Source code for robot.utils.filereader

#  Copyright 2008-2015 Nokia Networks
#  Copyright 2016-     Robot Framework Foundation
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

from collections.abc import Iterator
from io import StringIO
from pathlib import Path
from typing import TextIO, Union

Source = Union[Path, str, TextIO]



[docs]
class FileReader:  # FIXME: Rename to SourceReader
    """Utility to ease reading different kind of source files.

    Supports different sources where to read the data:

    - The source can be a path to a file, either as a string or as a
      ``pathlib.Path`` instance. The file itself must be UTF-8 encoded.

    - Alternatively the source can be an already opened file object,
      including a StringIO or BytesIO object. The file can contain either
      Unicode text or UTF-8 encoded bytes.

    - The third options is giving the source as Unicode text directly.
      This requires setting ``accept_text=True`` when creating the reader.

    In all cases bytes are automatically decoded to Unicode and possible
    BOM removed.
    """

    def __init__(self, source: Source, accept_text: bool = False):
        self.file, self._opened = self._get_file(source, accept_text)

    def _get_file(self, source: Source, accept_text: bool) -> "tuple[TextIO, bool]":
        path = self._get_path(source, accept_text)
        if path:
            file = open(path, "rb")
            opened = True
        elif isinstance(source, str):
            file = StringIO(source)
            opened = True
        else:
            file = source
            opened = False
        return file, opened

    def _get_path(self, source: Source, accept_text: bool):
        if isinstance(source, Path):
            return str(source)
        if not isinstance(source, str):
            return None
        if not accept_text:
            return source
        if "\n" in source:
            return None
        path = Path(source)
        try:
            is_path = path.is_absolute() or path.exists()
        except OSError:  # Can happen on Windows w/ Python < 3.10.
            is_path = False
        return source if is_path else None

    @property
    def name(self) -> str:
        return getattr(self.file, "name", "<in-memory file>")

    def __enter__(self):
        return self

    def __exit__(self, *exc_info):
        if self._opened:
            self.file.close()


[docs]
    def read(self) -> str:
        return self._decode(self.file.read())



[docs]
    def readlines(self) -> "Iterator[str]":
        first_line = True
        for line in self.file:
            yield self._decode(line, remove_bom=first_line)
            first_line = False


    def _decode(self, content: "str|bytes", remove_bom: bool = True) -> str:
        if isinstance(content, bytes):
            content = content.decode("UTF-8")
        if remove_bom and content.startswith("\ufeff"):
            content = content[1:]
        if "\r\n" in content:
            content = content.replace("\r\n", "\n")
        return content