Source code for robot.libdocpkg.htmlutils

#  Copyright 2008-2015 Nokia Networks
#  Copyright 2016-     Robot Framework Foundation
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import re
from urllib.parse import quote

from robot.errors import DataError
from robot.utils import html_escape, html_format, NormalizedDict
from robot.utils.htmlformatters import HeaderFormatter



[docs]
class DocFormatter:
    _header_regexp = re.compile(r"<h([234])>(.+?)</h\1>")

    def __init__(self, keywords, type_info, introduction, doc_format="ROBOT"):
        targets = self._get_targets(
            keywords,
            type_info,
            introduction,
            robot_format=doc_format == "ROBOT",
        )
        self._doc_to_html = DocToHtml(doc_format, targets)

    def _get_targets(self, keywords, type_info, introduction, robot_format):
        targets = {
            "introduction": "Introduction",
            "library introduction": "Introduction",
            "importing": "Importing",
            "library importing": "Importing",
            "keywords": "Keywords",
        }
        for info in type_info:
            targets[info.name] = "type-" + info.name
        if robot_format:
            for header in self._yield_header_targets(introduction):
                targets[header] = header
        for kw in keywords:
            targets[kw.name] = kw.name
        return {
            html_escape(key): "#" + self._encode_uri_component(value)
            for key, value in targets.items()
        }

    def _yield_header_targets(self, introduction):
        headers = HeaderFormatter()
        for line in introduction.splitlines():
            match = headers.match(line.strip())
            if match:
                yield match.group(2)

    def _encode_uri_component(self, value):
        # Emulates encodeURIComponent javascript function
        return quote(value.encode("UTF-8"), safe="-_.!~*'()")


[docs]
    def html(self, doc, intro=False):
        doc = self._doc_to_html(doc)
        if intro:
            doc = self._header_regexp.sub(r'<h\1 id="\2">\2</h\1>', doc)
        return doc





[docs]
class DocToHtml:
    _name_regexp = re.compile("`(.+?)`")

    def __init__(self, doc_format, targets=None):
        self._formatter = self._get_formatter(doc_format)
        self._targets = NormalizedDict(targets)

    def _get_formatter(self, doc_format):
        try:
            return {
                "ROBOT": html_format,
                "TEXT": self._format_text,
                "HTML": lambda doc: doc,
                "REST": self._format_rest,
            }[doc_format]
        except KeyError:
            raise DataError(f"Invalid documentation format '{doc_format}'.")

    def _format_text(self, doc):
        return f'<p style="white-space: pre-wrap">{html_escape(doc)}</p>'

    def _format_rest(self, doc):
        try:
            from docutils.core import publish_parts
        except ImportError:
            raise DataError("reST format requires 'docutils' module to be installed.")
        parts = publish_parts(
            doc,
            writer_name="html",
            settings_overrides={"syntax_highlight": "short"},
        )
        return parts["html_body"]

    def __call__(self, doc):
        doc = self._formatter(doc)
        return self._name_regexp.sub(self._link_keywords, doc)

    def _link_keywords(self, match):
        name = match.group(1)
        target = self._targets.get(name)
        if target:
            return f'<a href="{target}" class="name">{name}</a>'
        return f'<span class="name">{name}</span>'




[docs]
class HtmlToText:
    html_tags = {
        "b": "*",
        "i": "_",
        "strong": "*",
        "em": "_",
        "code": "``",
        "div.*?": "",
    }
    html_chars = {
        "<br */?>": "\n",
        "&amp;": "&",
        "&lt;": "<",
        "&gt;": ">",
        "&quot;": '"',
        "&apos;": "'",
    }


[docs]
    def get_short_doc_from_html(self, doc):
        match = re.search(r"<p.*?>(.*?)</?p>", doc, re.DOTALL)
        if match:
            doc = match.group(1)
        return self.html_to_plain_text(doc)



[docs]
    def html_to_plain_text(self, doc):
        for tag, repl in self.html_tags.items():
            doc = re.sub(
                rf"<{tag}>(.*?)</{tag}>",
                rf"{repl}\1{repl}",
                doc,
                flags=re.DOTALL,
            )
        for html, text in self.html_chars.items():
            doc = re.sub(html, text, doc)
        return doc