Source code for robot.libdocpkg.htmlutils
# Copyright 2008-2015 Nokia Networks
# Copyright 2016- Robot Framework Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from urllib.parse import quote
from robot.errors import DataError
from robot.utils import html_escape, html_format, NormalizedDict
from robot.utils.htmlformatters import HeaderFormatter
[docs]
class DocFormatter:
_header_regexp = re.compile(r"<h([234])>(.+?)</h\1>")
_name_regexp = re.compile("`(.+?)`")
def __init__(self, keywords, type_info, introduction, doc_format="ROBOT"):
self._doc_to_html = DocToHtml(doc_format)
self._targets = self._get_targets(
keywords,
introduction,
robot_format=doc_format == "ROBOT",
)
self._type_info_targets = self._get_type_info_targets(type_info)
def _get_targets(self, keywords, introduction, robot_format):
targets = {
"introduction": "Introduction",
"library introduction": "Introduction",
"importing": "Importing",
"library importing": "Importing",
"keywords": "Keywords",
}
for kw in keywords:
targets[kw.name] = kw.name
if robot_format:
for header in self._yield_header_targets(introduction):
targets[header] = header
return self._escape_and_encode_targets(targets)
def _get_type_info_targets(self, type_info):
targets = {info.name: info.name for info in type_info}
return self._escape_and_encode_targets(targets)
def _yield_header_targets(self, introduction):
headers = HeaderFormatter()
for line in introduction.splitlines():
match = headers.match(line.strip())
if match:
yield match.group(2)
def _escape_and_encode_targets(self, targets):
return NormalizedDict(
(html_escape(key), self._encode_uri_component(value))
for key, value in targets.items()
)
def _encode_uri_component(self, value):
# Emulates encodeURIComponent javascript function
return quote(value.encode("UTF-8"), safe="-_.!~*'()")
[docs]
def html(self, doc, intro=False):
doc = self._doc_to_html(doc)
if intro:
doc = self._header_regexp.sub(r'<h\1 id="\2">\2</h\1>', doc)
return self._name_regexp.sub(self._link_keywords, doc)
def _link_keywords(self, match):
name = match.group(1)
targets = self._targets
types = self._type_info_targets
if name in targets:
return f'<a href="#{targets[name]}" class="name">{name}</a>'
if name in types:
return f'<a href="#type-{types[name]}" class="name">{name}</a>'
return f'<span class="name">{name}</span>'
[docs]
class DocToHtml:
def __init__(self, doc_format):
self._formatter = self._get_formatter(doc_format)
def _get_formatter(self, doc_format):
try:
return {
"ROBOT": html_format,
"TEXT": self._format_text,
"HTML": lambda doc: doc,
"REST": self._format_rest,
}[doc_format]
except KeyError:
raise DataError(f"Invalid documentation format '{doc_format}'.")
def _format_text(self, doc):
return f'<p style="white-space: pre-wrap">{html_escape(doc)}</p>'
def _format_rest(self, doc):
try:
from docutils.core import publish_parts
except ImportError:
raise DataError("reST format requires 'docutils' module to be installed.")
parts = publish_parts(
doc,
writer_name="html",
settings_overrides={"syntax_highlight": "short"},
)
return parts["html_body"]
def __call__(self, doc):
return self._formatter(doc)
[docs]
class HtmlToText:
html_tags = {
"b": "*",
"i": "_",
"strong": "*",
"em": "_",
"code": "``",
"div.*?": "",
}
html_chars = {
"<br */?>": "\n",
"&": "&",
"<": "<",
">": ">",
""": '"',
"'": "'",
}
[docs]
def get_short_doc_from_html(self, doc):
match = re.search(r"<p.*?>(.*?)</?p>", doc, re.DOTALL)
if match:
doc = match.group(1)
return self.html_to_plain_text(doc)
[docs]
def html_to_plain_text(self, doc):
for tag, repl in self.html_tags.items():
doc = re.sub(
rf"<{tag}>(.*?)</{tag}>",
rf"{repl}\1{repl}",
doc,
flags=re.DOTALL,
)
for html, text in self.html_chars.items():
doc = re.sub(html, text, doc)
return doc