# Copyright 2008-2015 Nokia Networks
# Copyright 2016- Robot Framework Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from urllib.parse import quote
from robot.errors import DataError
from robot.utils import html_escape, html_format, NormalizedDict
from robot.utils.htmlformatters import HeaderFormatter
[docs]
class DocToHtml:
_name_regexp = re.compile("`(.+?)`")
def __init__(self, doc_format, targets=None):
self._formatter = self._get_formatter(doc_format)
self._targets = NormalizedDict(targets)
def _get_formatter(self, doc_format):
try:
return {
"ROBOT": html_format,
"TEXT": self._format_text,
"HTML": lambda doc: doc,
"REST": self._format_rest,
}[doc_format]
except KeyError:
raise DataError(f"Invalid documentation format '{doc_format}'.")
def _format_text(self, doc):
return f'<p style="white-space: pre-wrap">{html_escape(doc)}</p>'
def _format_rest(self, doc):
try:
from docutils.core import publish_parts
except ImportError:
raise DataError("reST format requires 'docutils' module to be installed.")
parts = publish_parts(
doc,
writer_name="html",
settings_overrides={"syntax_highlight": "short"},
)
return parts["html_body"]
def __call__(self, doc):
doc = self._formatter(doc)
return self._name_regexp.sub(self._link_keywords, doc)
def _link_keywords(self, match):
name = match.group(1)
target = self._targets.get(name)
if target:
return f'<a href="{target}" class="name">{name}</a>'
return f'<span class="name">{name}</span>'
[docs]
class HtmlToText:
html_tags = {
"b": "*",
"i": "_",
"strong": "*",
"em": "_",
"code": "``",
"div.*?": "",
}
html_chars = {
"<br */?>": "\n",
"&": "&",
"<": "<",
">": ">",
""": '"',
"'": "'",
}
[docs]
def get_short_doc_from_html(self, doc):
match = re.search(r"<p.*?>(.*?)</?p>", doc, re.DOTALL)
if match:
doc = match.group(1)
return self.html_to_plain_text(doc)
[docs]
def html_to_plain_text(self, doc):
for tag, repl in self.html_tags.items():
doc = re.sub(
rf"<{tag}>(.*?)</{tag}>",
rf"{repl}\1{repl}",
doc,
flags=re.DOTALL,
)
for html, text in self.html_chars.items():
doc = re.sub(html, text, doc)
return doc