In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 39f232e5 authored by di68kap's avatar di68kap

- syntaxtree.Node.as_xml() santizes strings

parent 511ec92a
......@@ -1215,6 +1215,9 @@ class Series(NaryOperator):
Matches if each of a series of parsers matches exactly in the order of
the series.
Attributes:
mandatory (int): Starting
Example::
>>> variable_name = RegExp('(?!\d)\w') + RE('\w*')
......
......@@ -199,6 +199,9 @@ def flatten_xml(xml: str) -> str:
return re.sub(r'\s+(?=<[\w:])', '', re.sub(r'(?P<closing_tag></:?\w+>)\s+', tag_only, xml))
RX_AMP = re.compile('&(?!\w+;)')
class Node(collections.abc.Sized):
"""
Represents a node in the concrete or abstract syntax tree.
......@@ -539,7 +542,7 @@ class Node(collections.abc.Sized):
content.append((sep + usetab).join(s for s in subtree))
return head + usetab + (sep + usetab).join(content) + tail
res = cast(str, self.result) # safe, because if there are no children, result is a string
res = self.content # cast(str, self.result) # safe, because if there are no children, result is a string
if not inline and not head:
# strip whitespace for omitted non inline node, e.g. CharData in mixed elements
res = res.strip()
......@@ -652,6 +655,13 @@ class Node(collections.abc.Sized):
return ''
return ('\n</') + node.tag_name + '>'
def sanitizer(content: str) -> str:
"""Substitute "&", "<", ">" in XML-content by the respective entities."""
content = RX_AMP.sub('&amp;', content)
content = content.replace('<', '&lt;').replace('>', '&gt;')
return content
def inlining(node):
"""Returns True, if `node`'s tag name is contained in `inline_tags`,
thereby signalling that the children of this node shall not be
......@@ -661,7 +671,7 @@ class Node(collections.abc.Sized):
and node.attr.get('xml:space', 'default') == 'preserve')
line_breaks = linebreaks(src) if src else []
return self._tree_repr(' ' * indentation, opening, closing,
return self._tree_repr(' ' * indentation, opening, closing, sanitizer,
density=1, inline_fn=inlining)
......
......@@ -23,7 +23,8 @@ import copy
import sys
sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, TOKEN_PTYPE
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, \
MockParser
from DHParser.transform import traverse, reduce_single_child, \
replace_by_single_child, flatten, remove_expendables
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
......@@ -154,6 +155,10 @@ class TestNode:
assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos
def test_xml_sanitizer(self):
node = Node(MockParser('tag'), '<&>')
print(node.as_xml())
class TestRootNode:
def test_error_handling(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment