05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 9f1872d8 authored by eckhart's avatar eckhart

- syntaxtree: added flatten_xml(); parse_xml still buggy!

parent 1c42afde
......@@ -44,7 +44,9 @@ __all__ = ('ParserBase',
'Node',
'RootNode',
'parse_sxpr',
'flatten_sxpr')
'parse_xml',
'flatten_sxpr',
'flatten_xml')
#######################################################################
......@@ -182,6 +184,13 @@ def flatten_sxpr(sxpr: str) -> str:
return re.sub(r'\s(?=\))', '', re.sub(r'\s+', ' ', sxpr)).strip()
def flatten_xml(xml: str) -> str:
"""Returns an XML-tree as a one linter without unnecessary whitespace,
i.e. only whitespace within leaf-nodes is preserved.
"""
return re.sub(r'\s+(?=<\w)', '', re.sub(r'(?<=</\w+>)\s+', '', xml))
class Node(collections.abc.Sized):
"""
Represents a node in the concrete or abstract syntax tree.
......@@ -869,7 +878,7 @@ def parse_xml(xml: str) -> Node:
Generates a tree of nodes from a (Pseudo-)XML-source.
"""
xml = StringView(xml)
PlainText = MockParser('', 'PlainText')
PlainText = MockParser('', ':PlainText')
mock_parsers = {':PlainText': PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
......@@ -894,10 +903,10 @@ def parse_xml(xml: str) -> Node:
s, attributes = parse_attributes(s[match.end() - s.begin:])
i = s.find('>')
assert i >= 0
return s[i+1,], tagname, attributes, s[i-1] == "/"
return s[i+1:], tagname, attributes, s[i-1] == "/"
def parse_closing_tag(s: StringView) -> Tuple[StringView, str]:
"""Parses a closing tag returns the string segment, just after
"""Parses a closing tag and returns the string segment, just after
the closing tag."""
match = s.match(re.compile(r'</\s*(?P<tagname>[\w:]+)>'))
assert match
......@@ -935,7 +944,9 @@ def parse_xml(xml: str) -> Node:
result = tuple(result)
return Node(mock_parsers.setdefault(tagname, MockParser(name, ":" + class_name)), result)
return parse_full_content(xml[xml.search(re.compile(r'<(?!\?)')):])
match_header = xml.search(re.compile(r'<(?!\?)'))
start = match_header.start() if match_header else 0
return parse_full_content(xml[start:])
# if __name__ == "__main__":
# st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
......
......@@ -24,15 +24,15 @@ import sys
sys.path.extend(['../', './'])
from DHParser.error import Error
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, TOKEN_PTYPE
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, TOKEN_PTYPE
from DHParser.transform import traverse, reduce_single_child, \
replace_by_single_child, flatten, remove_expendables
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider
class TestMockSyntaxTree:
def test_mock_syntax_tree(self):
class TestParseSxpression:
def test_parse_s_expression(self):
tree = parse_sxpr('(a (b c))')
assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
tree = parse_sxpr('(a i\nj\nk)')
......@@ -44,6 +44,14 @@ class TestMockSyntaxTree:
except ValueError:
pass
class TestParseXML:
def test_roundtrip(self):
tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
xml = tree.as_xml()
fxml = flatten_xml(xml)
assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
tree2 = parse_xml(fxml)
print(tree2.as_sxpr())
class TestNode:
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment