10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 9f1872d8 authored by eckhart's avatar eckhart

- syntaxtree: added flatten_xml(); parse_xml still buggy!

parent 1c42afde
...@@ -44,7 +44,9 @@ __all__ = ('ParserBase', ...@@ -44,7 +44,9 @@ __all__ = ('ParserBase',
'Node', 'Node',
'RootNode', 'RootNode',
'parse_sxpr', 'parse_sxpr',
'flatten_sxpr') 'parse_xml',
'flatten_sxpr',
'flatten_xml')
####################################################################### #######################################################################
...@@ -182,6 +184,13 @@ def flatten_sxpr(sxpr: str) -> str: ...@@ -182,6 +184,13 @@ def flatten_sxpr(sxpr: str) -> str:
return re.sub(r'\s(?=\))', '', re.sub(r'\s+', ' ', sxpr)).strip() return re.sub(r'\s(?=\))', '', re.sub(r'\s+', ' ', sxpr)).strip()
def flatten_xml(xml: str) -> str:
"""Returns an XML-tree as a one linter without unnecessary whitespace,
i.e. only whitespace within leaf-nodes is preserved.
"""
return re.sub(r'\s+(?=<\w)', '', re.sub(r'(?<=</\w+>)\s+', '', xml))
class Node(collections.abc.Sized): class Node(collections.abc.Sized):
""" """
Represents a node in the concrete or abstract syntax tree. Represents a node in the concrete or abstract syntax tree.
...@@ -869,7 +878,7 @@ def parse_xml(xml: str) -> Node: ...@@ -869,7 +878,7 @@ def parse_xml(xml: str) -> Node:
Generates a tree of nodes from a (Pseudo-)XML-source. Generates a tree of nodes from a (Pseudo-)XML-source.
""" """
xml = StringView(xml) xml = StringView(xml)
PlainText = MockParser('', 'PlainText') PlainText = MockParser('', ':PlainText')
mock_parsers = {':PlainText': PlainText} mock_parsers = {':PlainText': PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]: def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
...@@ -894,10 +903,10 @@ def parse_xml(xml: str) -> Node: ...@@ -894,10 +903,10 @@ def parse_xml(xml: str) -> Node:
s, attributes = parse_attributes(s[match.end() - s.begin:]) s, attributes = parse_attributes(s[match.end() - s.begin:])
i = s.find('>') i = s.find('>')
assert i >= 0 assert i >= 0
return s[i+1,], tagname, attributes, s[i-1] == "/" return s[i+1:], tagname, attributes, s[i-1] == "/"
def parse_closing_tag(s: StringView) -> Tuple[StringView, str]: def parse_closing_tag(s: StringView) -> Tuple[StringView, str]:
"""Parses a closing tag returns the string segment, just after """Parses a closing tag and returns the string segment, just after
the closing tag.""" the closing tag."""
match = s.match(re.compile(r'</\s*(?P<tagname>[\w:]+)>')) match = s.match(re.compile(r'</\s*(?P<tagname>[\w:]+)>'))
assert match assert match
...@@ -935,7 +944,9 @@ def parse_xml(xml: str) -> Node: ...@@ -935,7 +944,9 @@ def parse_xml(xml: str) -> Node:
result = tuple(result) result = tuple(result)
return Node(mock_parsers.setdefault(tagname, MockParser(name, ":" + class_name)), result) return Node(mock_parsers.setdefault(tagname, MockParser(name, ":" + class_name)), result)
return parse_full_content(xml[xml.search(re.compile(r'<(?!\?)')):]) match_header = xml.search(re.compile(r'<(?!\?)'))
start = match_header.start() if match_header else 0
return parse_full_content(xml[start:])
# if __name__ == "__main__": # if __name__ == "__main__":
# st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))") # st = parse_sxpr("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
......
...@@ -24,15 +24,15 @@ import sys ...@@ -24,15 +24,15 @@ import sys
sys.path.extend(['../', './']) sys.path.extend(['../', './'])
from DHParser.error import Error from DHParser.error import Error
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, TOKEN_PTYPE from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, TOKEN_PTYPE
from DHParser.transform import traverse, reduce_single_child, \ from DHParser.transform import traverse, reduce_single_child, \
replace_by_single_child, flatten, remove_expendables replace_by_single_child, flatten, remove_expendables
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider from DHParser.dsl import grammar_provider
class TestMockSyntaxTree: class TestParseSxpression:
def test_mock_syntax_tree(self): def test_parse_s_expression(self):
tree = parse_sxpr('(a (b c))') tree = parse_sxpr('(a (b c))')
assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr()) assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
tree = parse_sxpr('(a i\nj\nk)') tree = parse_sxpr('(a i\nj\nk)')
...@@ -44,6 +44,14 @@ class TestMockSyntaxTree: ...@@ -44,6 +44,14 @@ class TestMockSyntaxTree:
except ValueError: except ValueError:
pass pass
class TestParseXML:
def test_roundtrip(self):
tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
xml = tree.as_xml()
fxml = flatten_xml(xml)
assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
tree2 = parse_xml(fxml)
print(tree2.as_sxpr())
class TestNode: class TestNode:
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment