Commit bf89fbf2 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- error handling simplified; XML parser continued

parent 43cb9807
...@@ -38,7 +38,7 @@ import os ...@@ -38,7 +38,7 @@ import os
import re import re
from DHParser.preprocess import strip_tokens, with_source_mapping, PreprocessorFunc from DHParser.preprocess import strip_tokens, with_source_mapping, PreprocessorFunc
from DHParser.syntaxtree import Node, RootNode from DHParser.syntaxtree import Node, RootNode, StrictResultType
from DHParser.transform import TransformationFunc from DHParser.transform import TransformationFunc
from DHParser.parse import Grammar from DHParser.parse import Grammar
from DHParser.error import adjust_error_locations, is_error, Error from DHParser.error import adjust_error_locations, is_error, Error
...@@ -152,6 +152,16 @@ class Compiler: ...@@ -152,6 +152,16 @@ class Compiler:
""" """
return 'on_' + node_name return 'on_' + node_name
def compile_children(self, node: Node) -> StrictResultType:
"""Compiles all children of the given node and returns the tuple
of the compiled children or the node's (potentially empty) result
in case the node does not have any children.
"""
if node.children:
return tuple(self.compile(child) for child in node.children)
else:
return node.result
def fallback_compiler(self, node: Node) -> Any: def fallback_compiler(self, node: Node) -> Any:
"""This is a generic compiler function which will be called on """This is a generic compiler function which will be called on
all those node types for which no compiler method `on_XXX` has all those node types for which no compiler method `on_XXX` has
......
...@@ -368,6 +368,20 @@ class Node(collections.abc.Sized): ...@@ -368,6 +368,20 @@ class Node(collections.abc.Sized):
# return False # return False
def get(self, index_or_tagname: Union[int, str],
surrogate: Union['Node', Iterator['Node']]) -> Union['Node', Iterator['Node']]:
"""Returns the child node with the given index if ``index_or_tagname``
is an integer or the first child node with the given tag name. If no
child with the given index or tag_name exists, the ``surrogate`` is
returned instead. This mimics the behaviour of Python's dictionary's
get-method.
"""
try:
return self[index_or_tagname]
except KeyError:
return surrogate
@property # this needs to be a (dynamic) property, in case sef.parser gets updated @property # this needs to be a (dynamic) property, in case sef.parser gets updated
def tag_name(self) -> str: def tag_name(self) -> str:
""" """
...@@ -525,7 +539,9 @@ class Node(collections.abc.Sized): ...@@ -525,7 +539,9 @@ class Node(collections.abc.Sized):
return head + '\n'.join([tab + data_fn(s) for s in res.split('\n')]) + tail return head + '\n'.join([tab + data_fn(s) for s in res.split('\n')]) + tail
def as_sxpr(self, src: str = None, showerrors: bool = True, indentation: int = 2, def as_sxpr(self, src: str = None,
showerrors: bool = True,
indentation: int = 2,
compact: bool = False) -> str: compact: bool = False) -> str:
""" """
Returns content as S-expression, i.e. in lisp-like form. Returns content as S-expression, i.e. in lisp-like form.
...@@ -534,7 +550,9 @@ class Node(collections.abc.Sized): ...@@ -534,7 +550,9 @@ class Node(collections.abc.Sized):
src: The source text or `None`. In case the source text is src: The source text or `None`. In case the source text is
given the position of the element in the text will be given the position of the element in the text will be
reported as line and column. reported as line and column.
compact: If True a compact representation is returned where showerrors: If True, error messages will be shown.
indentation: The number of whitespaces for indentation
compact: If True, a compact representation is returned where
brackets are omitted and only the indentation indicates the brackets are omitted and only the indentation indicates the
tree structure. tree structure.
""" """
...@@ -567,8 +585,12 @@ class Node(collections.abc.Sized): ...@@ -567,8 +585,12 @@ class Node(collections.abc.Sized):
return self._tree_repr(' ' * indentation, opening, closing, pretty, density=density) return self._tree_repr(' ' * indentation, opening, closing, pretty, density=density)
def as_xml(self, src: str = None, showerrors: bool = True, indentation: int = 2, def as_xml(self, src: str = None,
inline_tags: Set[str]=set(), omit_tags: Set[str]=set()) -> str: showerrors: bool = True,
indentation: int = 2,
inline_tags: Set[str]=set(),
omit_tags: Set[str]=set(),
empty_tags: Set[str]=set()) -> str:
""" """
Returns content as XML-tree. Returns content as XML-tree.
...@@ -576,6 +598,8 @@ class Node(collections.abc.Sized): ...@@ -576,6 +598,8 @@ class Node(collections.abc.Sized):
src: The source text or `None`. In case the source text is src: The source text or `None`. In case the source text is
given the position will also be reported as line and given the position will also be reported as line and
column. column.
showerrors: If True, error messages will be shown.
indentation: The number of whitespaces for indentation
inline_tags: A set of tag names, the content of which will always be written inline_tags: A set of tag names, the content of which will always be written
on a single line, unless it contains explicit line feeds ('\n'). on a single line, unless it contains explicit line feeds ('\n').
omit_tags: A set of tags from which only the content will be printed, but omit_tags: A set of tags from which only the content will be printed, but
...@@ -583,6 +607,8 @@ class Node(collections.abc.Sized): ...@@ -583,6 +607,8 @@ class Node(collections.abc.Sized):
allows producing a mix of plain text and child tags in the output, allows producing a mix of plain text and child tags in the output,
which otherwise is not supported by the Node object, because it which otherwise is not supported by the Node object, because it
requires its content to be either a tuple of children or string content. requires its content to be either a tuple of children or string content.
empty_tags: A set of tags which shall be rendered as empty elements, e.g.
"<empty/>" instead of "<empty><empty>".
""" """
def opening(node) -> str: def opening(node) -> str:
...@@ -599,11 +625,17 @@ class Node(collections.abc.Sized): ...@@ -599,11 +625,17 @@ class Node(collections.abc.Sized):
if showerrors and node.errors and not has_reserved_attrs: if showerrors and node.errors and not has_reserved_attrs:
txt.append(' err="%s"' % ''.join(str(err).replace('"', r'\"') txt.append(' err="%s"' % ''.join(str(err).replace('"', r'\"')
for err in node.errors)) for err in node.errors))
return "".join(txt + [">\n"]) if node.tag_name in empty_tags:
assert not node.result, ("Node %s with content %s is not an empty element!" %
(node.tag_name, str(node)))
ending = "/>\n"
else:
ending = ">\n"
return "".join(txt + [ending])
def closing(node): def closing(node):
"""Returns the closing string for the representation of `node`.""" """Returns the closing string for the representation of `node`."""
if node.tag_name in omit_tags: if node.tag_name in omit_tags or node.tag_name in empty_tags:
return '' return ''
return ('\n</') + node.tag_name + '>' return ('\n</') + node.tag_name + '>'
...@@ -716,6 +748,10 @@ class RootNode(Node): ...@@ -716,6 +748,10 @@ class RootNode(Node):
self.error_flag = 0 self.error_flag = 0
if node is not None: if node is not None:
self.swallow(node) self.swallow(node)
# customization for XML-Representation
self.inline_tags = set()
self.omit_tags = set()
self.empty_tags = set()
def swallow(self, node: Node) -> 'RootNode': def swallow(self, node: Node) -> 'RootNode':
"""Put `self` in the place of `node` by copying all its data. """Put `self` in the place of `node` by copying all its data.
...@@ -766,6 +802,14 @@ class RootNode(Node): ...@@ -766,6 +802,14 @@ class RootNode(Node):
self.all_errors.sort(key=lambda e: e.pos) self.all_errors.sort(key=lambda e: e.pos)
return self.all_errors return self.all_errors
def customized_XML(self):
"""Returns a customized XML representation of the tree.
See the docstring of `Node.as_xml()` for an explanation of the
customizations."""
return self.as_xml(inline_tags = self.inline_tags,
omit_tags=self.omit_tags,
empty_tags=self.empty_tags)
ZOMBIE_NODE = Node(ZOMBIE_PARSER, '') ZOMBIE_NODE = Node(ZOMBIE_PARSER, '')
......
...@@ -207,7 +207,7 @@ def create_project(path: str): ...@@ -207,7 +207,7 @@ def create_project(path: str):
create_file('README.md', README_TEMPLATE.format(name=name)) create_file('README.md', README_TEMPLATE.format(name=name))
create_file('tst_%s_grammar.py' % name, create_file('tst_%s_grammar.py' % name,
GRAMMAR_TEST_TEMPLATE.format(name=name, dhparserdir=dhparserdir)) GRAMMAR_TEST_TEMPLATE.format(name=name, dhparserdir=dhparserdir))
create_file('example.dsl', 'Life is but a walking shadow\n') create_file('example.xml', 'Life is but a walking shadow\n')
os.chmod('tst_%s_grammar.py' % name, 0o755) os.chmod('tst_%s_grammar.py' % name, 0o755)
# The following is left to the user as an exercise # The following is left to the user as an exercise
# print('Creating file "%s".' % (name + 'Compiler.py')) # print('Creating file "%s".' % (name + 'Compiler.py'))
......
...@@ -32,7 +32,8 @@ from DHParser import logging, is_filename, load_if_file, \ ...@@ -32,7 +32,8 @@ from DHParser import logging, is_filename, load_if_file, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \ is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \ remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \ keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, MockParser remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, MockParser, \
ZOMBIE_NODE
####################################################################### #######################################################################
...@@ -680,14 +681,15 @@ class XMLCompiler(Compiler): ...@@ -680,14 +681,15 @@ class XMLCompiler(Compiler):
self.mock_parsers = dict() self.mock_parsers = dict()
def on_document(self, node): def on_document(self, node):
self.tree.omit_tags.add('CharData')
return self.fallback_compiler(node) return self.fallback_compiler(node)
def extract_attributes(self, node_sequence): def extract_attributes(self, node_sequence):
attributes = OrderedDict() attributes = OrderedDict()
for node in node_sequence: for node in node_sequence:
if node.tag_name == "Attribute": if node.tag_name == "Attribute":
assert node[0].tag_name == "Name" assert node[0].tag_name == "Name", node.as_sexpr()
assert node[1].tag_name == "ATTValue" assert node[1].tag_name == "AttValue", node.as_sxpr()
attributes[node[0].content] = node[1].content attributes[node[0].content] = node[1].content
return attributes return attributes
...@@ -875,7 +877,7 @@ class XMLCompiler(Compiler): ...@@ -875,7 +877,7 @@ class XMLCompiler(Compiler):
if attributes: if attributes:
node.attributes.update(attributes) node.attributes.update(attributes)
node.parser = self.get_parser(stag['Name'].content) node.parser = self.get_parser(stag['Name'].content)
node.result = node['content'].result node.result = self.compile_children(node.get('content', ZOMBIE_NODE))
return node return node
# def on_STag(self, node): # def on_STag(self, node):
...@@ -890,6 +892,7 @@ class XMLCompiler(Compiler): ...@@ -890,6 +892,7 @@ class XMLCompiler(Compiler):
node.attributes.update(attributes) node.attributes.update(attributes)
node.parser = self.get_parser(node['Name'].content) node.parser = self.get_parser(node['Name'].content)
node.result = '' node.result = ''
self.tree.empty_tags.add(node.tag_name)
return node return node
# def on_TagName(self, node): # def on_TagName(self, node):
...@@ -1046,6 +1049,6 @@ if __name__ == "__main__": ...@@ -1046,6 +1049,6 @@ if __name__ == "__main__":
print(rel_path + ':' + str(error)) print(rel_path + ':' + str(error))
sys.exit(1) sys.exit(1)
else: else:
print(result.as_xml() if isinstance(result, Node) else result) print(result.customized_XML() if isinstance(result, Node) else result)
else: else:
print("Usage: XMLCompiler.py [FILENAME]") print("Usage: XMLCompiler.py [FILENAME]")
Life is but a walking shadow
<?xml version="1.0" encoding="UTF-8"?>
<note date="2018-06-14">
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend!</body>
<priority level="high" />
<remark></remark>
</note>
\ No newline at end of file
...@@ -45,7 +45,7 @@ class TestDHParserCommandLineTool: ...@@ -45,7 +45,7 @@ class TestDHParserCommandLineTool:
def test_dhparser(self): def test_dhparser(self):
os.system('python ../dhparser.py testdata/neu >/dev/null') os.system('python ../dhparser.py testdata/neu >/dev/null')
os.system('python testdata/neu/tst_neu_grammar.py >/dev/null') os.system('python testdata/neu/tst_neu_grammar.py >/dev/null')
os.system('python testdata/neu/neuCompiler.py testdata/neu/example.dsl >testdata/neu/example.xml') os.system('python testdata/neu/neuCompiler.py testdata/neu/example.xml >testdata/neu/example.xml')
with open('testdata/neu/example.xml', 'r', encoding='utf-8') as f: with open('testdata/neu/example.xml', 'r', encoding='utf-8') as f:
xml = f.read() xml = f.read()
assert xml.find('<document>') >= 0 assert xml.find('<document>') >= 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment