...
 
Commits (6)
......@@ -1074,7 +1074,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
@cython.locals(i=cython.int, k=cython.int, N=cython.int)
def _tree_repr(self, tab, open_fn, close_fn, data_fn=lambda i: i,
density=0, inline=False, inline_fn=lambda node: False,
allow_ommissions=False) -> str:
allow_ommissions=False) -> List[str]:
"""
Generates a tree representation of this node and its children
in string from.
......@@ -1087,7 +1087,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
tab (str): The indentation string, e.g. '\t' or ' '
open_fn: (Node->str) A function that returns an opening
string (e.g. an XML-tag_name) for a given node
close_fn: (Node->str) A function that returns a closeF
close_fn: (Node->str) A function that returns a closing
string (e.g. an XML-tag_name) for a given node.
data_fn: (str->str) A function that filters the data string
before printing, e.g. to add quotation marks
......@@ -1100,28 +1100,38 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
tail = close_fn(self)
if not self.result:
return head.rstrip() + tail.lstrip()
tail = tail.lstrip(None if density & 2 else '')
return [head + tail]
inline = inline or inline_fn(self)
if inline:
head = head.rstrip()
tail = tail.lstrip()
usetab, sep = '', ''
hlf, tlf = '', ''
else:
usetab = tab if head else '' # no indentation if tag is already omitted
sep = '\n'
hlf = '\n'
tlf = '\n' if density == 0 or (tail[0:1] == '<') else ''
if self._children:
content = []
content = [head]
first_child = self._children[0]
for child in self._children:
subtree = child._tree_repr(tab, open_fn, close_fn, data_fn,
density, inline, inline_fn)
if subtree:
st = [subtree] if inline else subtree.split('\n')
content.append((sep + usetab).join(s for s in st))
return head + usetab + (sep + usetab).join(content) + tail
if inline:
content[-1] += '\n'.join(subtree)
else:
if sep:
for item in subtree:
content.append(usetab + item)
else:
content[-1] += ''.join(subtree)
if tlf:
content.append(tail)
else:
content[-1] += tail
return content
res = self.content
if not inline and not head and allow_ommissions:
......@@ -1129,21 +1139,30 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
res = res.strip() # WARNING: This changes the data in subtle ways
if density & 1 and res.find('\n') < 0:
# except for XML, add a gap between opening statement and content
gap = ' ' if not inline and head and head.rstrip()[-1:] != '>' else ''
return head.rstrip() + gap + data_fn(res) + tail.lstrip()
gap = ' ' if not inline and head and head[-1:] != '>' else ''
return [''.join((head, gap, data_fn(res), tail))]
else:
lines = [data_fn(s) for s in res.split('\n')]
N = len(lines)
i, k = 0, N - 1
if not inline and allow_ommissions:
# Strip preceding and succeding whitespace.
# Strip preceding and succeeding whitespace.
# WARNING: This changes the data in subtle ways
while i < N and not lines[i]:
i += 1
while k >= 0 and not lines[k]:
k -= 1
lines = [usetab + line for line in lines[i:k + 1]]
return head + '\n'.join(lines) + tail
content = [head, usetab] if hlf else [head + usetab]
for line in lines[i:k]:
content[-1] += line
content.append(usetab)
content[-1] += lines[k]
if tlf:
content.append(tail)
else:
content[-1] += tail
return content
def as_sxpr(self, src: Optional[str] = None,
indentation: int = 2,
......@@ -1167,7 +1186,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
the flattened expression does not exceed the threshold length.
A negative number means that it will always be flattened.
"""
left_bracket, right_bracket, density = ('(', ')', 1) if compact else ('(', '\n)', 0)
left_bracket, right_bracket, density = ('(', ')', 1) if compact else ('(', ')', 0)
lbreaks = linebreaks(src) if src else [] # type: List[int]
root = cast(RootNode, self) if isinstance(self, RootNode) \
else None # type: Optional[RootNode]
......@@ -1188,7 +1207,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
# print(node.pos, id(node), id(node) in root.error_nodes, root.get_errors(node))
if root and id(node) in root.error_nodes and not node.has_attr('err'):
txt.append(" `(%s)" % '; '.join(str(err) for err in root.get_errors(node)))
return "".join(txt) + '\n'
return "".join(txt)
def closing(node: Node) -> str:
"""Returns the closing string for the representation of `node`."""
......@@ -1200,7 +1219,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
else "'%s'" % strg if strg.find("'") < 0 \
else '"%s"' % strg.replace('"', r'\"')
sxpr = self._tree_repr(' ' * indentation, opening, closing, pretty, density=density)
sxpr = '\n'.join(self._tree_repr(' ' * indentation, opening, closing, pretty, density=density))
return sxpr if compact else flatten_sxpr(sxpr, flatten_threshold)
def as_xml(self, src: str = None,
......@@ -1244,16 +1263,16 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
if node.tag_name in empty_tags:
assert not node.result, ("Node %s with content %s is not an empty element!" %
(node.tag_name, str(node)))
ending = "/>\n" if not node.tag_name[0] == '?' else "?>\n"
ending = "/>" if not node.tag_name[0] == '?' else "?>"
else:
ending = ">\n"
ending = ">"
return "".join(txt + [ending])
def closing(node: Node):
"""Returns the closing string for the representation of `node`."""
if node.tag_name in empty_tags or (node.tag_name in omit_tags and not node.has_attr()):
return ''
return '\n</' + clean_anonymous_tag_name(node.tag_name) + '>'
return '</' + clean_anonymous_tag_name(node.tag_name) + '>'
def sanitizer(content: str) -> str:
"""Substitute "&", "<", ">" in XML-content by the respective entities."""
......@@ -1271,8 +1290,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
and node.attr.get('xml:space', 'default') == 'preserve')
line_breaks = linebreaks(src) if src else []
return self._tree_repr(' ' * indentation, opening, closing, sanitizer,
density=1, inline_fn=inlining, allow_ommissions=bool(omit_tags))
return '\n'.join(self._tree_repr(
' ' * indentation, opening, closing, sanitizer, density=1, inline_fn=inlining,
allow_ommissions=bool(omit_tags)))
# JSON serialization ###
......@@ -1969,8 +1989,9 @@ def parse_xml(xml: Union[str, StringView], ignore_pos: bool = False) -> Node:
closing or solitary tag is reached.
"""
i = 0
while s[i] != "<" or s[max(0, i - 1)] == "\\":
i = s.find("<", i)
while s[i] != "<": # or s[max(0, i - 1)] == "\\":
i = s.find("<", i + 1)
assert i > 0
return s[i:], s[:i]
def parse_full_content(s: StringView) -> Tuple[StringView, Node]:
......
......@@ -227,10 +227,10 @@ def transform_generic_command(context: List[Node]):
def transform_generic_block(context: List[Node]):
node = context[-1]
assert node.children[0].tag_name == "begin_generic_block"
assert node.children[0].children[0].tag_name == "begin_environment"
assert node.children[-1].tag_name == "end_generic_block"
assert node.children[-1].children[0].tag_name == "end_environment"
# assert node.children[0].tag_name == "begin_generic_block"
# assert node.children[0].children[0].tag_name == "begin_environment"
# assert node.children[-1].tag_name == "end_generic_block"
# assert node.children[-1].children[0].tag_name == "end_environment"
node.tag_name = 'env_' + node.children[0].children[0].content.lstrip('\\')
node.result = node.children[1:-1]
......@@ -240,6 +240,10 @@ def is_expendable(context: List[Node]):
return not node._result and not node.tag_name.startswith('cmd_')
def show(context: List[Node]):
print(context[-1].as_xml())
LaTeX_AST_transformation_table = {
# AST Transformations for the LaTeX-grammar
"<": [flatten, flatten_structure, remove_children_if(is_expendable)],
......
......@@ -29,7 +29,7 @@
"""
12: """\begin{generic}
invalid enivronment \end{generic}
invalid environment \end{generic}
"""
......
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env python3
"""test_syntaxtree.py - profiling of syntaxtree-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import sys
scriptpath = os.path.dirname(__file__) or '.'
sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser.syntaxtree import parse_xml
def cpu_profile(func, repetitions=1):
"""Profile the function `func`.
"""
import cProfile
import pstats
profile = cProfile.Profile()
profile.enable()
success = True
for _ in range(repetitions):
success = func()
if not success:
break
profile.disable()
# after your program ends
stats = pstats.Stats(profile)
stats.strip_dirs()
stats.sort_stats('time').print_stats(80)
return success
def profile_serializing():
with open(os.path.join(scriptpath, 'data', 'inferus.ausgabe.xml')) as f:
data = f.read()
tree = parse_xml(data)
success = cpu_profile(tree.as_xml, 100)
with open(os.path.join(scriptpath, 'data', 'testdoc3.xml')) as f:
data = f.read()
tree = parse_xml(data)
success = cpu_profile(tree.as_xml, 100)
if __name__ == "__main__":
profile_serializing()
......@@ -66,6 +66,10 @@ class TestParseSxpression:
assert str(tree) == "LIUTPR. leg. 21 ..."
assert tree.attr['unterbedeutungstiefe'] == '0'
def test_endlessloop_error(self):
tree = parse_sxpr(r'(LINEFEED "\\")')
assert tree
class TestParseXML:
def test_roundtrip(self):
......@@ -92,6 +96,10 @@ class TestParseXML:
flat_xml = flatten_xml(tree.as_xml())
assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml
def test_endlessloop_error(self):
tree = parse_xml(r'<LINEFEED>\\</LINEFEED>')
assert tree
class TestParseJSON:
tree = parse_sxpr('(a (b ä) (d (e ö) (h über)))').with_pos(0)
......@@ -519,7 +527,6 @@ class TestSerialization:
xml = tree.as_xml(inline_tags={'A'})
assert xml == "<A><B>C</B><D>E</D></A>", xml
assert tree.as_xml() == "<A>\n <B>C</B>\n <D>E</D>\n</A>", xml
tree.attr['xml:space'] = 'preserve'
......