Commit af62f375 authored by eckhart's avatar eckhart

syntaxtree.py: parser_xml() bugfix: endless loop

parent 02d3dc78
......@@ -1100,7 +1100,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
tail = close_fn(self)
if not self.result:
return [head, tail]
return [head + tail]
inline = inline or inline_fn(self)
if inline:
......@@ -1143,7 +1143,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
if density & 1 and res.find('\n') < 0:
# except for XML, add a gap between opening statement and content
gap = ' ' if not inline and head and head[-1:] != '>' else ''
return [head + gap + data_fn(res) + tail]
return [''.join((head, gap, data_fn(res), tail))]
else:
lines = [data_fn(s) for s in res.split('\n')]
N = len(lines)
......@@ -1155,7 +1155,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
i += 1
while k >= 0 and not lines[k]:
k -= 1
content = [head, hlf, usetab] if hlf else [head, usetab]
content = [head, hlf, usetab] if hlf else [head + usetab]
for line in lines[i:k]:
content.append(line)
content.append('\n')
......@@ -1992,8 +1992,9 @@ def parse_xml(xml: Union[str, StringView], ignore_pos: bool = False) -> Node:
closing or solitary tag is reached.
"""
i = 0
while s[i] != "<" or s[max(0, i - 1)] == "\\":
i = s.find("<", i)
while s[i] != "<": # or s[max(0, i - 1)] == "\\":
i = s.find("<", i + 1)
assert i > 0
return s[i:], s[:i]
def parse_full_content(s: StringView) -> Tuple[StringView, Node]:
......
This diff is collapsed.
......@@ -23,6 +23,7 @@ import os
import sys
scriptpath = os.path.dirname(__file__) or '.'
sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser.syntaxtree import parse_xml
......@@ -51,6 +52,10 @@ def profile_serializing():
data = f.read()
tree = parse_xml(data)
success = cpu_profile(tree.as_xml, 100)
with open(os.path.join(scriptpath, 'data', 'testdoc3.xml')) as f:
data = f.read()
tree = parse_xml(data)
success = cpu_profile(tree.as_xml, 100)
if __name__ == "__main__":
......
......@@ -66,6 +66,10 @@ class TestParseSxpression:
assert str(tree) == "LIUTPR. leg. 21 ..."
assert tree.attr['unterbedeutungstiefe'] == '0'
def test_endlessloop_error(self):
tree = parse_sxpr(r'(LINEFEED "\\")')
assert tree
class TestParseXML:
def test_roundtrip(self):
......@@ -92,6 +96,10 @@ class TestParseXML:
flat_xml = flatten_xml(tree.as_xml())
assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml
def test_endlessloop_error(self):
tree = parse_xml(r'<LINEFEED>\\</LINEFEED>')
assert tree
class TestParseJSON:
tree = parse_sxpr('(a (b ä) (d (e ö) (h über)))').with_pos(0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment