05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit d93e749e authored by Eckhart Arnold's avatar Eckhart Arnold

- syntaxtree: mock_syntax_tree renamed to parse_sxpr; ad-hoc xml-parser parse_xml added

parent ae91c19e
......@@ -54,7 +54,7 @@ def last_char(text, begin: int, end: int) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while end > begin and text[end] in ' \n\t':
while end > begin and text[end-1] in ' \n\t':
end -= 1
return end
......@@ -94,14 +94,17 @@ class StringView(collections.abc.Sized):
copying, i.e. slices are just a view on a section of the sliced
string.
"""
__slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag']
__slots__ = ['text', 'begin', 'end', 'len', 'fullstring']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
assert isinstance(text, str)
# assert isinstance(text, str)
self.text = text # type: str
self.begin, self.end = real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0) # type: int
self.fullstring_flag = (self.begin == 0 and self.len == len(self.text)) # type: bool
if (self.begin == 0 and self.len == len(self.text)):
self.fullstring = self.text # type: str
else:
self.fullstring = ''
def __bool__(self):
return self.end > self.begin # and bool(self.text)
......@@ -111,16 +114,12 @@ class StringView(collections.abc.Sized):
def __str__(self):
# PERFORMANCE WARNING: This creates a copy of the string-slice
if self.fullstring_flag: # optimization: avoid slicing/copying
return self.text
if self.fullstring: # optimization: avoid slicing/copying
return self.fullstring
# since the slice is being copyied now, anyway, the copy might
# as well be stored in the string view
self.text = self.text[self.begin:self.end]
self.begin = 0
self.len = len(self.text)
self.end = self.len
self.fullstring_flag = True
return self.text
self.fullstring = self.text[self.begin:self.end]
return self.fullstring
def __eq__(self, other):
# PERFORMANCE WARNING: This creates copies of the strings
......@@ -146,16 +145,19 @@ class StringView(collections.abc.Sized):
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
# "Step sizes other than 1 are not yet supported by StringView"
start, stop = real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop)
try:
start, stop = real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop)
except AttributeError:
return self.text[self.begin + index]
def count(self, sub: str, start=None, end=None) -> int:
"""Returns the number of non-overlapping occurrences of substring
`sub` in StringView S[start:end]. Optional arguments start and end
are interpreted as in slice notation.
"""
if self.fullstring_flag:
return self.text.count(sub, start, end)
if self.fullstring:
return self.fullstring.count(sub, start, end)
elif start is None and end is None:
return self.text.count(sub, self.begin, self.end)
else:
......@@ -168,8 +170,8 @@ class StringView(collections.abc.Sized):
arguments `start` and `end` are interpreted as in slice notation.
Returns -1 on failure.
"""
if self.fullstring_flag:
return self.text.find(sub, start, end)
if self.fullstring:
return self.fullstring.find(sub, start, end)
elif start is None and end is None:
return self.text.find(sub, self.begin, self.end) - self.begin
else:
......@@ -182,8 +184,8 @@ class StringView(collections.abc.Sized):
arguments `start` and `end` are interpreted as in slice notation.
Returns -1 on failure.
"""
if self.fullstring_flag:
return self.text.rfind(sub, start, end)
if self.fullstring:
return self.fullstring.rfind(sub, start, end)
if start is None and end is None:
return self.text.rfind(sub, self.begin, self.end) - self.begin
else:
......@@ -203,9 +205,11 @@ class StringView(collections.abc.Sized):
end = self.end if end is None else self.begin + end
return self.text.startswith(prefix, start, end)
def match(self, regex):
def match(self, regex, flags=0):
"""Executes `regex.match` on the StringView object and returns the
result, which is either a match-object or None.
WARNING: match.end(), match.span() etc. are mapped to the underlying text,
not the StringView-object!!!
"""
return regex.match(self.text, pos=self.begin, endpos=self.end)
......@@ -232,19 +236,36 @@ class StringView(collections.abc.Sized):
def search(self, regex):
"""Executes regex.search on the StringView object and returns the
result, which is either a match-object or None.
WARNING: match.end(), match.span() etc. are mapped to the underlying text,
not the StringView-object!!!
"""
return regex.search(self.text, pos=self.begin, endpos=self.end)
def finditer(self, regex):
"""Executes regex.finditer on the StringView object and returns the
iterator of match objects.
WARNING: match.end(), match.span() etc. are mapped to the underlying text,
not the StringView-object!!!
"""
return regex.finditer(self.text, pos=self.begin, endpos=self.end)
def strip(self):
"""Returns a copy of the StringView `self` with leading and trailing
whitespace removed.
"""
if self.fullstring_flag:
return self.text.strip()
else:
begin = first_char(self.text, self.begin, self.end)
end = last_char(self.text, self.begin, self.end)
return self.text[begin:end]
begin = first_char(self.text, self.begin, self.end) - self.begin
end = last_char(self.text, self.begin, self.end) - self.begin
return self if begin == 0 and end == self.len else self[begin:end]
def lstrip(self):
"""Returns a copy of `self` with leading whitespace removed."""
begin = first_char(self.text, self.begin, self.end) - self.begin
return self if begin == 0 else self[begin:]
def rstrip(self):
"""Returns a copy of `self` with trailing whitespace removed."""
end = last_char(self.text, self.begin, self.end) - self.begin
return self if end == self.len else self[:end]
def split(self, sep=None):
"""Returns a list of the words in `self`, using `sep` as the
......@@ -252,8 +273,8 @@ class StringView(collections.abc.Sized):
whitespace string is a separator and empty strings are
removed from the result.
"""
if self.fullstring_flag:
return self.text.split(sep)
if self.fullstring:
return self.fullstring.split(sep)
else:
pieces = []
l = len(sep)
......
This diff is collapsed.
......@@ -39,7 +39,7 @@ import sys
from DHParser.error import is_error, adjust_error_locations
from DHParser.log import is_logging, clear_logs, log_ST, log_parsing_history
from DHParser.parse import UnknownParserError
from DHParser.syntaxtree import Node, mock_syntax_tree, flatten_sxpr, ZOMBIE_PARSER
from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, ZOMBIE_PARSER
from DHParser.toolkit import re, typing
from typing import Tuple
......@@ -315,12 +315,12 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
# write parsing-history log only in case of failure!
if is_logging():
log_parsing_history(parser, "match_%s_%s.log" % (parser_name, clean_test_name))
elif "cst" in tests and mock_syntax_tree(tests["cst"][test_name]) != cst:
elif "cst" in tests and parse_sxpr(tests["cst"][test_name]) != cst:
errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
(test_name, parser_name, cst.as_sxpr()))
elif "ast" in tests:
try:
compare = mock_syntax_tree(tests["ast"][test_name])
compare = parse_sxpr(tests["ast"][test_name])
except KeyError:
pass
if compare != ast:
......
......@@ -728,7 +728,7 @@ node’s parser’s <cite>ptype</cite>.</p>
<dl class="function">
<dt id="syntaxtree.mock_syntax_tree">
<code class="descname">mock_syntax_tree</code><span class="sig-paren">(</span><em>sxpr</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/syntaxtree.html#mock_syntax_tree"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#syntaxtree.mock_syntax_tree" title="Permalink to this definition"></a></dt>
<code class="descname">mock_syntax_tree</code><span class="sig-paren">(</span><em>sxpr</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/syntaxtree.html#parse_sxpr"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#syntaxtree.parse_sxpr" title="Permalink to this definition"></a></dt>
<dd><p>Generates a tree of nodes from an S-expression. The main purpose of this is
to generate test data.</p>
<p>Example:
......
......@@ -561,7 +561,7 @@
</li>
<li><a href="ModuleReference.html#parse.mixin_comment">mixin_comment() (in module parse)</a>
</li>
<li><a href="ModuleReference.html#syntaxtree.mock_syntax_tree">mock_syntax_tree() (in module syntaxtree)</a>
<li><a href="ModuleReference.html#syntaxtree.mock_syntax_tree">parse_sxpr() (in module syntaxtree)</a>
</li>
<li><a href="ModuleReference.html#syntaxtree.MockParser">MockParser (class in syntaxtree)</a>
</li>
......
......@@ -20,12 +20,12 @@ limitations under the License.
"""
from DHParser import mock_syntax_tree, Compiler
from DHParser import parse_sxpr, Compiler
class TestCompilerClass:
def test_error_propagations(self):
tree = mock_syntax_tree('(A (B 1) (C (D (E 2) (F 3))))')
tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
A = tree
B = next(tree.select(lambda node: str(node) == "1"))
D = next(tree.select(lambda node: node.parser.name == "D"))
......
......@@ -123,12 +123,16 @@ class TestStringView:
assert EMPTY_STRING_VIEW.match(re.compile(r'.*'))
assert len(EMPTY_STRING_VIEW[0:1]) == 0
def text_strip(self):
def test_strip(self):
s = StringView(' test ', 1, -1)
assert s.strip() == "test"
assert s.lstrip() == "test "
assert s.rstrip() == " test"
s = StringView(' test ', 1, -1)
assert s.strip() == "test"
s = StringView('(a (b c))')
assert s.strip() == '(a (b c))'
assert s[1:].strip() == 'a (b c))'
def text_split(self):
s = StringView(' 1,2,3,4,5 ', 1, -1)
......
......@@ -24,7 +24,7 @@ import sys
sys.path.extend(['../', './'])
from DHParser.error import Error
from DHParser.syntaxtree import Node, mock_syntax_tree, flatten_sxpr, TOKEN_PTYPE
from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, TOKEN_PTYPE
from DHParser.transform import traverse, reduce_single_child, \
replace_by_single_child, flatten, remove_expendables
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
......@@ -33,11 +33,13 @@ from DHParser.dsl import grammar_provider
class TestMockSyntaxTree:
def test_mock_syntax_tree(self):
tree = mock_syntax_tree('(a (b c))')
tree = mock_syntax_tree('(a i\nj\nk)')
tree = parse_sxpr('(a (b c))')
assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
tree = parse_sxpr('(a i\nj\nk)')
assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
try:
tree = mock_syntax_tree('a b c')
assert False, "mock_syntax_tree() should raise a ValueError " \
tree = parse_sxpr('a b c')
assert False, "parse_sxpr() should raise a ValueError " \
"if argument is not a tree!"
except ValueError:
pass
......@@ -49,9 +51,9 @@ class TestNode:
"""
def setup(self):
self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
self.unique_tree = mock_syntax_tree(self.unique_nodes_sexpr)
self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
self.recurr_tree = mock_syntax_tree(self.recurring_nodes_sexpr)
self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
def test_str(self):
assert str(self.unique_tree) == "ceh"
......@@ -68,8 +70,8 @@ class TestNode:
def test_equality1(self):
assert self.unique_tree == self.unique_tree
assert self.recurr_tree != self.unique_tree
assert mock_syntax_tree('(a (b c))') != mock_syntax_tree('(a (b d))')
assert mock_syntax_tree('(a (b c))') == mock_syntax_tree('(a (b c))')
assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
def test_equality2(self):
ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
......@@ -80,7 +82,7 @@ class TestNode:
parser = grammar_provider(ebnf)()
tree = parser("20 / 4 * 3")
traverse(tree, att)
compare_tree = mock_syntax_tree("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
assert tree == compare_tree, tree.as_sxpr()
def test_copy(self):
......@@ -126,7 +128,7 @@ class TestNode:
assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos
def test_collect_errors(self):
tree = mock_syntax_tree('(A (B 1) (C (D (E 2) (F 3))))')
tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
A = tree
B = next(tree.select(lambda node: str(node) == "1"))
D = next(tree.select(lambda node: node.parser.name == "D"))
......@@ -145,7 +147,7 @@ class TestNode:
class TestErrorHandling:
def test_error_flag_propagation(self):
tree = mock_syntax_tree('(a (b c) (d (e (f (g h)))))')
tree = parse_sxpr('(a (b c) (d (e (f (g h)))))')
def find_h(context):
node = context[-1]
......@@ -154,7 +156,7 @@ class TestErrorHandling:
assert not tree.error_flag
traverse(tree, {"*": find_h})
assert tree.error_flag
assert tree.error_flag, tree.as_sxpr()
class TestNodeFind():
......@@ -165,33 +167,34 @@ class TestNodeFind():
def match_tag_name(node, tag_name):
return node.tag_name == tag_name
matchf = lambda node: match_tag_name(node, "X")
tree = mock_syntax_tree('(a (b X) (X (c d)) (e (X F)))')
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
matches = list(tree.select(matchf))
assert len(matches) == 2, len(matches)
assert str(matches[0]) == 'd', str(matches[0])
assert str(matches[1]) == 'F', str(matches[1])
assert matches[0] == mock_syntax_tree('(X (c d))')
assert matches[1] == mock_syntax_tree('(X F)')
assert matches[0] == parse_sxpr('(X (c d))')
assert matches[1] == parse_sxpr('(X F)')
# check default: root is included in search:
matchf2 = lambda node: match_tag_name(node, 'a')
assert list(tree.select(matchf2))
assert not list(tree.select(matchf2, include_root=False))
def test_getitem(self):
tree = mock_syntax_tree('(a (b X) (X (c d)) (e (X F)))')
assert tree[0] == mock_syntax_tree('(b X)')
assert tree[2] == mock_syntax_tree('(e (X F))')
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
# print(tree.as_sxpr())
assert tree[0] == parse_sxpr('(b X)')
assert tree[2] == parse_sxpr('(e (X F))')
try:
node = tree[3]
assert False, "IndexError expected!"
except IndexError:
pass
matches = list(tree.select_by_tag('X', False))
assert matches[0] == mock_syntax_tree('(X (c d))')
assert matches[1] == mock_syntax_tree('(X F)')
assert matches[0] == parse_sxpr('(X (c d))')
assert matches[1] == parse_sxpr('(X F)')
def test_contains(self):
tree = mock_syntax_tree('(a (b X) (X (c d)) (e (X F)))')
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
assert 'a' not in tree
assert any(tree.select_by_tag('a', True))
assert not any(tree.select_by_tag('a', False))
......@@ -204,12 +207,12 @@ class TestNodeFind():
class TestSerialization:
def test_attributes(self):
tree = mock_syntax_tree('(A "B")')
tree = parse_sxpr('(A "B")')
tree.attributes['attr'] = "value"
tree2 = mock_syntax_tree('(A `(attr "value") "B")')
tree2 = parse_sxpr('(A `(attr "value") "B")')
assert tree.as_sxpr() == tree2.as_sxpr()
tree.attributes['attr2'] = "value2"
tree3 = mock_syntax_tree('(A `(attr "value") `(attr2 "value2") "B")')
tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
assert tree.as_sxpr() == tree3.as_sxpr()
......
......@@ -26,7 +26,7 @@ from functools import partial
sys.path.extend(['../', './'])
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr, TOKEN_PTYPE
from DHParser.syntaxtree import parse_sxpr, flatten_sxpr, TOKEN_PTYPE
from DHParser.transform import traverse, remove_expendables, \
replace_by_single_child, reduce_single_child, flatten
from DHParser.dsl import grammar_provider
......@@ -240,25 +240,25 @@ class TestSExpr:
def test_mock_syntax_tree(self):
sexpr = '(a (b c) (d e) (f (g h)))'
tree = mock_syntax_tree(sexpr)
tree = parse_sxpr(sexpr)
assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr
# test different quotation marks
sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))'''
sexpr_stripped = '(a (b c k l) (d e) (f (g h)))'
tree = mock_syntax_tree(sexpr)
tree = parse_sxpr(sexpr)
assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped
sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))'
tree = mock_syntax_tree(sexpr_clean)
tree = parse_sxpr(sexpr_clean)
assert flatten_sxpr(tree.as_sxpr()) == sexpr_clean
tree = mock_syntax_tree(sexpr_stripped)
tree = parse_sxpr(sexpr_stripped)
assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
def test_mock_syntax_tree_with_classes(self):
sexpr = '(a:class1 (b:class2 x) (:class3 y) (c z))'
tree = mock_syntax_tree(sexpr)
tree = parse_sxpr(sexpr)
assert tree.tag_name == 'a'
assert tree.result[0].tag_name == 'b'
assert tree.result[1].tag_name == ':class3'
......
......@@ -24,7 +24,7 @@ import sys
sys.path.extend(['../', './'])
from DHParser.syntaxtree import mock_syntax_tree
from DHParser.syntaxtree import parse_sxpr
from DHParser.transform import traverse, reduce_single_child, remove_whitespace, \
traverse_locally, collapse, lstrip, rstrip, remove_content
......@@ -33,46 +33,46 @@ class TestRemoval:
"""Tests removing transformations."""
def test_lstrip(self):
cst = mock_syntax_tree('(Token (:Whitespace " ") (:Re test))')
cst = parse_sxpr('(Token (:Whitespace " ") (:Re test))')
lstrip([cst])
assert cst.as_sxpr().find(":Whitespace") < 0
sxpr1 = cst.as_sxpr()
lstrip([cst])
assert sxpr1 == cst.as_sxpr()
cst = mock_syntax_tree('(Token)')
cst = parse_sxpr('(Token)')
lstrip([cst])
assert cst.as_sxpr() == '(Token)'
cst = mock_syntax_tree('(Token (:Whitespace " ") (:Whitespace " ") (:Re test))')
cst = parse_sxpr('(Token (:Whitespace " ") (:Whitespace " ") (:Re test))')
lstrip([cst])
assert cst.as_sxpr().find(":Whitespace") < 0
cst = mock_syntax_tree('(Token (:Whitespace " ") (Deeper (:Whitespace " ")) '
cst = parse_sxpr('(Token (:Whitespace " ") (Deeper (:Whitespace " ")) '
'(:Whitespace " ") (:Re test))')
lstrip([cst])
assert cst.as_sxpr().find(":Whitespace") < 0
cst = mock_syntax_tree('(Token (:Re ein) (:Whitespace " ") (:Re test))')
cst = parse_sxpr('(Token (:Re ein) (:Whitespace " ") (:Re test))')
lstrip([cst])
assert cst.as_sxpr().find(":Whitespace") >= 0
def test_rstrip(self):
cst = mock_syntax_tree('(Token (:Re test) (:Whitespace " "))')
cst = parse_sxpr('(Token (:Re test) (:Whitespace " "))')
rstrip([cst])
assert cst.as_sxpr().find(":Whitespace") < 0, cst.as_sxpr()
sxpr1 = cst.as_sxpr()
rstrip([cst])
assert sxpr1 == cst.as_sxpr()
cst = mock_syntax_tree('(Token)')
cst = parse_sxpr('(Token)')
rstrip([cst])
assert cst.as_sxpr() == '(Token)'
cst = mock_syntax_tree('(Token (:Re test) (:Whitespace " ") (:Whitespace " "))')
cst = parse_sxpr('(Token (:Re test) (:Whitespace " ") (:Whitespace " "))')
rstrip([cst])
assert cst.as_sxpr().find(":Whitespace") < 0
cst = mock_syntax_tree('(Token (:Re test) (:Whitespace " ") (Deeper (:Whitespace " ")) '
cst = parse_sxpr('(Token (:Re test) (:Whitespace " ") (Deeper (:Whitespace " ")) '
'(:Whitespace " "))')
rstrip([cst])
assert cst.as_sxpr().find(":Whitespace") < 0, cst.as_sxpr()
def test_remove_content(self):
cst = mock_syntax_tree('(BelegLemma (:Series (:RegExp "#") (LAT_WORT (:RegExp "facitergula"))))')
cst = parse_sxpr('(BelegLemma (:Series (:RegExp "#") (LAT_WORT (:RegExp "facitergula"))))')
remove_content([cst], '#')
assert cst.content == "#facitergula", str(cst.content)
reduce_single_child([cst])
......@@ -84,7 +84,7 @@ class TestConditionalTransformations:
"""Tests conditional transformations."""
def test_traverse_locally(self):
cst = mock_syntax_tree("""
cst = parse_sxpr("""
(Lemma
(LemmaVariante
(LAT_WORT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment