Commit 8dd3f2de authored by di68kap's avatar di68kap

- DHParser/syntaxtree.py: refactoring of select and pick

parent a903b09d
......@@ -21,8 +21,8 @@ cdef class Node:
# cpdef _tree_repr(self, tab, open_fn, close_fn, data_fn, density, inline, inline_fn)
# cpdef as_sxpr(self, src, indentation, compact)
# cpdef as_xml(self, src, indentation, inline_tags, omit_tags, empty_tags)
# cpdef select(self, match_function, include_root, reverse)
# cpdef select_by_tag(self, tag_names, include_root)
# cpdef select_if(self, match_function, include_root, reverse)
# cpdef select(self, tag_names, include_root)
cpdef pick(self, tag_names)
# cpdef tree_size(self)
cpdef to_json_obj(self)
......
......@@ -507,27 +507,11 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
i += 1
raise ValueError("Node with tag name '%s' not among child-nodes." % tag_name)
def select(self, match_function: Callable, include_root: bool = False, reverse: bool = False) \
def select_if(self, match_function: Callable, include_root: bool = False, reverse: bool = False) \
-> Iterator['Node']:
"""
Finds nodes in the tree that fulfill a given criterion.
`select` is a generator that yields all nodes for which the
given `match_function` evaluates to True. The tree is
traversed pre-order.
See function `Node.select_by_tag` for some examples.
Args:
match_function (function): A function that takes as Node
object as argument and returns True or False
include_root (bool): If False, only descendant nodes will be
checked for a match.
reverse (bool): If True, the tree will be walked in reverse
order, i.e. last children first.
Yields:
Node: All nodes of the tree for which
``match_function(node)`` returns True
Finds nodes in the tree for which `match_function` returns True.
See see more general function `Node.select()` for a detailed description.
"""
if include_root and match_function(self):
yield self
......@@ -535,21 +519,20 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
for child in child_iterator:
if match_function(child):
yield child
yield from child.select(match_function, False, reverse)
yield from child.select_if(match_function, False, reverse)
def select_by_tag(self, criterion: Union[str, Container[str], Callable],
include_root: bool = False, reverse: bool = False) -> Iterator['Node']:
def select(self, criterion: Union[str, Container[str], Callable],
include_root: bool = False, reverse: bool = False) -> Iterator['Node']:
"""
Finds nodes in the tree that fulfill a given criterion. This criterion
can either be general, if criterion is a Callable or a tag_name or
a set of tag_names.
`select` is a generator that yields all nodes for which the
`select_if` is a generator that yields all nodes for which the
given `match_function` evaluates to True. The tree is
traversed pre-order.
See function `Node.select_by_tag` for some examples.
See function `Node.select` for some examples.
Args:
criterion: A function that takes as Node
......@@ -565,15 +548,15 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
Examples::
>>> tree = parse_sxpr('(a (b "X") (X (c "d")) (e (X "F")))')
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_by_tag("X", False))
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select("X", False))
['(X (c "d"))', '(X "F")']
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_by_tag({"X", "b"}, False))
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select({"X", "b"}, False))
['(b "X")', '(X (c "d"))', '(X "F")']
>>> any(tree.select_by_tag('a', False))
>>> any(tree.select('a', False))
False
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_by_tag('a', True))
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select('a', True))
['(a (b "X") (X (c "d")) (e (X "F")))']
>>> flatten_sxpr(next(tree.select_by_tag("X", False)).as_sxpr())
>>> flatten_sxpr(next(tree.select("X", False)).as_sxpr())
'(X (c "d"))'
Args:
......@@ -585,24 +568,27 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
Node: All nodes which have a given tag name.
"""
if isinstance(criterion, str):
return self.select(lambda node: node.tag_name == criterion, include_root, reverse)
return self.select_if(lambda node: node.tag_name == criterion, include_root, reverse)
elif isinstance(criterion, Container):
return self.select(lambda node: node.tag_name in criterion, include_root, reverse)
return self.select_if(lambda node: node.tag_name in criterion, include_root, reverse)
else:
assert isinstance(criterion, Callable)
return self._select(criterion, include_root, reverse)
return self.select_if(criterion, include_root, reverse)
def pick(self, tag_names: Union[str, Set[str]], reverse: bool = False) -> Optional['Node']:
def pick(self, criterion: Union[str, Container[str], Callable],
reverse: bool = False) -> Optional['Node']:
"""
Picks the first descendant with one of the given tag_names.
Picks the first (or last if run in reverse mode) descendant that fulfills
the given criterion which can be either a match-function or a tag-name or
a container of tag-names.
This function is mostly just syntactic sugar for
``next(node.select_by_tag(tag_names, False))``. However, rather than
``next(node.select(criterion, False))``. However, rather than
raising a StopIterationError if no descendant with the given tag-name
exists, it returns None.
"""
try:
return next(self.select_by_tag(tag_names, False, reverse))
return next(self.select(criterion, False, reverse))
except StopIteration:
return None
......@@ -927,7 +913,7 @@ def tree_sanity_check(tree: Node) -> bool:
:return: True, if the tree is `sane`, False otherwise.
"""
node_set = set() # type: Set[Node]
for node in tree.select(lambda nd: True, include_root=True):
for node in tree.select_if(lambda nd: True, include_root=True):
if node in node_set or isinstance(Node, FrozenNode):
return False
node_set.add(node)
......@@ -1065,7 +1051,7 @@ class RootNode(Node):
if nid == node_id:
errors.extend(self.error_nodes[nid])
else:
for nd in node.select(lambda n: id(n) == nid):
for nd in node.select_if(lambda n: id(n) == nid):
break
else:
# node is not connected to tree any more, but since errors
......
......@@ -360,9 +360,9 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
if is_artifact:
# don't remove zombie node with error message at the end
# but change it's tag_name to indicate that it is an artifact!
for parent in st.select(lambda node: any(child.tag_name == ZOMBIE_TAG
for child in node.children),
include_root=True, reverse=True):
for parent in st.select_if(lambda node: any(child.tag_name == ZOMBIE_TAG
for child in node.children),
include_root=True, reverse=True):
zombie = parent[ZOMBIE_TAG]
zombie.tag_name = '__TESTING_ARTIFACT__'
zombie.result = 'Artifact can be ignored. Be aware, though, that also the' \
......
......@@ -178,7 +178,7 @@ def is_filename(strg: str) -> bool:
return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \
and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and strg.select('*') < 0 and strg.select('?') < 0
# and strg.select_if('*') < 0 and strg.select_if('?') < 0
def concurrent_ident() -> str:
......
......@@ -30,9 +30,9 @@ from DHParser import parse_sxpr, Compiler
# def test_error_propagations(self):
# tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
# A = tree
# B = next(tree.select(lambda node: str(node) == "1"))
# D = next(tree.select(lambda node: node.parser.name == "D"))
# F = next(tree.select(lambda node: str(node) == "3"))
# B = next(tree.select_if(lambda node: str(node) == "1"))
# D = next(tree.select_if(lambda node: node.parser.name == "D"))
# F = next(tree.select_if(lambda node: str(node) == "3"))
# B.new_error("Error in child node")
# F.new_error("Error in child's child node")
# Compiler.propagate_error_flags(tree, lazy=True)
......
......@@ -154,7 +154,7 @@ class TestEBNFParser:
result = self.EBNF(snippet, 'literal')
assert not result.error_flag
assert str(result) == snippet.strip()
assert result.select(lambda node: node.parser.ptype == WHITESPACE_PTYPE)
assert result.select_if(lambda node: node.parser.ptype == WHITESPACE_PTYPE)
result = self.EBNF('"text" ', 'literal')
assert not result.error_flag
......
......@@ -558,8 +558,8 @@ class TestPopRetrieve:
teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
syntax_tree = self.minilang_parser(teststr)
assert not syntax_tree.errors_sorted
delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="delimiter"))))
pop = str(next(syntax_tree.select(self.closing_delimiter)))
delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="delimiter"))))
pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
assert delim == pop
if is_logging():
log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
......@@ -575,8 +575,8 @@ class TestPopRetrieve:
"""
syntax_tree = self.minilang_parser(teststr)
assert not syntax_tree.errors_sorted
delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="delimiter"))))
pop = str(next(syntax_tree.select(self.closing_delimiter)))
delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="delimiter"))))
pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
assert delim == pop
if is_logging():
log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
......@@ -585,8 +585,8 @@ class TestPopRetrieve:
teststr = "Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende"
syntax_tree = self.minilang_parser2(teststr)
assert not syntax_tree.errors_sorted
delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.select(self.closing_delimiter)))
delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
assert len(delim) == len(pop) and delim != pop
if is_logging():
log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
......@@ -602,8 +602,8 @@ class TestPopRetrieve:
"""
syntax_tree = self.minilang_parser2(teststr)
assert not syntax_tree.errors_sorted
delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.select(self.closing_delimiter)))
delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
assert len(delim) == len(pop) and delim != pop
if is_logging():
log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
......@@ -823,12 +823,12 @@ class TestEarlyTokenWhitespaceDrop:
assert not cst.pick(':Whitespace')
cst = self.gr('A + B')
try:
_ = next(cst.select(lambda node: node.content == 'A'))
_ = next(cst.select_if(lambda node: node.content == 'A'))
assert False, "Tokens in compound expressions should be dropped!"
except StopIteration:
pass
cst = self.gr('X * y')
assert next(cst.select(lambda node: node.content == 'X'))
assert next(cst.select_if(lambda node: node.content == 'X'))
class TestMetaParser:
......
......@@ -172,14 +172,14 @@ class TestNode:
def test_select_subnodes(self):
tags = [node.tag_name
for node in self.unique_tree.select(lambda nd: True, include_root=True)]
for node in self.unique_tree.select_if(lambda nd: True, include_root=True)]
assert ''.join(tags) == "abdfg", ''.join(tags)
def test_find(self):
found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
found = list(self.unique_tree.select_if(lambda nd: not nd.children and nd.result == "e"))
assert len(found) == 1
assert found[0].result == 'e'
found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
found = list(self.recurr_tree.select_if(lambda nd: nd.tag_name == 'b'))
assert len(found) == 2
assert found[0].result == 'x' and found[1].result == 'y'
......@@ -274,7 +274,7 @@ class TestRootNode:
class TestNodeFind():
"""Test the select-functions of class Node.
"""Test the select_if-functions of class Node.
"""
def test_find(self):
......@@ -282,7 +282,7 @@ class TestNodeFind():
return node.tag_name == tag_name
matchf = lambda node: match_tag_name(node, "X")
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
matches = list(tree.select(matchf))
matches = list(tree.select_if(matchf))
assert len(matches) == 2, len(matches)
assert str(matches[0]) == 'd', str(matches[0])
assert str(matches[1]) == 'F', str(matches[1])
......@@ -290,8 +290,8 @@ class TestNodeFind():
assert matches[1].equals(parse_sxpr('(X F)'))
# check default: root is included in search:
matchf2 = lambda node: match_tag_name(node, 'a')
assert list(tree.select(matchf2, include_root=True))
assert not list(tree.select(matchf2, include_root=False))
assert list(tree.select_if(matchf2, include_root=True))
assert not list(tree.select_if(matchf2, include_root=False))
def test_getitem(self):
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
......@@ -302,20 +302,20 @@ class TestNodeFind():
assert False, "IndexError expected!"
except IndexError:
pass
matches = list(tree.select_by_tag('X', False))
matches = list(tree.select('X', False))
assert matches[0].equals(parse_sxpr('(X (c d))'))
assert matches[1].equals(parse_sxpr('(X F)'))
def test_contains(self):
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
assert 'a' not in tree
assert any(tree.select_by_tag('a', True))
assert not any(tree.select_by_tag('a', False))
assert any(tree.select('a', True))
assert not any(tree.select('a', False))
assert 'b' in tree
assert 'X' in tree
assert 'e' in tree
assert 'c' not in tree
assert any(tree.select_by_tag('c', False))
assert any(tree.select('c', False))
def test_index(self):
tree = parse_sxpr('(a (b 0) (c 1) (d 2))')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment