Commit 8dd3f2de authored by di68kap's avatar di68kap

- DHParser/syntaxtree.py: refactoring of select and pick

parent a903b09d
...@@ -21,8 +21,8 @@ cdef class Node: ...@@ -21,8 +21,8 @@ cdef class Node:
# cpdef _tree_repr(self, tab, open_fn, close_fn, data_fn, density, inline, inline_fn) # cpdef _tree_repr(self, tab, open_fn, close_fn, data_fn, density, inline, inline_fn)
# cpdef as_sxpr(self, src, indentation, compact) # cpdef as_sxpr(self, src, indentation, compact)
# cpdef as_xml(self, src, indentation, inline_tags, omit_tags, empty_tags) # cpdef as_xml(self, src, indentation, inline_tags, omit_tags, empty_tags)
# cpdef select(self, match_function, include_root, reverse) # cpdef select_if(self, match_function, include_root, reverse)
# cpdef select_by_tag(self, tag_names, include_root) # cpdef select(self, tag_names, include_root)
cpdef pick(self, tag_names) cpdef pick(self, tag_names)
# cpdef tree_size(self) # cpdef tree_size(self)
cpdef to_json_obj(self) cpdef to_json_obj(self)
......
...@@ -507,27 +507,11 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -507,27 +507,11 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
i += 1 i += 1
raise ValueError("Node with tag name '%s' not among child-nodes." % tag_name) raise ValueError("Node with tag name '%s' not among child-nodes." % tag_name)
def select(self, match_function: Callable, include_root: bool = False, reverse: bool = False) \ def select_if(self, match_function: Callable, include_root: bool = False, reverse: bool = False) \
-> Iterator['Node']: -> Iterator['Node']:
""" """
Finds nodes in the tree that fulfill a given criterion. Finds nodes in the tree for which `match_function` returns True.
See see more general function `Node.select()` for a detailed description.
`select` is a generator that yields all nodes for which the
given `match_function` evaluates to True. The tree is
traversed pre-order.
See function `Node.select_by_tag` for some examples.
Args:
match_function (function): A function that takes as Node
object as argument and returns True or False
include_root (bool): If False, only descendant nodes will be
checked for a match.
reverse (bool): If True, the tree will be walked in reverse
order, i.e. last children first.
Yields:
Node: All nodes of the tree for which
``match_function(node)`` returns True
""" """
if include_root and match_function(self): if include_root and match_function(self):
yield self yield self
...@@ -535,21 +519,20 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -535,21 +519,20 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
for child in child_iterator: for child in child_iterator:
if match_function(child): if match_function(child):
yield child yield child
yield from child.select(match_function, False, reverse) yield from child.select_if(match_function, False, reverse)
def select_by_tag(self, criterion: Union[str, Container[str], Callable], def select(self, criterion: Union[str, Container[str], Callable],
include_root: bool = False, reverse: bool = False) -> Iterator['Node']: include_root: bool = False, reverse: bool = False) -> Iterator['Node']:
""" """
Finds nodes in the tree that fulfill a given criterion. This criterion Finds nodes in the tree that fulfill a given criterion. This criterion
can either be general, if criterion is a Callable or a tag_name or can either be general, if criterion is a Callable or a tag_name or
a set of tag_names. a set of tag_names.
`select` is a generator that yields all nodes for which the `select_if` is a generator that yields all nodes for which the
given `match_function` evaluates to True. The tree is given `match_function` evaluates to True. The tree is
traversed pre-order. traversed pre-order.
See function `Node.select_by_tag` for some examples. See function `Node.select` for some examples.
Args: Args:
criterion: A function that takes as Node criterion: A function that takes as Node
...@@ -565,15 +548,15 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -565,15 +548,15 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
Examples:: Examples::
>>> tree = parse_sxpr('(a (b "X") (X (c "d")) (e (X "F")))') >>> tree = parse_sxpr('(a (b "X") (X (c "d")) (e (X "F")))')
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_by_tag("X", False)) >>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select("X", False))
['(X (c "d"))', '(X "F")'] ['(X (c "d"))', '(X "F")']
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_by_tag({"X", "b"}, False)) >>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select({"X", "b"}, False))
['(b "X")', '(X (c "d"))', '(X "F")'] ['(b "X")', '(X (c "d"))', '(X "F")']
>>> any(tree.select_by_tag('a', False)) >>> any(tree.select('a', False))
False False
>>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select_by_tag('a', True)) >>> list(flatten_sxpr(item.as_sxpr()) for item in tree.select('a', True))
['(a (b "X") (X (c "d")) (e (X "F")))'] ['(a (b "X") (X (c "d")) (e (X "F")))']
>>> flatten_sxpr(next(tree.select_by_tag("X", False)).as_sxpr()) >>> flatten_sxpr(next(tree.select("X", False)).as_sxpr())
'(X (c "d"))' '(X (c "d"))'
Args: Args:
...@@ -585,24 +568,27 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -585,24 +568,27 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
Node: All nodes which have a given tag name. Node: All nodes which have a given tag name.
""" """
if isinstance(criterion, str): if isinstance(criterion, str):
return self.select(lambda node: node.tag_name == criterion, include_root, reverse) return self.select_if(lambda node: node.tag_name == criterion, include_root, reverse)
elif isinstance(criterion, Container): elif isinstance(criterion, Container):
return self.select(lambda node: node.tag_name in criterion, include_root, reverse) return self.select_if(lambda node: node.tag_name in criterion, include_root, reverse)
else: else:
assert isinstance(criterion, Callable) assert isinstance(criterion, Callable)
return self._select(criterion, include_root, reverse) return self.select_if(criterion, include_root, reverse)
def pick(self, tag_names: Union[str, Set[str]], reverse: bool = False) -> Optional['Node']: def pick(self, criterion: Union[str, Container[str], Callable],
reverse: bool = False) -> Optional['Node']:
""" """
Picks the first descendant with one of the given tag_names. Picks the first (or last if run in reverse mode) descendant that fulfills
the given criterion which can be either a match-function or a tag-name or
a container of tag-names.
This function is mostly just syntactic sugar for This function is mostly just syntactic sugar for
``next(node.select_by_tag(tag_names, False))``. However, rather than ``next(node.select(criterion, False))``. However, rather than
raising a StopIterationError if no descendant with the given tag-name raising a StopIterationError if no descendant with the given tag-name
exists, it returns None. exists, it returns None.
""" """
try: try:
return next(self.select_by_tag(tag_names, False, reverse)) return next(self.select(criterion, False, reverse))
except StopIteration: except StopIteration:
return None return None
...@@ -927,7 +913,7 @@ def tree_sanity_check(tree: Node) -> bool: ...@@ -927,7 +913,7 @@ def tree_sanity_check(tree: Node) -> bool:
:return: True, if the tree is `sane`, False otherwise. :return: True, if the tree is `sane`, False otherwise.
""" """
node_set = set() # type: Set[Node] node_set = set() # type: Set[Node]
for node in tree.select(lambda nd: True, include_root=True): for node in tree.select_if(lambda nd: True, include_root=True):
if node in node_set or isinstance(Node, FrozenNode): if node in node_set or isinstance(Node, FrozenNode):
return False return False
node_set.add(node) node_set.add(node)
...@@ -1065,7 +1051,7 @@ class RootNode(Node): ...@@ -1065,7 +1051,7 @@ class RootNode(Node):
if nid == node_id: if nid == node_id:
errors.extend(self.error_nodes[nid]) errors.extend(self.error_nodes[nid])
else: else:
for nd in node.select(lambda n: id(n) == nid): for nd in node.select_if(lambda n: id(n) == nid):
break break
else: else:
# node is not connected to tree any more, but since errors # node is not connected to tree any more, but since errors
......
...@@ -360,9 +360,9 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -360,9 +360,9 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
if is_artifact: if is_artifact:
# don't remove zombie node with error message at the end # don't remove zombie node with error message at the end
# but change it's tag_name to indicate that it is an artifact! # but change it's tag_name to indicate that it is an artifact!
for parent in st.select(lambda node: any(child.tag_name == ZOMBIE_TAG for parent in st.select_if(lambda node: any(child.tag_name == ZOMBIE_TAG
for child in node.children), for child in node.children),
include_root=True, reverse=True): include_root=True, reverse=True):
zombie = parent[ZOMBIE_TAG] zombie = parent[ZOMBIE_TAG]
zombie.tag_name = '__TESTING_ARTIFACT__' zombie.tag_name = '__TESTING_ARTIFACT__'
zombie.result = 'Artifact can be ignored. Be aware, though, that also the' \ zombie.result = 'Artifact can be ignored. Be aware, though, that also the' \
......
...@@ -178,7 +178,7 @@ def is_filename(strg: str) -> bool: ...@@ -178,7 +178,7 @@ def is_filename(strg: str) -> bool:
return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \ return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \
and all(strg.find(ch) < 0 for ch in '*?"<>|') and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and strg.select('*') < 0 and strg.select('?') < 0 # and strg.select_if('*') < 0 and strg.select_if('?') < 0
def concurrent_ident() -> str: def concurrent_ident() -> str:
......
...@@ -30,9 +30,9 @@ from DHParser import parse_sxpr, Compiler ...@@ -30,9 +30,9 @@ from DHParser import parse_sxpr, Compiler
# def test_error_propagations(self): # def test_error_propagations(self):
# tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))') # tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
# A = tree # A = tree
# B = next(tree.select(lambda node: str(node) == "1")) # B = next(tree.select_if(lambda node: str(node) == "1"))
# D = next(tree.select(lambda node: node.parser.name == "D")) # D = next(tree.select_if(lambda node: node.parser.name == "D"))
# F = next(tree.select(lambda node: str(node) == "3")) # F = next(tree.select_if(lambda node: str(node) == "3"))
# B.new_error("Error in child node") # B.new_error("Error in child node")
# F.new_error("Error in child's child node") # F.new_error("Error in child's child node")
# Compiler.propagate_error_flags(tree, lazy=True) # Compiler.propagate_error_flags(tree, lazy=True)
......
...@@ -154,7 +154,7 @@ class TestEBNFParser: ...@@ -154,7 +154,7 @@ class TestEBNFParser:
result = self.EBNF(snippet, 'literal') result = self.EBNF(snippet, 'literal')
assert not result.error_flag assert not result.error_flag
assert str(result) == snippet.strip() assert str(result) == snippet.strip()
assert result.select(lambda node: node.parser.ptype == WHITESPACE_PTYPE) assert result.select_if(lambda node: node.parser.ptype == WHITESPACE_PTYPE)
result = self.EBNF('"text" ', 'literal') result = self.EBNF('"text" ', 'literal')
assert not result.error_flag assert not result.error_flag
......
...@@ -558,8 +558,8 @@ class TestPopRetrieve: ...@@ -558,8 +558,8 @@ class TestPopRetrieve:
teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende" teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
syntax_tree = self.minilang_parser(teststr) syntax_tree = self.minilang_parser(teststr)
assert not syntax_tree.errors_sorted assert not syntax_tree.errors_sorted
delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="delimiter")))) delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="delimiter"))))
pop = str(next(syntax_tree.select(self.closing_delimiter))) pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
assert delim == pop assert delim == pop
if is_logging(): if is_logging():
log_ST(syntax_tree, "test_PopRetrieve_single_line.cst") log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
...@@ -575,8 +575,8 @@ class TestPopRetrieve: ...@@ -575,8 +575,8 @@ class TestPopRetrieve:
""" """
syntax_tree = self.minilang_parser(teststr) syntax_tree = self.minilang_parser(teststr)
assert not syntax_tree.errors_sorted assert not syntax_tree.errors_sorted
delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="delimiter")))) delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="delimiter"))))
pop = str(next(syntax_tree.select(self.closing_delimiter))) pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
assert delim == pop assert delim == pop
if is_logging(): if is_logging():
log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst") log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
...@@ -585,8 +585,8 @@ class TestPopRetrieve: ...@@ -585,8 +585,8 @@ class TestPopRetrieve:
teststr = "Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende" teststr = "Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende"
syntax_tree = self.minilang_parser2(teststr) syntax_tree = self.minilang_parser2(teststr)
assert not syntax_tree.errors_sorted assert not syntax_tree.errors_sorted
delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="braces")))) delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.select(self.closing_delimiter))) pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
assert len(delim) == len(pop) and delim != pop assert len(delim) == len(pop) and delim != pop
if is_logging(): if is_logging():
log_ST(syntax_tree, "test_PopRetrieve_single_line.cst") log_ST(syntax_tree, "test_PopRetrieve_single_line.cst")
...@@ -602,8 +602,8 @@ class TestPopRetrieve: ...@@ -602,8 +602,8 @@ class TestPopRetrieve:
""" """
syntax_tree = self.minilang_parser2(teststr) syntax_tree = self.minilang_parser2(teststr)
assert not syntax_tree.errors_sorted assert not syntax_tree.errors_sorted
delim = str(next(syntax_tree.select(partial(self.opening_delimiter, name="braces")))) delim = str(next(syntax_tree.select_if(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.select(self.closing_delimiter))) pop = str(next(syntax_tree.select_if(self.closing_delimiter)))
assert len(delim) == len(pop) and delim != pop assert len(delim) == len(pop) and delim != pop
if is_logging(): if is_logging():
log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst") log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
...@@ -823,12 +823,12 @@ class TestEarlyTokenWhitespaceDrop: ...@@ -823,12 +823,12 @@ class TestEarlyTokenWhitespaceDrop:
assert not cst.pick(':Whitespace') assert not cst.pick(':Whitespace')
cst = self.gr('A + B') cst = self.gr('A + B')
try: try:
_ = next(cst.select(lambda node: node.content == 'A')) _ = next(cst.select_if(lambda node: node.content == 'A'))
assert False, "Tokens in compound expressions should be dropped!" assert False, "Tokens in compound expressions should be dropped!"
except StopIteration: except StopIteration:
pass pass
cst = self.gr('X * y') cst = self.gr('X * y')
assert next(cst.select(lambda node: node.content == 'X')) assert next(cst.select_if(lambda node: node.content == 'X'))
class TestMetaParser: class TestMetaParser:
......
...@@ -172,14 +172,14 @@ class TestNode: ...@@ -172,14 +172,14 @@ class TestNode:
def test_select_subnodes(self): def test_select_subnodes(self):
tags = [node.tag_name tags = [node.tag_name
for node in self.unique_tree.select(lambda nd: True, include_root=True)] for node in self.unique_tree.select_if(lambda nd: True, include_root=True)]
assert ''.join(tags) == "abdfg", ''.join(tags) assert ''.join(tags) == "abdfg", ''.join(tags)
def test_find(self): def test_find(self):
found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e")) found = list(self.unique_tree.select_if(lambda nd: not nd.children and nd.result == "e"))
assert len(found) == 1 assert len(found) == 1
assert found[0].result == 'e' assert found[0].result == 'e'
found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b')) found = list(self.recurr_tree.select_if(lambda nd: nd.tag_name == 'b'))
assert len(found) == 2 assert len(found) == 2
assert found[0].result == 'x' and found[1].result == 'y' assert found[0].result == 'x' and found[1].result == 'y'
...@@ -274,7 +274,7 @@ class TestRootNode: ...@@ -274,7 +274,7 @@ class TestRootNode:
class TestNodeFind(): class TestNodeFind():
"""Test the select-functions of class Node. """Test the select_if-functions of class Node.
""" """
def test_find(self): def test_find(self):
...@@ -282,7 +282,7 @@ class TestNodeFind(): ...@@ -282,7 +282,7 @@ class TestNodeFind():
return node.tag_name == tag_name return node.tag_name == tag_name
matchf = lambda node: match_tag_name(node, "X") matchf = lambda node: match_tag_name(node, "X")
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))') tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
matches = list(tree.select(matchf)) matches = list(tree.select_if(matchf))
assert len(matches) == 2, len(matches) assert len(matches) == 2, len(matches)
assert str(matches[0]) == 'd', str(matches[0]) assert str(matches[0]) == 'd', str(matches[0])
assert str(matches[1]) == 'F', str(matches[1]) assert str(matches[1]) == 'F', str(matches[1])
...@@ -290,8 +290,8 @@ class TestNodeFind(): ...@@ -290,8 +290,8 @@ class TestNodeFind():
assert matches[1].equals(parse_sxpr('(X F)')) assert matches[1].equals(parse_sxpr('(X F)'))
# check default: root is included in search: # check default: root is included in search:
matchf2 = lambda node: match_tag_name(node, 'a') matchf2 = lambda node: match_tag_name(node, 'a')
assert list(tree.select(matchf2, include_root=True)) assert list(tree.select_if(matchf2, include_root=True))
assert not list(tree.select(matchf2, include_root=False)) assert not list(tree.select_if(matchf2, include_root=False))
def test_getitem(self): def test_getitem(self):
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))') tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
...@@ -302,20 +302,20 @@ class TestNodeFind(): ...@@ -302,20 +302,20 @@ class TestNodeFind():
assert False, "IndexError expected!" assert False, "IndexError expected!"
except IndexError: except IndexError:
pass pass
matches = list(tree.select_by_tag('X', False)) matches = list(tree.select('X', False))
assert matches[0].equals(parse_sxpr('(X (c d))')) assert matches[0].equals(parse_sxpr('(X (c d))'))
assert matches[1].equals(parse_sxpr('(X F)')) assert matches[1].equals(parse_sxpr('(X F)'))
def test_contains(self): def test_contains(self):
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))') tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
assert 'a' not in tree assert 'a' not in tree
assert any(tree.select_by_tag('a', True)) assert any(tree.select('a', True))
assert not any(tree.select_by_tag('a', False)) assert not any(tree.select('a', False))
assert 'b' in tree assert 'b' in tree
assert 'X' in tree assert 'X' in tree
assert 'e' in tree assert 'e' in tree
assert 'c' not in tree assert 'c' not in tree
assert any(tree.select_by_tag('c', False)) assert any(tree.select('c', False))
def test_index(self): def test_index(self):
tree = parse_sxpr('(a (b 0) (c 1) (d 2))') tree = parse_sxpr('(a (b 0) (c 1) (d 2))')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment