Commit a903b09d authored by di68kap's avatar di68kap

- syntaxtree.py: select_by_tag generalized...

parent e9506364
...@@ -28,7 +28,7 @@ import copy ...@@ -28,7 +28,7 @@ import copy
import json import json
import sys import sys
from typing import Callable, cast, Iterator, Sequence, List, AbstractSet, Set, Union, Tuple, \ from typing import Callable, cast, Iterator, Sequence, List, AbstractSet, Set, Union, Tuple, \
Optional, Dict Container, Optional, Dict
from DHParser.configuration import SERIALIZATIONS, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, \ from DHParser.configuration import SERIALIZATIONS, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, \
COMPACT_SERIALIZATION, JSON_SERIALIZATION COMPACT_SERIALIZATION, JSON_SERIALIZATION
...@@ -536,15 +536,31 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -536,15 +536,31 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
if match_function(child): if match_function(child):
yield child yield child
yield from child.select(match_function, False, reverse) yield from child.select(match_function, False, reverse)
# The above variant is slightly faster
# for child in child_iterator:
# yield from child.select(match_function, True, reverse)
def select_by_tag(self, tag_names: Union[str, AbstractSet[str]],
def select_by_tag(self, criterion: Union[str, Container[str], Callable],
include_root: bool = False, reverse: bool = False) -> Iterator['Node']: include_root: bool = False, reverse: bool = False) -> Iterator['Node']:
""" """
Returns an iterator that runs through all descendants that have one Finds nodes in the tree that fulfill a given criterion. This criterion
of the given tag names. can either be general, if criterion is a Callable or a tag_name or
a set of tag_names.
`select` is a generator that yields all nodes for which the
given `match_function` evaluates to True. The tree is
traversed pre-order.
See function `Node.select_by_tag` for some examples.
Args:
criterion: A function that takes as Node
object as argument and returns True or False
include_root (bool): If False, only descendant nodes will be
checked for a match.
reverse (bool): If True, the tree will be walked in reverse
order, i.e. last children first.
Yields:
Node: All nodes of the tree for which
``match_function(node)`` returns True
Examples:: Examples::
...@@ -568,9 +584,13 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -568,9 +584,13 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
Yields: Yields:
Node: All nodes which have a given tag name. Node: All nodes which have a given tag name.
""" """
if isinstance(tag_names, str): if isinstance(criterion, str):
tag_names = frozenset({tag_names}) return self.select(lambda node: node.tag_name == criterion, include_root, reverse)
return self.select(lambda node: node.tag_name in tag_names, include_root, reverse) elif isinstance(criterion, Container):
return self.select(lambda node: node.tag_name in criterion, include_root, reverse)
else:
assert isinstance(criterion, Callable)
return self._select(criterion, include_root, reverse)
def pick(self, tag_names: Union[str, Set[str]], reverse: bool = False) -> Optional['Node']: def pick(self, tag_names: Union[str, Set[str]], reverse: bool = False) -> Optional['Node']:
""" """
......
...@@ -77,7 +77,7 @@ Match test "entry" for parser "entry" failed: ...@@ -77,7 +77,7 @@ Match test "entry" for parser "entry" failed:
### AST ### AST
<__ZOMBIE__> <ZOMBIE__>
<entry> <entry>
<:RegExp>@</:RegExp> <:RegExp>@</:RegExp>
<type> <type>
...@@ -183,15 +183,15 @@ Match test "entry" for parser "entry" failed: ...@@ -183,15 +183,15 @@ Match test "entry" for parser "entry" failed:
<plain_content> <plain_content>
<COMMA_TERMINATED_STRING>{https://en.wikipedia.org/w/index.php?title=Duhem\</COMMA_TERMINATED_STRING> <COMMA_TERMINATED_STRING>{https://en.wikipedia.org/w/index.php?title=Duhem\</COMMA_TERMINATED_STRING>
</plain_content> </plain_content>
<__ZOMBIE__>%</__ZOMBIE__> <ZOMBIE__>%</ZOMBIE__>
</entry> </entry>
<__ZOMBIE__> <ZOMBIE__>
E2\%80\%93Quine\_thesis\&amp;oldid=772834991}, E2\%80\%93Quine\_thesis\&amp;oldid=772834991},
</__ZOMBIE__> </ZOMBIE__>
<__ZOMBIE__> <ZOMBIE__>
organization = {Wikipedia} organization = {Wikipedia}
</__ZOMBIE__> </ZOMBIE__>
<__ZOMBIE__>}</__ZOMBIE__> <ZOMBIE__>}</ZOMBIE__>
</__ZOMBIE__> </ZOMBIE__>
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment