Commit 5d946f25 authored by eckhart's avatar eckhart
Browse files

- syntaxtree.Node.select: performance slightly improved

parent 8f72f453
...@@ -70,12 +70,6 @@ ZOMBIE_TAG = "__ZOMBIE__" ...@@ -70,12 +70,6 @@ ZOMBIE_TAG = "__ZOMBIE__"
####################################################################### #######################################################################
ChildrenType = Tuple['Node', ...]
NoChildren = cast(ChildrenType, ()) # type: ChildrenType
StrictResultType = Union[ChildrenType, StringView, str]
ResultType = Union[ChildrenType, 'Node', StringView, str, None]
def flatten_sxpr(sxpr: str, threshold: int = -1) -> str: def flatten_sxpr(sxpr: str, threshold: int = -1) -> str:
""" """
Returns S-expression ``sxpr`` as a one-liner without unnecessary Returns S-expression ``sxpr`` as a one-liner without unnecessary
...@@ -114,6 +108,11 @@ def flatten_xml(xml: str) -> str: ...@@ -114,6 +108,11 @@ def flatten_xml(xml: str) -> str:
return re.sub(r'\s+(?=<[\w:])', '', re.sub(r'(?P<closing_tag></:?\w+>)\s+', tag_only, xml)) return re.sub(r'\s+(?=<[\w:])', '', re.sub(r'(?P<closing_tag></:?\w+>)\s+', tag_only, xml))
ChildrenType = Tuple['Node', ...]
NoChildren = cast(ChildrenType, ()) # type: ChildrenType
StrictResultType = Union[ChildrenType, StringView, str]
ResultType = Union[ChildrenType, 'Node', StringView, str, None]
RX_AMP = re.compile(r'&(?!\w+;)') RX_AMP = re.compile(r'&(?!\w+;)')
...@@ -365,6 +364,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -365,6 +364,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self.children = NoChildren self.children = NoChildren
self._result = result # cast(StrictResultType, result) self._result = result # cast(StrictResultType, result)
def _content(self) -> List[str]: def _content(self) -> List[str]:
""" """
Returns string content as list of string fragments Returns string content as list of string fragments
...@@ -378,6 +378,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -378,6 +378,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self._result = str(self._result) self._result = str(self._result)
return [self._result] return [self._result]
@property @property
def content(self) -> str: def content(self) -> str:
""" """
...@@ -697,8 +698,12 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -697,8 +698,12 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
yield self yield self
child_iterator = reversed(self.children) if reverse else self.children child_iterator = reversed(self.children) if reverse else self.children
for child in child_iterator: for child in child_iterator:
for node in child.select(match_function, True, reverse): if match_function(child):
yield node yield child
yield from child.select(match_function, False, reverse)
# The above variant is slightly faster
# for child in child_iterator:
# yield from child.select(match_function, True, reverse)
def select_by_tag(self, tag_names: Union[str, AbstractSet[str]], def select_by_tag(self, tag_names: Union[str, AbstractSet[str]],
......
...@@ -55,13 +55,6 @@ def fail_on_error(src, result): ...@@ -55,13 +55,6 @@ def fail_on_error(src, result):
sys.exit(1) sys.exit(1)
def count_nodes(tree, condition=lambda n: True):
N = 0
for nd in tree.select(condition, include_root=True):
N += 1
return N
def tst_func(): def tst_func():
with DHParser.log.logging(LOGGING): with DHParser.log.logging(LOGGING):
files = os.listdir('testdata') files = os.listdir('testdata')
...@@ -73,7 +66,7 @@ def tst_func(): ...@@ -73,7 +66,7 @@ def tst_func():
print('\n\nParsing document: "%s"' % file) print('\n\nParsing document: "%s"' % file)
result = parser(doc) result = parser(doc)
print("Number of CST-nodes: " + str(count_nodes(result))) print("Number of CST-nodes: " + str(result.tree_size()))
# print("Number of empty nodes: " + str(count_nodes(result, # print("Number of empty nodes: " + str(count_nodes(result,
# lambda n: not bool(n.result)))) # lambda n: not bool(n.result))))
if DHParser.log.is_logging(): if DHParser.log.is_logging():
...@@ -87,7 +80,7 @@ def tst_func(): ...@@ -87,7 +80,7 @@ def tst_func():
fail_on_error(doc, result) fail_on_error(doc, result)
transformer(result) transformer(result)
fail_on_error(doc, result) fail_on_error(doc, result)
print("Number of AST-nodes: " + str(count_nodes(result))) print("Number of AST-nodes: " + str(result.tree_size()))
if DHParser.log.is_logging(): if DHParser.log.is_logging():
print('Saving AST') print('Saving AST')
with open('LOGS/' + file[:-4] + '.ast', 'w', encoding='utf-8') as f: with open('LOGS/' + file[:-4] + '.ast', 'w', encoding='utf-8') as f:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment