Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
5d946f25
Commit
5d946f25
authored
Feb 26, 2019
by
eckhart
Browse files
- syntaxtree.Node.select: performance slightly improved
parent
8f72f453
Changes
2
Hide whitespace changes
Inline
Side-by-side
DHParser/syntaxtree.py
View file @
5d946f25
...
...
@@ -70,12 +70,6 @@ ZOMBIE_TAG = "__ZOMBIE__"
#######################################################################
ChildrenType
=
Tuple
[
'Node'
,
...]
NoChildren
=
cast
(
ChildrenType
,
())
# type: ChildrenType
StrictResultType
=
Union
[
ChildrenType
,
StringView
,
str
]
ResultType
=
Union
[
ChildrenType
,
'Node'
,
StringView
,
str
,
None
]
def
flatten_sxpr
(
sxpr
:
str
,
threshold
:
int
=
-
1
)
->
str
:
"""
Returns S-expression ``sxpr`` as a one-liner without unnecessary
...
...
@@ -114,6 +108,11 @@ def flatten_xml(xml: str) -> str:
return
re
.
sub
(
r
'\s+(?=<[\w:])'
,
''
,
re
.
sub
(
r
'(?P<closing_tag></:?\w+>)\s+'
,
tag_only
,
xml
))
ChildrenType
=
Tuple
[
'Node'
,
...]
NoChildren
=
cast
(
ChildrenType
,
())
# type: ChildrenType
StrictResultType
=
Union
[
ChildrenType
,
StringView
,
str
]
ResultType
=
Union
[
ChildrenType
,
'Node'
,
StringView
,
str
,
None
]
RX_AMP
=
re
.
compile
(
r
'&(?!\w+;)'
)
...
...
@@ -365,6 +364,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self
.
children
=
NoChildren
self
.
_result
=
result
# cast(StrictResultType, result)
def
_content
(
self
)
->
List
[
str
]:
"""
Returns string content as list of string fragments
...
...
@@ -378,6 +378,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self
.
_result
=
str
(
self
.
_result
)
return
[
self
.
_result
]
@
property
def
content
(
self
)
->
str
:
"""
...
...
@@ -697,8 +698,12 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
yield
self
child_iterator
=
reversed
(
self
.
children
)
if
reverse
else
self
.
children
for
child
in
child_iterator
:
for
node
in
child
.
select
(
match_function
,
True
,
reverse
):
yield
node
if
match_function
(
child
):
yield
child
yield
from
child
.
select
(
match_function
,
False
,
reverse
)
# The above variant is slightly faster
# for child in child_iterator:
# yield from child.select(match_function, True, reverse)
def
select_by_tag
(
self
,
tag_names
:
Union
[
str
,
AbstractSet
[
str
]],
...
...
examples/LaTeX/tst_LaTeX_docs.py
View file @
5d946f25
...
...
@@ -55,13 +55,6 @@ def fail_on_error(src, result):
sys
.
exit
(
1
)
def
count_nodes
(
tree
,
condition
=
lambda
n
:
True
):
N
=
0
for
nd
in
tree
.
select
(
condition
,
include_root
=
True
):
N
+=
1
return
N
def
tst_func
():
with
DHParser
.
log
.
logging
(
LOGGING
):
files
=
os
.
listdir
(
'testdata'
)
...
...
@@ -73,7 +66,7 @@ def tst_func():
print
(
'
\n\n
Parsing document: "%s"'
%
file
)
result
=
parser
(
doc
)
print
(
"Number of CST-nodes: "
+
str
(
count_nodes
(
result
)))
print
(
"Number of CST-nodes: "
+
str
(
result
.
tree_size
(
)))
# print("Number of empty nodes: " + str(count_nodes(result,
# lambda n: not bool(n.result))))
if
DHParser
.
log
.
is_logging
():
...
...
@@ -87,7 +80,7 @@ def tst_func():
fail_on_error
(
doc
,
result
)
transformer
(
result
)
fail_on_error
(
doc
,
result
)
print
(
"Number of AST-nodes: "
+
str
(
count_nodes
(
result
)))
print
(
"Number of AST-nodes: "
+
str
(
result
.
tree_size
(
)))
if
DHParser
.
log
.
is_logging
():
print
(
'Saving AST'
)
with
open
(
'LOGS/'
+
file
[:
-
4
]
+
'.ast'
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment