Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
b75d6237
Commit
b75d6237
authored
Aug 30, 2017
by
Eckhart Arnold
Browse files
- parser.py: memoization streamlined and optimized
parent
bcd332a8
Changes
3
Hide whitespace changes
Inline
Side-by-side
DHParser/parser.py
View file @
b75d6237
...
...
@@ -230,6 +230,12 @@ def add_parser_guard(parser_func):
(aka "history tracking") of parser calls. Returns the wrapped call.
"""
def
guarded_call
(
parser
:
'Parser'
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
def
memoized
(
parser
,
location
):
node
=
parser
.
visited
[
location
]
rlen
=
location
-
(
0
if
node
is
None
else
node
.
len
)
rest
=
grammar
.
document__
[
-
rlen
:]
if
rlen
else
''
return
node
,
rest
try
:
location
=
len
(
text
)
# mind that location is always the distance to the end
grammar
=
parser
.
grammar
# grammar may be 'None' for unconnected parsers!
...
...
@@ -240,10 +246,7 @@ def add_parser_guard(parser_func):
# if location has already been visited by the current parser,
# return saved result
if
location
in
parser
.
visited
:
node
=
parser
.
visited
[
location
]
rlen
=
location
-
(
0
if
node
is
None
else
node
.
len
)
rest
=
grammar
.
document__
[
-
rlen
:]
if
rlen
else
''
return
node
,
rest
return
memoized
(
parser
,
location
)
# break left recursion at the maximum allowed depth
if
parser
.
recursion_counter
.
setdefault
(
location
,
0
)
>
LEFT_RECURSION_DEPTH
:
...
...
@@ -261,24 +264,21 @@ def add_parser_guard(parser_func):
if
node
is
None
:
# retrieve an earlier match result (from left recursion) if it exists
node
=
parser
.
visited
.
get
(
location
,
None
)
rlen
=
location
-
(
0
if
node
is
None
else
node
.
len
)
rest
=
grammar
.
document__
[
-
rlen
:]
if
rlen
else
''
# don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls!
# COMMENT THIS TO TURN FULL MEMOIZATION OFF
if
node
is
None
and
location
not
in
grammar
.
recursion_locations__
:
if
location
in
grammar
.
recursion_locations__
:
if
location
in
parser
.
visited
:
node
,
rest
=
memoized
(
parser
,
location
)
# don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls!
elif
grammar
.
memoization__
:
# otherwise also cache None-results
parser
.
visited
[
location
]
=
None
else
:
# variable manipulating parsers will be excluded, though,
# because caching would interfere with changes of variable state
if
grammar
.
last_rb__loc__
>
location
:
# in case of left recursion, the first recursive step that
# matches will store its result in the cache
# UNCOMMENT THIS TO TURN FULL MEMOIZATION OFF
# if location in grammar.recursion_locations__:
parser
.
visited
[
location
]
=
node
elif
((
grammar
.
memoization__
or
location
in
grammar
.
recursion_locations__
)
and
grammar
.
last_rb__loc__
>
location
):
# - variable manipulating parsers will not be entered into the cache,
# because caching would interfere with changes of variable state
# - in case of left recursion, the first recursive step that
# matches will store its result in the cache
parser
.
visited
[
location
]
=
node
parser
.
recursion_counter
[
location
]
-=
1
...
...
@@ -616,6 +616,10 @@ class Grammar:
recursion detection algorithm, but, strictly speaking, superfluous
if full memoization is enabled. (See `add_parser_guard` and its
local function `guarded_call`)
memoization__: Turns full memoization on or off. Turning memoization off
results in less memory usage and sometimes reduced parsing time.
In some situations it may drastically increase parsing time, so
it is safer to leave it on.
"""
root__
=
None
# type: Union[Parser, None]
# root__ must be overwritten with the root-parser by grammar subclass
...
...
@@ -672,6 +676,7 @@ class Grammar:
self
.
all_parsers__
=
set
()
# type: Set[Parser]
self
.
_dirty_flag__
=
False
# type: bool
self
.
history_tracking__
=
False
# type: bool
self
.
memoization__
=
True
# type: bool
self
.
_reset__
()
# prepare parsers in the class, first
...
...
dhparser.py
View file @
b75d6237
...
...
@@ -208,11 +208,11 @@ def create_project(path,
print
(
'ready.'
)
def
profile
(
func
):
def
cpu_
profile
(
func
,
repetitions
=
1
):
import
cProfile
,
pstats
pr
=
cProfile
.
Profile
()
pr
.
enable
()
for
i
in
range
(
1
):
for
i
in
range
(
repetitions
):
success
=
func
()
if
not
success
:
break
...
...
@@ -220,11 +220,11 @@ def profile(func):
# after your program ends
st
=
pstats
.
Stats
(
pr
)
st
.
strip_dirs
()
st
.
sort_stats
(
'time'
).
print_stats
(
1
0
)
st
.
sort_stats
(
'time'
).
print_stats
(
4
0
)
return
success
def
mem_profile
(
func
):
def
mem_profile
(
func
,
dummy
=
0
):
import
tracemalloc
tracemalloc
.
start
()
success
=
func
()
...
...
@@ -250,6 +250,6 @@ if __name__ == "__main__":
# run self test
# selftest('EBNF/EBNF.ebnf')
with
logging
(
False
):
if
not
mem
_profile
(
selftest
):
if
not
cpu
_profile
(
selftest
,
1
):
sys
.
exit
(
1
)
examples/LaTeX/tst_LaTeX_docs.py
View file @
b75d6237
...
...
@@ -73,7 +73,7 @@ def cpu_profile(func):
pr
.
disable
()
st
=
pstats
.
Stats
(
pr
)
st
.
strip_dirs
()
st
.
sort_stats
(
'time'
).
print_stats
(
2
0
)
st
.
sort_stats
(
'time'
).
print_stats
(
4
0
)
def
mem_profile
(
func
):
...
...
@@ -87,7 +87,7 @@ def mem_profile(func):
print
(
stat
)
if
__name__
==
"__main__"
:
mem
_profile
(
test
)
cpu
_profile
(
test
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment