Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
93a863ef
Commit
93a863ef
authored
Aug 30, 2017
by
di68kap
Browse files
- parser.py: turned full memoization off, because of excessive memory usage
parent
ac919989
Changes
3
Hide whitespace changes
Inline
Side-by-side
DHParser/parser.py
View file @
93a863ef
...
...
@@ -173,7 +173,7 @@ class HistoryRecord:
@
property
def
stack
(
self
)
->
str
:
return
"->"
.
join
((
repr
(
p
)
if
p
.
ptype
==
':RegExp'
else
p
.
name
or
p
.
ptype
)
return
"->"
.
join
((
p
.
repr
if
p
.
ptype
==
':RegExp'
else
p
.
name
or
p
.
ptype
)
for
p
in
self
.
call_stack
)
@
property
...
...
@@ -234,25 +234,22 @@ def add_parser_guard(parser_func):
location
=
len
(
text
)
# mind that location is always the distance to the end
grammar
=
parser
.
grammar
# grammar may be 'None' for unconnected parsers!
if
not
grammar
.
moving_forward__
:
# rollback variable changes from discarded parser passes
if
grammar
.
last_rb__loc__
<=
location
:
grammar
.
rollback_to__
(
location
)
grammar
.
moving_forward__
=
True
if
grammar
.
last_rb__loc__
<=
location
:
grammar
.
rollback_to__
(
location
)
# if location has already been visited by the current parser,
# return saved result
if
location
in
parser
.
visited
:
grammar
.
moving_forward__
=
False
return
parser
.
visited
[
location
]
# break left recursion at the maximum allowed depth
if
parser
.
recursion_counter
.
setdefault
(
location
,
0
)
>
LEFT_RECURSION_DEPTH
:
grammar
.
moving_forward__
=
False
grammar
.
recursion_locations__
.
add
(
location
)
return
None
,
text
if
grammar
.
history_tracking__
:
grammar
.
call_stack__
.
append
(
parser
)
grammar
.
moving_forward__
=
True
parser
.
recursion_counter
[
location
]
+=
1
...
...
@@ -264,25 +261,30 @@ def add_parser_guard(parser_func):
node
,
rest
=
parser
.
visited
.
get
(
location
,
(
None
,
rest
))
# don't overwrite any positive match (i.e. node not None) in the cache
# and don't add empty entries for parsers returning from left recursive calls!
if
node
is
None
and
grammar
.
moving_forward__
:
# otherwise also cache None-results
parser
.
visited
[
location
]
=
None
,
rest
# TODO: uncomment the following for full memoizazion
# if node is None and location not in grammar.recursion_locations__:
# # otherwise also cache None-results
# parser.visited[location] = None, rest
else
:
# variable manipulating parsers will be excluded, though,
# because caching would interfere with changes of variable state
if
grammar
.
last_rb__loc__
>
location
:
# in case of left recursion, the first recursive step that
# matches will store its result in the cache
parser
.
visited
[
location
]
=
(
node
,
rest
)
# TODO: remove if clause for full memoization
if
location
in
grammar
.
recursion_locations__
:
parser
.
visited
[
location
]
=
(
node
,
rest
)
parser
.
recursion_counter
[
location
]
-=
1
if
grammar
.
history_tracking__
:
# don't track returning parsers except in case an error has occurred
remaining
=
len
(
rest
)
if
grammar
.
moving_forward__
or
(
node
and
node
.
_errors
):
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
len
(
rest
)
)
record
=
HistoryRecord
(
grammar
.
call_stack__
,
node
,
remaining
)
grammar
.
history__
.
append
(
record
)
# print(record.stack, record.status, rest[:20].replace('\n', '|'))
grammar
.
moving_forward__
=
False
grammar
.
call_stack__
.
pop
()
except
RecursionError
:
...
...
@@ -291,7 +293,6 @@ def add_parser_guard(parser_func):
"potentially due to too many errors!"
)
rest
=
''
grammar
.
moving_forward__
=
False
return
node
,
rest
return
guarded_call
...
...
@@ -600,11 +601,16 @@ class Grammar:
history__: A list of parser-call-stacks. A parser-call-stack is
appended to the list each time a parser either matches, fails
or if a parser-error occurs.
moving_forward__: This flag indicates that the parsing process is currently
moving forward. This information is needed among other thins to
trigger the roolback of variables, which happens stepwise when the
parser is reatreating form a dead end, i.e. not moving forward.
(See `add_parser_guard` and its local function `guarded_call`)
moving_forward__: This flag indicates that the parsing process is currently
moving forward . It is needed to reduce noise in history recording
and should not be considered as having a valid value if history
recording is turned off! (See `add_parser_guard` and its local
function `guarded_call`)
recursion_locations__: Stores the locations where left recursion was
detected. Needed to provide minimal memoization for the left
recursion detection algorithm, but, strictly speaking, superfluous
if full memoization is enabled. (See `add_parser_guard` and its
local function `guarded_call`)
"""
root__
=
None
# type: Union[Parser, None]
# root__ must be overwritten with the root-parser by grammar subclass
...
...
@@ -713,7 +719,8 @@ class Grammar:
# snapshots of call stacks
self
.
history__
=
[]
# type: List[HistoryRecord]
# also needed for call stack tracing
self
.
moving_forward__
=
True
# type: bool
self
.
moving_forward__
=
False
# type: bool
self
.
recursion_locations__
=
set
()
# type: Set[int]
@
property
...
...
dhparser.py
View file @
93a863ef
...
...
@@ -224,6 +224,18 @@ def profile(func):
return
success
def
mem_profile
(
func
):
import
tracemalloc
tracemalloc
.
start
()
success
=
func
()
snapshot
=
tracemalloc
.
take_snapshot
()
top_stats
=
snapshot
.
statistics
(
'lineno'
)
print
(
"[ Top 20 ]"
)
for
stat
in
top_stats
[:
20
]:
print
(
stat
)
return
success
if
__name__
==
"__main__"
:
if
len
(
sys
.
argv
)
>
1
:
if
os
.
path
.
exists
(
sys
.
argv
[
1
])
and
os
.
path
.
isfile
(
sys
.
argv
[
1
]):
...
...
@@ -238,6 +250,6 @@ if __name__ == "__main__":
# run self test
# selftest('EBNF/EBNF.ebnf')
with
logging
(
False
):
if
not
profile
(
selftest
):
if
not
mem_
profile
(
selftest
):
sys
.
exit
(
1
)
examples/LaTeX/tst_LaTeX_docs.py
View file @
93a863ef
...
...
@@ -49,23 +49,45 @@ def fail_on_error(src, result):
sys
.
exit
(
1
)
with
toolkit
.
logging
(
False
):
files
=
os
.
listdir
(
'testdata'
)
files
.
sort
()
def
test
():
with
toolkit
.
logging
(
False
):
files
=
os
.
listdir
(
'testdata'
)
files
.
sort
()
for
file
in
files
:
if
file
.
lower
().
endswith
(
'.tex'
)
and
file
.
lower
().
find
(
'error'
)
<
0
:
with
open
(
os
.
path
.
join
(
'testdata'
,
file
),
'r'
,
encoding
=
"utf-8"
)
as
f
:
doc
=
f
.
read
()
print
(
'
\n\n
Parsing document: "%s"
\n
'
%
file
)
result
=
parser
(
doc
)
parser
.
log_parsing_history__
()
fail_on_error
(
doc
,
result
)
transformer
(
result
)
fail_on_error
(
doc
,
result
)
# print(result.as_sxpr())
def
cpu_profile
(
func
):
pr
=
profile
.
Profile
()
pr
.
enable
()
for
file
in
files
:
if
file
.
lower
().
endswith
(
'.tex'
)
and
file
.
lower
().
find
(
'error'
)
<
0
:
with
open
(
os
.
path
.
join
(
'testdata'
,
file
),
'r'
,
encoding
=
"utf-8"
)
as
f
:
doc
=
f
.
read
()
print
(
'
\n\n
Parsing document: "%s"
\n
'
%
file
)
result
=
parser
(
doc
)
parser
.
log_parsing_history__
()
fail_on_error
(
doc
,
result
)
transformer
(
result
)
fail_on_error
(
doc
,
result
)
print
(
result
.
as_sxpr
())
func
()
pr
.
disable
()
st
=
pstats
.
Stats
(
pr
)
st
.
strip_dirs
()
st
.
sort_stats
(
'time'
).
print_stats
(
20
)
def
mem_profile
(
func
):
import
tracemalloc
tracemalloc
.
start
()
func
()
snapshot
=
tracemalloc
.
take_snapshot
()
top_stats
=
snapshot
.
statistics
(
'lineno'
)
print
(
"[ Top 20 ]"
)
for
stat
in
top_stats
[:
20
]:
print
(
stat
)
if
__name__
==
"__main__"
:
mem_profile
(
test
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment