Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
DHParser
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
badw-it
DHParser
Commits
781802c9
Commit
781802c9
authored
Jun 13, 2020
by
eckhart
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
parse.py: use memoize as flag
parent
1215f1f6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
22 additions
and
39 deletions
+22
-39
DHParser/configuration.py
DHParser/configuration.py
+0
-10
DHParser/parse.py
DHParser/parse.py
+20
-27
tests/test_ebnf.py
tests/test_ebnf.py
+1
-1
tests/test_parse.py
tests/test_parse.py
+1
-1
No files found.
DHParser/configuration.py
View file @
781802c9
...
...
@@ -224,16 +224,6 @@ CONFIG_PRESET['history_tracking'] = False
# Default value: False
CONFIG_PRESET
[
'resume_notices'
]
=
False
# Turns on memoization. This means that whenever a parser returns the
# result it yielded at the location at which it was called will be
# stored in the parser's "visited"-cache, so that in case the same
# parser is called again at the same location, the result can be
# retrieved from the cache. This kind of caching is the central
# design feature of pack-rat parsers, which ensures their finishing
# in linear time (https://pdos.csail.mit.edu/~baford/packrat/thesis/)
# Default value: True
CONFIG_PRESET
[
'memoization'
]
=
True
# Turns on the left-recursion-handling algorithm. This allows the use
# of left-recursion in grammars, which otherwise would run a recursive
# descent parser into an infinite-loop.
...
...
DHParser/parse.py
View file @
781802c9
...
...
@@ -450,6 +450,8 @@ class Parser:
recursion_state
=
grammar
.
returning_from_recursion__
grammar
.
returning_from_recursion__
=
False
memoization_state
=
grammar
.
memoization__
grammar
.
memoization__
=
True
# now, the actual parser call!
try
:
...
...
@@ -504,16 +506,14 @@ class Parser:
if
node
is
not
None
:
node
.
_pos
=
location
if
(
grammar
.
memoization__
and
not
grammar
.
returning_from_recursion__
# Variable-manipulating parsers will not be entered into the cache,
# because caching would interfere with changes of variable state.
# See `_rollback_location()` for the added compensation term.
and
location
>
grammar
.
last_rb__loc__
+
int
(
text
.
_len
==
rest
.
_len
)):
visited
[
location
]
=
(
node
,
rest
)
if
not
grammar
.
returning_from_recursion__
:
# grammar.memoization__ = location > (grammar.last_rb__loc__
# + int(text._len == rest._len))
if
grammar
.
memoization__
:
visited
[
location
]
=
(
node
,
rest
)
grammar
.
returning_from_recursion__
=
recursion_state
if
grammar
.
memoization__
:
grammar
.
memoization__
=
memoization_state
except
RecursionError
:
node
=
Node
(
ZOMBIE_TAG
,
str
(
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))])
+
" ..."
)
...
...
@@ -990,10 +990,10 @@ class Grammar:
(resulting in a maximum recursion depth reached error) when
the grammar definition contains left recursions.
memoization__:
Turns full memoization on or off. Turning memoization off
results in less memory usage and sometimes reduced parsing time.
In some situations it may drastically increase parsing time, so
it is safer to leave it on. (Default: on)
memoization__:
A flag that signals that return values shall not be
momoized. This flag is set to `True` when the parser is going
forward in the call sequence, but may be set to `False` by
context-sensitive parsers that are incompatible with memoization.
# mirrored class attributes:
...
...
@@ -1152,7 +1152,6 @@ class Grammar:
or
(
not
self
.
__class__
.
COMMENT__
and
self
.
comment_rx__
==
RX_NEVER_MATCH
))
self
.
start_parser__
=
None
# type: Optional[Parser]
self
.
_dirty_flag__
=
False
# type: bool
self
.
memoization__
=
get_config_value
(
'memoization'
)
# type: bool
self
.
left_recursion__
=
get_config_value
(
'left_recursion'
)
# type: bool
self
.
history_tracking__
=
get_config_value
(
'history_tracking'
)
# type: bool
self
.
resume_notices__
=
get_config_value
(
'resume_notices'
)
# type: bool
...
...
@@ -1233,6 +1232,7 @@ class Grammar:
self
.
variables__
=
defaultdict
(
lambda
:
[])
# type: DefaultDict[str, List[str]]
self
.
rollback__
=
[]
# type: List[Tuple[int, Callable]]
self
.
last_rb__loc__
=
-
2
# type: int
self
.
memoization__
=
True
# type: bool
# support for call stack tracing
self
.
call_stack__
=
[]
# type: List[CallItem] # tag_name, location
# snapshots of call stacks
...
...
@@ -1441,6 +1441,7 @@ class Grammar:
"""
self
.
rollback__
.
append
((
location
,
func
))
self
.
last_rb__loc__
=
location
self
.
memoization__
=
False
@
property
...
...
@@ -2930,12 +2931,6 @@ class ContextSensitive(UnaryParser):
starts to move forward again. Only those variable changes should be
rolled back the locations of which have been passed when backtracking.
The rollback location is furthermore used to block memoizing. Since
the result returned by a variable changing parser (or a parser
that directly or indirectly calls a variable changing parser), should
never be memoized, memoizing is only triggered, when the location of
a returning parser is greater than the last rollback location.
Usually, the rollback location is exactly the location, where the parser
started parsing. However, the rollback-location must lie before the
location where the parser stopped, because otherwise variable changes
...
...
@@ -2943,12 +2938,7 @@ class ContextSensitive(UnaryParser):
zero length data. In order to avoid this, the rollback location is
artificially reduced by one in case the parser did not capture any text
(either of the two equivalent criteria len(text) == len(rest) or
len(node) == 0) identifies this case). As this in turn could lead
to the return values of variable changing parsers being memoized, because
memoizing is triggered if the location of a returning parser is greater
than the last rollback location, this must be compensated again in
`Parser.__call__()` (and, likewise, `Forward.__call__()`) before
memoizing is triggered.
len(node) == 0) identifies this case).
"""
L
=
text
.
_len
rb_loc
=
self
.
grammar
.
document_length__
-
L
...
...
@@ -3320,6 +3310,7 @@ class Forward(UnaryParser):
else
:
recursion_state
=
grammar
.
returning_from_recursion__
self
.
recursion_counter
[
location
]
=
0
# fail on the first recursion
grammar
.
memoization__
=
True
result
=
self
.
parser
(
text
)
if
result
[
0
]
is
not
None
:
# keep calling the (potentially left-)recursive parser and increase
...
...
@@ -3330,6 +3321,7 @@ class Forward(UnaryParser):
self
.
recursion_counter
[
location
]
=
depth
grammar
.
returning_from_recursion__
=
False
rb_stack_size
=
len
(
grammar
.
rollback__
)
grammar
.
memoization__
=
True
next_result
=
self
.
parser
(
text
)
# discard next_result if it is not the longest match and return
if
len
(
next_result
[
1
])
>=
len
(
result
[
1
]):
# also true, if no match
...
...
@@ -3354,8 +3346,9 @@ class Forward(UnaryParser):
break
result
=
next_result
depth
+=
1
if
(
grammar
.
memoization__
# see `_rollback_location()` for added compensation term
and
location
>
grammar
.
last_rb__loc__
+
int
(
text
.
_len
==
result
[
1
].
_len
)):
grammar
.
memoization__
=
location
>
(
grammar
.
last_rb__loc__
+
int
(
text
.
_len
==
result
[
1
].
_len
))
if
grammar
.
memoization__
:
visited
[
location
]
=
result
grammar
.
returning_from_recursion__
=
recursion_state
return
result
...
...
tests/test_ebnf.py
View file @
781802c9
...
...
@@ -988,7 +988,7 @@ VARIABLE ::= /[A-Za-z]/, ~;
class
TestAlternativeEBNFSyntax
:
def
test_alt_syntax
(
self
):
code
,
errors
,
ast
=
compile_ebnf
(
ArithmeticEBNF
,
preserve_AST
=
True
)
assert
not
ast
.
error_flag
assert
not
ast
.
error_flag
,
str
(
ast
.
errors
)
arithmetic_grammer
=
compile_python_object
(
DHPARSER_IMPORTS
.
format
(
dhparser_parentdir
=
DHPARSER_PARENTDIR
)
+
code
)
arithmetic_parser
=
arithmetic_grammer
()
...
...
tests/test_parse.py
View file @
781802c9
...
...
@@ -892,7 +892,7 @@ class TestPopRetrieve:
st
=
gr
(
case
)
# log_parsing_history(gr, 'test_cache_neutrality_3')
# print(st.as_sxpr())
assert
not
st
.
errors
assert
not
st
.
errors
,
str
(
errors
)
case
=
'AXXX!'
st
=
gr
(
case
)
assert
not
st
.
errors
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment