Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
27c232b9
Commit
27c232b9
authored
May 23, 2020
by
eckhart
Browse files
parse.py: bug fix left recursion algorithm
parent
e8e5f974
Changes
6
Hide whitespace changes
Inline
Side-by-side
DHParser/compile.py
View file @
27c232b9
...
...
@@ -301,7 +301,11 @@ def compile_source(source: str,
ast
=
None
# type: Optional[Node]
original_text
=
load_if_file
(
source
)
# type: str
log_file_name
=
logfile_basename
(
source
,
compiler
)
if
is_logging
()
else
''
# type: str
log_syntax_trees
=
get_config_value
(
'log_syntax_trees'
)
if
not
hasattr
(
parser
,
'free_char_parsefunc__'
)
or
parser
.
history_tracking__
:
# log only for custom parser/transformer/compilers
log_syntax_trees
=
get_config_value
(
'log_syntax_trees'
)
else
:
log_syntax_trees
=
set
()
# preprocessing
...
...
@@ -316,7 +320,7 @@ def compile_source(source: str,
syntax_tree
=
parser
(
source_text
)
# type: RootNode
if
'cst'
in
log_syntax_trees
:
log_ST
(
syntax_tree
,
log_file_name
+
'.cst'
)
if
get_config_value
(
'
history_tracking
'
)
:
if
parser
.
history_tracking
__
:
log_parsing_history
(
parser
,
log_file_name
)
# assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
...
...
DHParser/error.py
View file @
27c232b9
...
...
@@ -87,7 +87,8 @@ __all__ = ('ErrorCode',
'BAD_REPETITION_COUNT'
,
'TREE_PROCESSING_CRASH'
,
'COMPILER_CRASH'
,
'AST_TRANSFORM_CRASH'
)
'AST_TRANSFORM_CRASH'
,
'RECURSION_DEPTH_LIMIT_HIT'
)
class
ErrorCode
(
int
):
...
...
@@ -152,6 +153,7 @@ BAD_REPETITION_COUNT = ErrorCode(1580)
TREE_PROCESSING_CRASH
=
ErrorCode
(
10100
)
COMPILER_CRASH
=
ErrorCode
(
10200
)
AST_TRANSFORM_CRASH
=
ErrorCode
(
10300
)
RECURSION_DEPTH_LIMIT_HIT
=
ErrorCode
(
10400
)
class
Error
:
...
...
DHParser/parse.py
View file @
27c232b9
...
...
@@ -42,7 +42,8 @@ from DHParser.error import Error, ErrorCode, is_error, MANDATORY_CONTINUATION, \
MALFORMED_ERROR_STRING
,
MANDATORY_CONTINUATION_AT_EOF
,
DUPLICATE_PARSERS_IN_ALTERNATIVE
,
\
CAPTURE_WITHOUT_PARSERNAME
,
CAPTURE_DROPPED_CONTENT_WARNING
,
LOOKAHEAD_WITH_OPTIONAL_PARSER
,
\
BADLY_NESTED_OPTIONAL_PARSER
,
BAD_ORDER_OF_ALTERNATIVES
,
BAD_MANDATORY_SETUP
,
\
OPTIONAL_REDUNDANTLY_NESTED_WARNING
,
CAPTURE_STACK_NOT_EMPTY
,
BAD_REPETITION_COUNT
,
AUTORETRIEVED_SYMBOL_NOT_CLEARED
OPTIONAL_REDUNDANTLY_NESTED_WARNING
,
CAPTURE_STACK_NOT_EMPTY
,
BAD_REPETITION_COUNT
,
\
AUTORETRIEVED_SYMBOL_NOT_CLEARED
,
RECURSION_DEPTH_LIMIT_HIT
from
DHParser.log
import
CallItem
,
HistoryRecord
from
DHParser.preprocess
import
BEGIN_TOKEN
,
END_TOKEN
,
RX_TOKEN_NAME
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
...
...
@@ -482,11 +483,13 @@ class Parser:
# and left recursion algorithm?
visited
[
location
]
=
(
node
,
rest
)
except
RecursionError
:
except
RecursionError
as
e
:
node
=
Node
(
ZOMBIE_TAG
,
str
(
text
[:
min
(
10
,
max
(
1
,
text
.
find
(
"
\n
"
)))])
+
" ..."
)
node
.
_pos
=
location
grammar
.
tree__
.
new_error
(
node
,
"maximum recursion depth of parser reached; "
"potentially due to too many errors!"
)
error
=
Error
(
"maximum recursion depth of parser reached; potentially due to too many "
"errors or left recursion!"
,
location
,
RECURSION_DEPTH_LIMIT_HIT
)
grammar
.
tree__
.
add_error
(
node
,
error
)
grammar
.
most_recent_error__
=
ParserError
(
node
,
text
,
error
,
first_throw
=
False
)
rest
=
EMPTY_STRING_VIEW
return
node
,
rest
...
...
@@ -3198,48 +3201,8 @@ class Forward(UnaryParser):
# TODO: need a unit-test concerning interference of variable manipulation
# and left recursion algorithm?
visited
[
location
]
=
(
node
,
rest
)
return
node
,
rest
# # TODO: For indirect recursion, recursion counters should not only
# # depend on location, but on location and call stack depth
# location = self.grammar.document_length__ - text._len
# depth, oracle = self.recursion.get(location, (-1, -1))
# if oracle >= 0:
# if depth >= oracle:
# self.recursion[location] = (0, oracle + 1)
# node, _text = None, text
# else:
# self.recursion[location] = (depth + 1, oracle)
# node, _text = self.parser(text)
# oracle = self.recursion[location][1]
# self.recursion[location] = (depth, oracle)
# self.memoization = self.grammar.memoization__
# self.grammar.memoization__ = False
# return node, _text
# else:
# self.recursion[location] = (0, 0)
# longest = None, text
# length = 0
# while True:
# node, text_ = self.parser(text)
# depth, oracle = self.recursion[location]
# if oracle == 0:
# longest = node, text_
# break
# elif node is None:
# break
# else:
# l = len(node)
# if l <= length:
# break
# length = l
# longest = node, text_
# self.recursion[location] = (-1, -1)
# self.grammar.memoization__ = self.memoization
# return longest
def
set_proxy
(
self
,
proxy
:
Optional
[
ParseFunc
]):
"""`set_proxy` has no effects on Forward-objects!"""
return
...
...
DHParser/syntaxtree.py
View file @
27c232b9
...
...
@@ -1655,6 +1655,7 @@ class RootNode(Node):
else
:
assert
isinstance
(
node
,
Node
)
assert
isinstance
(
node
,
FrozenNode
)
or
node
.
pos
<=
error
.
pos
,
\
"Wrong error position when processing error: %s
\n
"
%
str
(
error
)
+
\
"%i <= %i <= %i ?"
%
(
node
.
pos
,
error
.
pos
,
node
.
pos
+
max
(
1
,
len
(
node
)
-
1
))
# assert node.pos == error.pos or isinstance(node, FrozenNode)
self
.
error_nodes
.
setdefault
(
id
(
node
),
[]).
append
(
error
)
...
...
DHParser/trace.py
View file @
27c232b9
...
...
@@ -26,7 +26,7 @@ Grammar-object.
from
typing
import
Tuple
,
Optional
,
List
,
Iterable
,
Union
from
DHParser.error
import
Error
,
RESUME_NOTICE
from
DHParser.error
import
Error
,
RESUME_NOTICE
,
RECURSION_DEPTH_LIMIT_HIT
from
DHParser.stringview
import
StringView
from
DHParser.syntaxtree
import
Node
,
REGEXP_PTYPE
,
TOKEN_PTYPE
,
WHITESPACE_PTYPE
,
ZOMBIE_TAG
from
DHParser.log
import
freeze_callstack
,
HistoryRecord
,
NONE_NODE
...
...
@@ -46,6 +46,9 @@ def trace_history(self: Parser, text: StringView) -> Tuple[Optional[Node], Strin
# add resume notice (mind that skip notices are added by
# `parse.MandatoryElementsParser.mandatory_violation()`
mre
=
grammar
.
most_recent_error__
# type: ParserError
if
mre
.
error
.
code
==
RECURSION_DEPTH_LIMIT_HIT
:
return
mre
.
node
,
text
grammar
.
most_recent_error__
=
None
errors
=
[
mre
.
error
]
# type: List[Error]
text_
=
grammar
.
document__
[
mre
.
error
.
pos
:]
...
...
tests/test_parse.py
View file @
27c232b9
...
...
@@ -111,18 +111,26 @@ class TestParserClass:
class
TestInfiLoopsAndRecursion
:
def
setup
(
self
):
pass
# set_config_value('history_tracking', True)
# set_config_value('resume_notices', True)
# set_config_value('log_syntax_trees', set(('cst', 'ast')))
# start_logging('LOGS')
def
test_very_simple
(
self
):
minilang
=
"""
term = term (`*`|`/`) factor | factor
factor = /[0-9]+/
"""
parser
=
grammar_provider
(
minilang
)()
grammar_factory
=
grammar_provider
(
minilang
)
parser
=
grammar_factory
()
snippet
=
"5*4*3*2"
parser
.
history_tracking__
=
True
set_tracer
(
parser
,
trace_history
)
start_logging
(
'LOGS'
)
# set_tracer(parser, trace_history)
st
=
parser
(
snippet
)
log_parsing_history
(
parser
,
'recursion_simple_test'
)
if
is_logging
():
log_ST
(
st
,
'test_LeftRecursion_very_simple.cst'
)
log_parsing_history
(
parser
,
'test_LeftRecurion_very_simple'
)
assert
not
is_error
(
st
.
error_flag
),
str
(
st
.
errors
)
def
test_direct_left_recursion1
(
self
):
...
...
@@ -139,8 +147,8 @@ class TestInfiLoopsAndRecursion:
assert
not
is_error
(
syntax_tree
.
error_flag
),
str
(
syntax_tree
.
errors_sorted
)
assert
snippet
==
syntax_tree
.
content
,
str
(
syntax_tree
)
if
is_logging
():
log_ST
(
syntax_tree
,
"test_LeftRecursion_direct.cst"
)
log_parsing_history
(
parser
,
"test_LeftRecursion_direct"
)
log_ST
(
syntax_tree
,
"test_LeftRecursion_direct
1
.cst"
)
log_parsing_history
(
parser
,
"test_LeftRecursion_direct
1
"
)
def
test_direct_left_recursion2
(
self
):
minilang
=
"""@literalws = right
...
...
@@ -155,6 +163,9 @@ class TestInfiLoopsAndRecursion:
syntax_tree
=
parser
(
snippet
)
assert
not
is_error
(
syntax_tree
.
error_flag
),
syntax_tree
.
errors_sorted
assert
snippet
==
syntax_tree
.
content
if
is_logging
():
log_ST
(
syntax_tree
,
"test_LeftRecursion_direct2.cst"
)
log_parsing_history
(
parser
,
"test_LeftRecursion_direct2"
)
def
test_indirect_left_recursion1
(
self
):
minilang
=
"""@literalws = right
...
...
@@ -175,30 +186,38 @@ class TestInfiLoopsAndRecursion:
syntax_tree
=
parser
(
snippet
)
assert
not
is_error
(
syntax_tree
.
error_flag
),
syntax_tree
.
errors_sorted
assert
snippet
==
syntax_tree
.
content
snippet
=
"9 + 8 * (4 + 3 * (5 + 1))"
syntax_tree
=
parser
(
snippet
)
assert
not
is_error
(
syntax_tree
.
error_flag
),
syntax_tree
.
errors_sorted
assert
snippet
==
syntax_tree
.
content
if
is_logging
():
log_ST
(
syntax_tree
,
"test_LeftRecursion_indirect.cst"
)
log_parsing_history
(
parser
,
"test_LeftRecursion_indirect"
)
# # BEWARE: EXPERIMENTAL TEST can be long running
# def test_indirect_left_recursion2(self):
# arithmetic_syntax = """@literalws = right
# expression = addition | subtraction
# addition = (expression | term) "+" (expression | term)
# subtraction = (expression | term) "-" (expression | term)
# term = multiplication | division
# multiplication = (term | factor) "*" (term | factor)
# division = (term | factor) "/" (term | factor)
# factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
# group = "(" expression ")"
# SIGN = /[+-]/
# NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
# VARIABLE = /[A-Za-z]/~
# """
# arithmetic = grammar_provider(arithmetic_syntax)()
# arithmetic.left_recursion_depth__ = 2
# assert arithmetic
# syntax_tree = arithmetic("(a + b) * (a - b)")
# assert syntax_tree.errors
log_ST
(
syntax_tree
,
"test_LeftRecursion_indirect1.cst"
)
log_parsing_history
(
parser
,
"test_LeftRecursion_indirect1"
)
# BEWARE: EXPERIMENTAL TEST can be long running
def
test_indirect_left_recursion2
(
self
):
arithmetic_syntax
=
"""@literalws = right
expression = addition | subtraction # | term
addition = (expression | term) "+" (expression | term)
subtraction = (expression | term) "-" (expression | term)
term = multiplication | division # | factor
multiplication = (term | factor) "*" (term | factor)
division = (term | factor) "/" (term | factor)
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" expression ")"
SIGN = /[+-]/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
"""
arithmetic
=
grammar_provider
(
arithmetic_syntax
)()
arithmetic
.
left_recursion_depth__
=
2
assert
arithmetic
syntax_tree
=
arithmetic
(
"(a + b) * (a - b)"
)
assert
syntax_tree
.
errors
if
is_logging
():
log_ST
(
syntax_tree
,
"test_LeftRecursion_indirect2.cst"
)
log_parsing_history
(
arithmetic
,
"test_LeftRecursion_indirect2"
)
def
test_break_inifnite_loop_ZeroOrMore
(
self
):
forever
=
ZeroOrMore
(
RegExp
(
''
))
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment