Commit 27c232b9 authored by eckhart's avatar eckhart
Browse files

parse.py: bug fix left recursion algorithm

parent e8e5f974
......@@ -301,7 +301,11 @@ def compile_source(source: str,
ast = None # type: Optional[Node]
original_text = load_if_file(source) # type: str
log_file_name = logfile_basename(source, compiler) if is_logging() else '' # type: str
log_syntax_trees = get_config_value('log_syntax_trees')
if not hasattr(parser, 'free_char_parsefunc__') or parser.history_tracking__:
# log only for custom parser/transformer/compilers
log_syntax_trees = get_config_value('log_syntax_trees')
else:
log_syntax_trees = set()
# preprocessing
......@@ -316,7 +320,7 @@ def compile_source(source: str,
syntax_tree = parser(source_text) # type: RootNode
if 'cst' in log_syntax_trees:
log_ST(syntax_tree, log_file_name + '.cst')
if get_config_value('history_tracking'):
if parser.history_tracking__:
log_parsing_history(parser, log_file_name)
# assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
......
......@@ -87,7 +87,8 @@ __all__ = ('ErrorCode',
'BAD_REPETITION_COUNT',
'TREE_PROCESSING_CRASH',
'COMPILER_CRASH',
'AST_TRANSFORM_CRASH')
'AST_TRANSFORM_CRASH',
'RECURSION_DEPTH_LIMIT_HIT')
class ErrorCode(int):
......@@ -152,6 +153,7 @@ BAD_REPETITION_COUNT = ErrorCode(1580)
TREE_PROCESSING_CRASH = ErrorCode(10100)
COMPILER_CRASH = ErrorCode(10200)
AST_TRANSFORM_CRASH = ErrorCode(10300)
RECURSION_DEPTH_LIMIT_HIT = ErrorCode(10400)
class Error:
......
......@@ -42,7 +42,8 @@ from DHParser.error import Error, ErrorCode, is_error, MANDATORY_CONTINUATION, \
MALFORMED_ERROR_STRING, MANDATORY_CONTINUATION_AT_EOF, DUPLICATE_PARSERS_IN_ALTERNATIVE, \
CAPTURE_WITHOUT_PARSERNAME, CAPTURE_DROPPED_CONTENT_WARNING, LOOKAHEAD_WITH_OPTIONAL_PARSER, \
BADLY_NESTED_OPTIONAL_PARSER, BAD_ORDER_OF_ALTERNATIVES, BAD_MANDATORY_SETUP, \
OPTIONAL_REDUNDANTLY_NESTED_WARNING, CAPTURE_STACK_NOT_EMPTY, BAD_REPETITION_COUNT, AUTORETRIEVED_SYMBOL_NOT_CLEARED
OPTIONAL_REDUNDANTLY_NESTED_WARNING, CAPTURE_STACK_NOT_EMPTY, BAD_REPETITION_COUNT, \
AUTORETRIEVED_SYMBOL_NOT_CLEARED, RECURSION_DEPTH_LIMIT_HIT
from DHParser.log import CallItem, HistoryRecord
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
......@@ -482,11 +483,13 @@ class Parser:
# and left recursion algorithm?
visited[location] = (node, rest)
except RecursionError:
except RecursionError as e:
node = Node(ZOMBIE_TAG, str(text[:min(10, max(1, text.find("\n")))]) + " ...")
node._pos = location
grammar.tree__.new_error(node, "maximum recursion depth of parser reached; "
"potentially due to too many errors!")
error = Error("maximum recursion depth of parser reached; potentially due to too many "
"errors or left recursion!", location, RECURSION_DEPTH_LIMIT_HIT)
grammar.tree__.add_error(node, error)
grammar.most_recent_error__ = ParserError(node, text, error, first_throw=False)
rest = EMPTY_STRING_VIEW
return node, rest
......@@ -3198,48 +3201,8 @@ class Forward(UnaryParser):
# TODO: need a unit-test concerning interference of variable manipulation
# and left recursion algorithm?
visited[location] = (node, rest)
return node, rest
# # TODO: For indirect recursion, recursion counters should not only
# # depend on location, but on location and call stack depth
# location = self.grammar.document_length__ - text._len
# depth, oracle = self.recursion.get(location, (-1, -1))
# if oracle >= 0:
# if depth >= oracle:
# self.recursion[location] = (0, oracle + 1)
# node, _text = None, text
# else:
# self.recursion[location] = (depth + 1, oracle)
# node, _text = self.parser(text)
# oracle = self.recursion[location][1]
# self.recursion[location] = (depth, oracle)
# self.memoization = self.grammar.memoization__
# self.grammar.memoization__ = False
# return node, _text
# else:
# self.recursion[location] = (0, 0)
# longest = None, text
# length = 0
# while True:
# node, text_ = self.parser(text)
# depth, oracle = self.recursion[location]
# if oracle == 0:
# longest = node, text_
# break
# elif node is None:
# break
# else:
# l = len(node)
# if l <= length:
# break
# length = l
# longest = node, text_
# self.recursion[location] = (-1, -1)
# self.grammar.memoization__ = self.memoization
# return longest
def set_proxy(self, proxy: Optional[ParseFunc]):
"""`set_proxy` has no effects on Forward-objects!"""
return
......
......@@ -1655,6 +1655,7 @@ class RootNode(Node):
else:
assert isinstance(node, Node)
assert isinstance(node, FrozenNode) or node.pos <= error.pos, \
"Wrong error position when processing error: %s\n" % str(error) + \
"%i <= %i <= %i ?" % (node.pos, error.pos, node.pos + max(1, len(node) - 1))
# assert node.pos == error.pos or isinstance(node, FrozenNode)
self.error_nodes.setdefault(id(node), []).append(error)
......
......@@ -26,7 +26,7 @@ Grammar-object.
from typing import Tuple, Optional, List, Iterable, Union
from DHParser.error import Error, RESUME_NOTICE
from DHParser.error import Error, RESUME_NOTICE, RECURSION_DEPTH_LIMIT_HIT
from DHParser.stringview import StringView
from DHParser.syntaxtree import Node, REGEXP_PTYPE, TOKEN_PTYPE, WHITESPACE_PTYPE, ZOMBIE_TAG
from DHParser.log import freeze_callstack, HistoryRecord, NONE_NODE
......@@ -46,6 +46,9 @@ def trace_history(self: Parser, text: StringView) -> Tuple[Optional[Node], Strin
# add resume notice (mind that skip notices are added by
# `parse.MandatoryElementsParser.mandatory_violation()`
mre = grammar.most_recent_error__ # type: ParserError
if mre.error.code == RECURSION_DEPTH_LIMIT_HIT:
return mre.node, text
grammar.most_recent_error__ = None
errors = [mre.error] # type: List[Error]
text_ = grammar.document__[mre.error.pos:]
......
......@@ -111,18 +111,26 @@ class TestParserClass:
class TestInfiLoopsAndRecursion:
def setup(self):
pass
# set_config_value('history_tracking', True)
# set_config_value('resume_notices', True)
# set_config_value('log_syntax_trees', set(('cst', 'ast')))
# start_logging('LOGS')
def test_very_simple(self):
minilang = """
term = term (`*`|`/`) factor | factor
factor = /[0-9]+/
"""
parser = grammar_provider(minilang)()
grammar_factory = grammar_provider(minilang)
parser = grammar_factory()
snippet = "5*4*3*2"
parser.history_tracking__ = True
set_tracer(parser, trace_history)
start_logging('LOGS')
# set_tracer(parser, trace_history)
st = parser(snippet)
log_parsing_history(parser, 'recursion_simple_test')
if is_logging():
log_ST(st, 'test_LeftRecursion_very_simple.cst')
log_parsing_history(parser, 'test_LeftRecurion_very_simple')
assert not is_error(st.error_flag), str(st.errors)
def test_direct_left_recursion1(self):
......@@ -139,8 +147,8 @@ class TestInfiLoopsAndRecursion:
assert not is_error(syntax_tree.error_flag), str(syntax_tree.errors_sorted)
assert snippet == syntax_tree.content, str(syntax_tree)
if is_logging():
log_ST(syntax_tree, "test_LeftRecursion_direct.cst")
log_parsing_history(parser, "test_LeftRecursion_direct")
log_ST(syntax_tree, "test_LeftRecursion_direct1.cst")
log_parsing_history(parser, "test_LeftRecursion_direct1")
def test_direct_left_recursion2(self):
minilang = """@literalws = right
......@@ -155,6 +163,9 @@ class TestInfiLoopsAndRecursion:
syntax_tree = parser(snippet)
assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
assert snippet == syntax_tree.content
if is_logging():
log_ST(syntax_tree, "test_LeftRecursion_direct2.cst")
log_parsing_history(parser, "test_LeftRecursion_direct2")
def test_indirect_left_recursion1(self):
minilang = """@literalws = right
......@@ -175,30 +186,38 @@ class TestInfiLoopsAndRecursion:
syntax_tree = parser(snippet)
assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
assert snippet == syntax_tree.content
snippet = "9 + 8 * (4 + 3 * (5 + 1))"
syntax_tree = parser(snippet)
assert not is_error(syntax_tree.error_flag), syntax_tree.errors_sorted
assert snippet == syntax_tree.content
if is_logging():
log_ST(syntax_tree, "test_LeftRecursion_indirect.cst")
log_parsing_history(parser, "test_LeftRecursion_indirect")
# # BEWARE: EXPERIMENTAL TEST can be long running
# def test_indirect_left_recursion2(self):
# arithmetic_syntax = """@literalws = right
# expression = addition | subtraction
# addition = (expression | term) "+" (expression | term)
# subtraction = (expression | term) "-" (expression | term)
# term = multiplication | division
# multiplication = (term | factor) "*" (term | factor)
# division = (term | factor) "/" (term | factor)
# factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
# group = "(" expression ")"
# SIGN = /[+-]/
# NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
# VARIABLE = /[A-Za-z]/~
# """
# arithmetic = grammar_provider(arithmetic_syntax)()
# arithmetic.left_recursion_depth__ = 2
# assert arithmetic
# syntax_tree = arithmetic("(a + b) * (a - b)")
# assert syntax_tree.errors
log_ST(syntax_tree, "test_LeftRecursion_indirect1.cst")
log_parsing_history(parser, "test_LeftRecursion_indirect1")
# BEWARE: EXPERIMENTAL TEST can be long running
def test_indirect_left_recursion2(self):
arithmetic_syntax = """@literalws = right
expression = addition | subtraction # | term
addition = (expression | term) "+" (expression | term)
subtraction = (expression | term) "-" (expression | term)
term = multiplication | division # | factor
multiplication = (term | factor) "*" (term | factor)
division = (term | factor) "/" (term | factor)
factor = [SIGN] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" expression ")"
SIGN = /[+-]/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
"""
arithmetic = grammar_provider(arithmetic_syntax)()
arithmetic.left_recursion_depth__ = 2
assert arithmetic
syntax_tree = arithmetic("(a + b) * (a - b)")
assert syntax_tree.errors
if is_logging():
log_ST(syntax_tree, "test_LeftRecursion_indirect2.cst")
log_parsing_history(arithmetic, "test_LeftRecursion_indirect2")
def test_break_inifnite_loop_ZeroOrMore(self):
forever = ZeroOrMore(RegExp(''))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment