05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 7f66e761 authored by eckhart's avatar eckhart

- In-Series-parser-recovery added and tested (not really powerful, unfortunately!)

parent 5a5efa7b
This diff is collapsed.
......@@ -73,7 +73,7 @@ class Error:
REDEFINED_DIRECTIVE_WARNING = ErrorCode(110)
REDECLARED_TOKEN_WARNING = ErrorCode(120)
UNUSED_ERROR_MSG_WARNING = ErrorCode(130)
UNUSED_ERROR_HANDLING_WARNING = ErrorCode(130)
UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING = ErrorCode(610)
......@@ -86,7 +86,7 @@ class Error:
PARSER_STOPPED_BEFORE_END = ErrorCode(1040)
CAPTURE_STACK_NOT_EMPTY = ErrorCode(1050)
MALFORMED_ERROR_STRING = ErrorCode(1060)
AMBIGUOUS_ERROR_MSG = ErrorCode(1070)
AMBIGUOUS_ERROR_HANDLING = ErrorCode(1070)
def __init__(self, message: str, pos, code: ErrorCode = ERROR,
orig_pos: int = -1, line: int = -1, column: int = -1) -> None:
......
......@@ -1377,24 +1377,31 @@ class Series(NaryOperator):
RX_ARGUMENT = re.compile(r'\s(\S)')
NOPE = 1000
MessageType = List[Tuple[Union[str, Any], str]]
MessagesType = List[Tuple[Union[str, Any], str]]
def __init__(self, *parsers: Parser, mandatory: int = NOPE, err_msgs: MessageType=[]) -> None:
def __init__(self, *parsers: Parser,
mandatory: int = NOPE,
err_msgs: MessagesType=[],
skip: ResumeList = []) -> None:
super().__init__(*parsers)
assert not (mandatory == Series.NOPE and err_msgs), \
'Custom error messages only make sense if parameter "mandatory" is set!'
'Custom error messages require that parameter "mandatory" is set!'
assert not (mandatory == Series.NOPE and skip), \
'Search expressions for skipping text require that parameter "mandatory" is set!'
length = len(self.parsers)
assert 1 <= length < Series.NOPE, \
'Length %i of series exceeds maximum length of %i' % (length, Series.NOPE)
if mandatory < 0:
mandatory += length
assert 0 <= mandatory < length or mandatory == Series.NOPE
self.mandatory = mandatory
self.err_msgs = err_msgs
self.mandatory = mandatory # type: int
self.err_msgs = err_msgs # type: Series.MessagesType
self.skip = skip # type: ResumeList
def __deepcopy__(self, memo):
parsers = copy.deepcopy(self.parsers, memo)
duplicate = self.__class__(*parsers, mandatory=self.mandatory, err_msgs=self.err_msgs)
duplicate = self.__class__(*parsers, mandatory=self.mandatory,
err_msgs=self.err_msgs, skip=self.skip)
duplicate.name = self.name
duplicate.ptype = self.ptype
return duplicate
......@@ -1409,7 +1416,8 @@ class Series(NaryOperator):
if pos < self.mandatory:
return None, text
else:
i = 0
k = reentry_point(text_, self.skip) if self.skip else -1
i = k if k >= 0 else 0
location = self.grammar.document_length__ - len(text_)
node = Node(None, text_[:i]).init_pos(location)
found = text_[:10].replace('\n', '\\n ')
......@@ -1431,17 +1439,21 @@ class Series(NaryOperator):
else Error.MANDATORY_CONTINUATION_AT_EOF)
self.grammar.tree__.add_error(node, mandatory_violation)
text_ = text_[i:]
results += (node,)
# TODO: Add queue-jumping here (XXX_skip = Regex, Regex, Regex...)
break
# check if parsing of the series can be resumed somewhere
if k >= 0:
nd, text_ = parser(text_) # try current parser again
if nd:
results += (node,)
node = nd
else:
results += (node,)
break
results += (node,)
# if node.error_flag: # break on first error
# break
assert len(results) <= len(self.parsers)
assert len(results) <= len(self.parsers) \
or len(self.parsers) >= len([p for p in results if p.parser != ZOMBIE_PARSER])
node = Node(self, results)
if mandatory_violation:
raise ParserError(node, text, first_throw=True)
# self.grammar.tree__.add_error(node, mandatory_violation)
return node, text_
def __repr__(self):
......
......@@ -179,6 +179,16 @@ class TestParserNameOverwriteBug:
messages = st.collect_errors()
assert not has_errors(messages), str(messages)
def test_single_mandatory_bug(self):
lang = """series = § /B/"""
result, messages, ast = compile_ebnf(lang)
# print(result)
assert result.find('Required') < 0
parser = grammar_provider(lang)()
st = parser('B')
assert not st.error_flag
class TestSemanticValidation:
def check(self, minilang, bool_filter=lambda x: x):
......@@ -496,7 +506,7 @@ class TestErrorCustomization:
assert False, "CompilationError because of ambiguous error message exptected!"
except CompilationError as compilation_error:
err = next(compilation_error.errors)
assert err.code == Error.AMBIGUOUS_ERROR_MSG, str(compilation_error)
assert err.code == Error.AMBIGUOUS_ERROR_HANDLING, str(compilation_error)
def test_unsed_error_customization(self):
lang = """
......@@ -506,7 +516,7 @@ class TestErrorCustomization:
other = "X" | "Y" | "Z"
"""
result, messages, ast = compile_ebnf(lang)
assert messages[0].code == Error.UNUSED_ERROR_MSG_WARNING
assert messages[0].code == Error.UNUSED_ERROR_HANDLING_WARNING
class TestCustomizedResumeParsing:
......@@ -562,6 +572,48 @@ class TestCustomizedResumeParsing:
assert len(cst.collect_errors()) == 1
class TestInSeriesResume:
def setup(self):
lang = """
document = series
@series_skip = /B/, /C/, /D/, /E/, /F/, /G/
series = "A" §"B" "C" "D" "E" "F" "G"
"""
try:
result, _, _ = compile_ebnf(lang)
self.gr = grammar_provider(lang)()
except CompilationError as ce:
print(ce)
def test_garbage_in_series(self):
st = self.gr('ABCDEFG')
assert not st.error_flag
st = self.gr('AB XYZ CDEFG')
errors = st.collect_errors()
assert len(errors) == 1 and errors[0].code == Error.MANDATORY_CONTINUATION
st = self.gr('AB XYZ CDE XYZ FG')
errors = st.collect_errors()
assert len(errors) == 2 and all(err.code == Error.MANDATORY_CONTINUATION for err in errors)
st = self.gr('AB XYZ CDE XNZ FG') # fails to resume parsing
errors = st.collect_errors()
assert len(errors) >= 1 and errors[0].code == Error.MANDATORY_CONTINUATION
def test_series_gap(self):
st = self.gr('ABDEFG')
errors = st.collect_errors()
assert len(errors) == 1 and errors[0].code == Error.MANDATORY_CONTINUATION
st = self.gr('ABXEFG') # two missing, one wrong element added
errors = st.collect_errors()
assert len(errors) == 2 and all(err.code == Error.MANDATORY_CONTINUATION for err in errors)
st = self.gr('AB_DE_G')
errors = st.collect_errors()
assert len(errors) == 2 and all(err.code == Error.MANDATORY_CONTINUATION for err in errors)
def test_series_permutation(self):
st = self.gr('ABEDFG')
errors = st.collect_errors()
assert len(errors) >= 1 # cannot really recover from permutation errors
if __name__ == "__main__":
from DHParser.testing import runner
......
......@@ -639,7 +639,6 @@ class TestReentryAfterError:
self.gr = grammar_provider(lang)()
def test_no_resume_rules(self):
# 1. no resume rules
gr = self.gr; gr.resume_rules = dict()
content = 'ALPHA acb BETA bac GAMMA cab .'
cst = gr(content)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment