Commit fbc08ddc authored by eckhart's avatar eckhart
Browse files

- DHParser/testing.py: Korrektur für Lookahead-parser-Fehlerbehandlung

parent e8025e8c
...@@ -1512,6 +1512,7 @@ NO_MANDATORY = 1000 ...@@ -1512,6 +1512,7 @@ NO_MANDATORY = 1000
def mandatory_violation(grammar: Grammar, def mandatory_violation(grammar: Grammar,
text_: StringView, text_: StringView,
failed_on_lookahead: bool,
expected: str, expected: str,
err_msgs: MessagesType, err_msgs: MessagesType,
reloc: int) -> Tuple[Error, Node, StringView]: reloc: int) -> Tuple[Error, Node, StringView]:
...@@ -1526,6 +1527,8 @@ def mandatory_violation(grammar: Grammar, ...@@ -1526,6 +1527,8 @@ def mandatory_violation(grammar: Grammar,
:param grammar: the grammar :param grammar: the grammar
:param text_: the point, where the mandatory vialoation. As usual the :param text_: the point, where the mandatory vialoation. As usual the
string view represents the remaining text from this point. string view represents the remaining text from this point.
:param failed_on_lookahead: True if the violating parser was a
Lookahead-Parser.
:param expected: the expected (but not found) text at this point. :param expected: the expected (but not found) text at this point.
:param err_msgs: A list of pairs of regular expressions (or simple :param err_msgs: A list of pairs of regular expressions (or simple
strings for that matter) and error messages that are chosen strings for that matter) and error messages that are chosen
...@@ -1556,8 +1559,8 @@ def mandatory_violation(grammar: Grammar, ...@@ -1556,8 +1559,8 @@ def mandatory_violation(grammar: Grammar,
grammar.tree__.add_error(err_node, error) grammar.tree__.add_error(err_node, error)
else: else:
msg = '%s expected, "%s" found!' % (expected, found) msg = '%s expected, "%s" found!' % (expected, found)
error = Error(msg, location, Error.MANDATORY_CONTINUATION if text_ error = Error(msg, location, Error.MANDATORY_CONTINUATION_AT_EOF
else Error.MANDATORY_CONTINUATION_AT_EOF) if (failed_on_lookahead and not text_) else Error.MANDATORY_CONTINUATION)
grammar.tree__.add_error(err_node, error) grammar.tree__.add_error(err_node, error)
return error, err_node, text_[i:] return error, err_node, text_[i:]
...@@ -1637,7 +1640,8 @@ class Series(NaryParser): ...@@ -1637,7 +1640,8 @@ class Series(NaryParser):
else: else:
reloc = reentry_point(text_, self.skip) if self.skip else -1 reloc = reentry_point(text_, self.skip) if self.skip else -1
error, node, text_ = mandatory_violation( error, node, text_ = mandatory_violation(
self.grammar, text_, parser.repr, self.err_msgs, reloc) self.grammar, text_, isinstance(parser, Lookahead), parser.repr,
self.err_msgs, reloc)
# check if parsing of the series can be resumed somewhere # check if parsing of the series can be resumed somewhere
if reloc >= 0: if reloc >= 0:
nd, text_ = parser(text_) # try current parser again nd, text_ = parser(text_) # try current parser again
...@@ -1859,8 +1863,9 @@ class AllOf(NaryParser): ...@@ -1859,8 +1863,9 @@ class AllOf(NaryParser):
else: else:
reloc = reentry_point(text_, self.skip) if self.skip else -1 reloc = reentry_point(text_, self.skip) if self.skip else -1
expected = '< ' + ' '.join([parser.repr for parser in parsers]) + ' >' expected = '< ' + ' '.join([parser.repr for parser in parsers]) + ' >'
lookahead = any(isinstance(p, Lookahead) for p in parsers)
error, err_node, text_ = mandatory_violation( error, err_node, text_ = mandatory_violation(
self.grammar, text_, expected, self.err_msgs, reloc) self.grammar, text_, lookahead, expected, self.err_msgs, reloc)
results += (err_node,) results += (err_node,)
if reloc < 0: if reloc < 0:
parsers = [] parsers = []
......
...@@ -151,7 +151,7 @@ def unit_from_config(config_str): ...@@ -151,7 +151,7 @@ def unit_from_config(config_str):
section_match = RX_SECTION.match(cfg, pos) section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg) and not re.match('\s+$', cfg[pos:]): if pos != len(cfg) and not re.match(r'\s+$', cfg[pos:]):
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1)) raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1))
return unit return unit
...@@ -322,39 +322,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -322,39 +322,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
parser = parser_factory() parser = parser_factory()
transform = transformer_factory() transform = transformer_factory()
# is_lookahead = set() # type: Set[str] # Dictionary of parser names def has_lookahead(parser_name: str) -> bool:
with_lookahead = set() # type: Set[Optional[Parser]] """Returns True if the parser or any of its descendant parsers it a
# lookahead_flag = False # type: bool Lookahead parser."""
lookahead_found = False
def find_lookahead(p: Parser): def find_lookahead(p: Parser):
"""Raises a StopIterationError if parser `p` is or contains nonlocal lookahead_found
a Lookahead-parser.""" if not lookahead_found:
nonlocal is_lookahead, with_lookahead, lookahead_flag lookahead_found = isinstance(p, Lookahead)
if p in with_lookahead:
lookahead_flag = True parser[parser_name].apply(find_lookahead)
else: return lookahead_found
if isinstance(p, Lookahead):
is_lookahead.add(p.tag_name)
with_lookahead.add(p)
lookahead_flag = True
else:
if any(child for child in (getattr(p, 'parsers', [])
or [getattr(p, 'parser', None)]) if isinstance(child, Lookahead)):
with_lookahead.add(p)
lookahead_flag = True
def has_lookahead(parser_name: str):
"""Returns `True`, if given parser is or contains a Lookahead-parser."""
nonlocal is_lookahead, with_lookahead, lookahead_flag, parser
p = parser[parser_name]
if p in with_lookahead:
return True
lookahead_flag = False
p.apply(find_lookahead)
if lookahead_flag:
with_lookahead.add(p)
return True
return False
def lookahead_artifact(parser, raw_errors): def lookahead_artifact(parser, raw_errors):
""" """
...@@ -364,14 +343,12 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -364,14 +343,12 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
This is required for testing of parsers that put a lookahead This is required for testing of parsers that put a lookahead
operator at the end. See test_testing.TestLookahead. operator at the end. See test_testing.TestLookahead.
""" """
nonlocal is_lookahead
return ((len(raw_errors) == 2 # case 1: superfluous data for lookahead return ((len(raw_errors) == 2 # case 1: superfluous data for lookahead
and raw_errors[-1].code == Error.PARSER_LOOKAHEAD_MATCH_ONLY and raw_errors[-1].code == Error.PARSER_LOOKAHEAD_MATCH_ONLY
and raw_errors[-2].code == Error.PARSER_STOPPED_BEFORE_END) and raw_errors[-2].code == Error.PARSER_STOPPED_BEFORE_END)
# case 2: mandatory lookahead failure at end of text # case 2: mandatory lookahead failure at end of text
or (len(raw_errors) == 1 or (len(raw_errors) == 1
and raw_errors[-1].code == Error.MANDATORY_CONTINUATION_AT_EOF) and raw_errors[-1].code == Error.MANDATORY_CONTINUATION_AT_EOF))
and any(tn in with_lookahead for tn in parser.history__[-1].call_stack))
for parser_name, tests in test_unit.items(): for parser_name, tests in test_unit.items():
assert parser_name, "Missing parser name in test %s!" % unit_name assert parser_name, "Missing parser name in test %s!" % unit_name
...@@ -566,12 +543,12 @@ def grammar_suite(directory, parser_factory, transformer_factory, ...@@ -566,12 +543,12 @@ def grammar_suite(directory, parser_factory, transformer_factory,
######################################################################## ########################################################################
RX_DEFINITION_OR_SECTION = re.compile('(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))') RX_DEFINITION_OR_SECTION = re.compile(r'(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))')
SymbolsDictType = Dict[str, List[str]] SymbolsDictType = Dict[str, List[str]]
def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType: def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
""" r"""
Extracts all defined symbols from an EBNF-grammar. This can be used to Extracts all defined symbols from an EBNF-grammar. This can be used to
prepare grammar-tests. The symbols will be returned as lists of strings prepare grammar-tests. The symbols will be returned as lists of strings
which are grouped by the sections to which they belong and returned as which are grouped by the sections to which they belong and returned as
...@@ -606,7 +583,7 @@ def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType: ...@@ -606,7 +583,7 @@ def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
to lists of symbols that appear under that section. to lists of symbols that appear under that section.
""" """
def trim_section_name(name: str) -> str: def trim_section_name(name: str) -> str:
return re.sub('[^\w-]', '_', name.replace('#:', '').strip()) return re.sub(r'[^\w-]', '_', name.replace('#:', '').strip())
ebnf = load_if_file(ebnf_text_or_file) ebnf = load_if_file(ebnf_text_or_file)
deflist = RX_DEFINITION_OR_SECTION.findall(ebnf) deflist = RX_DEFINITION_OR_SECTION.findall(ebnf)
......
...@@ -58,7 +58,7 @@ class TestCompileFunctions: ...@@ -58,7 +58,7 @@ class TestCompileFunctions:
class TestCompilerGeneration: class TestCompilerGeneration:
trivial_lang = """ trivial_lang = r"""
text = { word | WSPC } "." [/\s/] text = { word | WSPC } "." [/\s/]
word = /\w+/ word = /\w+/
WSPC = /\s+/ WSPC = /\s+/
......
...@@ -346,7 +346,7 @@ class TestBoundaryCases: ...@@ -346,7 +346,7 @@ class TestBoundaryCases:
"not an error: " + str(messages) "not an error: " + str(messages)
grammar_src = result grammar_src = result
grammar = compile_python_object(DHPARSER_IMPORTS + grammar_src, grammar = compile_python_object(DHPARSER_IMPORTS + grammar_src,
'get_(?:\w+_)?grammar$')() r'get_(?:\w+_)?grammar$')()
else: else:
assert False, "EBNF compiler should warn about unconnected rules." assert False, "EBNF compiler should warn about unconnected rules."
...@@ -405,7 +405,7 @@ class TestFlowControlOperators: ...@@ -405,7 +405,7 @@ class TestFlowControlOperators:
"""Tests whether failures to comply with the required operator '§' """Tests whether failures to comply with the required operator '§'
are correctly reported as such. are correctly reported as such.
""" """
lang1 = "nonsense == /\w+/~ # wrong_equal_sign" lang1 = r"nonsense == /\w+/~ # wrong_equal_sign"
lang2 = "nonsense = [^{}%]+ # someone forgot the '/'-delimiters for regular expressions" lang2 = "nonsense = [^{}%]+ # someone forgot the '/'-delimiters for regular expressions"
try: try:
parser_class = grammar_provider(lang1) parser_class = grammar_provider(lang1)
...@@ -477,6 +477,7 @@ class TestErrorCustomization: ...@@ -477,6 +477,7 @@ class TestErrorCustomization:
st = parser("ABCD"); assert not st.error_flag st = parser("ABCD"); assert not st.error_flag
st = parser("A_CD"); assert not st.error_flag st = parser("A_CD"); assert not st.error_flag
st = parser("AB_D"); assert st.error_flag st = parser("AB_D"); assert st.error_flag
print(st.errors_sorted)
assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
assert st.errors_sorted[0].message == "a user defined error message" assert st.errors_sorted[0].message == "a user defined error message"
# transitivity of mandatory-operator # transitivity of mandatory-operator
...@@ -497,7 +498,7 @@ class TestErrorCustomization: ...@@ -497,7 +498,7 @@ class TestErrorCustomization:
assert st.errors_sorted[0].message == "a user defined error message" assert st.errors_sorted[0].message == "a user defined error message"
def test_multiple_error_messages(self): def test_multiple_error_messages(self):
lang = """ lang = r"""
document = series | /.*/ document = series | /.*/
@series_error = '_', "the underscore is wrong in this place" @series_error = '_', "the underscore is wrong in this place"
@series_error = '*', "the asterix is wrong in this place" @series_error = '*', "the asterix is wrong in this place"
...@@ -573,7 +574,7 @@ class TestErrorCustomizationErrors: ...@@ -573,7 +574,7 @@ class TestErrorCustomizationErrors:
class TestCustomizedResumeParsing: class TestCustomizedResumeParsing:
def setup(self): def setup(self):
lang = """ lang = r"""
@ alpha_resume = 'BETA', GAMMA_STR @ alpha_resume = 'BETA', GAMMA_STR
@ beta_resume = GAMMA_RE @ beta_resume = GAMMA_RE
@ bac_resume = /GA\w+/ @ bac_resume = /GA\w+/
......
...@@ -133,10 +133,10 @@ class TestFlowControl: ...@@ -133,10 +133,10 @@ class TestFlowControl:
self.t2 = "All word and not play makes Jack a dull boy END\n" self.t2 = "All word and not play makes Jack a dull boy END\n"
def test_lookbehind(self): def test_lookbehind(self):
ws = RegExp('\s*') ws = RegExp(r'\s*')
end = RegExp("END") end = RegExp("END")
doc_end = Lookbehind(RegExp('\\s*?\\n')) + end doc_end = Lookbehind(RegExp('\\s*?\\n')) + end
word = RegExp('\w+') word = RegExp(r'\w+')
sequence = OneOrMore(NegativeLookahead(end) + word + ws) sequence = OneOrMore(NegativeLookahead(end) + word + ws)
document = ws + sequence + doc_end + ws document = ws + sequence + doc_end + ws
...@@ -149,11 +149,11 @@ class TestFlowControl: ...@@ -149,11 +149,11 @@ class TestFlowControl:
def test_lookbehind_indirect(self): def test_lookbehind_indirect(self):
class LookbehindTestGrammar(Grammar): class LookbehindTestGrammar(Grammar):
parser_initialization__ = ["upon instantiation"] parser_initialization__ = ["upon instantiation"]
ws = RegExp('\\s*') ws = RegExp(r'\s*')
end = RegExp('END') end = RegExp('END')
SUCC_LB = RegExp('\\s*?\\n') SUCC_LB = RegExp('\\s*?\\n')
doc_end = Series(Lookbehind(SUCC_LB), end) doc_end = Series(Lookbehind(SUCC_LB), end)
word = RegExp('\w+') word = RegExp(r'\w+')
sequence = OneOrMore(Series(NegativeLookahead(end), word, ws)) sequence = OneOrMore(Series(NegativeLookahead(end), word, ws))
document = Series(ws, sequence, doc_end, ws) document = Series(ws, sequence, doc_end, ws)
root__ = document root__ = document
...@@ -176,7 +176,7 @@ class TestRegex: ...@@ -176,7 +176,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest')) get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result assert result
assert not messages, str(messages) assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')() parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node = parser('abc+def', parser.regex) node = parser('abc+def', parser.regex)
assert not node.error_flag assert not node.error_flag
assert node.tag_name == "regex" assert node.tag_name == "regex"
...@@ -192,7 +192,7 @@ class TestRegex: ...@@ -192,7 +192,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest')) get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result assert result
assert not messages, str(messages) assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')() parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node = parser('abc+def', parser.regex) node = parser('abc+def', parser.regex)
assert not node.error_flag assert not node.error_flag
assert node.tag_name == "regex" assert node.tag_name == "regex"
...@@ -207,7 +207,7 @@ class TestRegex: ...@@ -207,7 +207,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest')) get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result assert result
assert not messages assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')() parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node, rest = parser.regex('Alpha') node, rest = parser.regex('Alpha')
assert node assert node
assert not node.error_flag assert not node.error_flag
...@@ -223,7 +223,7 @@ class TestRegex: ...@@ -223,7 +223,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest')) get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result assert result
assert not messages assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')() parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node, rest = parser.regex('Alpha') node, rest = parser.regex('Alpha')
assert node.error_flag assert node.error_flag
...@@ -244,7 +244,7 @@ class TestRegex: ...@@ -244,7 +244,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler("TokenTest")) get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
assert result assert result
assert not messages, str(messages) assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')() parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
result = parser(testdoc) result = parser(testdoc)
# log_parsing_history(parser, "test.log") # log_parsing_history(parser, "test.log")
assert not result.error_flag assert not result.error_flag
...@@ -267,13 +267,13 @@ class TestGrammar: ...@@ -267,13 +267,13 @@ class TestGrammar:
# checks whether pos values in the parsing result and in the # checks whether pos values in the parsing result and in the
# history record have been initialized # history record have been initialized
with logging("LOGS"): with logging("LOGS"):
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')() grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
grammar("no_file_name*") grammar("no_file_name*")
for record in grammar.history__: for record in grammar.history__:
assert not record.node or record.node.pos >= 0 assert not record.node or record.node.pos >= 0
def test_select_parsing(self): def test_select_parsing(self):
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')() grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
grammar("wort", "WORT") grammar("wort", "WORT")
grammar("eine Zeile", "textzeile") grammar("eine Zeile", "textzeile")
grammar("kein Haupt", "haupt") grammar("kein Haupt", "haupt")
...@@ -281,7 +281,7 @@ class TestGrammar: ...@@ -281,7 +281,7 @@ class TestGrammar:
def test_grammar_subclassing(self): def test_grammar_subclassing(self):
class Arithmetic(Grammar): class Arithmetic(Grammar):
''' r'''
expression = term { ("+" | "-") term } expression = term { ("+" | "-") term }
term = factor { ("*" | "/") factor } term = factor { ("*" | "/") factor }
factor = INTEGER | "(" expression ")" factor = INTEGER | "(" expression ")"
...@@ -413,14 +413,14 @@ class TestAllOfSomeOf: ...@@ -413,14 +413,14 @@ class TestAllOfSomeOf:
class TestPopRetrieve: class TestPopRetrieve:
mini_language = """ mini_language = r"""
document = { text | codeblock } document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign # never use delimiter between capture and pop except for retrival! delimiter = delimiter_sign # never use delimiter between capture and pop except for retrival!
delimiter_sign = /`+/ delimiter_sign = /`+/
text = /[^`]+/ text = /[^`]+/
""" """
mini_lang2 = """ mini_lang2 = r"""
@braces_filter=counterpart @braces_filter=counterpart
document = { text | codeblock } document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
...@@ -429,7 +429,7 @@ class TestPopRetrieve: ...@@ -429,7 +429,7 @@ class TestPopRetrieve:
closing_braces = /\}+/ closing_braces = /\}+/
text = /[^{}]+/ text = /[^{}]+/
""" """
mini_lang3 = """ mini_lang3 = r"""
document = { text | env } document = { text | env }
env = (specialtag | opentag) text [closespecial | closetag] env = (specialtag | opentag) text [closespecial | closetag]
opentag = "<" name ">" opentag = "<" name ">"
...@@ -485,7 +485,7 @@ class TestPopRetrieve: ...@@ -485,7 +485,7 @@ class TestPopRetrieve:
def test_cache_neutrality(self): def test_cache_neutrality(self):
"""Test that packrat-caching does not interfere with the variable- """Test that packrat-caching does not interfere with the variable-
changing parsers: Capture and Retrieve.""" changing parsers: Capture and Retrieve."""
lang = """ lang = r"""
text = opening closing text = opening closing
opening = (unmarked_package | marked_package) opening = (unmarked_package | marked_package)
closing = ::variable closing = ::variable
...@@ -758,7 +758,7 @@ class TestUnknownParserError: ...@@ -758,7 +758,7 @@ class TestUnknownParserError:
class TestEarlyTokenWhitespaceDrop: class TestEarlyTokenWhitespaceDrop:
def setup(self): def setup(self):
self.lang = """ self.lang = r"""
@ drop = token, whitespace @ drop = token, whitespace
expression = term { ("+" | "-") term} expression = term { ("+" | "-") term}
term = factor { ("*"|"/") factor} term = factor { ("*"|"/") factor}
......
...@@ -206,7 +206,7 @@ class TestRootNode: ...@@ -206,7 +206,7 @@ class TestRootNode:
assert error_str.find("A") < error_str.find("B") assert error_str.find("A") < error_str.find("B")
def test_error_reporting(self): def test_error_reporting(self):
number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+') number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
result = str(Grammar(number)("3.1416")) result = str(Grammar(number)("3.1416"))
assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \ assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
str(result) str(result)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment