The name of the initial branch for new projects is now "main" instead of "master". Existing projects remain unchanged. More information: https://doku.lrz.de/display/PUBLIC/GitLab

Commit fbc08ddc authored by eckhart's avatar eckhart
Browse files

- DHParser/testing.py: Korrektur für Lookahead-parser-Fehlerbehandlung

parent e8025e8c
......@@ -1512,6 +1512,7 @@ NO_MANDATORY = 1000
def mandatory_violation(grammar: Grammar,
text_: StringView,
failed_on_lookahead: bool,
expected: str,
err_msgs: MessagesType,
reloc: int) -> Tuple[Error, Node, StringView]:
......@@ -1526,6 +1527,8 @@ def mandatory_violation(grammar: Grammar,
:param grammar: the grammar
:param text_: the point, where the mandatory vialoation. As usual the
string view represents the remaining text from this point.
:param failed_on_lookahead: True if the violating parser was a
Lookahead-Parser.
:param expected: the expected (but not found) text at this point.
:param err_msgs: A list of pairs of regular expressions (or simple
strings for that matter) and error messages that are chosen
......@@ -1556,8 +1559,8 @@ def mandatory_violation(grammar: Grammar,
grammar.tree__.add_error(err_node, error)
else:
msg = '%s expected, "%s" found!' % (expected, found)
error = Error(msg, location, Error.MANDATORY_CONTINUATION if text_
else Error.MANDATORY_CONTINUATION_AT_EOF)
error = Error(msg, location, Error.MANDATORY_CONTINUATION_AT_EOF
if (failed_on_lookahead and not text_) else Error.MANDATORY_CONTINUATION)
grammar.tree__.add_error(err_node, error)
return error, err_node, text_[i:]
......@@ -1637,7 +1640,8 @@ class Series(NaryParser):
else:
reloc = reentry_point(text_, self.skip) if self.skip else -1
error, node, text_ = mandatory_violation(
self.grammar, text_, parser.repr, self.err_msgs, reloc)
self.grammar, text_, isinstance(parser, Lookahead), parser.repr,
self.err_msgs, reloc)
# check if parsing of the series can be resumed somewhere
if reloc >= 0:
nd, text_ = parser(text_) # try current parser again
......@@ -1859,8 +1863,9 @@ class AllOf(NaryParser):
else:
reloc = reentry_point(text_, self.skip) if self.skip else -1
expected = '< ' + ' '.join([parser.repr for parser in parsers]) + ' >'
lookahead = any(isinstance(p, Lookahead) for p in parsers)
error, err_node, text_ = mandatory_violation(
self.grammar, text_, expected, self.err_msgs, reloc)
self.grammar, text_, lookahead, expected, self.err_msgs, reloc)
results += (err_node,)
if reloc < 0:
parsers = []
......
......@@ -151,7 +151,7 @@ def unit_from_config(config_str):
section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg) and not re.match('\s+$', cfg[pos:]):
if pos != len(cfg) and not re.match(r'\s+$', cfg[pos:]):
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1))
return unit
......@@ -322,39 +322,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
parser = parser_factory()
transform = transformer_factory()
# is_lookahead = set() # type: Set[str] # Dictionary of parser names
with_lookahead = set() # type: Set[Optional[Parser]]
# lookahead_flag = False # type: bool
def has_lookahead(parser_name: str) -> bool:
"""Returns True if the parser or any of its descendant parsers it a
Lookahead parser."""
lookahead_found = False
def find_lookahead(p: Parser):
"""Raises a StopIterationError if parser `p` is or contains
a Lookahead-parser."""
nonlocal is_lookahead, with_lookahead, lookahead_flag
if p in with_lookahead:
lookahead_flag = True
else:
if isinstance(p, Lookahead):
is_lookahead.add(p.tag_name)
with_lookahead.add(p)
lookahead_flag = True
else:
if any(child for child in (getattr(p, 'parsers', [])
or [getattr(p, 'parser', None)]) if isinstance(child, Lookahead)):
with_lookahead.add(p)
lookahead_flag = True
def has_lookahead(parser_name: str):
"""Returns `True`, if given parser is or contains a Lookahead-parser."""
nonlocal is_lookahead, with_lookahead, lookahead_flag, parser
p = parser[parser_name]
if p in with_lookahead:
return True
lookahead_flag = False
p.apply(find_lookahead)
if lookahead_flag:
with_lookahead.add(p)
return True
return False
def find_lookahead(p: Parser):
nonlocal lookahead_found
if not lookahead_found:
lookahead_found = isinstance(p, Lookahead)
parser[parser_name].apply(find_lookahead)
return lookahead_found
def lookahead_artifact(parser, raw_errors):
"""
......@@ -364,14 +343,12 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
This is required for testing of parsers that put a lookahead
operator at the end. See test_testing.TestLookahead.
"""
nonlocal is_lookahead
return ((len(raw_errors) == 2 # case 1: superfluous data for lookahead
and raw_errors[-1].code == Error.PARSER_LOOKAHEAD_MATCH_ONLY
and raw_errors[-2].code == Error.PARSER_STOPPED_BEFORE_END)
# case 2: mandatory lookahead failure at end of text
or (len(raw_errors) == 1
and raw_errors[-1].code == Error.MANDATORY_CONTINUATION_AT_EOF)
and any(tn in with_lookahead for tn in parser.history__[-1].call_stack))
and raw_errors[-1].code == Error.MANDATORY_CONTINUATION_AT_EOF))
for parser_name, tests in test_unit.items():
assert parser_name, "Missing parser name in test %s!" % unit_name
......@@ -566,12 +543,12 @@ def grammar_suite(directory, parser_factory, transformer_factory,
########################################################################
RX_DEFINITION_OR_SECTION = re.compile('(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))')
RX_DEFINITION_OR_SECTION = re.compile(r'(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))')
SymbolsDictType = Dict[str, List[str]]
def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
"""
r"""
Extracts all defined symbols from an EBNF-grammar. This can be used to
prepare grammar-tests. The symbols will be returned as lists of strings
which are grouped by the sections to which they belong and returned as
......@@ -606,7 +583,7 @@ def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
to lists of symbols that appear under that section.
"""
def trim_section_name(name: str) -> str:
return re.sub('[^\w-]', '_', name.replace('#:', '').strip())
return re.sub(r'[^\w-]', '_', name.replace('#:', '').strip())
ebnf = load_if_file(ebnf_text_or_file)
deflist = RX_DEFINITION_OR_SECTION.findall(ebnf)
......
......@@ -58,7 +58,7 @@ class TestCompileFunctions:
class TestCompilerGeneration:
trivial_lang = """
trivial_lang = r"""
text = { word | WSPC } "." [/\s/]
word = /\w+/
WSPC = /\s+/
......
......@@ -346,7 +346,7 @@ class TestBoundaryCases:
"not an error: " + str(messages)
grammar_src = result
grammar = compile_python_object(DHPARSER_IMPORTS + grammar_src,
'get_(?:\w+_)?grammar$')()
r'get_(?:\w+_)?grammar$')()
else:
assert False, "EBNF compiler should warn about unconnected rules."
......@@ -405,7 +405,7 @@ class TestFlowControlOperators:
"""Tests whether failures to comply with the required operator '§'
are correctly reported as such.
"""
lang1 = "nonsense == /\w+/~ # wrong_equal_sign"
lang1 = r"nonsense == /\w+/~ # wrong_equal_sign"
lang2 = "nonsense = [^{}%]+ # someone forgot the '/'-delimiters for regular expressions"
try:
parser_class = grammar_provider(lang1)
......@@ -477,6 +477,7 @@ class TestErrorCustomization:
st = parser("ABCD"); assert not st.error_flag
st = parser("A_CD"); assert not st.error_flag
st = parser("AB_D"); assert st.error_flag
print(st.errors_sorted)
assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
assert st.errors_sorted[0].message == "a user defined error message"
# transitivity of mandatory-operator
......@@ -497,7 +498,7 @@ class TestErrorCustomization:
assert st.errors_sorted[0].message == "a user defined error message"
def test_multiple_error_messages(self):
lang = """
lang = r"""
document = series | /.*/
@series_error = '_', "the underscore is wrong in this place"
@series_error = '*', "the asterix is wrong in this place"
......@@ -573,7 +574,7 @@ class TestErrorCustomizationErrors:
class TestCustomizedResumeParsing:
def setup(self):
lang = """
lang = r"""
@ alpha_resume = 'BETA', GAMMA_STR
@ beta_resume = GAMMA_RE
@ bac_resume = /GA\w+/
......
......@@ -133,10 +133,10 @@ class TestFlowControl:
self.t2 = "All word and not play makes Jack a dull boy END\n"
def test_lookbehind(self):
ws = RegExp('\s*')
ws = RegExp(r'\s*')
end = RegExp("END")
doc_end = Lookbehind(RegExp('\\s*?\\n')) + end
word = RegExp('\w+')
word = RegExp(r'\w+')
sequence = OneOrMore(NegativeLookahead(end) + word + ws)
document = ws + sequence + doc_end + ws
......@@ -149,11 +149,11 @@ class TestFlowControl:
def test_lookbehind_indirect(self):
class LookbehindTestGrammar(Grammar):
parser_initialization__ = ["upon instantiation"]
ws = RegExp('\\s*')
ws = RegExp(r'\s*')
end = RegExp('END')
SUCC_LB = RegExp('\\s*?\\n')
doc_end = Series(Lookbehind(SUCC_LB), end)
word = RegExp('\w+')
word = RegExp(r'\w+')
sequence = OneOrMore(Series(NegativeLookahead(end), word, ws))
document = Series(ws, sequence, doc_end, ws)
root__ = document
......@@ -176,7 +176,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node = parser('abc+def', parser.regex)
assert not node.error_flag
assert node.tag_name == "regex"
......@@ -192,7 +192,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node = parser('abc+def', parser.regex)
assert not node.error_flag
assert node.tag_name == "regex"
......@@ -207,7 +207,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node, rest = parser.regex('Alpha')
assert node
assert not node.error_flag
......@@ -223,7 +223,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node, rest = parser.regex('Alpha')
assert node.error_flag
......@@ -244,7 +244,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
assert result
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
result = parser(testdoc)
# log_parsing_history(parser, "test.log")
assert not result.error_flag
......@@ -267,13 +267,13 @@ class TestGrammar:
# checks whether pos values in the parsing result and in the
# history record have been initialized
with logging("LOGS"):
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
grammar("no_file_name*")
for record in grammar.history__:
assert not record.node or record.node.pos >= 0
def test_select_parsing(self):
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
grammar("wort", "WORT")
grammar("eine Zeile", "textzeile")
grammar("kein Haupt", "haupt")
......@@ -281,7 +281,7 @@ class TestGrammar:
def test_grammar_subclassing(self):
class Arithmetic(Grammar):
'''
r'''
expression = term { ("+" | "-") term }
term = factor { ("*" | "/") factor }
factor = INTEGER | "(" expression ")"
......@@ -413,14 +413,14 @@ class TestAllOfSomeOf:
class TestPopRetrieve:
mini_language = """
mini_language = r"""
document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign # never use delimiter between capture and pop except for retrival!
delimiter_sign = /`+/
text = /[^`]+/
"""
mini_lang2 = """
mini_lang2 = r"""
@braces_filter=counterpart
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
......@@ -429,7 +429,7 @@ class TestPopRetrieve:
closing_braces = /\}+/
text = /[^{}]+/
"""
mini_lang3 = """
mini_lang3 = r"""
document = { text | env }
env = (specialtag | opentag) text [closespecial | closetag]
opentag = "<" name ">"
......@@ -485,7 +485,7 @@ class TestPopRetrieve:
def test_cache_neutrality(self):
"""Test that packrat-caching does not interfere with the variable-
changing parsers: Capture and Retrieve."""
lang = """
lang = r"""
text = opening closing
opening = (unmarked_package | marked_package)
closing = ::variable
......@@ -758,7 +758,7 @@ class TestUnknownParserError:
class TestEarlyTokenWhitespaceDrop:
def setup(self):
self.lang = """
self.lang = r"""
@ drop = token, whitespace
expression = term { ("+" | "-") term}
term = factor { ("*"|"/") factor}
......
......@@ -206,7 +206,7 @@ class TestRootNode:
assert error_str.find("A") < error_str.find("B")
def test_error_reporting(self):
number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
result = str(Grammar(number)("3.1416"))
assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
str(result)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment