The expiration time for new job artifacts in CI/CD pipelines is now 30 days (GitLab default). Previously generated artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit fbc08ddc authored by eckhart's avatar eckhart
Browse files

- DHParser/testing.py: Korrektur für Lookahead-parser-Fehlerbehandlung

parent e8025e8c
......@@ -1512,6 +1512,7 @@ NO_MANDATORY = 1000
def mandatory_violation(grammar: Grammar,
text_: StringView,
failed_on_lookahead: bool,
expected: str,
err_msgs: MessagesType,
reloc: int) -> Tuple[Error, Node, StringView]:
......@@ -1526,6 +1527,8 @@ def mandatory_violation(grammar: Grammar,
:param grammar: the grammar
:param text_: the point, where the mandatory vialoation. As usual the
string view represents the remaining text from this point.
:param failed_on_lookahead: True if the violating parser was a
Lookahead-Parser.
:param expected: the expected (but not found) text at this point.
:param err_msgs: A list of pairs of regular expressions (or simple
strings for that matter) and error messages that are chosen
......@@ -1556,8 +1559,8 @@ def mandatory_violation(grammar: Grammar,
grammar.tree__.add_error(err_node, error)
else:
msg = '%s expected, "%s" found!' % (expected, found)
error = Error(msg, location, Error.MANDATORY_CONTINUATION if text_
else Error.MANDATORY_CONTINUATION_AT_EOF)
error = Error(msg, location, Error.MANDATORY_CONTINUATION_AT_EOF
if (failed_on_lookahead and not text_) else Error.MANDATORY_CONTINUATION)
grammar.tree__.add_error(err_node, error)
return error, err_node, text_[i:]
......@@ -1637,7 +1640,8 @@ class Series(NaryParser):
else:
reloc = reentry_point(text_, self.skip) if self.skip else -1
error, node, text_ = mandatory_violation(
self.grammar, text_, parser.repr, self.err_msgs, reloc)
self.grammar, text_, isinstance(parser, Lookahead), parser.repr,
self.err_msgs, reloc)
# check if parsing of the series can be resumed somewhere
if reloc >= 0:
nd, text_ = parser(text_) # try current parser again
......@@ -1859,8 +1863,9 @@ class AllOf(NaryParser):
else:
reloc = reentry_point(text_, self.skip) if self.skip else -1
expected = '< ' + ' '.join([parser.repr for parser in parsers]) + ' >'
lookahead = any(isinstance(p, Lookahead) for p in parsers)
error, err_node, text_ = mandatory_violation(
self.grammar, text_, expected, self.err_msgs, reloc)
self.grammar, text_, lookahead, expected, self.err_msgs, reloc)
results += (err_node,)
if reloc < 0:
parsers = []
......
......@@ -151,7 +151,7 @@ def unit_from_config(config_str):
section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg) and not re.match('\s+$', cfg[pos:]):
if pos != len(cfg) and not re.match(r'\s+$', cfg[pos:]):
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1))
return unit
......@@ -322,39 +322,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
parser = parser_factory()
transform = transformer_factory()
# is_lookahead = set() # type: Set[str] # Dictionary of parser names
with_lookahead = set() # type: Set[Optional[Parser]]
# lookahead_flag = False # type: bool
def has_lookahead(parser_name: str) -> bool:
"""Returns True if the parser or any of its descendant parsers it a
Lookahead parser."""
lookahead_found = False
def find_lookahead(p: Parser):
"""Raises a StopIterationError if parser `p` is or contains
a Lookahead-parser."""
nonlocal is_lookahead, with_lookahead, lookahead_flag
if p in with_lookahead:
lookahead_flag = True
else:
if isinstance(p, Lookahead):
is_lookahead.add(p.tag_name)
with_lookahead.add(p)
lookahead_flag = True
else:
if any(child for child in (getattr(p, 'parsers', [])
or [getattr(p, 'parser', None)]) if isinstance(child, Lookahead)):
with_lookahead.add(p)
lookahead_flag = True
def has_lookahead(parser_name: str):
"""Returns `True`, if given parser is or contains a Lookahead-parser."""
nonlocal is_lookahead, with_lookahead, lookahead_flag, parser
p = parser[parser_name]
if p in with_lookahead:
return True
lookahead_flag = False
p.apply(find_lookahead)
if lookahead_flag:
with_lookahead.add(p)
return True
return False
def find_lookahead(p: Parser):
nonlocal lookahead_found
if not lookahead_found:
lookahead_found = isinstance(p, Lookahead)
parser[parser_name].apply(find_lookahead)
return lookahead_found
def lookahead_artifact(parser, raw_errors):
"""
......@@ -364,14 +343,12 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
This is required for testing of parsers that put a lookahead
operator at the end. See test_testing.TestLookahead.
"""
nonlocal is_lookahead
return ((len(raw_errors) == 2 # case 1: superfluous data for lookahead
and raw_errors[-1].code == Error.PARSER_LOOKAHEAD_MATCH_ONLY
and raw_errors[-2].code == Error.PARSER_STOPPED_BEFORE_END)
# case 2: mandatory lookahead failure at end of text
or (len(raw_errors) == 1
and raw_errors[-1].code == Error.MANDATORY_CONTINUATION_AT_EOF)
and any(tn in with_lookahead for tn in parser.history__[-1].call_stack))
and raw_errors[-1].code == Error.MANDATORY_CONTINUATION_AT_EOF))
for parser_name, tests in test_unit.items():
assert parser_name, "Missing parser name in test %s!" % unit_name
......@@ -566,12 +543,12 @@ def grammar_suite(directory, parser_factory, transformer_factory,
########################################################################
RX_DEFINITION_OR_SECTION = re.compile('(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))')
RX_DEFINITION_OR_SECTION = re.compile(r'(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))')
SymbolsDictType = Dict[str, List[str]]
def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
"""
r"""
Extracts all defined symbols from an EBNF-grammar. This can be used to
prepare grammar-tests. The symbols will be returned as lists of strings
which are grouped by the sections to which they belong and returned as
......@@ -606,7 +583,7 @@ def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
to lists of symbols that appear under that section.
"""
def trim_section_name(name: str) -> str:
return re.sub('[^\w-]', '_', name.replace('#:', '').strip())
return re.sub(r'[^\w-]', '_', name.replace('#:', '').strip())
ebnf = load_if_file(ebnf_text_or_file)
deflist = RX_DEFINITION_OR_SECTION.findall(ebnf)
......
......@@ -58,7 +58,7 @@ class TestCompileFunctions:
class TestCompilerGeneration:
trivial_lang = """
trivial_lang = r"""
text = { word | WSPC } "." [/\s/]
word = /\w+/
WSPC = /\s+/
......
......@@ -346,7 +346,7 @@ class TestBoundaryCases:
"not an error: " + str(messages)
grammar_src = result
grammar = compile_python_object(DHPARSER_IMPORTS + grammar_src,
'get_(?:\w+_)?grammar$')()
r'get_(?:\w+_)?grammar$')()
else:
assert False, "EBNF compiler should warn about unconnected rules."
......@@ -405,7 +405,7 @@ class TestFlowControlOperators:
"""Tests whether failures to comply with the required operator '§'
are correctly reported as such.
"""
lang1 = "nonsense == /\w+/~ # wrong_equal_sign"
lang1 = r"nonsense == /\w+/~ # wrong_equal_sign"
lang2 = "nonsense = [^{}%]+ # someone forgot the '/'-delimiters for regular expressions"
try:
parser_class = grammar_provider(lang1)
......@@ -477,6 +477,7 @@ class TestErrorCustomization:
st = parser("ABCD"); assert not st.error_flag
st = parser("A_CD"); assert not st.error_flag
st = parser("AB_D"); assert st.error_flag
print(st.errors_sorted)
assert st.errors_sorted[0].code == Error.MANDATORY_CONTINUATION
assert st.errors_sorted[0].message == "a user defined error message"
# transitivity of mandatory-operator
......@@ -497,7 +498,7 @@ class TestErrorCustomization:
assert st.errors_sorted[0].message == "a user defined error message"
def test_multiple_error_messages(self):
lang = """
lang = r"""
document = series | /.*/
@series_error = '_', "the underscore is wrong in this place"
@series_error = '*', "the asterix is wrong in this place"
......@@ -573,7 +574,7 @@ class TestErrorCustomizationErrors:
class TestCustomizedResumeParsing:
def setup(self):
lang = """
lang = r"""
@ alpha_resume = 'BETA', GAMMA_STR
@ beta_resume = GAMMA_RE
@ bac_resume = /GA\w+/
......
......@@ -133,10 +133,10 @@ class TestFlowControl:
self.t2 = "All word and not play makes Jack a dull boy END\n"
def test_lookbehind(self):
ws = RegExp('\s*')
ws = RegExp(r'\s*')
end = RegExp("END")
doc_end = Lookbehind(RegExp('\\s*?\\n')) + end
word = RegExp('\w+')
word = RegExp(r'\w+')
sequence = OneOrMore(NegativeLookahead(end) + word + ws)
document = ws + sequence + doc_end + ws
......@@ -149,11 +149,11 @@ class TestFlowControl:
def test_lookbehind_indirect(self):
class LookbehindTestGrammar(Grammar):
parser_initialization__ = ["upon instantiation"]
ws = RegExp('\\s*')
ws = RegExp(r'\s*')
end = RegExp('END')
SUCC_LB = RegExp('\\s*?\\n')
doc_end = Series(Lookbehind(SUCC_LB), end)
word = RegExp('\w+')
word = RegExp(r'\w+')
sequence = OneOrMore(Series(NegativeLookahead(end), word, ws))
document = Series(ws, sequence, doc_end, ws)
root__ = document
......@@ -176,7 +176,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node = parser('abc+def', parser.regex)
assert not node.error_flag
assert node.tag_name == "regex"
......@@ -192,7 +192,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node = parser('abc+def', parser.regex)
assert not node.error_flag
assert node.tag_name == "regex"
......@@ -207,7 +207,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node, rest = parser.regex('Alpha')
assert node
assert not node.error_flag
......@@ -223,7 +223,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
assert result
assert not messages
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
node, rest = parser.regex('Alpha')
assert node.error_flag
......@@ -244,7 +244,7 @@ class TestRegex:
get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
assert result
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
parser = compile_python_object(DHPARSER_IMPORTS + result, r'\w+Grammar$')()
result = parser(testdoc)
# log_parsing_history(parser, "test.log")
assert not result.error_flag
......@@ -267,13 +267,13 @@ class TestGrammar:
# checks whether pos values in the parsing result and in the
# history record have been initialized
with logging("LOGS"):
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
grammar("no_file_name*")
for record in grammar.history__:
assert not record.node or record.node.pos >= 0
def test_select_parsing(self):
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, r'\w+Grammar$')()
grammar("wort", "WORT")
grammar("eine Zeile", "textzeile")
grammar("kein Haupt", "haupt")
......@@ -281,7 +281,7 @@ class TestGrammar:
def test_grammar_subclassing(self):
class Arithmetic(Grammar):
'''
r'''
expression = term { ("+" | "-") term }
term = factor { ("*" | "/") factor }
factor = INTEGER | "(" expression ")"
......@@ -413,14 +413,14 @@ class TestAllOfSomeOf:
class TestPopRetrieve:
mini_language = """
mini_language = r"""
document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign # never use delimiter between capture and pop except for retrival!
delimiter_sign = /`+/
text = /[^`]+/
"""
mini_lang2 = """
mini_lang2 = r"""
@braces_filter=counterpart
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
......@@ -429,7 +429,7 @@ class TestPopRetrieve:
closing_braces = /\}+/
text = /[^{}]+/
"""
mini_lang3 = """
mini_lang3 = r"""
document = { text | env }
env = (specialtag | opentag) text [closespecial | closetag]
opentag = "<" name ">"
......@@ -485,7 +485,7 @@ class TestPopRetrieve:
def test_cache_neutrality(self):
"""Test that packrat-caching does not interfere with the variable-
changing parsers: Capture and Retrieve."""
lang = """
lang = r"""
text = opening closing
opening = (unmarked_package | marked_package)
closing = ::variable
......@@ -758,7 +758,7 @@ class TestUnknownParserError:
class TestEarlyTokenWhitespaceDrop:
def setup(self):
self.lang = """
self.lang = r"""
@ drop = token, whitespace
expression = term { ("+" | "-") term}
term = factor { ("*"|"/") factor}
......
......@@ -206,7 +206,7 @@ class TestRootNode:
assert error_str.find("A") < error_str.find("B")
def test_error_reporting(self):
number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
result = str(Grammar(number)("3.1416"))
assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
str(result)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment