05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit ecd84a51 authored by eckhart's avatar eckhart

- support for resuming after parser failure directives in ebnf.py - bugfixes and tests!

parent 94edc4d8
......@@ -35,7 +35,7 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace,
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, RootNode, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
GLOBALS, CONFIG_PRESET, get_config_value, typing
GLOBALS, CONFIG_PRESET, get_config_value, unrepr, typing
from DHParser.transform import TransformationFunc, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, remove_infix_operator
......@@ -570,6 +570,7 @@ class EBNFCompiler(Compiler):
definitions.append((self.RAW_WS_KEYWORD, "r'{whitespace}'".format(**self.directives)))
definitions.append((self.COMMENT_KEYWORD, "r'{comment}'".format(**self.directives)))
definitions.append((self.RESUME_RULES_KEYWORD, repr(self.directives['resume'])))
print(self.directives['resume'])
# prepare parser class header and docstring and
# add EBNF grammar to the doc string of the parser class
......@@ -736,6 +737,18 @@ class EBNFCompiler(Compiler):
self.tree.new_error(node, 'Directive "%s" must have one, but not %i values.'
% (key, len(node.children) - 1))
def extract_regex(nd: Node) -> str:
value = nd.content.strip("~")
# cast(str, node.children[1].result).strip("~")
if value != nd.content: # cast(str, node.children[1].result)
self.tree.new_error(node, "Whitespace marker '~' not allowed in definition "
"of %s regular expression." % key)
if value[0] + value[-1] in {'""', "''"}:
value = escape_re(value[1:-1])
elif value[0] + value[-1] == '//':
value = self._check_rx(node, value[1:-1])
return value
if key in {'comment', 'whitespace'}:
check_argnum()
if node.children[1].parser.name == "symbol":
......@@ -746,15 +759,7 @@ class EBNFCompiler(Compiler):
self.tree.new_error(node, 'Value "%s" not allowed for directive "%s".'
% (value, key))
else:
value = node.children[1].content.strip("~")
# cast(str, node.children[1].result).strip("~")
if value != node.children[1].content: # cast(str, node.children[1].result)
self.tree.new_error(node, "Whitespace marker '~' not allowed in definition "
"of %s regular expression." % key)
if value[0] + value[-1] in {'""', "''"}:
value = escape_re(value[1:-1])
elif value[0] + value[-1] == '//':
value = self._check_rx(node, value[1:-1])
value = extract_regex(node.children[1])
if key == 'whitespace' and not re.match(value, ''):
self.tree.new_error(node, "Implicit whitespace should always "
"match the empty string, /%s/ does not." % value)
......@@ -813,17 +818,19 @@ class EBNFCompiler(Compiler):
self.tree.new_error(node, 'Directive "%s" accepts only regular expressions or '
'plain strings as arguments, but no symbols without '
'quotation marks!' % key)
symbol = key[:-6]
symbol = key[:-7]
if symbol in self.directives['resume']:
self.tree.new_error(node, 'Reentry conditions for "%s" have already been defined'
' earlier!' % symbol)
else:
reentry_conditions = []
for child in node.children:
if child.parser.name == 'regex':
reentry_conditions.append("re.compile(r'')" % child.content)
for child in node.children[1:]:
if child.parser.name == 'regexp':
reentry_conditions.append(unrepr("re.compile(r'%s')" % extract_regex(child)))
else:
reentry_conditions.append(repr(child.content))
s = child.content.strip()
s = s.strip('"') if s[0] == '"' else s.strip("'")
reentry_conditions.append(s)
self.directives['resume'][symbol] = reentry_conditions
else:
......
......@@ -146,7 +146,7 @@ def reentry_point(rest: StringView, rules: ResumeList) -> int:
else:
m = rest.search(rule)
if m:
i = min(rest.index(m.startswith()), i)
i = min(rest.index(m.start()), i)
# in case no rule matched return -1
if i == upper_limit:
i = -1
......@@ -1372,7 +1372,7 @@ class Series(NaryOperator):
# i = max(1, text.index(match.regs[1][0])) if match else 1
i = 0
location = self.grammar.document_length__ - len(text_)
node = Node(self, text_[:i]).init_pos(location)
node = Node(None, text_[:i]).init_pos(location)
# self.grammar.tree__.add_error(
# node, Error("§ %s violation" % parser.repr, location, Error.MESSAGE))
# # node.errors.append(Error("§ %s violation" % parser.repr,
......
......@@ -48,6 +48,7 @@ __all__ = ('escape_re',
'escape_control_characters',
'is_filename',
'concurrent_ident',
'unrepr',
'lstrip_docstring',
'issubtype',
'isgenerictype',
......@@ -169,6 +170,29 @@ def concurrent_ident() -> str:
return multiprocessing.current_process().name + '_' + str(threading.get_ident())
class unrepr:
"""
unrepr encapsulates a string representing a python function in such
a way that the representation of the string yields the function call
itself rather then a string representing the function call and delimited
by quotation marks.
Example:
>>> "re.compile(r'abc+')"
"re.compile(r'abc+')"
>>> unrepr("re.compile(r'abc+')")
re.compile(r'abc+')
"""
def __init__(self, s):
self.s = s
def __str__(self):
return self.s
def __repr__(self):
return self.s
#######################################################################
#
# type system support
......
......@@ -28,7 +28,7 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object, re
from DHParser.preprocess import nil_preprocessor
from DHParser import compile_source
from DHParser.error import has_errors
from DHParser.error import has_errors, Error
from DHParser.syntaxtree import WHITESPACE_PTYPE
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, get_ebnf_compiler
from DHParser.dsl import CompilationError, compileDSL, DHPARSER_IMPORTS, grammar_provider
......@@ -448,6 +448,92 @@ class TestAllSome:
assert grammar('B').content == 'B'
class TestCuratedErrors:
"""
Cureted Errors replace existing errors with alternative
error codes and messages that are more helptful to the user.
"""
def test_user_error_declaration(self):
lang = """
document = series | /.*/
series = "X" | head §"C" "D"
head = "A" "B"
@series_error = "a user defined error message"
"""
try:
parser = grammar_provider(lang)()
assert False, "Error definition after symbol definition should fail!"
except CompilationError as e:
pass
def test_curated_mandatory_continuation(self):
lang = """
document = series | /.*/
@series_error = "a user defined error message"
series = "X" | head §"C" "D"
head = "A" "B"
"""
# from DHParser.dsl import compileDSL
# from DHParser.preprocess import nil_preprocessor
# from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
# grammar_src = compileDSL(lang, nil_preprocessor, get_ebnf_grammar(),
# get_ebnf_transformer(), get_ebnf_compiler("test", lang))
# print(grammar_src)
parser = grammar_provider(lang)()
st = parser("X"); assert not st.error_flag
st = parser("ABCD"); assert not st.error_flag
st = parser("A_CD"); assert not st.error_flag
st = parser("AB_D"); assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
assert st.collect_errors()[0].message == "a user defined error message"
# transitivity of mandatory-operator
st = parser("ABC_"); assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
assert st.collect_errors()[0].message == "a user defined error message"
class TestCustomizedResumeParsing:
def setup(self):
lang = """
@ alpha_resume = 'BETA', 'GAMMA'
@ beta_resume = 'GAMMA'
@ bac_resume = /GA\w+/
document = alpha [beta] gamma "."
alpha = "ALPHA" abc
abc = §"a" "b" "c"
beta = "BETA" (bac | bca)
bac = "b" "a" §"c"
bca = "b" "c" §"a"
gamma = "GAMMA" §(cab | cba)
cab = "c" "a" §"b"
cba = "c" "b" §"a"
"""
try:
self.gr = grammar_provider(lang)()
except CompilationError as ce:
print(ce)
def test_several_resume_rules_innermost_rule_matching(self):
gr = self.gr
content = 'ALPHA abc BETA bad GAMMA cab .'
cst = gr(content)
# print(cst.as_sxpr())
assert cst.error_flag
assert cst.content == content
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only on error message
assert len(cst.collect_errors()) == 1
# multiple failures
content = 'ALPHA acb BETA bad GAMMA cab .'
cst = gr(content)
# print(cst.as_sxpr())
assert cst.error_flag
assert cst.content == content
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only on error message
assert len(cst.collect_errors()) == 2
if __name__ == "__main__":
from DHParser.testing import runner
......
......@@ -71,50 +71,6 @@ class TestErrorSupport:
self.mini_suite(s, linebreaks(s), 1)
class TestCuratedErrors:
"""
Cureted Errors replace existing errors with alternative
error codes and messages that are more helptful to the user.
"""
def test_user_error_declaration(self):
lang = """
document = series | /.*/
series = "X" | head §"C" "D"
head = "A" "B"
@series_error = "a user defined error message"
"""
try:
parser = grammar_provider(lang)()
assert False, "Error definition after symbol definition should fail!"
except CompilationError as e:
pass
def test_curated_mandatory_continuation(self):
lang = """
document = series | /.*/
@series_error = "a user defined error message"
series = "X" | head §"C" "D"
head = "A" "B"
"""
# from DHParser.dsl import compileDSL
# from DHParser.preprocess import nil_preprocessor
# from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
# grammar_src = compileDSL(lang, nil_preprocessor, get_ebnf_grammar(),
# get_ebnf_transformer(), get_ebnf_compiler("test", lang))
# print(grammar_src)
parser = grammar_provider(lang)()
st = parser("X"); assert not st.error_flag
st = parser("ABCD"); assert not st.error_flag
st = parser("A_CD"); assert not st.error_flag
st = parser("AB_D"); assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
assert st.collect_errors()[0].message == "a user defined error message"
# transitivity of mandatory-operator
st = parser("ABC_"); assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
assert st.collect_errors()[0].message == "a user defined error message"
if __name__ == "__main__":
from DHParser.testing import runner
runner("", globals())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment