Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 797e5c68 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

parser.py, ebnf.py: Allow Parsers for resume-skipping (refactorings pending)

parent 07cf20fd
......@@ -280,11 +280,8 @@ def grammar_provider(ebnf_src: str, branding="DSL", additional_code: str = '') -
grammar_src = compileDSL(ebnf_src, nil_preprocessor, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler(branding, ebnf_src))
log_name = get_config_value('compiled_EBNF_log')
if log_name:
if is_logging():
if log_name and is_logging():
append_log(log_name, grammar_src)
else:
print(grammar_src)
imports = DHPARSER_IMPORTS.format(dhparser_parentdir=relative_path('.', DHPARSER_PARENTDIR))
grammar_factory = compile_python_object('\n'.join([imports, additional_code, grammar_src]),
r'get_(?:\w+_)?grammar$')
......
......@@ -2411,6 +2411,11 @@ def get_grammar() -> {NAME}Grammar:
resume_notices_on(grammar)
elif get_config_value('history_tracking'):
set_tracer(grammar, trace_history)
try:
if not grammar.__class__.python_src__:
grammar.__class__.python_src__ = get_grammar.python_src__
except AttributeError:
pass
return grammar
def parse_{NAME}(document, start_parser = "root_parser__", *, complete_match=True):
......@@ -2916,18 +2921,13 @@ class EBNFCompiler(Compiler):
return unrepr("re.compile(r'(?=%s)')" % escape_re(s))
elif nd.tag_name == 'procedure':
return unrepr(nd.content)
elif nd.tag_name != 'symbol':
self.tree.new_error(nd, 'Only regular expressions, string literals and external '
'procedures are allowed as search rules, but not: ' + nd.tag_name)
return ''
def gen_search_list(self, nodes: Sequence[Node]) -> List[Union[unrepr, str]]:
search_list = [] # type: List[Union[unrepr, str]]
for child in nodes:
rule = self.gen_search_rule(child)
search_list.append(rule if rule else unrepr(child.content.strip()))
return search_list
elif nd.tag_name == 'symbol':
return unrepr(nd.content.strip())
else:
return ''
# self.tree.new_error(nd, 'Only regular expressions, string literals and external '
# 'procedures are allowed as search rules, but not: ' + nd.tag_name)
# return unrepr('')
def directly_referred(self, symbol: str) -> FrozenSet[str]:
......@@ -3058,7 +3058,6 @@ class EBNFCompiler(Compiler):
Creates the Python code for the parser after compilation of
the EBNF-Grammar
"""
def pp_rules(rule_name: str, ruleset: Dict[str, List]) -> Tuple[str, str]:
"""Pretty-print skip- and resume-rule and error-messages dictionaries
to avoid excessively long lines in the generated python source."""
......@@ -3066,11 +3065,8 @@ class EBNFCompiler(Compiler):
indent = ",\n" + " " * (len(rule_name) + 8)
rule_repr = []
for k, v in ruleset.items():
if len(v) > 1:
delimiter = indent + ' ' * (len(k) + 5)
val = '(' + delimiter.join(str(it) for it in v) + ')'
else:
val = str((v[0],)) # turn single-element list into single-element tuple
delimiter = indent + ' ' * (len(k) + 5)
val = '[' + delimiter.join(str(it) for it in v) + ']'
rule_repr.append("'{key}': {value}".format(key=k, value=val))
rule_repr[0] = '{' + rule_repr[0]
rule_repr[-1] = rule_repr[-1] + '}'
......@@ -3093,6 +3089,7 @@ class EBNFCompiler(Compiler):
# minimize the necessary number of forward declarations
self.optimize_definitions_order(definitions)
self.root_symbol = definitions[0][0] if definitions else ""
# provide for capturing of symbols that are variables, i.e. the
# value of which will be retrieved at some point during the parsing process
......@@ -3141,6 +3138,7 @@ class EBNFCompiler(Compiler):
try:
nd = self.rules[rule.s][0].children[1]
refined = self.gen_search_rule(nd)
if not refined: refined = unrepr(rule.s)
except IndexError:
nd = self.tree # TODO: Allow arbitrary parsers, here
refined = '' # refined = rule
......@@ -3159,7 +3157,7 @@ class EBNFCompiler(Compiler):
refined_rules.append(rule)
resume_rules[symbol] = refined_rules
if resume_rules:
definitions.append(pp_rules(self.RESUME_RULES_KEYWORD, resume_rules))
definitions.insert(0, pp_rules(self.RESUME_RULES_KEYWORD, resume_rules))
# prepare and add skip-rules
......@@ -3180,7 +3178,7 @@ class EBNFCompiler(Compiler):
rules.append(search)
skip_rules[symbol] = rules
if skip_rules:
definitions.append(pp_rules(self.SKIP_RULES_KEYWORD, skip_rules))
definitions.insert(0, pp_rules(self.SKIP_RULES_KEYWORD, skip_rules))
for symbol in self.directives.skip.keys():
if symbol not in self.consumed_skip_rules:
......@@ -3257,7 +3255,6 @@ class EBNFCompiler(Compiler):
# turn definitions into declarations in reverse order
self.root_symbol = definitions[0][0] if definitions else ""
definitions.reverse()
declarations += [symbol + ' = Forward()'
for symbol in sorted(list(self.forward))]
......@@ -3314,7 +3311,7 @@ class EBNFCompiler(Compiler):
'Filter declared for uncaptured symbol "%s"' % symbol,
WARNING)
# set root_symbol parser and assemble python grammar definition
# assemble python grammar definition
if self.root_symbol:
if self.directives.reduction != CombinedParser.DEFAULT_OPTIMIZATION:
......@@ -3323,6 +3320,8 @@ class EBNFCompiler(Compiler):
declarations.append('root__ = TreeReduction(' + self.root_symbol + opt)
else:
declarations.append('root__ = ' + self.root_symbol)
else:
declarations.append(f'root__ = RegExp(r"{NEVER_MATCH_PATTERN}")')
declarations.append('')
self.python_src = '\n '.join(declarations) \
+ GRAMMAR_FACTORY.format(NAME=self.grammar_name, ID=self.grammar_id)
......@@ -3334,7 +3333,6 @@ class EBNFCompiler(Compiler):
def on_ZOMBIE__(self, node: Node) -> str:
result = ['Illegal node in AST generated from EBNF-Source!']
# print(self.tree.as_sxpr())
if node.children:
result.append(' Fragments found: ')
result.extend([str(self.compile(child)) for child in node.children])
......@@ -3589,11 +3587,11 @@ class EBNFCompiler(Compiler):
node, 'Directive "%s" requires message string or a a pair ' % key
+ '(regular expression or search string, message string) as argument!')
if len(node.children) == 2:
error_msgs.append(('', unrepr(node.children[1].content)))
error_msgs.append(('', unrepr(node[1].content)))
elif len(node.children) == 3:
rule = self.gen_search_rule(node.children[1])
error_msgs.append((rule if rule else unrepr(node.children[1].content),
unrepr(node.children[2].content)))
rule = self.gen_search_rule(node[1])
error_msgs.append((rule if rule else unrepr(node[1].content),
unrepr(node[2].content)))
else:
self.tree.new_error(node, 'Directive "%s" allows at most two parameters' % key)
self.directives.error[symbol] = error_msgs
......@@ -3603,11 +3601,11 @@ class EBNFCompiler(Compiler):
# if symbol in self.rules:
# self.tree.new_error(node, 'Skip list for resuming in series for symbol "{}"'
# ' must be defined before the symbol!'.format(symbol))
self.directives.skip[symbol] = self.gen_search_list(node.children[1:])
self.directives.skip[symbol] = [self.gen_search_rule(nd) for nd in node[1:]]
elif key.endswith('_resume'):
symbol = key[:-7]
self.directives.resume[symbol] = self.gen_search_list(node.children[1:])
self.directives.resume[symbol] = [self.gen_search_rule(nd) for nd in node[1:]]
else:
if any(key.startswith(directive) for directive in ('skip', 'error', 'resume')):
......
......@@ -967,6 +967,9 @@ class GrammarError(Exception):
for i, err_tuple in enumerate(self.errors))
RESERVED_PARSER_NAMES = ('root__', 'dwsp__', 'wsp__', 'comment__', 'root_parser__', 'ff_parser__')
class Grammar:
r"""
Class Grammar directs the parsing process and stores global state
......@@ -1276,7 +1279,7 @@ class Grammar:
if cls.parser_initialization__[0] != "done":
cdict = cls.__dict__
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
if isinstance(parser, Parser) and entry not in RESERVED_PARSER_NAMES:
anonymous = True if cls.disposable__.match(entry) else False
assert anonymous or not parser.drop_content, entry
if isinstance(parser, Forward):
......@@ -1360,10 +1363,24 @@ class Grammar:
self.static_analysis_caches__ = dict() # type: Dict[str, Dict]
self.root_parser__.apply(self._add_parser__)
resume_lists = []
self.resume_parsers__: List[Parser] = []
if hasattr(self, 'resume_rules__'):
resume_lists.extend(self.resume_rules__.values())
if hasattr(self, 'skip_rules__'):
resume_lists.extend(self.skip_rules__.values())
for l in resume_lists:
for i in range(len(l)):
if isinstance(l[i], Parser):
l[i] = self[l[i].pname]
self.resume_parsers__.append(l[i])
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
self.ff_parser__ = self.root_parser__
self.root_parser__.apply(lambda ctx: ctx[-1].reset())
for p in self.resume_parsers__: p.apply(lambda ctx: ctx[-1].reset())
if (self.static_analysis_pending__
and (static_analysis
......@@ -1450,27 +1467,21 @@ class Grammar:
particular instance of Grammar.
"""
parser = context[-1]
if parser.pname:
# prevent overwriting instance variables or parsers of a different class
assert (parser.pname not in self.__dict__
or isinstance(self.__dict__[parser.pname], parser.__class__)), \
('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object: %s!'
% (parser.pname, str(self.__dict__[parser.pname])))
setattr(self, parser.pname, parser)
# if isinstance(parser, MandatoryNary):
# for p in reversed(context):
# if p.pname:
# cast(MandatoryNary, parser).nearest_pname = p.pname
# break
# else:
# assert False, '???'
if parser.disposable:
parser.tag_name = parser.ptype
else:
parser.tag_name = parser.pname
self.all_parsers__.add(parser)
parser.grammar = self
if parser not in self.all_parsers__:
if parser.pname:
# prevent overwriting instance variables or parsers of a different class
assert (parser.pname not in self.__dict__
or isinstance(self.__dict__[parser.pname], parser.__class__)), \
('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object: %s!'
% (parser.pname, str(self.__dict__[parser.pname])))
setattr(self, parser.pname, parser)
if parser.disposable:
parser.tag_name = parser.ptype
else:
parser.tag_name = parser.pname
self.all_parsers__.add(parser)
parser.grammar = self
def get_memoization_dict__(self, parser: Parser):
......@@ -1532,6 +1543,7 @@ class Grammar:
if self._dirty_flag__:
self._reset__()
parser.apply(lambda ctx: ctx[-1].reset())
for p in self.resume_parsers__: p.apply(lambda ctx: ctx[-1].reset())
else:
self._dirty_flag__ = True
......@@ -1741,6 +1753,8 @@ class Grammar:
symbol = parser
else:
self.root_parser__.apply(find_symbol_for_parser)
for resume_parser in self.resume_parsers__:
resume_parser.apply(find_symbol_for_parser)
if symbol is None:
raise AttributeError('Parser %s (%i) is not contained in Grammar!'
% (str(parser), id(parser)))
......
......@@ -833,10 +833,9 @@ class TestCustomizedResumeParsing:
cba = "c" "b" §"a"
GAMMA_RE = /(?=GA\w+)/
"""
gr = grammar_provider(lang)()
def test_several_resume_rules_innermost_rule_matching(self):
gr = self.gr
gr = grammar_provider(self.lang)()
content = 'ALPHA abc BETA bad GAMMA cab .'
cst = gr(content)
assert cst.error_flag
......@@ -903,12 +902,11 @@ class TestCustomizedResumeParsing_with_Parsers:
cab = "c" "a" §"b"
cba = "c" "b" §"a"
GAMMA_RE = /(?=GA\w+)/
ALPHA_RESUME = { !`BETA` !`GAMMA` /./ }
ALPHA_RESUME = { !`BETA` !`GAMMA` /./ }
"""
gr = grammar_provider(lang)()
def test_several_resume_rules_innermost_rule_matching(self):
gr = self.gr
gr = grammar_provider(self.lang)()
content = 'ALPHA abc BETA bad GAMMA cab .'
cst = gr(content)
assert cst.error_flag
......@@ -930,8 +928,8 @@ class TestCustomizedResumeParsing_with_Parsers:
assert cst.error_flag
assert cst.content == content
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only on error message
assert len(cst.errors_sorted) == 1
# because of resuming, there should be only one error message
assert len(cst.errors_sorted) == 1, str(cst.errors_sorted)
class TestInSeriesResume:
......@@ -1302,7 +1300,6 @@ class TestTreeOptimization:
parser = create_parser(lang.replace('none', 'flatten'))
assert parser('ABC').as_sxpr() == '(root (:Text "A") (:Text "B") (important "C"))'
parser = create_parser(lang.replace('none', 'merge_treetops'))
# print(parser.python_src__)
assert parser('ABC').as_sxpr() == '(root (:Text "A") (:Text "B") (important "C"))'
assert parser('ABD').as_sxpr() == '(root "ABD")'
parser = create_parser(lang.replace('none', 'merge_leaves'))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment