Commit 797e5c68 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

parser.py, ebnf.py: Allow Parsers for resume-skipping (refactorings pending)

parent 07cf20fd
......@@ -280,11 +280,8 @@ def grammar_provider(ebnf_src: str, branding="DSL", additional_code: str = '') -
grammar_src = compileDSL(ebnf_src, nil_preprocessor, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler(branding, ebnf_src))
log_name = get_config_value('compiled_EBNF_log')
if log_name:
if is_logging():
if log_name and is_logging():
append_log(log_name, grammar_src)
else:
print(grammar_src)
imports = DHPARSER_IMPORTS.format(dhparser_parentdir=relative_path('.', DHPARSER_PARENTDIR))
grammar_factory = compile_python_object('\n'.join([imports, additional_code, grammar_src]),
r'get_(?:\w+_)?grammar$')
......
......@@ -2411,6 +2411,11 @@ def get_grammar() -> {NAME}Grammar:
resume_notices_on(grammar)
elif get_config_value('history_tracking'):
set_tracer(grammar, trace_history)
try:
if not grammar.__class__.python_src__:
grammar.__class__.python_src__ = get_grammar.python_src__
except AttributeError:
pass
return grammar
def parse_{NAME}(document, start_parser = "root_parser__", *, complete_match=True):
......@@ -2916,18 +2921,13 @@ class EBNFCompiler(Compiler):
return unrepr("re.compile(r'(?=%s)')" % escape_re(s))
elif nd.tag_name == 'procedure':
return unrepr(nd.content)
elif nd.tag_name != 'symbol':
self.tree.new_error(nd, 'Only regular expressions, string literals and external '
'procedures are allowed as search rules, but not: ' + nd.tag_name)
return ''
def gen_search_list(self, nodes: Sequence[Node]) -> List[Union[unrepr, str]]:
search_list = [] # type: List[Union[unrepr, str]]
for child in nodes:
rule = self.gen_search_rule(child)
search_list.append(rule if rule else unrepr(child.content.strip()))
return search_list
elif nd.tag_name == 'symbol':
return unrepr(nd.content.strip())
else:
return ''
# self.tree.new_error(nd, 'Only regular expressions, string literals and external '
# 'procedures are allowed as search rules, but not: ' + nd.tag_name)
# return unrepr('')
def directly_referred(self, symbol: str) -> FrozenSet[str]:
......@@ -3058,7 +3058,6 @@ class EBNFCompiler(Compiler):
Creates the Python code for the parser after compilation of
the EBNF-Grammar
"""
def pp_rules(rule_name: str, ruleset: Dict[str, List]) -> Tuple[str, str]:
"""Pretty-print skip- and resume-rule and error-messages dictionaries
to avoid excessively long lines in the generated python source."""
......@@ -3066,11 +3065,8 @@ class EBNFCompiler(Compiler):
indent = ",\n" + " " * (len(rule_name) + 8)
rule_repr = []
for k, v in ruleset.items():
if len(v) > 1:
delimiter = indent + ' ' * (len(k) + 5)
val = '(' + delimiter.join(str(it) for it in v) + ')'
else:
val = str((v[0],)) # turn single-element list into single-element tuple
delimiter = indent + ' ' * (len(k) + 5)
val = '[' + delimiter.join(str(it) for it in v) + ']'
rule_repr.append("'{key}': {value}".format(key=k, value=val))
rule_repr[0] = '{' + rule_repr[0]
rule_repr[-1] = rule_repr[-1] + '}'
......@@ -3093,6 +3089,7 @@ class EBNFCompiler(Compiler):
# minimize the necessary number of forward declarations
self.optimize_definitions_order(definitions)
self.root_symbol = definitions[0][0] if definitions else ""
# provide for capturing of symbols that are variables, i.e. the
# value of which will be retrieved at some point during the parsing process
......@@ -3141,6 +3138,7 @@ class EBNFCompiler(Compiler):
try:
nd = self.rules[rule.s][0].children[1]
refined = self.gen_search_rule(nd)
if not refined: refined = unrepr(rule.s)
except IndexError:
nd = self.tree # TODO: Allow arbitrary parsers, here
refined = '' # refined = rule
......@@ -3159,7 +3157,7 @@ class EBNFCompiler(Compiler):
refined_rules.append(rule)
resume_rules[symbol] = refined_rules
if resume_rules:
definitions.append(pp_rules(self.RESUME_RULES_KEYWORD, resume_rules))
definitions.insert(0, pp_rules(self.RESUME_RULES_KEYWORD, resume_rules))
# prepare and add skip-rules
......@@ -3180,7 +3178,7 @@ class EBNFCompiler(Compiler):
rules.append(search)
skip_rules[symbol] = rules
if skip_rules:
definitions.append(pp_rules(self.SKIP_RULES_KEYWORD, skip_rules))
definitions.insert(0, pp_rules(self.SKIP_RULES_KEYWORD, skip_rules))
for symbol in self.directives.skip.keys():
if symbol not in self.consumed_skip_rules:
......@@ -3257,7 +3255,6 @@ class EBNFCompiler(Compiler):
# turn definitions into declarations in reverse order
self.root_symbol = definitions[0][0] if definitions else ""
definitions.reverse()
declarations += [symbol + ' = Forward()'
for symbol in sorted(list(self.forward))]
......@@ -3314,7 +3311,7 @@ class EBNFCompiler(Compiler):
'Filter declared for uncaptured symbol "%s"' % symbol,
WARNING)
# set root_symbol parser and assemble python grammar definition
# assemble python grammar definition
if self.root_symbol:
if self.directives.reduction != CombinedParser.DEFAULT_OPTIMIZATION:
......@@ -3323,6 +3320,8 @@ class EBNFCompiler(Compiler):
declarations.append('root__ = TreeReduction(' + self.root_symbol + opt)
else:
declarations.append('root__ = ' + self.root_symbol)
else:
declarations.append(f'root__ = RegExp(r"{NEVER_MATCH_PATTERN}")')
declarations.append('')
self.python_src = '\n '.join(declarations) \
+ GRAMMAR_FACTORY.format(NAME=self.grammar_name, ID=self.grammar_id)
......@@ -3334,7 +3333,6 @@ class EBNFCompiler(Compiler):
def on_ZOMBIE__(self, node: Node) -> str:
result = ['Illegal node in AST generated from EBNF-Source!']
# print(self.tree.as_sxpr())
if node.children:
result.append(' Fragments found: ')
result.extend([str(self.compile(child)) for child in node.children])
......@@ -3589,11 +3587,11 @@ class EBNFCompiler(Compiler):
node, 'Directive "%s" requires message string or a a pair ' % key
+ '(regular expression or search string, message string) as argument!')
if len(node.children) == 2:
error_msgs.append(('', unrepr(node.children[1].content)))
error_msgs.append(('', unrepr(node[1].content)))
elif len(node.children) == 3:
rule = self.gen_search_rule(node.children[1])
error_msgs.append((rule if rule else unrepr(node.children[1].content),
unrepr(node.children[2].content)))
rule = self.gen_search_rule(node[1])
error_msgs.append((rule if rule else unrepr(node[1].content),
unrepr(node[2].content)))
else:
self.tree.new_error(node, 'Directive "%s" allows at most two parameters' % key)
self.directives.error[symbol] = error_msgs
......@@ -3603,11 +3601,11 @@ class EBNFCompiler(Compiler):
# if symbol in self.rules:
# self.tree.new_error(node, 'Skip list for resuming in series for symbol "{}"'
# ' must be defined before the symbol!'.format(symbol))
self.directives.skip[symbol] = self.gen_search_list(node.children[1:])
self.directives.skip[symbol] = [self.gen_search_rule(nd) for nd in node[1:]]
elif key.endswith('_resume'):
symbol = key[:-7]
self.directives.resume[symbol] = self.gen_search_list(node.children[1:])
self.directives.resume[symbol] = [self.gen_search_rule(nd) for nd in node[1:]]
else:
if any(key.startswith(directive) for directive in ('skip', 'error', 'resume')):
......
......@@ -967,6 +967,9 @@ class GrammarError(Exception):
for i, err_tuple in enumerate(self.errors))
RESERVED_PARSER_NAMES = ('root__', 'dwsp__', 'wsp__', 'comment__', 'root_parser__', 'ff_parser__')
class Grammar:
r"""
Class Grammar directs the parsing process and stores global state
......@@ -1276,7 +1279,7 @@ class Grammar:
if cls.parser_initialization__[0] != "done":
cdict = cls.__dict__
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
if isinstance(parser, Parser) and entry not in RESERVED_PARSER_NAMES:
anonymous = True if cls.disposable__.match(entry) else False
assert anonymous or not parser.drop_content, entry
if isinstance(parser, Forward):
......@@ -1360,10 +1363,24 @@ class Grammar:
self.static_analysis_caches__ = dict() # type: Dict[str, Dict]
self.root_parser__.apply(self._add_parser__)
resume_lists = []
self.resume_parsers__: List[Parser] = []
if hasattr(self, 'resume_rules__'):
resume_lists.extend(self.resume_rules__.values())
if hasattr(self, 'skip_rules__'):
resume_lists.extend(self.skip_rules__.values())
for l in resume_lists:
for i in range(len(l)):
if isinstance(l[i], Parser):
l[i] = self[l[i].pname]
self.resume_parsers__.append(l[i])
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
self.ff_parser__ = self.root_parser__
self.root_parser__.apply(lambda ctx: ctx[-1].reset())
for p in self.resume_parsers__: p.apply(lambda ctx: ctx[-1].reset())
if (self.static_analysis_pending__
and (static_analysis
......@@ -1450,27 +1467,21 @@ class Grammar:
particular instance of Grammar.
"""
parser = context[-1]
if parser.pname:
# prevent overwriting instance variables or parsers of a different class
assert (parser.pname not in self.__dict__
or isinstance(self.__dict__[parser.pname], parser.__class__)), \
('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object: %s!'
% (parser.pname, str(self.__dict__[parser.pname])))
setattr(self, parser.pname, parser)
# if isinstance(parser, MandatoryNary):
# for p in reversed(context):
# if p.pname:
# cast(MandatoryNary, parser).nearest_pname = p.pname
# break
# else:
# assert False, '???'
if parser.disposable:
parser.tag_name = parser.ptype
else:
parser.tag_name = parser.pname
self.all_parsers__.add(parser)
parser.grammar = self
if parser not in self.all_parsers__:
if parser.pname:
# prevent overwriting instance variables or parsers of a different class
assert (parser.pname not in self.__dict__
or isinstance(self.__dict__[parser.pname], parser.__class__)), \
('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object: %s!'
% (parser.pname, str(self.__dict__[parser.pname])))
setattr(self, parser.pname, parser)
if parser.disposable:
parser.tag_name = parser.ptype
else:
parser.tag_name = parser.pname
self.all_parsers__.add(parser)
parser.grammar = self
def get_memoization_dict__(self, parser: Parser):
......@@ -1532,6 +1543,7 @@ class Grammar:
if self._dirty_flag__:
self._reset__()
parser.apply(lambda ctx: ctx[-1].reset())
for p in self.resume_parsers__: p.apply(lambda ctx: ctx[-1].reset())
else:
self._dirty_flag__ = True
......@@ -1741,6 +1753,8 @@ class Grammar:
symbol = parser
else:
self.root_parser__.apply(find_symbol_for_parser)
for resume_parser in self.resume_parsers__:
resume_parser.apply(find_symbol_for_parser)
if symbol is None:
raise AttributeError('Parser %s (%i) is not contained in Grammar!'
% (str(parser), id(parser)))
......
......@@ -833,10 +833,9 @@ class TestCustomizedResumeParsing:
cba = "c" "b" §"a"
GAMMA_RE = /(?=GA\w+)/
"""
gr = grammar_provider(lang)()
def test_several_resume_rules_innermost_rule_matching(self):
gr = self.gr
gr = grammar_provider(self.lang)()
content = 'ALPHA abc BETA bad GAMMA cab .'
cst = gr(content)
assert cst.error_flag
......@@ -903,12 +902,11 @@ class TestCustomizedResumeParsing_with_Parsers:
cab = "c" "a" §"b"
cba = "c" "b" §"a"
GAMMA_RE = /(?=GA\w+)/
ALPHA_RESUME = { !`BETA` !`GAMMA` /./ }
ALPHA_RESUME = { !`BETA` !`GAMMA` /./ }
"""
gr = grammar_provider(lang)()
def test_several_resume_rules_innermost_rule_matching(self):
gr = self.gr
gr = grammar_provider(self.lang)()
content = 'ALPHA abc BETA bad GAMMA cab .'
cst = gr(content)
assert cst.error_flag
......@@ -930,8 +928,8 @@ class TestCustomizedResumeParsing_with_Parsers:
assert cst.error_flag
assert cst.content == content
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only on error message
assert len(cst.errors_sorted) == 1
# because of resuming, there should be only one error message
assert len(cst.errors_sorted) == 1, str(cst.errors_sorted)
class TestInSeriesResume:
......@@ -1302,7 +1300,6 @@ class TestTreeOptimization:
parser = create_parser(lang.replace('none', 'flatten'))
assert parser('ABC').as_sxpr() == '(root (:Text "A") (:Text "B") (important "C"))'
parser = create_parser(lang.replace('none', 'merge_treetops'))
# print(parser.python_src__)
assert parser('ABC').as_sxpr() == '(root (:Text "A") (:Text "B") (important "C"))'
assert parser('ABD').as_sxpr() == '(root "ABD")'
parser = create_parser(lang.replace('none', 'merge_leaves'))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment