Commit 958459c3 authored by di68kap's avatar di68kap
Browse files

Merge remote-tracking branch 'origin/development' into development

parents 6062648b 3eefd06d
......@@ -280,11 +280,8 @@ def grammar_provider(ebnf_src: str, branding="DSL", additional_code: str = '') -
grammar_src = compileDSL(ebnf_src, nil_preprocessor, get_ebnf_grammar(),
get_ebnf_transformer(), get_ebnf_compiler(branding, ebnf_src))
log_name = get_config_value('compiled_EBNF_log')
if log_name:
if is_logging():
if log_name and is_logging():
append_log(log_name, grammar_src)
else:
print(grammar_src)
imports = DHPARSER_IMPORTS.format(dhparser_parentdir=relative_path('.', DHPARSER_PARENTDIR))
grammar_factory = compile_python_object('\n'.join([imports, additional_code, grammar_src]),
r'get_(?:\w+_)?grammar$')
......
......@@ -2420,6 +2420,11 @@ def get_grammar() -> {NAME}Grammar:
resume_notices_on(grammar)
elif get_config_value('history_tracking'):
set_tracer(grammar, trace_history)
try:
if not grammar.__class__.python_src__:
grammar.__class__.python_src__ = get_grammar.python_src__
except AttributeError:
pass
return grammar
def parse_{NAME}(document, start_parser = "root_parser__", *, complete_match=True):
......@@ -2925,18 +2930,13 @@ class EBNFCompiler(Compiler):
return unrepr("re.compile(r'(?=%s)')" % escape_re(s))
elif nd.tag_name == 'procedure':
return unrepr(nd.content)
elif nd.tag_name != 'symbol':
self.tree.new_error(nd, 'Only regular expressions, string literals and external '
'procedures are allowed as search rules, but not: ' + nd.tag_name)
return ''
def gen_search_list(self, nodes: Sequence[Node]) -> List[Union[unrepr, str]]:
search_list = [] # type: List[Union[unrepr, str]]
for child in nodes:
rule = self.gen_search_rule(child)
search_list.append(rule if rule else unrepr(child.content.strip()))
return search_list
elif nd.tag_name == 'symbol':
return unrepr(nd.content.strip())
else:
return ''
# self.tree.new_error(nd, 'Only regular expressions, string literals and external '
# 'procedures are allowed as search rules, but not: ' + nd.tag_name)
# return unrepr('')
def directly_referred(self, symbol: str) -> FrozenSet[str]:
......@@ -3067,7 +3067,6 @@ class EBNFCompiler(Compiler):
Creates the Python code for the parser after compilation of
the EBNF-Grammar
"""
def pp_rules(rule_name: str, ruleset: Dict[str, List]) -> Tuple[str, str]:
"""Pretty-print skip- and resume-rule and error-messages dictionaries
to avoid excessively long lines in the generated python source."""
......@@ -3075,11 +3074,8 @@ class EBNFCompiler(Compiler):
indent = ",\n" + " " * (len(rule_name) + 8)
rule_repr = []
for k, v in ruleset.items():
if len(v) > 1:
delimiter = indent + ' ' * (len(k) + 5)
val = '(' + delimiter.join(str(it) for it in v) + ')'
else:
val = str((v[0],)) # turn single-element list into single-element tuple
delimiter = indent + ' ' * (len(k) + 5)
val = '[' + delimiter.join(str(it) for it in v) + ']'
rule_repr.append("'{key}': {value}".format(key=k, value=val))
rule_repr[0] = '{' + rule_repr[0]
rule_repr[-1] = rule_repr[-1] + '}'
......@@ -3102,6 +3098,7 @@ class EBNFCompiler(Compiler):
# minimize the necessary number of forward declarations
self.optimize_definitions_order(definitions)
self.root_symbol = definitions[0][0] if definitions else ""
# provide for capturing of symbols that are variables, i.e. the
# value of which will be retrieved at some point during the parsing process
......@@ -3150,9 +3147,10 @@ class EBNFCompiler(Compiler):
try:
nd = self.rules[rule.s][0].children[1]
refined = self.gen_search_rule(nd)
if not refined: refined = unrepr(rule.s)
except IndexError:
nd = self.tree
refined = ""
nd = self.tree # TODO: Allow arbitrary parsers, here
refined = '' # refined = rule
except KeyError:
# rule represents a procedure name
nd = self.tree
......@@ -3168,7 +3166,7 @@ class EBNFCompiler(Compiler):
refined_rules.append(rule)
resume_rules[symbol] = refined_rules
if resume_rules:
definitions.append(pp_rules(self.RESUME_RULES_KEYWORD, resume_rules))
definitions.insert(0, pp_rules(self.RESUME_RULES_KEYWORD, resume_rules))
# prepare and add skip-rules
......@@ -3189,7 +3187,7 @@ class EBNFCompiler(Compiler):
rules.append(search)
skip_rules[symbol] = rules
if skip_rules:
definitions.append(pp_rules(self.SKIP_RULES_KEYWORD, skip_rules))
definitions.insert(0, pp_rules(self.SKIP_RULES_KEYWORD, skip_rules))
for symbol in self.directives.skip.keys():
if symbol not in self.consumed_skip_rules:
......@@ -3266,7 +3264,6 @@ class EBNFCompiler(Compiler):
# turn definitions into declarations in reverse order
self.root_symbol = definitions[0][0] if definitions else ""
definitions.reverse()
declarations += [symbol + ' = Forward()'
for symbol in sorted(list(self.forward))]
......@@ -3323,7 +3320,7 @@ class EBNFCompiler(Compiler):
'Filter declared for uncaptured symbol "%s"' % symbol,
WARNING)
# set root_symbol parser and assemble python grammar definition
# assemble python grammar definition
if self.root_symbol:
if self.directives.reduction != CombinedParser.DEFAULT_OPTIMIZATION:
......@@ -3332,6 +3329,8 @@ class EBNFCompiler(Compiler):
declarations.append('root__ = TreeReduction(' + self.root_symbol + opt)
else:
declarations.append('root__ = ' + self.root_symbol)
else:
declarations.append(f'root__ = RegExp(r"{NEVER_MATCH_PATTERN}")')
declarations.append('')
self.python_src = '\n '.join(declarations) \
+ GRAMMAR_FACTORY.format(NAME=self.grammar_name, ID=self.grammar_id)
......@@ -3343,7 +3342,6 @@ class EBNFCompiler(Compiler):
def on_ZOMBIE__(self, node: Node) -> str:
result = ['Illegal node in AST generated from EBNF-Source!']
# print(self.tree.as_sxpr())
if node.children:
result.append(' Fragments found: ')
result.extend([str(self.compile(child)) for child in node.children])
......@@ -3598,11 +3596,11 @@ class EBNFCompiler(Compiler):
node, 'Directive "%s" requires message string or a a pair ' % key
+ '(regular expression or search string, message string) as argument!')
if len(node.children) == 2:
error_msgs.append(('', unrepr(node.children[1].content)))
error_msgs.append(('', unrepr(node[1].content)))
elif len(node.children) == 3:
rule = self.gen_search_rule(node.children[1])
error_msgs.append((rule if rule else unrepr(node.children[1].content),
unrepr(node.children[2].content)))
rule = self.gen_search_rule(node[1])
error_msgs.append((rule if rule else unrepr(node[1].content),
unrepr(node[2].content)))
else:
self.tree.new_error(node, 'Directive "%s" allows at most two parameters' % key)
self.directives.error[symbol] = error_msgs
......@@ -3612,11 +3610,11 @@ class EBNFCompiler(Compiler):
# if symbol in self.rules:
# self.tree.new_error(node, 'Skip list for resuming in series for symbol "{}"'
# ' must be defined before the symbol!'.format(symbol))
self.directives.skip[symbol] = self.gen_search_list(node.children[1:])
self.directives.skip[symbol] = [self.gen_search_rule(nd) for nd in node[1:]]
elif key.endswith('_resume'):
symbol = key[:-7]
self.directives.resume[symbol] = self.gen_search_list(node.children[1:])
self.directives.resume[symbol] = [self.gen_search_rule(nd) for nd in node[1:]]
else:
if any(key.startswith(directive) for directive in ('skip', 'error', 'resume')):
......
......@@ -177,7 +177,7 @@ class ParserError(Exception):
return pe
PatternMatchType = Union[RxPatternType, str, Callable]
PatternMatchType = Union[RxPatternType, str, Callable, 'Parser']
ErrorMessagesType = List[Tuple[PatternMatchType, str]]
ResumeList = List[PatternMatchType] # list of strings or regular expressions
ReentryPointAlgorithm = Callable[[StringView, int, int], Tuple[int, int]]
......@@ -191,7 +191,7 @@ ReentryPointAlgorithm = Callable[[StringView, int, int], Tuple[int, int]]
def reentry_point(rest: StringView,
rules: ResumeList,
comment_regex,
search_window: int = -1) -> int:
search_window: int = -1) -> Tuple[int, Node]:
"""
Finds the point where parsing should resume after a ParserError has been caught.
The algorithm makes sure that this reentry-point does not lie inside a comment.
......@@ -222,11 +222,13 @@ def reentry_point(rest: StringView,
reentry-point. A value smaller than zero means that the complete remaining
text will be searched. A value of zero effectively turns of resuming after
error.
:return: The integer index of the closest reentry point or -1 if no
:return: A tuple of integer index of the closest reentry point and a Node
capturing all text from ``rest`` up to this point or ``(-1, None)`` if no
reentry-point was found.
"""
upper_limit = len(rest) + 1
closest_match = upper_limit
skip_node = None
comments = None # type: Optional[Iterator]
if search_window < 0:
search_window = len(rest)
......@@ -298,19 +300,31 @@ def reentry_point(rest: StringView,
# find closest match
for rule in rules:
comments = rest.finditer(comment_regex)
if callable(rule):
search_func = algorithm_search
elif isinstance(rule, str):
search_func = str_search
if isinstance(rule, Parser):
_node, _text = cast(Parser, rule)(rest)
if _node:
pos = len(rest) - len(_text)
if pos < closest_match:
closest_match = pos
skip_node = _node
else:
search_func = rx_search
pos = entry_point(search_func, rule)
closest_match = min(pos, closest_match)
if callable(rule):
search_func = algorithm_search
elif isinstance(rule, str):
search_func = str_search
else:
search_func = rx_search
pos = entry_point(search_func, rule)
if pos < closest_match:
skip_node = None
closest_match = pos
# in case no rule matched return -1
if closest_match == upper_limit:
closest_match = -1
return closest_match
if skip_node is None:
skip_node = Node(ZOMBIE_TAG, rest[:max(closest_match,0)])
return closest_match, skip_node
########################################################################
......@@ -524,8 +538,8 @@ class Parser:
rules = tuple(grammar.resume_rules__.get(
self.pname or grammar.associated_symbol__(self).pname, []))
rest = pe.rest[len(pe.node):]
i = reentry_point(rest, rules, grammar.comment_rx__,
grammar.reentry_search_window__)
i, skip_node = reentry_point(rest, rules, grammar.comment_rx__,
grammar.reentry_search_window__)
if i >= 0 or self == grammar.start_parser__:
# either a reentry point was found or the
# error has fallen through to the first level
......@@ -540,9 +554,8 @@ class Parser:
zombie.result = rest[:i]
tail = tuple() # type: ChildrenType
else:
nd = Node(ZOMBIE_TAG, rest[:i]).with_pos(location)
# nd.attr['err'] = pe.error.message
tail = (nd,)
tail = (skip_node,)
rest = rest[i:]
if pe.first_throw:
node = pe.node
......@@ -954,6 +967,9 @@ class GrammarError(Exception):
for i, err_tuple in enumerate(self.errors))
RESERVED_PARSER_NAMES = ('root__', 'dwsp__', 'wsp__', 'comment__', 'root_parser__', 'ff_parser__')
class Grammar:
r"""
Class Grammar directs the parsing process and stores global state
......@@ -1094,6 +1110,10 @@ class Grammar:
was started (see method `__call__`) or `None` if no parsing process
is running.
unconnected_parsers__: A list of parsers that is not connected to the
root parser. This list of parsers is collected during instantiation
from the ``resume_rules__`` and ``skip_rules__``-data.
_dirty_flag__: A flag indicating that the Grammar has been called at
least once so that the parsing-variables need to be reset
when it is called again.
......@@ -1263,7 +1283,7 @@ class Grammar:
if cls.parser_initialization__[0] != "done":
cdict = cls.__dict__
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
if isinstance(parser, Parser) and entry not in RESERVED_PARSER_NAMES:
anonymous = True if cls.disposable__.match(entry) else False
assert anonymous or not parser.drop_content, entry
if isinstance(parser, Forward):
......@@ -1347,10 +1367,25 @@ class Grammar:
self.static_analysis_caches__ = dict() # type: Dict[str, Dict]
self.root_parser__.apply(self._add_parser__)
root_connected = frozenset(self.all_parsers__)
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
self.ff_parser__ = self.root_parser__
self.root_parser__.apply(lambda ctx: ctx[-1].reset())
self.unconnected_parsers__: List[Parser] = []
resume_lists = []
if hasattr(self, 'resume_rules__'):
resume_lists.extend(self.resume_rules__.values())
if hasattr(self, 'skip_rules__'):
resume_lists.extend(self.skip_rules__.values())
for l in resume_lists:
for i in range(len(l)):
if isinstance(l[i], Parser):
l[i] = self[l[i].pname]
if l[i] not in root_connected:
self.unconnected_parsers__.append(l[i])
for p in self.unconnected_parsers__: p.apply(lambda ctx: ctx[-1].reset())
if (self.static_analysis_pending__
and (static_analysis
......@@ -1437,27 +1472,21 @@ class Grammar:
particular instance of Grammar.
"""
parser = context[-1]
if parser.pname:
# prevent overwriting instance variables or parsers of a different class
assert (parser.pname not in self.__dict__
or isinstance(self.__dict__[parser.pname], parser.__class__)), \
('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object: %s!'
% (parser.pname, str(self.__dict__[parser.pname])))
setattr(self, parser.pname, parser)
# if isinstance(parser, MandatoryNary):
# for p in reversed(context):
# if p.pname:
# cast(MandatoryNary, parser).nearest_pname = p.pname
# break
# else:
# assert False, '???'
if parser.disposable:
parser.tag_name = parser.ptype
else:
parser.tag_name = parser.pname
self.all_parsers__.add(parser)
parser.grammar = self
if parser not in self.all_parsers__:
if parser.pname:
# prevent overwriting instance variables or parsers of a different class
assert (parser.pname not in self.__dict__
or isinstance(self.__dict__[parser.pname], parser.__class__)), \
('Cannot add parser "%s" because a field with the same name '
'already exists in grammar object: %s!'
% (parser.pname, str(self.__dict__[parser.pname])))
setattr(self, parser.pname, parser)
if parser.disposable:
parser.tag_name = parser.ptype
else:
parser.tag_name = parser.pname
self.all_parsers__.add(parser)
parser.grammar = self
def get_memoization_dict__(self, parser: Parser):
......@@ -1519,6 +1548,7 @@ class Grammar:
if self._dirty_flag__:
self._reset__()
parser.apply(lambda ctx: ctx[-1].reset())
for p in self.unconnected_parsers__: p.apply(lambda ctx: ctx[-1].reset())
else:
self._dirty_flag__ = True
......@@ -1728,6 +1758,8 @@ class Grammar:
symbol = parser
else:
self.root_parser__.apply(find_symbol_for_parser)
for resume_parser in self.unconnected_parsers__:
resume_parser.apply(find_symbol_for_parser)
if symbol is None:
raise AttributeError('Parser %s (%i) is not contained in Grammar!'
% (str(parser), id(parser)))
......@@ -2741,7 +2773,7 @@ class MandatoryNary(NaryParser):
return duplicate
@cython.returns(cython.int)
def get_reentry_point(self, text_: StringView) -> int:
def get_reentry_point(self, text_: StringView) -> Tuple[int, Node]:
"""Returns a reentry-point determined by the associated skip-list in
`self.grammar.skip_rules__`. If no reentry-point was found or the
skip-list ist empty, -1 is returned.
......@@ -2751,14 +2783,15 @@ class MandatoryNary(NaryParser):
if skip:
gr = self._grammar
return reentry_point(text_, skip, gr.comment_rx__, gr.reentry_search_window__)
return -1
return -1, Node(ZOMBIE_TAG, '')
@cython.locals(i=cython.int, location=cython.int)
@cython.locals(location=cython.int)
def mandatory_violation(self,
text_: StringView,
failed_on_lookahead: bool,
expected: str,
reloc: int) -> Tuple[Error, Node, StringView]:
reloc: int,
err_node: Node) -> Tuple[Error, StringView]:
"""
Chooses the right error message in case of a mandatory violation and
returns an error with this message, an error node, to which the error
......@@ -2772,18 +2805,18 @@ class MandatoryNary(NaryParser):
:param failed_on_lookahead: True if the violating parser was a
Lookahead-Parser.
:param expected: the expected (but not found) text at this point.
:param err_node: A zombie-node that captures the text from the
position where the error occurred to a suggested
reentry-position.
:param reloc: A position value that represents the reentry point for
parsing after the error occurred.
:return: a tuple of an error object, a zombie node at the position
where the mandatory violation occurred and to which the error
object is attached and a string view for the continuation the
parsing process
:return: a tuple of an error object and a string view for the
continuation the parsing process
"""
grammar = self._grammar
i = reloc if reloc >= 0 else 0
location = grammar.document_length__ - len(text_)
err_node = Node(ZOMBIE_TAG, text_[:i], True).with_pos(location)
err_node.with_pos(location)
found = text_[:10].replace('\n', '\\n ') + '...'
sym = self.grammar.associated_symbol__(self).pname
err_msgs = self.grammar.error_messages__.get(sym, [])
......@@ -2814,7 +2847,7 @@ class MandatoryNary(NaryParser):
# signal error to tracer directly, because this error is not raised!
grammar.most_recent_error__ = ParserError(
self, err_node, text_, error, first_throw=False)
return error, err_node, text_[i:]
return error, text_[max(reloc, 0):]
def static_analysis(self) -> List['AnalysisError']:
errors = super().static_analysis()
......@@ -2888,9 +2921,9 @@ class Series(MandatoryNary):
if pos < mandatory:
return None, text
else:
reloc = self.get_reentry_point(text_)
error, node, text_ = self.mandatory_violation(
text_, isinstance(parser, Lookahead), parser.repr, reloc)
reloc, node = self.get_reentry_point(text_)
error, text_ = self.mandatory_violation(
text_, isinstance(parser, Lookahead), parser.repr, reloc, node)
# check if parsing of the series can be resumed somewhere
if reloc >= 0:
nd, text_ = parser(text_) # try current parser again
......@@ -3243,9 +3276,9 @@ class Interleave(MandatoryNary):
break
else:
return None, text
reloc = self.get_reentry_point(text_)
reloc, err_node = self.get_reentry_point(text_)
expected = ' ° '.join([parser.repr for parser in self.parsers])
error, err_node, text_ = self.mandatory_violation(text_, False, expected, reloc)
error, text_ = self.mandatory_violation(text_, False, expected, reloc, err_node)
results += (err_node,)
if reloc < 0:
break
......
......@@ -833,10 +833,9 @@ class TestCustomizedResumeParsing:
cba = "c" "b" §"a"
GAMMA_RE = /(?=GA\w+)/
"""
gr = grammar_provider(lang)()
def test_several_resume_rules_innermost_rule_matching(self):
gr = self.gr
gr = grammar_provider(self.lang)()
content = 'ALPHA abc BETA bad GAMMA cab .'
cst = gr(content)
assert cst.error_flag
......@@ -888,6 +887,50 @@ class TestCustomizedResumeParsing:
st = grammar(doc3)
assert st.children and st.children[-1].tag_name == 'word'
class TestCustomizedResumeParsing_with_Parsers:
lang = r"""@ literalws = right
@ alpha_resume = ALPHA_RESUME
@ beta_resume = GAMMA_RE
@ bac_resume = /(?=GA\w+)/
document = alpha [beta] gamma "."
alpha = "ALPHA" abc
abc = §"a" "b" "c"
beta = "BETA" (bac | bca)
bac = "b" "a" §"c"
bca = "b" "c" §"a"
gamma = "GAMMA" §(cab | cba)
cab = "c" "a" §"b"
cba = "c" "b" §"a"
GAMMA_RE = /(?=GA\w+)/
ALPHA_RESUME = { !`BETA` !`GAMMA` /./ }
"""
def test_several_resume_rules_innermost_rule_matching(self):
gr = grammar_provider(self.lang)()
content = 'ALPHA abc BETA bad GAMMA cab .'
cst = gr(content)
assert cst.error_flag
assert cst.content == content
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only on error message
assert len(cst.errors_sorted) == 1
content = 'ALPHA acb BETA bad GAMMA cab .'
cst = gr(content)
assert cst.error_flag
assert cst.content == content
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only on error message
assert len(cst.errors_sorted) == 2
content = 'ALPHA acb GAMMA cab .'
cst = gr(content)
assert cst.error_flag
assert cst.content == content
assert cst.pick('alpha').content.startswith('ALPHA')
# because of resuming, there should be only one error message
assert len(cst.errors_sorted) == 1, str(cst.errors_sorted)
class TestInSeriesResume:
def setup(self):
......@@ -1257,7 +1300,6 @@ class TestTreeOptimization:
parser = create_parser(lang.replace('none', 'flatten'))
assert parser('ABC').as_sxpr() == '(root (:Text "A") (:Text "B") (important "C"))'
parser = create_parser(lang.replace('none', 'merge_treetops'))
# print(parser.python_src__)
assert parser('ABC').as_sxpr() == '(root (:Text "A") (:Text "B") (important "C"))'
assert parser('ABD').as_sxpr() == '(root "ABD")'
parser = create_parser(lang.replace('none', 'merge_leaves'))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment