Commit 46914209 authored by di68kap's avatar di68kap
Browse files

- optiized dropping of nodes

parent f9fd165b
......@@ -537,6 +537,11 @@ class EBNFCompiler(Compiler):
root_symbol: The name of the root symbol.
drop_flag: This flag is set temporarily when compiling the definition
of a parser that shall drop its content. If this flag is
set all contained parser will also drop their content as an
optimization.
directives: A record of all directives and their default values.
defined_directives: A set of all directives that have already been
......@@ -627,6 +632,7 @@ class EBNFCompiler(Compiler):
self.required_keywords = set() # type: Set[str]
self.deferred_tasks = [] # type: List[Callable]
self.root_symbol = "" # type: str
self.drop_flag = False # type: bool
self.directives = EBNFDirectives() # type: EBNFDirectives
self.defined_directives = set() # type: Set[str]
self.consumed_custom_errors = set() # type: Set[str]
......@@ -1046,6 +1052,7 @@ class EBNFCompiler(Compiler):
try:
self.current_symbols = [node]
self.rules[rule] = self.current_symbols
self.drop_flag = rule in self.directives['drop'] and rule not in DROP_VALUES
defn = self.compile(node.children[1])
if rule in self.variables:
defn = 'Capture(%s)' % defn
......@@ -1053,8 +1060,8 @@ class EBNFCompiler(Compiler):
elif defn.find("(") < 0:
# assume it's a synonym, like 'page = REGEX_PAGE_NR'
defn = 'Synonym(%s)' % defn
if rule in self.directives['drop'] and rule not in DROP_VALUES:
defn = 'Drop(%s)' % defn # TODO: Recursively drop all contained parsers for optimization
# if self.drop_flag:
# defn = 'Drop(%s)' % defn # TODO: Recursively drop all contained parsers for optimization
except TypeError as error:
from traceback import extract_tb
trace = str(extract_tb(error.__traceback__)[-1])
......@@ -1062,6 +1069,8 @@ class EBNFCompiler(Compiler):
% (EBNFCompiler.AST_ERROR, str(error), trace, node.as_sxpr())
self.tree.new_error(node, errmsg)
rule, defn = rule + ':error', '"' + errmsg + '"'
finally:
self.drop_flag = False
return rule, defn
......@@ -1203,10 +1212,13 @@ class EBNFCompiler(Compiler):
# remove drop clause for non dropping definitions of forms like "/\w+/~"
if (parser_class == "Series" and node.tag_name not in self.directives.drop
and DROP_REGEXP in self.directives.drop and self.context[-2].tag_name == "definition"
and all(arg.startswith('Drop(RegExp(') or arg in EBNFCompiler.COMMENT_OR_WHITESPACE
for arg in arguments)):
and all((arg.startswith('Drop(RegExp(') or arg.startswith('Drop(Token(')
or arg in EBNFCompiler.COMMENT_OR_WHITESPACE) for arg in arguments)):
arguments = [arg.replace('Drop(', '').replace('))', ')') for arg in arguments]
return parser_class + '(' + ', '.join(arguments) + ')'
if self.drop_flag:
return 'Drop(' + parser_class + '(' + ', '.join(arguments) + '))'
else:
return parser_class + '(' + ', '.join(arguments) + ')'
def on_expression(self, node) -> str:
......@@ -1348,8 +1360,8 @@ class EBNFCompiler(Compiler):
elif nd.tag_name == "expression":
if any(c.tag_name == TOKEN_PTYPE and nd.content == '§' for c in nd.children):
self.tree.new_error(node, "No mandatory items § allowed in SomeOf-operator!")
args = ', '.join(self.compile(child) for child in nd.children)
return "SomeOf(" + args + ")"
# args = ', '.join(self.compile(child) for child in nd.children)
return self.non_terminal(node, 'SomeOf') # "SomeOf(" + args + ")"
else:
self.tree.new_error(node, "Unordered sequence or alternative "
"requires at least two elements.")
......
......@@ -26,7 +26,7 @@ from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Po
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, Node, TransformationFunc, traverse, remove_children_if, \
reduce_single_child, replace_by_single_child, remove_whitespace, remove_empty, \
flatten, is_empty, collapse, replace_content, remove_brackets, \
flatten, is_empty, collapse, replace_content, remove_brackets, strip, \
is_one_of, traverse_locally, remove_tokens, remove_nodes, TOKEN_PTYPE, Error, \
access_thread_locals, recompile_grammar
from DHParser.log import start_logging
......@@ -79,8 +79,8 @@ class LaTeXGrammar(Grammar):
LB = RegExp('\\s*?\\n|$')
NEW_LINE = Series(Drop(RegExp('[ \\t]*')), Option(comment__), Drop(RegExp('\\n')))
_GAP = Drop(Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), dwsp__))
_WSPC = Drop(OneOrMore(Alternative(comment__, Drop(RegExp('\\s+')))))
_PARSEP = Drop(Series(ZeroOrMore(Series(whitespace__, comment__)), _GAP, Option(_WSPC)))
_WSPC = Drop(OneOrMore(Drop(Alternative(comment__, Drop(RegExp('\\s+'))))))
_PARSEP = Drop(Series(Drop(ZeroOrMore(Drop(Series(whitespace__, comment__)))), _GAP, Drop(Option(_WSPC))))
S = Series(Lookahead(Drop(RegExp('[% \\t\\n]'))), wsp__)
LFF = Series(NEW_LINE, Option(_WSPC))
LF = Series(NEW_LINE, ZeroOrMore(Series(comment__, whitespace__)))
......@@ -259,7 +259,7 @@ LaTeX_AST_transformation_table = {
"multicolumn": [remove_tokens('{', '}')],
"hline": [remove_whitespace, reduce_single_child],
"sequence": [flatten],
"paragraph": [flatten],
"paragraph": [flatten, strip(is_one_of({'S'}))],
"text_element": replace_by_single_child,
"line_element": replace_by_single_child,
"inline_environment": replace_by_single_child,
......
......@@ -389,7 +389,7 @@ class TestGrammar:
assert not st.errors
def test_synonym(self):
lang = """
lang = r"""
doc = { word | number }
word = /\w+/ S
number = [VZ] /\d+/ S
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment