Notice to GitKraken users: A vulnerability has been found in the SSH key generation of GitKraken versions 7.6.0 to 8.0.0 (https://www.gitkraken.com/blog/weak-ssh-key-fix). If you use GitKraken and have generated a SSH key using one of these versions, please remove it both from your local workstation and from your LRZ GitLab profile.

21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 3a0790e5 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- transform.py: syntax-tree traversal now passes not only the current node but...

- transform.py: syntax-tree traversal now passes not only the current node but the complete context (i.e. list of all parent nodes) to the transformation function.
- similarly parser.Compiler now stores the context (i.e. list of all parent nodes) in self.context.
- more documentation for parser.Compiler, parser.Parser and parser.Grammar
parent 8c25efce
...@@ -285,8 +285,6 @@ class EBNFCompilerError(Exception): ...@@ -285,8 +285,6 @@ class EBNFCompilerError(Exception):
pass pass
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!? Is this possible at compile time?
class EBNFCompiler(Compiler): class EBNFCompiler(Compiler):
""" """
Generates a Parser from an abstract syntax tree of a grammar specified Generates a Parser from an abstract syntax tree of a grammar specified
...@@ -304,12 +302,12 @@ class EBNFCompiler(Compiler): ...@@ -304,12 +302,12 @@ class EBNFCompiler(Compiler):
the prefix `gen_`. the prefix `gen_`.
Attributes: Attributes:
current_symbols - During compilation, a list containing the root current_symbols: During compilation, a list containing the root
node of the currently compiled definition as first element node of the currently compiled definition as first element
and then the nodes of the symbols that are referred to in and then the nodes of the symbols that are referred to in
the currently compiled definition. the currently compiled definition.
rules - Dictionary that maps rule names to a list of Nodes that rules: Dictionary that maps rule names to a list of Nodes that
contain symbol-references in the definition of the rule. contain symbol-references in the definition of the rule.
The first item in the list is the node of the rule- The first item in the list is the node of the rule-
definition itself. Example: definition itself. Example:
...@@ -319,29 +317,29 @@ class EBNFCompiler(Compiler): ...@@ -319,29 +317,29 @@ class EBNFCompiler(Compiler):
Now `[str(node) for node in self.rules['alternative']]` Now `[str(node) for node in self.rules['alternative']]`
yields `['alternative = a | b', 'a', 'b']` yields `['alternative = a | b', 'a', 'b']`
symbols - A mapping of symbol names to their first usage (not symbols: A mapping of symbol names to their first usage (not
their definition!) in the EBNF source. their definition!) in the EBNF source.
variables - A set of symbols names that are used with the variables: A set of symbols names that are used with the
Pop or Retrieve operator. Because the values of these Pop or Retrieve operator. Because the values of these
symbols need to be captured they are called variables. symbols need to be captured they are called variables.
See `test_parser.TestPopRetrieve` for an example. See `test_parser.TestPopRetrieve` for an example.
recursive - A set of symbols that are used recursively and recursive: A set of symbols that are used recursively and
therefore require a `Forward`-operator. therefore require a `Forward`-operator.
definitions - A dictionary of definitions. Other than `rules` definitions: A dictionary of definitions. Other than `rules`
this maps the symbols to their compiled definienda. this maps the symbols to their compiled definienda.
deferred_taks - A list of callables that is filled during deferred_taks: A list of callables that is filled during
compilatation, but that will be executed only after compilatation, but that will be executed only after
compilation has finished. Typically, it contains compilation has finished. Typically, it contains
sementatic checks that require information that sementatic checks that require information that
is only available upon completion of compilation. is only available upon completion of compilation.
root - The name of the root symbol. root: The name of the root symbol.
directives - A dictionary of all directives and their default directives: A dictionary of all directives and their default
values. values.
""" """
COMMENT_KEYWORD = "COMMENT__" COMMENT_KEYWORD = "COMMENT__"
...@@ -364,6 +362,7 @@ class EBNFCompiler(Compiler): ...@@ -364,6 +362,7 @@ class EBNFCompiler(Compiler):
def _reset(self): def _reset(self):
super(EBNFCompiler, self)._reset()
self._result = '' # type: str self._result = '' # type: str
self.rules = OrderedDict() # type: OrderedDict[str, List[Node]] self.rules = OrderedDict() # type: OrderedDict[str, List[Node]]
self.current_symbols = [] # type: List[Node] self.current_symbols = [] # type: List[Node]
...@@ -372,7 +371,7 @@ class EBNFCompiler(Compiler): ...@@ -372,7 +371,7 @@ class EBNFCompiler(Compiler):
self.recursive = set() # type: Set[str] self.recursive = set() # type: Set[str]
self.definitions = {} # type: Dict[str, str] self.definitions = {} # type: Dict[str, str]
self.deferred_tasks = [] # type: List[Callable] self.deferred_tasks = [] # type: List[Callable]
self.root = "" # type: str self.root_symbol = "" # type: str
self.directives = {'whitespace': self.WHITESPACE['horizontal'], self.directives = {'whitespace': self.WHITESPACE['horizontal'],
'comment': '', 'comment': '',
'literalws': ['right'], 'literalws': ['right'],
...@@ -444,7 +443,7 @@ class EBNFCompiler(Compiler): ...@@ -444,7 +443,7 @@ class EBNFCompiler(Compiler):
" assert re.match('\w+\Z', grammar_name)", ''] " assert re.match('\w+\Z', grammar_name)", '']
for name in self.rules: for name in self.rules:
method_name = Compiler.method_name(name) method_name = Compiler.method_name(name)
if name == self.root: if name == self.root_symbol:
compiler += [' def ' + method_name + '(self, node):', compiler += [' def ' + method_name + '(self, node):',
' return node', ''] ' return node', '']
else: else:
...@@ -507,7 +506,7 @@ class EBNFCompiler(Compiler): ...@@ -507,7 +506,7 @@ class EBNFCompiler(Compiler):
# turn definitions into declarations in reverse order # turn definitions into declarations in reverse order
self.root = definitions[0][0] if definitions else "" self.root_symbol = definitions[0][0] if definitions else ""
definitions.reverse() definitions.reverse()
declarations += [symbol + ' = Forward()' declarations += [symbol + ' = Forward()'
for symbol in sorted(list(self.recursive))] for symbol in sorted(list(self.recursive))]
...@@ -523,7 +522,7 @@ class EBNFCompiler(Compiler): ...@@ -523,7 +522,7 @@ class EBNFCompiler(Compiler):
for symbol in self.symbols: for symbol in self.symbols:
if symbol not in defined_symbols: if symbol not in defined_symbols:
self.symbols[symbol].add_error("Missing definition for symbol '%s'" % symbol) self.symbols[symbol].add_error("Missing definition for symbol '%s'" % symbol)
root_node.error_flag = True # root_node.error_flag = True
# check for unconnected rules # check for unconnected rules
...@@ -536,16 +535,18 @@ class EBNFCompiler(Compiler): ...@@ -536,16 +535,18 @@ class EBNFCompiler(Compiler):
for related in self.rules[symbol][1:]: for related in self.rules[symbol][1:]:
remove_connections(str(related)) remove_connections(str(related))
remove_connections(self.root) remove_connections(self.root_symbol)
for leftover in defined_symbols: for leftover in defined_symbols:
self.rules[leftover][0].add_error(('Rule "%s" is not connected to parser ' self.rules[leftover][0].add_error(('Rule "%s" is not connected to parser '
'root "%s" !') % (leftover, self.root) + ' (Use directive "@testing=True" ' 'root "%s" !') % (leftover,
self.root_symbol) + ' (Use directive "@testing=True" '
'to supress this error message.)') 'to supress this error message.)')
# root_node.error_flag = True
# set root parser and assemble python grammar definition # set root_symbol parser and assemble python grammar definition
if self.root and 'root__' not in self.rules: if self.root_symbol and 'root__' not in self.rules:
declarations.append('root__ = ' + self.root) declarations.append('root__ = ' + self.root_symbol)
declarations.append('') declarations.append('')
self._result = '\n '.join(declarations) \ self._result = '\n '.join(declarations) \
+ GRAMMAR_FACTORY.format(NAME=self.grammar_name) + GRAMMAR_FACTORY.format(NAME=self.grammar_name)
...@@ -555,7 +556,6 @@ class EBNFCompiler(Compiler): ...@@ -555,7 +556,6 @@ class EBNFCompiler(Compiler):
## compilation methods ## compilation methods
def on_syntax(self, node: Node) -> str: def on_syntax(self, node: Node) -> str:
self._reset()
definitions = [] # type: List[Tuple[str, str]] definitions = [] # type: List[Tuple[str, str]]
# drop the wrapping sequence node # drop the wrapping sequence node
......
...@@ -307,6 +307,22 @@ class Parser(ParserBase, metaclass=ParserMetaClass): ...@@ -307,6 +307,22 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
this is not the same as an empty match `("", text)`. Any empty match this is not the same as an empty match `("", text)`. Any empty match
can for example be returned by the `ZeroOrMore`-parser in case the can for example be returned by the `ZeroOrMore`-parser in case the
contained parser is repeated zero times. contained parser is repeated zero times.
Attributes:
visted: Dictionary of places this parser has already been to
during the current parsing process and the results the
parser returned at the respective place. This dictionary
is used to implement memoizing.
recursion_counter: Mapping of places to how often the parser
has already been called recursively at this place. This
is needed to implement left recursion. The number of
calls becomes irrelevant once a resault has been memoized.
cycle_detection: The apply()-method uses this variable to make
sure that one and the same function will not be applied
(recursively) a second time, if it has already been
applied to this parser.
""" """
ApplyFunc = Callable[['Parser'], None] ApplyFunc = Callable[['Parser'], None]
...@@ -472,15 +488,72 @@ class Grammar: ...@@ -472,15 +488,72 @@ class Grammar:
is not necessary to instantiate more than one Grammar object per is not necessary to instantiate more than one Grammar object per
thread. thread.
Grammar objects contain a few special fields for implicit Grammar classes contain a few special class fields for implicit
whitespace and comments that should be overwritten, if the defaults whitespace and comments that should be overwritten, if the defaults
(no comments, horizontal right aligned whitespace) don't fit: (no comments, horizontal right aligned whitespace) don't fit:
COMMENT__ - regular expression string for matching comments COMMENT__: regular expression string for matching comments
wspL__ - regular expression string for left aligned whitespace WSP__: regular expression for whitespace and comments
wspR__ - regular expression string for right aligned whitespace wspL__: regular expression string for left aligned whitespace,
root__ - the root parser of the grammar which either equals WSP__ or is empty.
wspR__: regular expression string for right aligned whitespace,
which either equals WSP__ or is empty.
root__: The root parser of the grammar. Theoretically, all parsers of the
grammar should be reachable by the root parser. However, for testing
of yet incomplete grammars class Grammar does not assume that this
is the case.
parser_initializiation__: Before the parser class (!) has been initialized,
which happens upon the first time it is instantiated (see doctring for
method `_assign_parser_names()` for an explanation), this class
field contains a value other than "done". A value of "done" indicates
that the class has already been initialized.
Attributes:
all_parsers__: A set of all parsers connected to this grammar object
hostory_tracking: A flag indicating that the parsing history shall
be tracked
wsp_left_parser__: A parser for the default left-adjacent-whitespace
or the zombie-parser (see `syntaxtree.ZOMBIE_PARSER`) if the
default is empty. The default whitespace will be used by parsers
`Token` and, if no other parsers are passed to its constructor,
by parser `RE'.
wsp_right_parser__: The same for the default right-adjacent-whitespace.
Both wsp_left_parser__ and wsp_right_parser__ merely serve the
purpose to avoid having to specify the default whitespace
explicitly every time an `RE`-parser-object is created.
_dirty_flag__: A flag indicating that the Grammar has been called at
least once so that the parsing-variables need to be reset
when it is called again.
document__: the text that has most recently been parsed or that is
currently being parsed.
_reversed__: the same text in reverse order - needed by the `Lookbehind'-
parsers.
variables__: A mapping for variable names to a stack of their respective
string values - needed by the `Capture`-, `Retrieve`- and `Pop`-
parsers.
rollback__: A list of tuples (location, rollback-function) that are
deposited by the `Capture`- and `Pop`-parsers. If the parsing
process reaches a dead end then all rollback-functions up to
the point to which it retreats will be called and the state
of the variable stack restored accordingly.
call_stack__: A stack of all parsers that have been called. This
is required for recording the parser history (for debugging)
and, eventually, i.e. one day in the future, for tracing through
the parsing process.
history__: A list of parser-call-stacks. A parser-call-stack is
appended to the list each time a parser either matches, fails
or if a parser-error occurs.
moving_forward__: This flag indicates that the parsing process is currently
moving forward. This information is needed among other thins to
trigger the roolback of variables, which happens stepwise when the
parser is reatreating form a dead end, i.e. not moving forward.
(See `add_parser_guard` and its local function `guarded_call`)
left_recursion_encountered__: This flag indicates that left recursion has
been encountered and triggers the left-recursion algorithm.
""" """
root__ = None # type: Union[Parser, None] root__ = None # type: Union[Parser, None]
# root__ must be overwritten with the root-parser by grammar subclass # root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__ = "pending" # type: str parser_initialization__ = "pending" # type: str
...@@ -515,16 +588,15 @@ class Grammar: ...@@ -515,16 +588,15 @@ class Grammar:
selected reference will be chosen. See PEP 520 selected reference will be chosen. See PEP 520
(www.python.org/dev/peps/pep-0520/) for an explanation of why. (www.python.org/dev/peps/pep-0520/) for an explanation of why.
""" """
if cls.parser_initialization__ == "done": if cls.parser_initialization__ != "done":
return cdict = cls.__dict__
cdict = cls.__dict__ for entry, parser in cdict.items():
for entry, parser in cdict.items(): if isinstance(parser, Parser) and sane_parser_name(entry):
if isinstance(parser, Parser) and sane_parser_name(entry): if not parser.name:
if not parser.name: parser.name = entry
parser.name = entry if (isinstance(parser, Forward) and (not parser.parser.name)):
if (isinstance(parser, Forward) and (not parser.parser.name)): parser.parser.name = entry
parser.parser.name = entry cls.parser_initialization__ = "done"
cls.parser_initialization__ = "done"
def __init__(self, root: Parser=None) -> None: def __init__(self, root: Parser=None) -> None:
...@@ -535,8 +607,8 @@ class Grammar: ...@@ -535,8 +607,8 @@ class Grammar:
# if not hasattr(self.__class__, 'wspR__'): # if not hasattr(self.__class__, 'wspR__'):
# self.wspR__ = '' # self.wspR__ = ''
self.all_parsers__ = set() # type: Set[Parser] self.all_parsers__ = set() # type: Set[Parser]
self.dirty_flag__ = False self._dirty_flag__ = False # type: bool
self.history_tracking__ = False self.history_tracking__ = False # type: bool
self._reset__() self._reset__()
# prepare parsers in the class, first # prepare parsers in the class, first
...@@ -631,12 +703,12 @@ class Grammar: ...@@ -631,12 +703,12 @@ class Grammar:
# assert isinstance(document, str), type(document) # assert isinstance(document, str), type(document)
if self.root__ is None: if self.root__ is None:
raise NotImplementedError() raise NotImplementedError()
if self.dirty_flag__: if self._dirty_flag__:
self._reset__() self._reset__()
for parser in self.all_parsers__: for parser in self.all_parsers__:
parser.reset() parser.reset()
else: else:
self.dirty_flag__ = True self._dirty_flag__ = True
self.history_tracking__ = is_logging() self.history_tracking__ = is_logging()
self.document__ = document self.document__ = document
self.last_rb__loc__ = len(document) + 1 # rollback location self.last_rb__loc__ = len(document) + 1 # rollback location
...@@ -1574,14 +1646,24 @@ class Compiler: ...@@ -1574,14 +1646,24 @@ class Compiler:
themselves. This should be done by invoking the `compile(node)`- themselves. This should be done by invoking the `compile(node)`-
method which will pick the right `on_XXX`-method. It is not method which will pick the right `on_XXX`-method. It is not
recommended to call the `on_XXX`-methods directly. recommended to call the `on_XXX`-methods directly.
"""
Attributes:
context: A list of parent nodes that ends with the currently
compiled node.
grammar_name: The name of the grammar this compiler is related to
grammar_source: The source code of the grammar this compiler is
related to.
_dirty_flag: A flag indicating that the compiler has already been
called at least once and that therefore all compilation
variables must be reset when it is called again.
"""
def __init__(self, grammar_name="", grammar_source=""): def __init__(self, grammar_name="", grammar_source=""):
self.dirty_flag = False self._reset()
self._dirty_flag = False
self.set_grammar_name(grammar_name, grammar_source) self.set_grammar_name(grammar_name, grammar_source)
def _reset(self): def _reset(self):
pass self.context = [] # type: List[Node]
def __call__(self, node: Node) -> Any: def __call__(self, node: Node) -> Any:
""" """
...@@ -1591,11 +1673,13 @@ class Compiler: ...@@ -1591,11 +1673,13 @@ class Compiler:
(This very much depends on the kind and purpose of the (This very much depends on the kind and purpose of the
implemented compiler.) implemented compiler.)
""" """
if self.dirty_flag: if self._dirty_flag:
self._reset() self._reset()
else: else:
self.dirty_flag = True self._dirty_flag = True
return self.compile(node) result = self.compile(node)
self.propagate_error_flags(node)
return result
def set_grammar_name(self, grammar_name="", grammar_source=""): def set_grammar_name(self, grammar_name="", grammar_source=""):
""" """
...@@ -1612,6 +1696,15 @@ class Compiler: ...@@ -1612,6 +1696,15 @@ class Compiler:
self.grammar_name = grammar_name self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source) self.grammar_source = load_if_file(grammar_source)
@staticmethod
def propagate_error_flags(node: Node) -> None:
if not node.error_flag:
for child in node.children:
Compiler.propagate_error_flags(child)
if child.error_flag:
node.error_flag = True
return
@staticmethod @staticmethod
def method_name(node_name: str) -> str: def method_name(node_name: str) -> str:
"""Returns the method name for `node_name`, e.g. """Returns the method name for `node_name`, e.g.
...@@ -1641,8 +1734,15 @@ class Compiler: ...@@ -1641,8 +1734,15 @@ class Compiler:
return None return None
else: else:
compiler = self.__getattribute__(self.method_name(elem)) compiler = self.__getattribute__(self.method_name(elem))
self.context.append(node)
result = compiler(node) result = compiler(node)
node.propagate_error_flags() self.context.pop()
# # the following statement makes sure that the error_flag
# # is propagated early on. Otherwise it is redundant, because
# # the __call__ method globally propagates the node's error_flag
# # later anyway. So, maybe it could be removed here.
# for child in node.children:
# node.error_flag = node.error_flag or child.error_flag
return result return result
...@@ -1704,4 +1804,3 @@ def compile_source(source: str, ...@@ -1704,4 +1804,3 @@ def compile_source(source: str,
errors = syntax_tree.collect_errors() if syntax_tree.error_flag else [] errors = syntax_tree.collect_errors() if syntax_tree.error_flag else []
messages = error_messages(source_text, errors) messages = error_messages(source_text, errors)
return result, messages, syntax_tree return result, messages, syntax_tree
...@@ -290,16 +290,6 @@ class Node: ...@@ -290,16 +290,6 @@ class Node:
return self return self
def propagate_error_flags(self) -> None:
"""Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
nodes after syntaxtree construction, i.e. in the compile phase.
"""
for child in self.children:
child.propagate_error_flags()
self.error_flag = self.error_flag or child.error_flag
def collect_errors(self, clear_errors=False) -> List[Error]: def collect_errors(self, clear_errors=False) -> List[Error]:
""" """
Returns all errors of this node or any child node in the form Returns all errors of this node or any child node in the form
......
...@@ -96,11 +96,11 @@ def transformation_factory(t=None): ...@@ -96,11 +96,11 @@ def transformation_factory(t=None):
Usage: Usage:
@transformation_factory(AbtractSet[str]) @transformation_factory(AbtractSet[str])
def remove_tokens(node, tokens): def remove_tokens(context, tokens):
... ...
or, alternatively: or, alternatively:
@transformation_factory @transformation_factory
def remove_tokens(node, tokens: AbstractSet[str]): def remove_tokens(context, tokens: AbstractSet[str]):
... ...
Example: Example:
...@@ -198,11 +198,14 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None: ...@@ -198,11 +198,14 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
table = expand_table(table) table = expand_table(table)
cache = {} # type: Dict[str, List[Callable]] cache = {} # type: Dict[str, List[Callable]]
def traverse_recursive(node): def traverse_recursive(context):
node = context[-1]
if node.children: if node.children:
for child in node.result: for child in node.result:
traverse_recursive(child) # depth first context.append(child)
traverse_recursive(context) # depth first
node.error_flag |= child.error_flag # propagate error flag node.error_flag |= child.error_flag # propagate error flag
context.pop()
key = key_func(node) key = key_func(node)
sequence = cache.get(key, None) sequence = cache.get(key, None)
...@@ -217,9 +220,9 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None: ...@@ -217,9 +220,9 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
cache[key] = sequence cache[key] = sequence
for call in sequence: for call in sequence:
call(node) call(context)
traverse_recursive(root_node) traverse_recursive([root_node])
# ------------------------------------------------ # ------------------------------------------------
...@@ -253,23 +256,25 @@ def reduce_child(node): ...@@ -253,23 +256,25 @@ def reduce_child(node):
@transformation_factory(Callable) @transformation_factory(Callable)
def replace_by_single_child(node, condition=TRUE_CONDITION): def replace_by_single_child(context, condition=TRUE_CONDITION):
"""Remove single branch node, replacing it by its immediate descendant """Remove single branch node, replacing it by its immediate descendant
if and only if the condision on the descendant is true. if and only if the condision on the descendant is true.
(In case the descendant's name is empty (i.e. anonymous) the (In case the descendant's name is empty (i.e. anonymous) the
name of this node's parser is kept.) name of this node's parser is kept.)
""" """
node = context[-1]
if len(node.children) == 1 and condition(node.children[0]): if len(node.children) == 1 and condition(node.children[0]):
replace_child(node) replace_child(node)
@transformation_factory(Callable) @transformation_factory(Callable)
def reduce_single_child(node, condition=TRUE_CONDITION): def reduce_single_child(context, condition=TRUE_CONDITION):
"""Reduce a single branch node, by transferring the result of its """Reduce a single branch node, by transferring the result of its
immediate descendant to this node, but keeping this node's parser entry. immediate descendant to this node, but keeping this node's parser entry.
If the condition evaluates to false on the descendant, it will not If the condition evaluates to false on the descendant, it will not
be reduced. be reduced.
""" """
node = context[-1]
if len(node.children) == 1 and condition(node.children[0]): if len(node.children) == 1 and condition(node.children[0]):
reduce_child(node) reduce_child(node)
...@@ -279,10 +284,11 @@ def is_named(node): ...@@ -279,10 +284,11 @@ def is_named(node):
@transformation_factory(Callable) @transformation_factory(Callable)
def replace_or_reduce(node, condition=is_named): def replace_or_reduce(context, condition=is_named):
"""Replaces node by a single child, if condition is met on child, """Replaces node by a single child, if condition is met on child,
otherwise (i.e. if the child is anonymous) reduces the child. otherwise (i.e. if the child is anonymous) reduces the child.
""" """
node = context[-1]
if len(node.children) == 1 and condition(node.children[0]): if len(node.children) == 1 and condition(node.children[0]):
replace_child(node)