Notice to GitKraken users: A vulnerability has been found in the SSH key generation of GitKraken versions 7.6.0 to 8.0.0 (https://www.gitkraken.com/blog/weak-ssh-key-fix). If you use GitKraken and have generated a SSH key using one of these versions, please remove it both from your local workstation and from your LRZ GitLab profile.

21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 3a0790e5 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- transform.py: syntax-tree traversal now passes not only the current node but...

- transform.py: syntax-tree traversal now passes not only the current node but the complete context (i.e. list of all parent nodes) to the transformation function.
- similarly parser.Compiler now stores the context (i.e. list of all parent nodes) in self.context.
- more documentation for parser.Compiler, parser.Parser and parser.Grammar
parent 8c25efce
......@@ -285,8 +285,6 @@ class EBNFCompilerError(Exception):
pass
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!? Is this possible at compile time?
class EBNFCompiler(Compiler):
"""
Generates a Parser from an abstract syntax tree of a grammar specified
......@@ -304,12 +302,12 @@ class EBNFCompiler(Compiler):
the prefix `gen_`.
Attributes:
current_symbols - During compilation, a list containing the root
current_symbols: During compilation, a list containing the root
node of the currently compiled definition as first element
and then the nodes of the symbols that are referred to in
the currently compiled definition.
rules - Dictionary that maps rule names to a list of Nodes that
rules: Dictionary that maps rule names to a list of Nodes that
contain symbol-references in the definition of the rule.
The first item in the list is the node of the rule-
definition itself. Example:
......@@ -319,29 +317,29 @@ class EBNFCompiler(Compiler):
Now `[str(node) for node in self.rules['alternative']]`
yields `['alternative = a | b', 'a', 'b']`
symbols - A mapping of symbol names to their first usage (not
symbols: A mapping of symbol names to their first usage (not
their definition!) in the EBNF source.
variables - A set of symbols names that are used with the
variables: A set of symbols names that are used with the
Pop or Retrieve operator. Because the values of these
symbols need to be captured they are called variables.
See `test_parser.TestPopRetrieve` for an example.
recursive - A set of symbols that are used recursively and
recursive: A set of symbols that are used recursively and
therefore require a `Forward`-operator.
definitions - A dictionary of definitions. Other than `rules`
definitions: A dictionary of definitions. Other than `rules`
this maps the symbols to their compiled definienda.
deferred_taks - A list of callables that is filled during
deferred_taks: A list of callables that is filled during
compilatation, but that will be executed only after
compilation has finished. Typically, it contains
sementatic checks that require information that
is only available upon completion of compilation.
root - The name of the root symbol.
root: The name of the root symbol.
directives - A dictionary of all directives and their default
directives: A dictionary of all directives and their default
values.
"""
COMMENT_KEYWORD = "COMMENT__"
......@@ -364,6 +362,7 @@ class EBNFCompiler(Compiler):
def _reset(self):
super(EBNFCompiler, self)._reset()
self._result = '' # type: str
self.rules = OrderedDict() # type: OrderedDict[str, List[Node]]
self.current_symbols = [] # type: List[Node]
......@@ -372,7 +371,7 @@ class EBNFCompiler(Compiler):
self.recursive = set() # type: Set[str]
self.definitions = {} # type: Dict[str, str]
self.deferred_tasks = [] # type: List[Callable]
self.root = "" # type: str
self.root_symbol = "" # type: str
self.directives = {'whitespace': self.WHITESPACE['horizontal'],
'comment': '',
'literalws': ['right'],
......@@ -444,7 +443,7 @@ class EBNFCompiler(Compiler):
" assert re.match('\w+\Z', grammar_name)", '']
for name in self.rules:
method_name = Compiler.method_name(name)
if name == self.root:
if name == self.root_symbol:
compiler += [' def ' + method_name + '(self, node):',
' return node', '']
else:
......@@ -507,7 +506,7 @@ class EBNFCompiler(Compiler):
# turn definitions into declarations in reverse order
self.root = definitions[0][0] if definitions else ""
self.root_symbol = definitions[0][0] if definitions else ""
definitions.reverse()
declarations += [symbol + ' = Forward()'
for symbol in sorted(list(self.recursive))]
......@@ -523,7 +522,7 @@ class EBNFCompiler(Compiler):
for symbol in self.symbols:
if symbol not in defined_symbols:
self.symbols[symbol].add_error("Missing definition for symbol '%s'" % symbol)
root_node.error_flag = True
# root_node.error_flag = True
# check for unconnected rules
......@@ -536,16 +535,18 @@ class EBNFCompiler(Compiler):
for related in self.rules[symbol][1:]:
remove_connections(str(related))
remove_connections(self.root)
remove_connections(self.root_symbol)
for leftover in defined_symbols:
self.rules[leftover][0].add_error(('Rule "%s" is not connected to parser '
'root "%s" !') % (leftover, self.root) + ' (Use directive "@testing=True" '
'root "%s" !') % (leftover,
self.root_symbol) + ' (Use directive "@testing=True" '
'to supress this error message.)')
# root_node.error_flag = True
# set root parser and assemble python grammar definition
# set root_symbol parser and assemble python grammar definition
if self.root and 'root__' not in self.rules:
declarations.append('root__ = ' + self.root)
if self.root_symbol and 'root__' not in self.rules:
declarations.append('root__ = ' + self.root_symbol)
declarations.append('')
self._result = '\n '.join(declarations) \
+ GRAMMAR_FACTORY.format(NAME=self.grammar_name)
......@@ -555,7 +556,6 @@ class EBNFCompiler(Compiler):
## compilation methods
def on_syntax(self, node: Node) -> str:
self._reset()
definitions = [] # type: List[Tuple[str, str]]
# drop the wrapping sequence node
......
......@@ -307,6 +307,22 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
this is not the same as an empty match `("", text)`. Any empty match
can for example be returned by the `ZeroOrMore`-parser in case the
contained parser is repeated zero times.
Attributes:
visted: Dictionary of places this parser has already been to
during the current parsing process and the results the
parser returned at the respective place. This dictionary
is used to implement memoizing.
recursion_counter: Mapping of places to how often the parser
has already been called recursively at this place. This
is needed to implement left recursion. The number of
calls becomes irrelevant once a resault has been memoized.
cycle_detection: The apply()-method uses this variable to make
sure that one and the same function will not be applied
(recursively) a second time, if it has already been
applied to this parser.
"""
ApplyFunc = Callable[['Parser'], None]
......@@ -472,15 +488,72 @@ class Grammar:
is not necessary to instantiate more than one Grammar object per
thread.
Grammar objects contain a few special fields for implicit
Grammar classes contain a few special class fields for implicit
whitespace and comments that should be overwritten, if the defaults
(no comments, horizontal right aligned whitespace) don't fit:
COMMENT__ - regular expression string for matching comments
wspL__ - regular expression string for left aligned whitespace
wspR__ - regular expression string for right aligned whitespace
root__ - the root parser of the grammar
COMMENT__: regular expression string for matching comments
WSP__: regular expression for whitespace and comments
wspL__: regular expression string for left aligned whitespace,
which either equals WSP__ or is empty.
wspR__: regular expression string for right aligned whitespace,
which either equals WSP__ or is empty.
root__: The root parser of the grammar. Theoretically, all parsers of the
grammar should be reachable by the root parser. However, for testing
of yet incomplete grammars class Grammar does not assume that this
is the case.
parser_initializiation__: Before the parser class (!) has been initialized,
which happens upon the first time it is instantiated (see doctring for
method `_assign_parser_names()` for an explanation), this class
field contains a value other than "done". A value of "done" indicates
that the class has already been initialized.
Attributes:
all_parsers__: A set of all parsers connected to this grammar object
hostory_tracking: A flag indicating that the parsing history shall
be tracked
wsp_left_parser__: A parser for the default left-adjacent-whitespace
or the zombie-parser (see `syntaxtree.ZOMBIE_PARSER`) if the
default is empty. The default whitespace will be used by parsers
`Token` and, if no other parsers are passed to its constructor,
by parser `RE'.
wsp_right_parser__: The same for the default right-adjacent-whitespace.
Both wsp_left_parser__ and wsp_right_parser__ merely serve the
purpose to avoid having to specify the default whitespace
explicitly every time an `RE`-parser-object is created.
_dirty_flag__: A flag indicating that the Grammar has been called at
least once so that the parsing-variables need to be reset
when it is called again.
document__: the text that has most recently been parsed or that is
currently being parsed.
_reversed__: the same text in reverse order - needed by the `Lookbehind'-
parsers.
variables__: A mapping for variable names to a stack of their respective
string values - needed by the `Capture`-, `Retrieve`- and `Pop`-
parsers.
rollback__: A list of tuples (location, rollback-function) that are
deposited by the `Capture`- and `Pop`-parsers. If the parsing
process reaches a dead end then all rollback-functions up to
the point to which it retreats will be called and the state
of the variable stack restored accordingly.
call_stack__: A stack of all parsers that have been called. This
is required for recording the parser history (for debugging)
and, eventually, i.e. one day in the future, for tracing through
the parsing process.
history__: A list of parser-call-stacks. A parser-call-stack is
appended to the list each time a parser either matches, fails
or if a parser-error occurs.
moving_forward__: This flag indicates that the parsing process is currently
moving forward. This information is needed among other thins to
trigger the roolback of variables, which happens stepwise when the
parser is reatreating form a dead end, i.e. not moving forward.
(See `add_parser_guard` and its local function `guarded_call`)
left_recursion_encountered__: This flag indicates that left recursion has
been encountered and triggers the left-recursion algorithm.
"""
root__ = None # type: Union[Parser, None]
# root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__ = "pending" # type: str
......@@ -515,16 +588,15 @@ class Grammar:
selected reference will be chosen. See PEP 520
(www.python.org/dev/peps/pep-0520/) for an explanation of why.
"""
if cls.parser_initialization__ == "done":
return
cdict = cls.__dict__
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
if not parser.name:
parser.name = entry
if (isinstance(parser, Forward) and (not parser.parser.name)):
parser.parser.name = entry
cls.parser_initialization__ = "done"
if cls.parser_initialization__ != "done":
cdict = cls.__dict__
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
if not parser.name:
parser.name = entry
if (isinstance(parser, Forward) and (not parser.parser.name)):
parser.parser.name = entry
cls.parser_initialization__ = "done"
def __init__(self, root: Parser=None) -> None:
......@@ -535,8 +607,8 @@ class Grammar:
# if not hasattr(self.__class__, 'wspR__'):
# self.wspR__ = ''
self.all_parsers__ = set() # type: Set[Parser]
self.dirty_flag__ = False
self.history_tracking__ = False
self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool
self._reset__()
# prepare parsers in the class, first
......@@ -631,12 +703,12 @@ class Grammar:
# assert isinstance(document, str), type(document)
if self.root__ is None:
raise NotImplementedError()
if self.dirty_flag__:
if self._dirty_flag__:
self._reset__()
for parser in self.all_parsers__:
parser.reset()
else:
self.dirty_flag__ = True
self._dirty_flag__ = True
self.history_tracking__ = is_logging()
self.document__ = document
self.last_rb__loc__ = len(document) + 1 # rollback location
......@@ -1574,14 +1646,24 @@ class Compiler:
themselves. This should be done by invoking the `compile(node)`-
method which will pick the right `on_XXX`-method. It is not
recommended to call the `on_XXX`-methods directly.
"""
Attributes:
context: A list of parent nodes that ends with the currently
compiled node.
grammar_name: The name of the grammar this compiler is related to
grammar_source: The source code of the grammar this compiler is
related to.
_dirty_flag: A flag indicating that the compiler has already been
called at least once and that therefore all compilation
variables must be reset when it is called again.
"""
def __init__(self, grammar_name="", grammar_source=""):
self.dirty_flag = False
self._reset()
self._dirty_flag = False
self.set_grammar_name(grammar_name, grammar_source)
def _reset(self):
pass
self.context = [] # type: List[Node]
def __call__(self, node: Node) -> Any:
"""
......@@ -1591,11 +1673,13 @@ class Compiler:
(This very much depends on the kind and purpose of the
implemented compiler.)
"""
if self.dirty_flag:
if self._dirty_flag:
self._reset()
else:
self.dirty_flag = True
return self.compile(node)
self._dirty_flag = True
result = self.compile(node)
self.propagate_error_flags(node)
return result
def set_grammar_name(self, grammar_name="", grammar_source=""):
"""
......@@ -1612,6 +1696,15 @@ class Compiler:
self.grammar_name = grammar_name
self.grammar_source = load_if_file(grammar_source)
@staticmethod
def propagate_error_flags(node: Node) -> None:
if not node.error_flag:
for child in node.children:
Compiler.propagate_error_flags(child)
if child.error_flag:
node.error_flag = True
return
@staticmethod
def method_name(node_name: str) -> str:
"""Returns the method name for `node_name`, e.g.
......@@ -1641,8 +1734,15 @@ class Compiler:
return None
else:
compiler = self.__getattribute__(self.method_name(elem))
self.context.append(node)
result = compiler(node)
node.propagate_error_flags()
self.context.pop()
# # the following statement makes sure that the error_flag
# # is propagated early on. Otherwise it is redundant, because
# # the __call__ method globally propagates the node's error_flag
# # later anyway. So, maybe it could be removed here.
# for child in node.children:
# node.error_flag = node.error_flag or child.error_flag
return result
......@@ -1704,4 +1804,3 @@ def compile_source(source: str,
errors = syntax_tree.collect_errors() if syntax_tree.error_flag else []
messages = error_messages(source_text, errors)
return result, messages, syntax_tree
......@@ -290,16 +290,6 @@ class Node:
return self
def propagate_error_flags(self) -> None:
"""Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
nodes after syntaxtree construction, i.e. in the compile phase.
"""
for child in self.children:
child.propagate_error_flags()
self.error_flag = self.error_flag or child.error_flag
def collect_errors(self, clear_errors=False) -> List[Error]:
"""
Returns all errors of this node or any child node in the form
......
......@@ -96,11 +96,11 @@ def transformation_factory(t=None):
Usage:
@transformation_factory(AbtractSet[str])
def remove_tokens(node, tokens):
def remove_tokens(context, tokens):
...
or, alternatively:
@transformation_factory
def remove_tokens(node, tokens: AbstractSet[str]):
def remove_tokens(context, tokens: AbstractSet[str]):
...
Example:
......@@ -198,11 +198,14 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
table = expand_table(table)
cache = {} # type: Dict[str, List[Callable]]
def traverse_recursive(node):
def traverse_recursive(context):
node = context[-1]
if node.children:
for child in node.result:
traverse_recursive(child) # depth first
context.append(child)
traverse_recursive(context) # depth first
node.error_flag |= child.error_flag # propagate error flag
context.pop()
key = key_func(node)
sequence = cache.get(key, None)
......@@ -217,9 +220,9 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
cache[key] = sequence
for call in sequence:
call(node)
call(context)
traverse_recursive(root_node)
traverse_recursive([root_node])
# ------------------------------------------------
......@@ -253,23 +256,25 @@ def reduce_child(node):
@transformation_factory(Callable)
def replace_by_single_child(node, condition=TRUE_CONDITION):
def replace_by_single_child(context, condition=TRUE_CONDITION):
"""Remove single branch node, replacing it by its immediate descendant
if and only if the condision on the descendant is true.
(In case the descendant's name is empty (i.e. anonymous) the
name of this node's parser is kept.)
"""
node = context[-1]
if len(node.children) == 1 and condition(node.children[0]):
replace_child(node)
@transformation_factory(Callable)
def reduce_single_child(node, condition=TRUE_CONDITION):
def reduce_single_child(context, condition=TRUE_CONDITION):
"""Reduce a single branch node, by transferring the result of its
immediate descendant to this node, but keeping this node's parser entry.
If the condition evaluates to false on the descendant, it will not
be reduced.
"""
node = context[-1]
if len(node.children) == 1 and condition(node.children[0]):
reduce_child(node)
......@@ -279,10 +284,11 @@ def is_named(node):
@transformation_factory(Callable)
def replace_or_reduce(node, condition=is_named):
def replace_or_reduce(context, condition=is_named):
"""Replaces node by a single child, if condition is met on child,
otherwise (i.e. if the child is anonymous) reduces the child.
"""
node = context[-1]
if len(node.children) == 1 and condition(node.children[0]):
replace_child(node)
else:
......@@ -290,7 +296,7 @@ def replace_or_reduce(node, condition=is_named):
@transformation_factory
def replace_parser(node, name: str):
def replace_parser(context, name: str):
"""Replaces the parser of a Node with a mock parser with the given
name.
......@@ -298,12 +304,13 @@ def replace_parser(node, name: str):
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
node = context[-1]
name, ptype = (name.split(':') + [''])[:2]
node.parser = MockParser(name, ptype)
@transformation_factory(Callable)
def flatten(node, condition=lambda node: not node.parser.name, recursive=True):
def flatten(context, condition=lambda node: not node.parser.name, recursive=True):
"""Flattens all children, that fulfil the given `condition`
(default: all unnamed children). Flattening means that wherever a
node has child nodes, the child nodes are inserted in place of the
......@@ -317,31 +324,36 @@ def flatten(node, condition=lambda node: not node.parser.name, recursive=True):
(1 (+ 2) (+ 3) -> (1 + 2 + 3)
(1 (+ (2 + (3)))) -> (1 + 2 + 3)
"""
node = context[-1]
if node.children:
new_result = []
for child in node.children:
if child.children and condition(child):
if recursive:
flatten(child, condition, recursive)
context.append(child)
flatten(context, condition, recursive)
context.pop()
new_result.extend(child.children)
else:
new_result.append(child)
node.result = tuple(new_result)
def collapse(node):
def collapse(context):
"""Collapses all sub-nodes of a node by replacing them with the
string representation of the node.
"""
node = context[-1]
node.result = str(node)
@transformation_factory
def merge_children(node, tag_names: List[str]):
def merge_children(context, tag_names: List[str]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser with the name of
the first tag name in the list.
"""
node = context
result = []
name, ptype = ('', tag_names[0]) if tag_names[0][:1] == ':' else (tag_names[0], '')
if node.children:
......@@ -374,10 +386,11 @@ def merge_children(node, tag_names: List[str]):
@transformation_factory
def replace_content(node, func: Callable): # Callable[[Node], ResultType]
def replace_content(context, func: Callable): # Callable[[Node], ResultType]
"""Replaces the content of the node. ``func`` takes the node
as an argument an returns the mapped result.
"""
node = context[-1]
node.result = func(node.result)
......@@ -415,24 +428,27 @@ def has_content(node, regexp: str) -> bool:
@transformation_factory(Callable)
def apply_if(node, transformation: Callable, condition: Callable):
def apply_if(context, transformation: Callable, condition: Callable):
"""Applies a transformation only if a certain condition is met.
"""
node = context[-1]
if condition(node):
transformation(node)
transformation(context)
@transformation_factory(slice)
def keep_children(node, section: slice = slice(None)):
def keep_children(context, section: slice = slice(None)):
"""Keeps only child-nodes which fall into a slice of the result field."""
node = context[-1]
if node.children:
node.result = node.children[section]
@transformation_factory(Callable)
def remove_children_if(node, condition: Callable, section: slice = slice(None)):
def remove_children_if(context, condition: Callable, section: slice = slice(None)):
"""Removes all nodes from a slice of the result field if the function
`condition(child_node)` evaluates to `True`."""
node = context[-1]
if node.children:
c = node.children
N = len(c)
......@@ -451,23 +467,23 @@ remove_single_child = apply_if(keep_children(slice(0)), lambda nd: len(nd.childr
@transformation_factory
def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
def remove_tokens(context, tokens: AbstractSet[str] = frozenset()):
"""Reomoves any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens
are removed."""
remove_children_if(node, partial(is_token, tokens=tokens))
remove_children_if(context, partial(is_token, tokens=tokens))
@transformation_factory
def remove_parser(node, tag_names: AbstractSet[str]):
def remove_parser(context, tag_names: AbstractSet[str]):
"""Removes children by tag name."""
remove_children_if(node, partial(is_one_of, tag_name_set=tag_names))
remove_children_if(context, partial(is_one_of, tag_name_set=tag_names))
@transformation_factory
def remove_content(node, regexp: str):
def remove_content(context, regexp: str):
"""Removes children depending on their string value."""
remove_children_if(node, partial(has_content, regexp=regexp))
remove_children_if(context, partial(has_content, regexp=regexp))
########################################################################
......@@ -477,8 +493,9 @@ def remove_content(node, regexp: str):
########################################################################
@transformation_factory(Callable)
def assert_condition(node, condition: Callable, error_msg: str='') -> bool:
def assert_condition(context, condition: Callable, error_msg: str = '') -> bool:
"""Checks for `condition`; adds an error message if condition is not met."""
node = context[-1]
if not condition(node):
if error_msg:
node.add_error(error_msg % node.tag_name if error_msg.find("%s") > 0 else error_msg)
......@@ -493,7 +510,8 @@ assert_has_children = assert_condition(lambda nd: nd.children, 'Element "%s" has
@transformation_factory
def assert_content(node, regexp: str):
def assert_content(context, regexp: str):
node = context[-1]
if not has_content(node, regexp):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regexp), str(node)))
......@@ -515,7 +533,8 @@ def assert_content(node, regexp: str):
@transformation_factory
def require(node, child_tags: AbstractSet[str]):
def require(context, child_tags: AbstractSet[str]):
node = context[-1]
for child in node.children:
if child.tag_name not in child_tags: