Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit a100f00d authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- streamlining transform.py + more LaTeX tests

parent 4f6c3ae8
......@@ -81,7 +81,7 @@ from DHParser import logging, is_filename, load_if_file, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, \\
traverse, remove_children_if, join, \\
traverse, remove_children_if, join_children, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
......
......@@ -206,9 +206,10 @@ EBNF_transformation_table = {
"factor, flowmarker, retrieveop":
replace_by_single_child,
"group":
[remove_tokens('(', ')'), replace_by_single_child],
[remove_brackets, replace_by_single_child],
"oneormore, repetition, option":
[reduce_single_child, remove_brackets],
[reduce_single_child, remove_brackets,
forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)')],
"symbol, literal, regexp":
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
......@@ -220,18 +221,8 @@ EBNF_transformation_table = {
}
EBNF_validation_table = {
# Semantic validation on the AST. EXPERIMENTAL!
"repetition, option, oneormore":
[forbid('repetition', 'option', 'oneormore'),
assert_content(r'(?!§)')]
}
def EBNFTransformer(syntax_tree: Node):
for processing_table, key_func in [(EBNF_transformation_table, key_tag_name),
(EBNF_validation_table, key_tag_name)]:
traverse(syntax_tree, processing_table, key_func)
traverse(syntax_tree, EBNF_transformation_table, key_tag_name)
def get_ebnf_transformer() -> TransformationFunc:
......@@ -728,7 +719,7 @@ class EBNFCompiler(Compiler):
def on_literal(self, node) -> str:
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ?
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join_children([node.result]) + ')' ?
def on_regexp(self, node: Node) -> str:
......
......@@ -270,7 +270,8 @@ class Node:
def errors(self) -> List[Error]:
return [Error(self.pos, err) for err in self._errors]
def add_error(self, error_str) -> 'Node':
def add_error(self, error_str: str) -> 'Node':
assert isinstance(error_str, str)
self._errors.append(error_str)
self.error_flag = True
return self
......
......@@ -151,7 +151,7 @@ def is_logging() -> bool:
# if i < 0:
# parameter_list = parameter_list[:i]
# name = f.__self__.__class__.__name__ if f.__name__ == '__init__' else f.__name__
# return "%s(%s)" % (name, ", ".join(repr(item) for item in parameter_list))
# return "%s(%s)" % (name, ", ".join_children(repr(item) for item in parameter_list))
def line_col(text: str, pos: int) -> Tuple[int, int]:
......
......@@ -43,7 +43,7 @@ __all__ = ('transformation_factory',
'reduce_single_child',
'replace_parser',
'collapse',
'join',
'join_children',
'replace_content',
'apply_if',
'is_whitespace',
......@@ -133,7 +133,7 @@ def transformation_factory(t=None):
# Provide for the case that transformation_factory has been
# written as plain decorator and not as a function call that
# returns the decorator proper.
func = t;
func = t
t = None
return decorator(func)
else:
......@@ -234,7 +234,7 @@ def replace_by_single_child(node):
if not node.result[0].parser.name:
node.result[0].parser.name = node.parser.name
node.parser = node.result[0].parser
node._errors.extend(node.result[0].errors)
node._errors.extend(node.result[0]._errors)
node.result = node.result[0].result
......@@ -243,7 +243,7 @@ def reduce_single_child(node):
immediate descendant to this node, but keeping this node's parser entry.
"""
if node.children and len(node.result) == 1:
node._errors.extend(node.result[0].errors)
node._errors.extend(node.result[0]._errors)
node.result = node.result[0].result
......@@ -295,14 +295,14 @@ def collapse(node):
@transformation_factory
def join(node, tag_names: List[str]):
def join_children(node, tag_names: List[str]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
"""
result = []
name, ptype = (tag_names[0].split(':') + [''])[:2]
if node.children:
i = 0;
i = 0
L = len(node.children)
while i < L:
while i < L and not node.children[i].tag_name in tag_names:
......@@ -356,21 +356,17 @@ def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens)
@transformation_factory
def has_name(node, tag_names: AbstractSet[str]) -> bool:
"""Checks if node has any of a given set of `tag names`.
See property `Node.tagname`."""
return node.tag_name in tag_names
def has_name(node, regexp: str) -> bool:
"""Checks a node's tag name against a regular expression."""
return bool(re.match(regexp, node.tag_name))
@transformation_factory
def has_content(node, contents: AbstractSet[str]) -> bool:
"""Checks if the node's content (i.e. `str(node)`) matches any of
a given set of strings."""
return str(node) in contents
def has_content(node, regexp: str) -> bool:
"""Checks a node's content against a regular expression."""
return bool(re.match(regexp, str(node)))
@transformation_factory
@transformation_factory(Callable)
def apply_if(node, transformation: Callable, condition: Callable):
"""Applies a transformation only if a certain condition is met.
"""
......@@ -378,47 +374,32 @@ def apply_if(node, transformation: Callable, condition: Callable):
transformation(node)
@transformation_factory
def keep_children(node, section: slice = slice(None, None, None), condition=lambda node: True):
"""Keeps only the nodes which fall into a slice of the result field
and for which the function `condition(child_node)` evaluates to
`True`."""
@transformation_factory(slice)
def keep_children(node, section: slice = slice(None)):
"""Keeps only child-nodes which fall into a slice of the result field."""
if node.children:
node.result = tuple(c for c in node.children[section] if condition(c))
node.result = node.children[section]
@transformation_factory(Callable)
def remove_children_if(node, condition):
def remove_children_if(node, condition: Callable, section: slice = slice(None)):
"""Removes all nodes from a slice of the result field if the function
``condition(child_node)`` evaluates to ``True``."""
`condition(child_node)` evaluates to `True`."""
if node.children:
node.result = tuple(c for c in node.children if not condition(c))
c = node.children
N = len(c)
rng = range(*section.indices(N))
node.result = tuple(c[i] for i in range(N) if not i in rng or not condition(c[i]))
remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
remove_first = keep_children(slice(1, None))
remove_last = keep_children(slice(None, -1))
remove_brackets = keep_children(slice(1, -1))
@transformation_factory(Callable)
def remove_first(node, condition=lambda node: True):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[0]):
node.result = node.result[1:]
@transformation_factory(Callable)
def remove_last(node, condition=lambda node: True):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[-1]):
node.result = node.result[:-1]
@transformation_factory
def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
"""Reomoves any among a particular set of tokens from the immediate
......@@ -428,24 +409,60 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@transformation_factory
def remove_parser(node, tag_names: AbstractSet[str]):
def remove_parser(node, regexp: str):
"""Removes children by 'tag name'."""
remove_children_if(node, partial(has_name, tag_names=tag_names))
remove_children_if(node, partial(has_name, regexp=regexp))
@transformation_factory
def remove_content(node, contents: AbstractSet[str]):
def remove_content(node, regexp: str):
"""Removes children depending on their string value."""
remove_children_if(node, partial(has_content, contents=contents))
remove_children_if(node, partial(has_content, regexp=regexp))
########################################################################
#
# AST semantic validation functions
# EXPERIMENTAL!
# AST semantic validation functions (EXPERIMENTAL!!!)
#
########################################################################
@transformation_factory(Callable)
def assert_condition(node, condition: Callable, error_msg: str='') -> bool:
"""Checks for `condition`; adds an error message if condition is not met."""
if not condition(node):
if error_msg:
node.add_error(error_msg % node.tag_name if error_msg.find("%s") > 0 else error_msg)
else:
cond_name = condition.__name__ if hasattr(condition, '__name__') \
else condition.__class__.__name__ if hasattr(condition, '__class__') \
else '<unknown>'
node.add_error("transform.assert_condition: Failed to meet condition " + cond_name)
assert_has_children = assert_condition(lambda nd: nd.children, 'Element "%s" has no children')
@transformation_factory
def assert_content(node, regexp: str):
if not has_content(node, regexp):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regexp), str(node)))
#
# @transformation_factory
# def assert_name(node, regexp: str):
# if not has_name(node, regexp):
# node.add_error('Element name "%s" does not match %s' % (node.tag_name), str(regexp))
#
#
# @transformation_factory(Callable)
# def assert_children(node, condition: Callable=lambda node: True,
# error_msg: str='', section: slice=slice(None)):
# if node.children:
# for child in node.children:
# assert_condition(child, condition, error_msg)
#
@transformation_factory
def require(node, child_tags: AbstractSet[str]):
......@@ -461,11 +478,3 @@ def forbid(node, child_tags: AbstractSet[str]):
if child.tag_name in child_tags:
node.add_error('Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
@transformation_factory
def assert_content(node, regex: str):
content = str(node)
if not re.match(regex, content):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regex), content))
......@@ -112,7 +112,7 @@ def selftest() -> bool:
if errors:
print("Selftest FAILED :-(")
print("\n\n".join(errors))
print("\n\n".join_children(errors))
return False
print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
......
......@@ -47,7 +47,7 @@ from DHParser.toolkit import logging
# transformer, compiler)
# print(result)
# if errors:
# print('\n\n'.join(errors))
# print('\n\n'.join_children(errors))
# sys.exit(1)
# else:
# # compile the grammar again using the result of the previous
......
......@@ -21,9 +21,9 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, \
Node, TransformationFunc, \
traverse, join, \
traverse, join_children, remove_whitespace, remove_parser, \
reduce_single_child, replace_by_single_child, remove_expendables, remove_empty, flatten, \
collapse, replace_content, remove_brackets
collapse, replace_content, remove_brackets, remove_first
#######################################################################
......@@ -324,35 +324,43 @@ LaTeX_AST_transformation_table = {
"blockenv": [],
"parblock": [],
"sequence":
flatten,
[flatten, remove_parser('PARSEP'), replace_by_single_child],
"enumerate, itemize":
[remove_brackets, remove_parser('PARSEP'), reduce_single_child],
"item":
[remove_first, remove_parser('PARSEP')],
"paragraph":
[flatten(lambda node: not node.parser.name or node.parser.name == "text"),
join('text', ':Whitespace')],
"inlineenv": [],
"beginenv": [],
"endenv": [],
"command": [],
"config": [],
"block": [remove_brackets, reduce_single_child],
join_children('text', ':Whitespace')],
"quotation, generic_bloc, generic_inline_env, inline_math":
[remove_brackets],
"inline_environment": [],
"begin_environment": [],
"end_environment": [],
# "command": [],
"generic_command": [],
"config, block": [remove_brackets, reduce_single_child],
"text":
[reduce_single_child, join('text', 'word_sequence', ':Whitespace')],
[reduce_single_child, join_children('text', 'word_sequence', ':Whitespace')],
"cfgtext": [flatten, reduce_single_child],
"word_sequence":
[collapse],
"blockcmd": [],
"CMDNAME":
[remove_expendables, reduce_single_child],
"NAME": [],
"NAME": [reduce_single_child],
"ESCAPED": [reduce_single_child],
"BRACKETS": [],
"TEXTCHUNK": [],
"WSPC, :Whitespace":
streamline_whitespace,
[], # streamline_whitespace, # whitespace will be removed anyway
"LF":
replace_content(lambda node: '\n'),
"PARSEP":
replace_content(lambda node: '\n\n'),
[], # replace_content(lambda node: '\n\n'),
"EOF": [],
":Token":
[], # [remove_whitespace, reduce_single_child], # Tokens will be removed anyway?
"*":
replace_by_single_child,
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment