10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit a100f00d authored by Eckhart Arnold's avatar Eckhart Arnold

- streamlining transform.py + more LaTeX tests

parent 4f6c3ae8
......@@ -81,7 +81,7 @@ from DHParser import logging, is_filename, load_if_file, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, \\
traverse, remove_children_if, join, \\
traverse, remove_children_if, join_children, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
......
......@@ -206,9 +206,10 @@ EBNF_transformation_table = {
"factor, flowmarker, retrieveop":
replace_by_single_child,
"group":
[remove_tokens('(', ')'), replace_by_single_child],
[remove_brackets, replace_by_single_child],
"oneormore, repetition, option":
[reduce_single_child, remove_brackets],
[reduce_single_child, remove_brackets,
forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)')],
"symbol, literal, regexp":
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
......@@ -220,18 +221,8 @@ EBNF_transformation_table = {
}
EBNF_validation_table = {
# Semantic validation on the AST. EXPERIMENTAL!
"repetition, option, oneormore":
[forbid('repetition', 'option', 'oneormore'),
assert_content(r'(?!§)')]
}
def EBNFTransformer(syntax_tree: Node):
for processing_table, key_func in [(EBNF_transformation_table, key_tag_name),
(EBNF_validation_table, key_tag_name)]:
traverse(syntax_tree, processing_table, key_func)
traverse(syntax_tree, EBNF_transformation_table, key_tag_name)
def get_ebnf_transformer() -> TransformationFunc:
......@@ -728,7 +719,7 @@ class EBNFCompiler(Compiler):
def on_literal(self, node) -> str:
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ?
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join_children([node.result]) + ')' ?
def on_regexp(self, node: Node) -> str:
......
......@@ -270,7 +270,8 @@ class Node:
def errors(self) -> List[Error]:
return [Error(self.pos, err) for err in self._errors]
def add_error(self, error_str) -> 'Node':
def add_error(self, error_str: str) -> 'Node':
assert isinstance(error_str, str)
self._errors.append(error_str)
self.error_flag = True
return self
......
......@@ -151,7 +151,7 @@ def is_logging() -> bool:
# if i < 0:
# parameter_list = parameter_list[:i]
# name = f.__self__.__class__.__name__ if f.__name__ == '__init__' else f.__name__
# return "%s(%s)" % (name, ", ".join(repr(item) for item in parameter_list))
# return "%s(%s)" % (name, ", ".join_children(repr(item) for item in parameter_list))
def line_col(text: str, pos: int) -> Tuple[int, int]:
......
......@@ -43,7 +43,7 @@ __all__ = ('transformation_factory',
'reduce_single_child',
'replace_parser',
'collapse',
'join',
'join_children',
'replace_content',
'apply_if',
'is_whitespace',
......@@ -133,7 +133,7 @@ def transformation_factory(t=None):
# Provide for the case that transformation_factory has been
# written as plain decorator and not as a function call that
# returns the decorator proper.
func = t;
func = t
t = None
return decorator(func)
else:
......@@ -234,7 +234,7 @@ def replace_by_single_child(node):
if not node.result[0].parser.name:
node.result[0].parser.name = node.parser.name
node.parser = node.result[0].parser
node._errors.extend(node.result[0].errors)
node._errors.extend(node.result[0]._errors)
node.result = node.result[0].result
......@@ -243,7 +243,7 @@ def reduce_single_child(node):
immediate descendant to this node, but keeping this node's parser entry.
"""
if node.children and len(node.result) == 1:
node._errors.extend(node.result[0].errors)
node._errors.extend(node.result[0]._errors)
node.result = node.result[0].result
......@@ -295,14 +295,14 @@ def collapse(node):
@transformation_factory
def join(node, tag_names: List[str]):
def join_children(node, tag_names: List[str]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
"""
result = []
name, ptype = (tag_names[0].split(':') + [''])[:2]
if node.children:
i = 0;
i = 0
L = len(node.children)
while i < L:
while i < L and not node.children[i].tag_name in tag_names:
......@@ -356,21 +356,17 @@ def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens)
@transformation_factory
def has_name(node, tag_names: AbstractSet[str]) -> bool:
"""Checks if node has any of a given set of `tag names`.
See property `Node.tagname`."""
return node.tag_name in tag_names
def has_name(node, regexp: str) -> bool:
"""Checks a node's tag name against a regular expression."""
return bool(re.match(regexp, node.tag_name))
@transformation_factory
def has_content(node, contents: AbstractSet[str]) -> bool:
"""Checks if the node's content (i.e. `str(node)`) matches any of
a given set of strings."""
return str(node) in contents
def has_content(node, regexp: str) -> bool:
"""Checks a node's content against a regular expression."""
return bool(re.match(regexp, str(node)))
@transformation_factory
@transformation_factory(Callable)
def apply_if(node, transformation: Callable, condition: Callable):
"""Applies a transformation only if a certain condition is met.
"""
......@@ -378,47 +374,32 @@ def apply_if(node, transformation: Callable, condition: Callable):
transformation(node)
@transformation_factory
def keep_children(node, section: slice = slice(None, None, None), condition=lambda node: True):
"""Keeps only the nodes which fall into a slice of the result field
and for which the function `condition(child_node)` evaluates to
`True`."""
@transformation_factory(slice)
def keep_children(node, section: slice = slice(None)):
"""Keeps only child-nodes which fall into a slice of the result field."""
if node.children:
node.result = tuple(c for c in node.children[section] if condition(c))
node.result = node.children[section]
@transformation_factory(Callable)
def remove_children_if(node, condition):
def remove_children_if(node, condition: Callable, section: slice = slice(None)):
"""Removes all nodes from a slice of the result field if the function
``condition(child_node)`` evaluates to ``True``."""
`condition(child_node)` evaluates to `True`."""
if node.children:
node.result = tuple(c for c in node.children if not condition(c))
c = node.children
N = len(c)
rng = range(*section.indices(N))
node.result = tuple(c[i] for i in range(N) if not i in rng or not condition(c[i]))
remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
remove_first = keep_children(slice(1, None))
remove_last = keep_children(slice(None, -1))
remove_brackets = keep_children(slice(1, -1))
@transformation_factory(Callable)
def remove_first(node, condition=lambda node: True):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[0]):
node.result = node.result[1:]
@transformation_factory(Callable)
def remove_last(node, condition=lambda node: True):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[-1]):
node.result = node.result[:-1]
@transformation_factory
def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
"""Reomoves any among a particular set of tokens from the immediate
......@@ -428,24 +409,60 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@transformation_factory
def remove_parser(node, tag_names: AbstractSet[str]):
def remove_parser(node, regexp: str):
"""Removes children by 'tag name'."""
remove_children_if(node, partial(has_name, tag_names=tag_names))
remove_children_if(node, partial(has_name, regexp=regexp))
@transformation_factory
def remove_content(node, contents: AbstractSet[str]):
def remove_content(node, regexp: str):
"""Removes children depending on their string value."""
remove_children_if(node, partial(has_content, contents=contents))
remove_children_if(node, partial(has_content, regexp=regexp))
########################################################################
#
# AST semantic validation functions
# EXPERIMENTAL!
# AST semantic validation functions (EXPERIMENTAL!!!)
#
########################################################################
@transformation_factory(Callable)
def assert_condition(node, condition: Callable, error_msg: str='') -> bool:
"""Checks for `condition`; adds an error message if condition is not met."""
if not condition(node):
if error_msg:
node.add_error(error_msg % node.tag_name if error_msg.find("%s") > 0 else error_msg)
else:
cond_name = condition.__name__ if hasattr(condition, '__name__') \
else condition.__class__.__name__ if hasattr(condition, '__class__') \
else '<unknown>'
node.add_error("transform.assert_condition: Failed to meet condition " + cond_name)
assert_has_children = assert_condition(lambda nd: nd.children, 'Element "%s" has no children')
@transformation_factory
def assert_content(node, regexp: str):
if not has_content(node, regexp):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regexp), str(node)))
#
# @transformation_factory
# def assert_name(node, regexp: str):
# if not has_name(node, regexp):
# node.add_error('Element name "%s" does not match %s' % (node.tag_name), str(regexp))
#
#
# @transformation_factory(Callable)
# def assert_children(node, condition: Callable=lambda node: True,
# error_msg: str='', section: slice=slice(None)):
# if node.children:
# for child in node.children:
# assert_condition(child, condition, error_msg)
#
@transformation_factory
def require(node, child_tags: AbstractSet[str]):
......@@ -461,11 +478,3 @@ def forbid(node, child_tags: AbstractSet[str]):
if child.tag_name in child_tags:
node.add_error('Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
@transformation_factory
def assert_content(node, regex: str):
content = str(node)
if not re.match(regex, content):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regex), content))
......@@ -112,7 +112,7 @@ def selftest() -> bool:
if errors:
print("Selftest FAILED :-(")
print("\n\n".join(errors))
print("\n\n".join_children(errors))
return False
print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
......
......@@ -47,7 +47,7 @@ from DHParser.toolkit import logging
# transformer, compiler)
# print(result)
# if errors:
# print('\n\n'.join(errors))
# print('\n\n'.join_children(errors))
# sys.exit(1)
# else:
# # compile the grammar again using the result of the previous
......
......@@ -21,9 +21,9 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, \
Node, TransformationFunc, \
traverse, join, \
traverse, join_children, remove_whitespace, remove_parser, \
reduce_single_child, replace_by_single_child, remove_expendables, remove_empty, flatten, \
collapse, replace_content, remove_brackets
collapse, replace_content, remove_brackets, remove_first
#######################################################################
......@@ -324,35 +324,43 @@ LaTeX_AST_transformation_table = {
"blockenv": [],
"parblock": [],
"sequence":
flatten,
[flatten, remove_parser('PARSEP'), replace_by_single_child],
"enumerate, itemize":
[remove_brackets, remove_parser('PARSEP'), reduce_single_child],
"item":
[remove_first, remove_parser('PARSEP')],
"paragraph":
[flatten(lambda node: not node.parser.name or node.parser.name == "text"),
join('text', ':Whitespace')],
"inlineenv": [],
"beginenv": [],
"endenv": [],
"command": [],
"config": [],
"block": [remove_brackets, reduce_single_child],
join_children('text', ':Whitespace')],
"quotation, generic_bloc, generic_inline_env, inline_math":
[remove_brackets],
"inline_environment": [],
"begin_environment": [],
"end_environment": [],
# "command": [],
"generic_command": [],
"config, block": [remove_brackets, reduce_single_child],
"text":
[reduce_single_child, join('text', 'word_sequence', ':Whitespace')],
[reduce_single_child, join_children('text', 'word_sequence', ':Whitespace')],
"cfgtext": [flatten, reduce_single_child],
"word_sequence":
[collapse],
"blockcmd": [],
"CMDNAME":
[remove_expendables, reduce_single_child],
"NAME": [],
"NAME": [reduce_single_child],
"ESCAPED": [reduce_single_child],
"BRACKETS": [],
"TEXTCHUNK": [],
"WSPC, :Whitespace":
streamline_whitespace,
[], # streamline_whitespace, # whitespace will be removed anyway
"LF":
replace_content(lambda node: '\n'),
"PARSEP":
replace_content(lambda node: '\n\n'),
[], # replace_content(lambda node: '\n\n'),
"EOF": [],
":Token":
[], # [remove_whitespace, reduce_single_child], # Tokens will be removed anyway?
"*":
replace_by_single_child,
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment