Commit a100f00d authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- streamlining transform.py + more LaTeX tests

parent 4f6c3ae8
...@@ -81,7 +81,7 @@ from DHParser import logging, is_filename, load_if_file, \\ ...@@ -81,7 +81,7 @@ from DHParser import logging, is_filename, load_if_file, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\ last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, \\ Node, TransformationFunc, \\
traverse, remove_children_if, join, \\ traverse, remove_children_if, join_children, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\ reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\ remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\ is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
......
...@@ -206,9 +206,10 @@ EBNF_transformation_table = { ...@@ -206,9 +206,10 @@ EBNF_transformation_table = {
"factor, flowmarker, retrieveop": "factor, flowmarker, retrieveop":
replace_by_single_child, replace_by_single_child,
"group": "group":
[remove_tokens('(', ')'), replace_by_single_child], [remove_brackets, replace_by_single_child],
"oneormore, repetition, option": "oneormore, repetition, option":
[reduce_single_child, remove_brackets], [reduce_single_child, remove_brackets,
forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)')],
"symbol, literal, regexp": "symbol, literal, regexp":
reduce_single_child, reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE): (TOKEN_PTYPE, WHITESPACE_PTYPE):
...@@ -220,18 +221,8 @@ EBNF_transformation_table = { ...@@ -220,18 +221,8 @@ EBNF_transformation_table = {
} }
EBNF_validation_table = {
# Semantic validation on the AST. EXPERIMENTAL!
"repetition, option, oneormore":
[forbid('repetition', 'option', 'oneormore'),
assert_content(r'(?!§)')]
}
def EBNFTransformer(syntax_tree: Node): def EBNFTransformer(syntax_tree: Node):
for processing_table, key_func in [(EBNF_transformation_table, key_tag_name), traverse(syntax_tree, EBNF_transformation_table, key_tag_name)
(EBNF_validation_table, key_tag_name)]:
traverse(syntax_tree, processing_table, key_func)
def get_ebnf_transformer() -> TransformationFunc: def get_ebnf_transformer() -> TransformationFunc:
...@@ -728,7 +719,7 @@ class EBNFCompiler(Compiler): ...@@ -728,7 +719,7 @@ class EBNFCompiler(Compiler):
def on_literal(self, node) -> str: def on_literal(self, node) -> str:
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ? return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join_children([node.result]) + ')' ?
def on_regexp(self, node: Node) -> str: def on_regexp(self, node: Node) -> str:
......
...@@ -270,7 +270,8 @@ class Node: ...@@ -270,7 +270,8 @@ class Node:
def errors(self) -> List[Error]: def errors(self) -> List[Error]:
return [Error(self.pos, err) for err in self._errors] return [Error(self.pos, err) for err in self._errors]
def add_error(self, error_str) -> 'Node': def add_error(self, error_str: str) -> 'Node':
assert isinstance(error_str, str)
self._errors.append(error_str) self._errors.append(error_str)
self.error_flag = True self.error_flag = True
return self return self
......
...@@ -151,7 +151,7 @@ def is_logging() -> bool: ...@@ -151,7 +151,7 @@ def is_logging() -> bool:
# if i < 0: # if i < 0:
# parameter_list = parameter_list[:i] # parameter_list = parameter_list[:i]
# name = f.__self__.__class__.__name__ if f.__name__ == '__init__' else f.__name__ # name = f.__self__.__class__.__name__ if f.__name__ == '__init__' else f.__name__
# return "%s(%s)" % (name, ", ".join(repr(item) for item in parameter_list)) # return "%s(%s)" % (name, ", ".join_children(repr(item) for item in parameter_list))
def line_col(text: str, pos: int) -> Tuple[int, int]: def line_col(text: str, pos: int) -> Tuple[int, int]:
......
...@@ -43,7 +43,7 @@ __all__ = ('transformation_factory', ...@@ -43,7 +43,7 @@ __all__ = ('transformation_factory',
'reduce_single_child', 'reduce_single_child',
'replace_parser', 'replace_parser',
'collapse', 'collapse',
'join', 'join_children',
'replace_content', 'replace_content',
'apply_if', 'apply_if',
'is_whitespace', 'is_whitespace',
...@@ -133,7 +133,7 @@ def transformation_factory(t=None): ...@@ -133,7 +133,7 @@ def transformation_factory(t=None):
# Provide for the case that transformation_factory has been # Provide for the case that transformation_factory has been
# written as plain decorator and not as a function call that # written as plain decorator and not as a function call that
# returns the decorator proper. # returns the decorator proper.
func = t; func = t
t = None t = None
return decorator(func) return decorator(func)
else: else:
...@@ -234,7 +234,7 @@ def replace_by_single_child(node): ...@@ -234,7 +234,7 @@ def replace_by_single_child(node):
if not node.result[0].parser.name: if not node.result[0].parser.name:
node.result[0].parser.name = node.parser.name node.result[0].parser.name = node.parser.name
node.parser = node.result[0].parser node.parser = node.result[0].parser
node._errors.extend(node.result[0].errors) node._errors.extend(node.result[0]._errors)
node.result = node.result[0].result node.result = node.result[0].result
...@@ -243,7 +243,7 @@ def reduce_single_child(node): ...@@ -243,7 +243,7 @@ def reduce_single_child(node):
immediate descendant to this node, but keeping this node's parser entry. immediate descendant to this node, but keeping this node's parser entry.
""" """
if node.children and len(node.result) == 1: if node.children and len(node.result) == 1:
node._errors.extend(node.result[0].errors) node._errors.extend(node.result[0]._errors)
node.result = node.result[0].result node.result = node.result[0].result
...@@ -295,14 +295,14 @@ def collapse(node): ...@@ -295,14 +295,14 @@ def collapse(node):
@transformation_factory @transformation_factory
def join(node, tag_names: List[str]): def join_children(node, tag_names: List[str]):
"""Joins all children next to each other and with particular tag- """Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'. names into a single child node with mock parser 'parser_name'.
""" """
result = [] result = []
name, ptype = (tag_names[0].split(':') + [''])[:2] name, ptype = (tag_names[0].split(':') + [''])[:2]
if node.children: if node.children:
i = 0; i = 0
L = len(node.children) L = len(node.children)
while i < L: while i < L:
while i < L and not node.children[i].tag_name in tag_names: while i < L and not node.children[i].tag_name in tag_names:
...@@ -356,21 +356,17 @@ def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool: ...@@ -356,21 +356,17 @@ def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens) return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.result in tokens)
@transformation_factory def has_name(node, regexp: str) -> bool:
def has_name(node, tag_names: AbstractSet[str]) -> bool: """Checks a node's tag name against a regular expression."""
"""Checks if node has any of a given set of `tag names`. return bool(re.match(regexp, node.tag_name))
See property `Node.tagname`."""
return node.tag_name in tag_names
@transformation_factory def has_content(node, regexp: str) -> bool:
def has_content(node, contents: AbstractSet[str]) -> bool: """Checks a node's content against a regular expression."""
"""Checks if the node's content (i.e. `str(node)`) matches any of return bool(re.match(regexp, str(node)))
a given set of strings."""
return str(node) in contents
@transformation_factory @transformation_factory(Callable)
def apply_if(node, transformation: Callable, condition: Callable): def apply_if(node, transformation: Callable, condition: Callable):
"""Applies a transformation only if a certain condition is met. """Applies a transformation only if a certain condition is met.
""" """
...@@ -378,47 +374,32 @@ def apply_if(node, transformation: Callable, condition: Callable): ...@@ -378,47 +374,32 @@ def apply_if(node, transformation: Callable, condition: Callable):
transformation(node) transformation(node)
@transformation_factory @transformation_factory(slice)
def keep_children(node, section: slice = slice(None, None, None), condition=lambda node: True): def keep_children(node, section: slice = slice(None)):
"""Keeps only the nodes which fall into a slice of the result field """Keeps only child-nodes which fall into a slice of the result field."""
and for which the function `condition(child_node)` evaluates to
`True`."""
if node.children: if node.children:
node.result = tuple(c for c in node.children[section] if condition(c)) node.result = node.children[section]
@transformation_factory(Callable) @transformation_factory(Callable)
def remove_children_if(node, condition): def remove_children_if(node, condition: Callable, section: slice = slice(None)):
"""Removes all nodes from a slice of the result field if the function """Removes all nodes from a slice of the result field if the function
``condition(child_node)`` evaluates to ``True``.""" `condition(child_node)` evaluates to `True`."""
if node.children: if node.children:
node.result = tuple(c for c in node.children if not condition(c)) c = node.children
N = len(c)
rng = range(*section.indices(N))
node.result = tuple(c[i] for i in range(N) if not i in rng or not condition(c[i]))
remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace) remove_whitespace = remove_children_if(is_whitespace) # partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty) remove_empty = remove_children_if(is_empty)
remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable) remove_expendables = remove_children_if(is_expendable) # partial(remove_children_if, condition=is_expendable)
remove_first = keep_children(slice(1, None))
remove_last = keep_children(slice(None, -1))
remove_brackets = keep_children(slice(1, -1)) remove_brackets = keep_children(slice(1, -1))
@transformation_factory(Callable)
def remove_first(node, condition=lambda node: True):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[0]):
node.result = node.result[1:]
@transformation_factory(Callable)
def remove_last(node, condition=lambda node: True):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if node.children:
if condition(node.children[-1]):
node.result = node.result[:-1]
@transformation_factory @transformation_factory
def remove_tokens(node, tokens: AbstractSet[str] = frozenset()): def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
"""Reomoves any among a particular set of tokens from the immediate """Reomoves any among a particular set of tokens from the immediate
...@@ -428,24 +409,60 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()): ...@@ -428,24 +409,60 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@transformation_factory @transformation_factory
def remove_parser(node, tag_names: AbstractSet[str]): def remove_parser(node, regexp: str):
"""Removes children by 'tag name'.""" """Removes children by 'tag name'."""
remove_children_if(node, partial(has_name, tag_names=tag_names)) remove_children_if(node, partial(has_name, regexp=regexp))
@transformation_factory @transformation_factory
def remove_content(node, contents: AbstractSet[str]): def remove_content(node, regexp: str):
"""Removes children depending on their string value.""" """Removes children depending on their string value."""
remove_children_if(node, partial(has_content, contents=contents)) remove_children_if(node, partial(has_content, regexp=regexp))
######################################################################## ########################################################################
# #
# AST semantic validation functions # AST semantic validation functions (EXPERIMENTAL!!!)
# EXPERIMENTAL!
# #
######################################################################## ########################################################################
@transformation_factory(Callable)
def assert_condition(node, condition: Callable, error_msg: str='') -> bool:
"""Checks for `condition`; adds an error message if condition is not met."""
if not condition(node):
if error_msg:
node.add_error(error_msg % node.tag_name if error_msg.find("%s") > 0 else error_msg)
else:
cond_name = condition.__name__ if hasattr(condition, '__name__') \
else condition.__class__.__name__ if hasattr(condition, '__class__') \
else '<unknown>'
node.add_error("transform.assert_condition: Failed to meet condition " + cond_name)
assert_has_children = assert_condition(lambda nd: nd.children, 'Element "%s" has no children')
@transformation_factory
def assert_content(node, regexp: str):
if not has_content(node, regexp):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regexp), str(node)))
#
# @transformation_factory
# def assert_name(node, regexp: str):
# if not has_name(node, regexp):
# node.add_error('Element name "%s" does not match %s' % (node.tag_name), str(regexp))
#
#
# @transformation_factory(Callable)
# def assert_children(node, condition: Callable=lambda node: True,
# error_msg: str='', section: slice=slice(None)):
# if node.children:
# for child in node.children:
# assert_condition(child, condition, error_msg)
#
@transformation_factory @transformation_factory
def require(node, child_tags: AbstractSet[str]): def require(node, child_tags: AbstractSet[str]):
...@@ -461,11 +478,3 @@ def forbid(node, child_tags: AbstractSet[str]): ...@@ -461,11 +478,3 @@ def forbid(node, child_tags: AbstractSet[str]):
if child.tag_name in child_tags: if child.tag_name in child_tags:
node.add_error('Element "%s" cannot be nested inside "%s".' % node.add_error('Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name)) (child.parser.name, node.parser.name))
@transformation_factory
def assert_content(node, regex: str):
content = str(node)
if not re.match(regex, content):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regex), content))
...@@ -112,7 +112,7 @@ def selftest() -> bool: ...@@ -112,7 +112,7 @@ def selftest() -> bool:
if errors: if errors:
print("Selftest FAILED :-(") print("Selftest FAILED :-(")
print("\n\n".join(errors)) print("\n\n".join_children(errors))
return False return False
print(generated_ebnf_parser) print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n") print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
......
...@@ -47,7 +47,7 @@ from DHParser.toolkit import logging ...@@ -47,7 +47,7 @@ from DHParser.toolkit import logging
# transformer, compiler) # transformer, compiler)
# print(result) # print(result)
# if errors: # if errors:
# print('\n\n'.join(errors)) # print('\n\n'.join_children(errors))
# sys.exit(1) # sys.exit(1)
# else: # else:
# # compile the grammar again using the result of the previous # # compile the grammar again using the result of the previous
......
...@@ -21,9 +21,9 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern ...@@ -21,9 +21,9 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, \ PreprocessorFunc, \
Node, TransformationFunc, \ Node, TransformationFunc, \
traverse, join, \ traverse, join_children, remove_whitespace, remove_parser, \
reduce_single_child, replace_by_single_child, remove_expendables, remove_empty, flatten, \ reduce_single_child, replace_by_single_child, remove_expendables, remove_empty, flatten, \
collapse, replace_content, remove_brackets collapse, replace_content, remove_brackets, remove_first
####################################################################### #######################################################################
...@@ -324,35 +324,43 @@ LaTeX_AST_transformation_table = { ...@@ -324,35 +324,43 @@ LaTeX_AST_transformation_table = {
"blockenv": [], "blockenv": [],
"parblock": [], "parblock": [],
"sequence": "sequence":
flatten, [flatten, remove_parser('PARSEP'), replace_by_single_child],
"enumerate, itemize":
[remove_brackets, remove_parser('PARSEP'), reduce_single_child],
"item":
[remove_first, remove_parser('PARSEP')],
"paragraph": "paragraph":
[flatten(lambda node: not node.parser.name or node.parser.name == "text"), [flatten(lambda node: not node.parser.name or node.parser.name == "text"),
join('text', ':Whitespace')], join_children('text', ':Whitespace')],
"inlineenv": [], "quotation, generic_bloc, generic_inline_env, inline_math":
"beginenv": [], [remove_brackets],
"endenv": [], "inline_environment": [],
"command": [], "begin_environment": [],
"config": [], "end_environment": [],
"block": [remove_brackets, reduce_single_child], # "command": [],
"generic_command": [],
"config, block": [remove_brackets, reduce_single_child],
"text": "text":
[reduce_single_child, join('text', 'word_sequence', ':Whitespace')], [reduce_single_child, join_children('text', 'word_sequence', ':Whitespace')],
"cfgtext": [flatten, reduce_single_child], "cfgtext": [flatten, reduce_single_child],
"word_sequence": "word_sequence":
[collapse], [collapse],
"blockcmd": [], "blockcmd": [],
"CMDNAME": "CMDNAME":
[remove_expendables, reduce_single_child], [remove_expendables, reduce_single_child],
"NAME": [], "NAME": [reduce_single_child],
"ESCAPED": [reduce_single_child], "ESCAPED": [reduce_single_child],
"BRACKETS": [], "BRACKETS": [],
"TEXTCHUNK": [], "TEXTCHUNK": [],
"WSPC, :Whitespace": "WSPC, :Whitespace":
streamline_whitespace, [], # streamline_whitespace, # whitespace will be removed anyway
"LF": "LF":
replace_content(lambda node: '\n'), replace_content(lambda node: '\n'),
"PARSEP": "PARSEP":
replace_content(lambda node: '\n\n'), [], # replace_content(lambda node: '\n\n'),
"EOF": [], "EOF": [],
":Token":
[], # [remove_whitespace, reduce_single_child], # Tokens will be removed anyway?
"*": "*":
replace_by_single_child, replace_by_single_child,
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment