11.3.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit d34bec8e authored by Eckhart Arnold's avatar Eckhart Arnold

- transform.py: refactored

parent 06d5fa5a
......@@ -28,6 +28,7 @@ from collections import OrderedDict
from functools import partial
import keyword
import os
from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional, Any, cast
from DHParser.compile import CompilerError, Compiler, compile_source, visitor_name
from DHParser.error import Error
......@@ -37,13 +38,12 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace,
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
GLOBALS, get_config_value, unrepr, compile_python_object, typing
from DHParser.configuration import CONFIG_PRESET
GLOBALS, get_config_value, unrepr, compile_python_object
from DHParser.transform import TransformationFunc, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
reduce_single_child, replace_by_single_child, remove_whitespace, remove_empty, \
remove_tokens, flatten, forbid, assert_content
from DHParser.versionnumber import __version__
from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional, Any, cast
__all__ = ('get_ebnf_preprocessor',
......@@ -93,13 +93,13 @@ from DHParser import logging, is_filename, load_if_file, \\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, exchange_parser, remove_anonymous_tokens, \\
remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \\
collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, change_tag_name, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
flatten_anonymous_nodes, error_on, recompile_grammar, GLOBALS
error_on, recompile_grammar, GLOBALS
'''.format(dhparserdir=dhparserdir)
......@@ -254,7 +254,7 @@ def get_ebnf_grammar() -> EBNFGrammar:
EBNF_AST_transformation_table = {
# AST Transformations for EBNF-grammar
"<":
remove_expendables,
[remove_whitespace, remove_empty],
"syntax":
[], # otherwise '"*": replace_by_single_child' would be applied
"directive, definition":
......@@ -607,7 +607,7 @@ class EBNFCompiler(Compiler):
tt_name = self.grammar_name + '_AST_transformation_table'
transtable = [tt_name + ' = {',
' # AST Transformations for the ' + self.grammar_name + '-grammar']
transtable.append(' "<": flatten_anonymous_nodes,')
transtable.append(' "<": flatten,')
for name in self.rules:
transformations = '[]'
# rule = self.definitions[name]
......
......@@ -29,7 +29,6 @@ cpdef is_insignificant_whitespace(context: List[Node])
cpdef contains_only_whitespace(context: List[Node])
cpdef is_any_kind_of_whitespace(context: List[Node])
cpdef is_empty(context: List[Node])
cpdef is_expendable(context: List[Node])
# cpdef is_token(context: List[Node], tokens: AbstractSet[str] = ?)
# cpdef is_one_of(context: List[Node], tag_name_set: AbstractSet[str])
# cpdef not_one_of(context: List[Node], tag_name_set: AbstractSet[str])
......
......@@ -45,16 +45,15 @@ __all__ = ('TransformationDict',
'transformation_factory',
'key_tag_name',
'traverse',
'always',
'is_named',
'update_attr',
'flatten_anonymous_nodes',
'replace_by_single_child',
'reduce_single_child',
'replace_or_reduce',
'exchange_parser',
'change_tag_name',
'collapse',
'collapse_if',
# 'merge_children',
'replace_content',
'replace_content_by',
'normalize_whitespace',
......@@ -67,7 +66,6 @@ __all__ = ('TransformationDict',
'contains_only_whitespace',
'is_any_kind_of_whitespace',
'is_empty',
'is_expendable',
'is_token',
'is_one_of',
'not_one_of',
......@@ -90,9 +88,7 @@ __all__ = ('TransformationDict',
'remove_whitespace',
'remove_empty',
'remove_anonymous_empty',
'remove_anonymous_expendables',
'remove_anonymous_tokens',
'remove_expendables',
'remove_brackets',
'remove_infix_operator',
'remove_single_child',
......@@ -233,11 +229,11 @@ def transformation_factory(t1=None, t2=None, t3=None, t4=None, t5=None):
return decorator
# def key_parser_name(node: Node) -> str:
# return node.parser.name
def key_tag_name(node: Node) -> str:
"""
Returns the tag name of the node as key of selecting transformations
from the transformation table.
"""
return node.tag_name
......@@ -245,7 +241,7 @@ def traverse(root_node: Node,
processing_table: ProcessingTableType,
key_func: KeyFunc = key_tag_name) -> None:
"""
Traverses the snytax tree starting with the given ``node`` depth
Traverses the syntax tree starting with the given ``node`` depth
first and applies the sequences of callback-functions registered
in the ``processing_table``-dictionary.
......@@ -387,6 +383,11 @@ def apply_unless(context: List[Node], transformation: Callable, condition: Calla
#######################################################################
def always(context: List[Node]) -> bool:
"""Always returns True, no matter that the state of the context."""
return True
def is_single_child(context: List[Node]) -> bool:
"""Returns ``True`` if the current node does not have any siblings."""
return len(context[-2].children) == 1
......@@ -433,10 +434,11 @@ def is_empty(context: List[Node]) -> bool:
return not context[-1].result
def is_expendable(context: List[Node]) -> bool:
"""Returns ``True`` if the current node either is a node containing
whitespace or an empty node."""
return is_empty(context) or is_insignificant_whitespace(context)
# DEPRECATED, because name is too ambiguous
# def is_expendable(context: List[Node]) -> bool:
# """Returns ``True`` if the current node either is a node containing
# whitespace or an empty node."""
# return is_empty(context) or is_insignificant_whitespace(context)
@transformation_factory(collections.abc.Set)
......@@ -545,74 +547,39 @@ def _reduce_child(node: Node, child: Node):
#
#######################################################################
# @transformation_factory(int, str, Callable)
# def replace_by_child(context: List[Node], criteria: CriteriaType=is_single_child):
# """
# Replaces a node by the first of its immediate descendants
# that meets the `criteria`. The criteria can either be the
# index of the child (counting from zero), or the tag name or
# a boolean-valued function on the context of the child.
# If no child matching the criteria is found, the node will
# not be replaced.
# With the default value for `criteria` the same semantics is
# the same that of `replace_by_single_child`.
# """
# child = _pick_child(context, criteria)
# if child:
# _replace_by(context[-1], child)
#
#
# @transformation_factory(int, str, Callable)
# def content_from_child(context: List[Node], criteria: CriteriaType = is_single_child):
# DEPRECATED
# def flatten_anonymous_nodes(context: List[Node]):
# """
# Reduces a node, by transferring the result of the first of its
# immediate descendants that meets the `criteria` to this node,
# but keeping this node's parser entry. The criteria can either
# be the index of the child (counting from zero), or the tag
# name or a boolean-valued function on the context of the child.
# If no child matching the criteria is found, the node will
# not be replaced.
# With the default value for `criteria` this has the same semantics
# as `content_from_single_child`.
# Flattens non-recursively all anonymous non-leaf children by adding
# their result to the result of the parent node. Empty anonymous children
# will be dropped altogether. If the parent node (i.e. `context[-1]) is
# anonymous itself and has only one child node, it will be replaced by
# its single child node.
# """
# child = _pick_child(context, criteria)
# if child:
# _reduce_child(context[-1], child)
def flatten_anonymous_nodes(context: List[Node]):
"""
Flattens non-recursively all anonymous non-leaf children by adding
their result to the result of the parent node. Empty anonymous children
will be dropped altogether. If the parent node (i.e. `context[-1]) is
anonymous itself and has only one child node, it will be replaced by
its single child node.
"""
node = context[-1]
if node.children:
new_result = [] # type: List[Node]
for child in node.children:
if child.is_anonymous():
if child.children:
new_result.extend(child.children)
update_attr(node, child)
elif child.result:
new_result.append(child)
else:
new_result.append(child)
if len(new_result) == 1:
child = new_result[0]
if node.is_anonymous():
node.tag_name = child.tag_name
node.result = child.result
update_attr(node, child)
return
elif child.is_anonymous():
node.result = child.result
update_attr(node, child)
return
node.result = tuple(new_result)
# node = context[-1]
# if node.children:
# new_result = [] # type: List[Node]
# for child in node.children:
# if child.is_anonymous():
# if child.children:
# new_result.extend(child.children)
# update_attr(node, child)
# elif child.result:
# new_result.append(child)
# else:
# new_result.append(child)
# if len(new_result) == 1:
# child = new_result[0]
# if node.is_anonymous():
# node.tag_name = child.tag_name
# node.result = child.result
# update_attr(node, child)
# return
# elif child.is_anonymous():
# node.result = child.result
# update_attr(node, child)
# return
# node.result = tuple(new_result)
def replace_by_single_child(context: List[Node]):
......@@ -647,24 +614,26 @@ def replace_or_reduce(context: List[Node], condition: Callable = is_named):
node = context[-1]
if len(node.children) == 1:
child = node.children[0]
if condition(context): # TODO: bug here?
if condition(context):
_replace_by(node, child)
else:
_reduce_child(node, child)
@transformation_factory
def exchange_parser(context: List[Node], name: str):
@transformation_factory(str)
def change_tag_name(context: List[Node], tag_name: str, restriction: Callable = always):
"""
Replaces the parser of a Node with a mock parser with the given
name.
Changes the tag name of a node.
Parameters:
restriction: A function of the context that returns False in cases
where the tag name shall not be exchanged
context: the context where the parser shall be replaced
name: "NAME:PTYPE" of the surrogate. The ptype is optional
tag_name: The new tag name.
"""
node = context[-1]
node.tag_name = name
if restriction(context):
node = context[-1]
node.tag_name = tag_name
@transformation_factory(collections.abc.Callable)
......@@ -875,6 +844,13 @@ def move_adjacent(context: List[Node], condition: Callable = is_insignificant_wh
parent.result = parent.children[:i] + before + (node,) + after + parent.children[i+1:]
def left_associative(context: List[Node]):
"""
Rearranges a flat node into a left associative tree.
"""
#######################################################################
#
# destructive transformations:
......@@ -966,41 +942,11 @@ def remove_children_if(context: List[Node], condition: Callable):
node.result = tuple(c for c in node.children if not condition(context + [c]))
pass
# @transformation_factory(Callable)
# def remove_children(context: List[Node],
# condition: Callable = TRUE_CONDITION,
# section: slice = slice(None)):
# """Removes all nodes from a slice of the result field if the function
# `condition(child_node)` evaluates to `True`."""
# node = context[-1]
# if node.children:
# c = node.children
# N = len(c)
# rng = range(*section.indices(N))
# node.result = tuple(c[i] for i in range(N)
# if i not in rng or not condition(context + [c[i]]))
# # selection = []
# # for i in range(N):
# # context.append(c[i])
# # if not i in rng or not condition(context):
# # selection.append(c[i])
# # context.pop()
# # if len(selection) != c:
# # node.result = tuple(selection)
remove_whitespace = remove_children_if(is_insignificant_whitespace)
# partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty)
remove_anonymous_empty = remove_children_if(lambda ctx: is_empty(ctx) and is_anonymous(ctx))
remove_expendables = remove_children_if(is_expendable)
# partial(remove_children_if, condition=is_expendable)
remove_anonymous_expendables = remove_children_if(lambda ctx: is_anonymous(ctx)
and is_expendable(ctx))
remove_anonymous_tokens = remove_children_if(lambda ctx: is_token(ctx) and is_anonymous(ctx))
# remove_first = apply_if(keep_children(slice(1, None)), lambda ctx: len(ctx[-1].children) > 1)
# remove_last = apply_if(keep_children(slice(None, -1)), lambda ctx: len(ctx[-1].children) > 1)
# remove_brackets = apply_if(keep_children(slice(1, -1)), lambda ctx: len(ctx[-1].children) >= 2)
remove_infix_operator = keep_children(slice(0, None, 2))
remove_single_child = apply_if(keep_children(slice(0)), lambda ctx: len(ctx[-1].children) == 1)
......
#!/usr/bin/python3
"""rename_project.py - rename a dhparser project properly
Copyright 2019 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import os
import re
import shutil
import sys
def save_project(path: str) -> bool:
"""Copies the old project to another directory."""
if os.path.exists(path + '_save'):
return False
shutil.copytree(path, path + '_save')
return True
def check_projectdir(path: str) -> bool:
"""Verifies that `path` if a valid DHParser project directory."""
name = os.path.basename(path)
def check(*args):
for filename in args:
filepath = os.path.join(path, filename)
if not (os.path.exists(filepath) and os.path.isfile(filepath)):
print('Could not find ' + filepath)
return False
return True
return check(name + '.ebnf', name + 'Compiler.py', "tst_%s_grammar.py" % name)
def rename_projectdir(path: str, new: str) -> bool:
"""
Renames the dhparser project in `path`. This implies renaming
the directory itself, the test and compile script and the data types
and variables that contain the project's name as part of their name.
"""
name = os.path.basename(path)
save = os.getcwd()
os.chdir(path)
os.rename(name + '.ebnf', new + '.ebnf')
os.rename(name + 'Compiler.py', new + 'Compiler.py')
os.rename('tst_%s_grammar.py' % name, 'tst_%s_grammar.py' % new)
for fname in (new + 'Compiler.py', 'tst_%s_grammar.py' % new):
with open(fname, 'r', encoding='utf-8') as f:
content = f.read()
with open(fname, 'w', encoding='utf-8') as f:
f.write(content.replace(name, new))
os.chdir('..')
os.rename(name, new)
os.chdir(save)
if __name__ == "__main__":
if len(sys.argv) == 3:
projectdir = sys.argv[1]
new_name = sys.argv[2]
if not os.path.isdir(projectdir):
print(projectdir + " is not a directory!")
sys.exit(1)
elif check_projectdir(projectdir):
m = re.match('\w+', new_name)
if m and len(m.group(0)) == len(new_name):
if save_project(projectdir):
rename_projectdir(projectdir, new_name)
else:
print('Could not save old project to '
+ os.path.basename(projectdir) + '_saved!')
sys.exit(1)
else:
print(new_name + " is not a valid project name!")
sys.exit(1)
else:
print(projectdir + " does not seem to be a DHParser-project directory!")
sys.exit(1)
else:
print('Usage: python rename_project.py PROJECT_DIRECTORY NEW_NAME')
......@@ -12,47 +12,31 @@
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace, token # drop anonymous whitespace
#######################################################################
#
#: Expressions
#
#######################################################################
expression = addition | subtraction | term
addition = (term "+" (addition|term)) | (subtraction "+" term)
subtraction = expression "-" term
#######################################################################
#
#: Terms
#: Structure and Components
#
#######################################################################
term = multiplication | division | factor
multiplication = factor ["*"] term
division = term "/" (multiplication | factor)
expression = term { (PLUS|MINUS) term}
term = factor { (DIV|[MUL]) factor}
factor = [sign] ( NUMBER | VARIABLE | group )
sign = POSITIVE | NEGATIVE
group = "(" expression ")"
#######################################################################
#
#: Factors
#: "Leaf"-Expressions
#
#######################################################################
factor = [sign] ( NUMBER | VARIABLE | group )
sign = PLUS | MINUS
group = "(" §expression ")"
PLUS = "+"
MINUS = "-"
MUL = "*"
DIV = "/"
POSITIVE = /[+]/ # no implicit whitespace after signs
NEGATIVE = /[-]/
#######################################################################
#
#: Tokens
#
#######################################################################
PLUS = /\+/
MINUS = /-/
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
......@@ -27,13 +27,13 @@ from DHParser import logging, is_filename, load_if_file, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, exchange_parser, remove_anonymous_tokens, \
remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, change_tag_name, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
error_on, recompile_grammar, flatten_anonymous_nodes, GLOBALS
error_on, recompile_grammar, GLOBALS
#######################################################################
......@@ -58,11 +58,8 @@ def get_preprocessor() -> PreprocessorFunc:
class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file.
"""
addition = Forward()
expression = Forward()
multiplication = Forward()
term = Forward()
source_hash__ = "6707df7f53e835c1e97330f132324ce8"
source_hash__ = "9f06b2623e1d797c32efc3b864fec5bd"
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......@@ -73,17 +70,17 @@ class ArithmeticGrammar(Grammar):
wsp__ = Whitespace(WSP_RE__)
VARIABLE = Series(RegExp('[A-Za-z]'), dwsp__)
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
MINUS = RegExp('-')
PLUS = RegExp('\\+')
group = Series(Series(DropToken("("), dwsp__), expression, Series(DropToken(")"), dwsp__), mandatory=1)
sign = Alternative(PLUS, MINUS)
NEGATIVE = RegExp('[-]')
POSITIVE = RegExp('[+]')
DIV = Series(Token("/"), dwsp__)
MUL = Series(Token("*"), dwsp__)
MINUS = Series(Token("-"), dwsp__)
PLUS = Series(Token("+"), dwsp__)
group = Series(Series(DropToken("("), dwsp__), expression, Series(DropToken(")"), dwsp__))
sign = Alternative(POSITIVE, NEGATIVE)
factor = Series(Option(sign), Alternative(NUMBER, VARIABLE, group))
division = Series(term, Series(DropToken("/"), dwsp__), Alternative(multiplication, factor))
multiplication.set(Series(factor, Option(Series(DropToken("*"), dwsp__)), term))
term.set(Alternative(multiplication, division, factor))
subtraction = Series(expression, Series(DropToken("-"), dwsp__), term)
addition.set(Alternative(Series(term, Series(DropToken("+"), dwsp__), Alternative(addition, term)), Series(subtraction, Series(DropToken("+"), dwsp__), term)))
expression.set(Alternative(addition, subtraction, term))
term = Series(factor, ZeroOrMore(Series(Alternative(DIV, Option(MUL)), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(PLUS, MINUS), term))))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
......@@ -104,11 +101,17 @@ def get_grammar() -> ArithmeticGrammar:
#
#######################################################################
Arithmetic_AST_transformation_table = {
# AST Transformations for the Arithmetic-grammar
# "<": flatten_anonymous_nodes,
"expression, term, sign, group, factor": [replace_by_single_child],
"<": flatten,
"expression": [],
"term": [reduce_single_child],
"factor": [reduce_single_child],
"group": [remove_tokens('(', ')'), replace_by_single_child],
"NUMBER": [],
"VARIABLE": [],
":Token": reduce_single_child,
"*": replace_by_single_child
}
......
......@@ -14,8 +14,6 @@ M3: "-2.71828"
M4: "-x"
M5: "(2 + x)"
M6: "-(a * b)"
M7: "4x"
M8: "-2x"
[fail:factor]
F1: "x4"
......@@ -28,6 +26,8 @@ M3: "5 / 2"
M4: "5 / 2x"
M5: "5 / -2x"
M6: "-3*2y"
M7: "4x"
M8: "-2x"
[fail:term]
F1: "2 + 4"
......
......@@ -24,7 +24,6 @@ except ModuleNotFoundError:
CONFIG_PRESET['ast_serialization'] = "S-expression"
CONFIG_PRESET['test_parallelization'] = True