05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 366020e0 authored by di68kap's avatar di68kap

- DHParser/transform.py: some renaming + bugfix move_adjacent

parent de5b8bf8
......@@ -94,9 +94,9 @@ from DHParser import logging, is_filename, load_if_file, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \\
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, is_empty, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
......
......@@ -41,6 +41,7 @@ __all__ = ('WHITESPACE_PTYPE',
'ChildrenType',
'Node',
'FrozenNode',
'tree_sanity_check',
'RootNode',
'parse_sxpr',
'parse_xml',
......@@ -222,14 +223,15 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return True
def __eq__(self, other):
"""
Equality of nodes: Two nodes are considered as equal, if their tag
name is the same, if their results are equal and if their attributes
and attribute values are the same.
"""
return self.tag_name == other.tag_name and self.result == other.result \
and self.compare_attr(other)
# can lead to obscure mistakes, where default object comparison behaviour is expected
# def __eq__(self, other):
# """
# Equality of nodes: Two nodes are considered as equal, if their tag
# name is the same, if their results are equal and if their attributes
# and attribute values are the same.
# """
# return self.tag_name == other.tag_name and self.result == other.result \
# and self.compare_attr(other)
def __hash__(self):
......@@ -283,6 +285,21 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
raise ValueError('Leave node cannot contain other nodes')
def equals(self, other):
"""
Equality of nodes: Two nodes are considered as equal, if their tag
name is the same, if their results are equal and if their attributes
and attribute values are the same.
"""
if self.tag_name == other.tag_name and self.compare_attr(other):
if self.children:
return (len(self.children) == len(other.children)
and all(a.equals(b) for a, b in zip(self.children, other.children)))
else:
return self.result == other.result
return False
def get(self, index_or_tagname: Union[int, str],
surrogate: Union['Node', Iterator['Node']]) -> Union['Node', Iterator['Node']]:
"""Returns the child node with the given index if ``index_or_tagname``
......@@ -529,6 +546,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
"""Returns the opening string for the representation of `node`."""
txt = [left_bracket, node.tag_name]
# s += " '(pos %i)" % node.add_pos
# txt.append(str(id(node))) # for debugging
if node.attr_active():
txt.extend(' `(%s "%s")' % (k, v) for k, v in node.attr.items())
if src:
......@@ -748,6 +766,16 @@ class FrozenNode(Node):
PLACEHOLDER = Node('__PLACEHOLDER__', '')
def tree_sanity_check(tree: Node) -> bool:
node_set = set()
for node in tree.select(lambda nd: True, include_root=True):
if node in node_set and not (isinstance(node, FrozenNode)
or node.tag_name == '__PLACEHOLDER__'):
return False
node_set.add(node)
return True
class RootNode(Node):
"""TODO: Add Documentation!!!
......
......@@ -428,7 +428,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
if "cst" in tests and len(errata) == errflag:
compare = parse_tree(get(tests, "cst", test_name))
if compare:
if compare != cst:
if not compare.equals(cst):
errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
(test_name, parser_name, cst.as_sxpr()))
if verbose:
......@@ -438,7 +438,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
if "ast" in tests and len(errata) == errflag:
compare = parse_tree(get(tests, "ast", test_name))
if compare:
if compare != ast:
if not compare.equals(ast):
errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
'\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s'
% (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
......
......@@ -58,12 +58,14 @@ __all__ = ('TransformationDict',
'replace_content',
'replace_content_by',
'normalize_whitespace',
'move_whitespace',
'move_adjacent',
'apply_if',
'apply_unless',
'traverse_locally',
'is_anonymous',
'is_whitespace',
'is_insignificant_whitespace',
'contains_only_whitespace',
'is_any_kind_of_whitespace',
'is_empty',
'is_expendable',
'is_token',
......@@ -400,12 +402,31 @@ def is_anonymous(context: List[Node]) -> bool:
return context[-1].is_anonymous()
def is_whitespace(context: List[Node]) -> bool:
def is_insignificant_whitespace(context: List[Node]) -> bool:
"""Returns ``True`` for whitespace and comments defined with the
``@comment``-directive."""
return context[-1].tag_name == WHITESPACE_PTYPE
RX_WHITESPACE = re.compile(r'\s*')
def contains_only_whitespace(context: List[Node]) -> bool:
"""Returns ``True`` for nodes that contain only whitespace regardless
of the tag_name, i.e. nodes the content of which matches the regular
expression /\s*/, including empty nodes. Note, that this is not true
for anonymous whitespace nodes that contain comments."""
return bool(RX_WHITESPACE.match(context[-1].content))
def is_any_kind_of_whitespace(context: List[Node]) -> bool:
"""Returns ``True`` for nodes that either contain only whitespace or
are insignificant whitespace nodes, i.e. nodes with the ``tag_name``
``PTYPE_WHITESPACE``, including those that contain comment-text."""
node = context[-1]
return node.tag_name == WHITESPACE_PTYPE or RX_WHITESPACE.match(node.content)
def is_empty(context: List[Node]) -> bool:
"""Returns ``True`` if the current node's content is empty."""
return not context[-1].result
......@@ -414,7 +435,7 @@ def is_empty(context: List[Node]) -> bool:
def is_expendable(context: List[Node]) -> bool:
"""Returns ``True`` if the current node either is a node containing
whitespace or an empty node."""
return is_empty(context) or is_whitespace(context)
return is_empty(context) or is_insignificant_whitespace(context)
@transformation_factory(collections.abc.Set)
......@@ -768,7 +789,7 @@ def normalize_whitespace(context):
"""
node = context[-1]
assert not node.children
if is_whitespace(context):
if is_insignificant_whitespace(context):
if node.result:
node.result = ' '
else:
......@@ -796,21 +817,22 @@ def merge_whitespace(context):
node.result = tuple(new_result)
def move_whitespace(context):
@transformation_factory(collections.abc.Callable)
def move_adjacent(context, condition: Callable = is_insignificant_whitespace):
"""
Moves adjacent whitespace nodes to the parent node.
Moves adjacent nodes that fulfill the given condition to the parent node.
"""
node = context[-1]
if len(context) <= 1 or not node.children:
return
parent = context[-2]
children = node.children
if children[0].tag_name == WHITESPACE_PTYPE:
if condition([children[0]]):
before = (children[0],)
children = children[1:]
else:
before = ()
if children and children[-1].tag_name == WHITESPACE_PTYPE:
if children and condition([children[-1]]):
after = (children[-1],)
children = children[:-1]
else:
......@@ -819,20 +841,20 @@ def move_whitespace(context):
if before or after:
node.result = children
for i, child in enumerate(parent.children):
if child == node:
if id(child) == id(node):
break
# merge adjacent whitespace
prevN = parent.children[i - 1] if i > 0 else None
nextN = parent.children[i + 1] if i < len(parent.children) - 1 else None
if before and prevN and prevN.tag_name == WHITESPACE_PTYPE:
if before and prevN and condition([prevN]):
prevN.result = prevN.result + before[0].result
before = ()
if after and nextN and nextN.tag_name == WHITESPACE_PTYPE:
if after and nextN and condition([nextN]):
nextN.result = after[0].result + nextN.result
after = ()
parent.result = parent.children[:i] + before + (node,) + after + parent.children[i + 1:]
parent.result = parent.children[:i] + before + (node,) + after + parent.children[i+1:]
#######################################################################
......@@ -949,7 +971,7 @@ def remove_children_if(context: List[Node], condition: Callable):
# # node.result = tuple(selection)
remove_whitespace = remove_children_if(is_whitespace)
remove_whitespace = remove_children_if(is_insignificant_whitespace)
# partial(remove_children_if, condition=is_whitespace)
remove_empty = remove_children_if(is_empty)
remove_anonymous_empty = remove_children_if(lambda ctx: is_empty(ctx) and is_anonymous(ctx))
......
......@@ -27,7 +27,7 @@ from DHParser import logging, is_filename, load_if_file, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, is_empty, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
......
......@@ -26,7 +26,7 @@ from DHParser import logging, is_filename, load_if_file, \
Node, TransformationFunc, TransformationDict, \
traverse, remove_children_if, is_anonymous, Whitespace, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
......
......@@ -26,7 +26,7 @@ from DHParser import is_filename, load_if_file, \
Node, TransformationDict, Whitespace, \
traverse, remove_children_if, is_anonymous, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \
is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
WHITESPACE_PTYPE, TOKEN_PTYPE, GLOBALS
......
......@@ -27,7 +27,7 @@ from DHParser import logging, is_filename, load_if_file, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, is_empty, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
......
......@@ -26,7 +26,7 @@ from DHParser import is_filename, load_if_file, \
Node, TransformationFunc, TransformationDict, Whitespace, \
traverse, remove_children_if, is_anonymous, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
......
......@@ -57,7 +57,7 @@ class LaTeXGrammar(Grammar):
paragraph = Forward()
tabular_config = Forward()
text_element = Forward()
source_hash__ = "e09808ecd485c07b3455c3a2bf4eada3"
source_hash__ = "dacb1f9ad5b1c18cdc29c7ddb7878959"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'%.*'
......
......@@ -25,7 +25,7 @@ from DHParser import is_filename, Grammar, Compiler, Lookbehind, \
reduce_single_child, replace_by_single_child, remove_whitespace, \
flatten, is_empty, collapse, replace_content, remove_brackets, \
is_one_of, rstrip, strip, remove_tokens, remove_nodes, peek, \
is_whitespace, TOKEN_PTYPE, GLOBALS
is_insignificant_whitespace, TOKEN_PTYPE, GLOBALS
from DHParser.log import logging
......
......@@ -27,7 +27,7 @@ from DHParser import logging, is_filename, load_if_file, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, is_empty, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
......
......@@ -28,7 +28,7 @@ from DHParser import logging, is_filename, load_if_file, \
Node, TransformationFunc, TransformationDict, Token, DropToken, DropWhitespace, \
traverse, remove_children_if, is_anonymous, GLOBALS, flatten_anonymous_nodes, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
......
......@@ -27,7 +27,7 @@ from DHParser import logging, is_filename, load_if_file, Grammar, Compiler, nil_
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, is_empty, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
......
......@@ -86,7 +86,7 @@ class TestNode:
tree.with_pos(0)
tree_copy = copy.deepcopy(tree)
assert tree == tree_copy
assert tree.equals(tree_copy)
assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
......@@ -96,11 +96,11 @@ class TestNode:
assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
tree['d'].result = "x"
assert tree != tree_copy
assert tree_copy == parse_sxpr('(a (b c) (d (e f) (h i)))')
assert not tree.equals(tree_copy)
assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
# print(tree.as_sxpr())
# print(parse_sxpr('(a (b c) (d x))').as_sxpr())
assert tree == parse_sxpr('(a (b c) (d x))')
assert tree.equals(parse_sxpr('(a (b c) (d x))'))
# this also checks for errors equality...
assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()
......@@ -123,10 +123,10 @@ class TestNode:
assert found[0].result == 'x' and found[1].result == 'y'
def test_equality1(self):
assert self.unique_tree == self.unique_tree
assert self.recurr_tree != self.unique_tree
assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
assert self.unique_tree.equals(self.unique_tree)
assert not self.recurr_tree.equals(self.unique_tree)
assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
def test_equality2(self):
ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
......@@ -137,14 +137,14 @@ class TestNode:
tree = parser("20 / 4 * 3")
traverse(tree, att)
compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
assert tree == compare_tree, tree.as_sxpr()
assert tree.equals(compare_tree), tree.as_sxpr()
def test_copy(self):
cpy = copy.deepcopy(self.unique_tree)
assert cpy == self.unique_tree
assert cpy.equals(self.unique_tree)
assert cpy.result[0].result != "epsilon" # just to make sure...
cpy.result[0].result = "epsilon"
assert cpy != self.unique_tree
assert not cpy.equals(self.unique_tree)
def test_copy2(self):
# test if Node.__deepcopy__ goes sufficiently deep for ast-
......@@ -225,8 +225,8 @@ class TestNodeFind():
assert len(matches) == 2, len(matches)
assert str(matches[0]) == 'd', str(matches[0])
assert str(matches[1]) == 'F', str(matches[1])
assert matches[0] == parse_sxpr('(X (c d))')
assert matches[1] == parse_sxpr('(X F)')
assert matches[0].equals(parse_sxpr('(X (c d))'))
assert matches[1].equals(parse_sxpr('(X F)'))
# check default: root is included in search:
matchf2 = lambda node: match_tag_name(node, 'a')
assert list(tree.select(matchf2, include_root=True))
......@@ -234,16 +234,16 @@ class TestNodeFind():
def test_getitem(self):
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
assert tree[0] == parse_sxpr('(b X)')
assert tree[2] == parse_sxpr('(e (X F))')
assert tree[0].equals(parse_sxpr('(b X)'))
assert tree[2].equals(parse_sxpr('(e (X F))'))
try:
node = tree[3]
assert False, "IndexError expected!"
except IndexError:
pass
matches = list(tree.select_by_tag('X', False))
assert matches[0] == parse_sxpr('(X (c d))')
assert matches[1] == parse_sxpr('(X F)')
assert matches[0].equals(parse_sxpr('(X (c d))'))
assert matches[1].equals(parse_sxpr('(X F)'))
def test_contains(self):
tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
......
......@@ -25,8 +25,8 @@ import sys
sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, parse_xml, PLACEHOLDER, \
TOKEN_PTYPE
from DHParser.transform import traverse, reduce_single_child, remove_whitespace, \
tree_sanity_check, TOKEN_PTYPE
from DHParser.transform import traverse, reduce_single_child, remove_whitespace, move_adjacent, \
traverse_locally, collapse, collapse_if, lstrip, rstrip, remove_content, remove_tokens, \
transformation_factory, has_parent
from DHParser.toolkit import typing
......@@ -240,6 +240,32 @@ class TestComplexTransformations:
"<Stelle><Text>p.26</Text><HOCHGESTELLT>b</HOCHGESTELLT><Text>,18</Text></Stelle>"
class TestWhitespaceTransformations:
def test_move_adjacent(self):
sentence = parse_sxpr('(SENTENCE (WORD (LETTERS "To") (:Whitespace " ")) '
'(WORD (LETTERS "be") (:Whitespace " ")) '
'(WORD (LETTERS "or") (:Whitespace " ")) '
'(WORD (LETTERS "not") (:Whitespace " ")) '
'(WORD (LETTERS "to") (:Whitespace " "))'
'(WORD (LETTERS "be") (:Whitespace " ")))')
transformations = { 'WORD': move_adjacent }
traverse(sentence, transformations)
assert tree_sanity_check(sentence)
assert all(i % 2 == 0 or node.tag_name == ':Whitespace' for i, node in enumerate(sentence))
def test_move_and_merge_adjacent(self):
sentence = parse_sxpr('(SENTENCE (WORD (LETTERS "To") (:Whitespace " ")) '
'(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")) '
'(WORD (:Whitespace " ") (LETTERS "or") (:Whitespace " ")) '
'(WORD (:Whitespace " ") (LETTERS "not") (:Whitespace " ")) '
'(WORD (:Whitespace " ") (LETTERS "to") (:Whitespace " "))'
'(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")))')
transformations = { 'WORD': move_adjacent }
traverse(sentence, transformations)
assert tree_sanity_check(sentence)
assert all(i % 2 == 0 or node.tag_name == ':Whitespace' for i, node in enumerate(sentence))
if __name__ == "__main__":
from DHParser.testing import runner
runner("", globals())
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment