Commit cd4aab6b authored by di68kap's avatar di68kap
Browse files

- transform.py: move_adjacent can now be configured not to merge nodes after moving

parent 2afa6b3b
...@@ -58,6 +58,7 @@ __all__ = ('TransformationDict', ...@@ -58,6 +58,7 @@ __all__ = ('TransformationDict',
'replace_content', 'replace_content',
'replace_content_by', 'replace_content_by',
'normalize_whitespace', 'normalize_whitespace',
'merge_adjacent',
'move_adjacent', 'move_adjacent',
'left_associative', 'left_associative',
'lean_left', 'lean_left',
...@@ -411,7 +412,7 @@ def is_insignificant_whitespace(context: List[Node]) -> bool: ...@@ -411,7 +412,7 @@ def is_insignificant_whitespace(context: List[Node]) -> bool:
return context[-1].tag_name == WHITESPACE_PTYPE return context[-1].tag_name == WHITESPACE_PTYPE
RX_WHITESPACE = re.compile(r'\s+') RX_WHITESPACE = re.compile(r'\s*$')
def contains_only_whitespace(context: List[Node]) -> bool: def contains_only_whitespace(context: List[Node]) -> bool:
...@@ -419,8 +420,7 @@ def contains_only_whitespace(context: List[Node]) -> bool: ...@@ -419,8 +420,7 @@ def contains_only_whitespace(context: List[Node]) -> bool:
of the tag_name, i.e. nodes the content of which matches the regular of the tag_name, i.e. nodes the content of which matches the regular
expression /\s*/, including empty nodes. Note, that this is not true expression /\s*/, including empty nodes. Note, that this is not true
for anonymous whitespace nodes that contain comments.""" for anonymous whitespace nodes that contain comments."""
content = context[-1].content return bool(RX_WHITESPACE.match(context[-1].content))
return bool(not content or RX_WHITESPACE.match(content))
def is_any_kind_of_whitespace(context: List[Node]) -> bool: def is_any_kind_of_whitespace(context: List[Node]) -> bool:
...@@ -820,9 +820,11 @@ def normalize_whitespace(context): ...@@ -820,9 +820,11 @@ def normalize_whitespace(context):
node.result = re.sub(r'\s+', ' ', node.result) node.result = re.sub(r'\s+', ' ', node.result)
def merge_whitespace(context): def merge_adjacent(context, condition: Callable):
""" """
Merges adjacent whitespace. UNTESTED! Merges adjacent nodes that fulfill the given `condition`. It is
is assumed that `condition` is never true for leaf-nodes and non-leaf-nodes
alike. Otherwise a type-error might ensue.
""" """
node = context[-1] node = context[-1]
children = node.children children = node.children
...@@ -830,26 +832,42 @@ def merge_whitespace(context): ...@@ -830,26 +832,42 @@ def merge_whitespace(context):
i = 0 i = 0
L = len(children) L = len(children)
while i < L: while i < L:
if children[i].tag_name == WHITESPACE_PTYPE: if condition([children[i]]):
initial = () if children[i].children else ''
k = i k = i
while i < L and children[k].tag_name == WHITESPACE_PTYPE: while i < L and condition([children[k]]):
i += 1 i += 1
if i > k: if i > k:
children[k].result = sum(children[n].result for n in range(k, i + 1)) children[k].result = sum((children[n].result for n in range(k, i + 1)), initial)
new_result.append(children[k]) new_result.append(children[k])
i += 1 i += 1
node.result = tuple(new_result) node.result = tuple(new_result)
@transformation_factory(collections.abc.Callable) @transformation_factory(collections.abc.Callable)
def move_adjacent(context: List[Node], condition: Callable = is_insignificant_whitespace): def move_adjacent(context: List[Node], condition: Callable, merge: bool = True):
""" """
Moves adjacent nodes that fulfill the given condition to the parent node. Moves adjacent nodes that fulfill the given condition to the parent node.
If the `merge`-flag is set, a moved node will be merged with its
predecessor (or successor, respectively) in the parent node in case it
also fulfill the given `condition`.
""" """
def join_results(a: Node, b: Node, c: Node) -> bool: def merge_results(a: Node, b: Node, c: Node) -> bool:
"""Joins the results of node `a` and `b` and write them to the result """
of `c` type-safely, if possible. Return True, if join was possible Merges the results of node `a` and `b` and writes them to the result
and done, False otherwise.""" of `c` type-safely, if b and c are either both leaf-nodes (in which case
their result-strings are concatenated) or both non-leaf-nodes (in which
case the tuples of children are concatenated).
Returns `True` in case of a successful merge, `False` if only one node
was a leaf node and the merge could thus not be done.
Example:
>>> head, tail = Node('head', '123'), Node('tail', '456')
>>> merge_results(head, tail, head) # merge head and tail (in that order) into head
True
>>> str(head)
'123456'
"""
if a.children and b.children: if a.children and b.children:
c.result = cast(Tuple[Node, ...], a.result) + cast(Tuple[Node, ...], b.result) c.result = cast(Tuple[Node, ...], a.result) + cast(Tuple[Node, ...], b.result)
return True return True
...@@ -858,7 +876,6 @@ def move_adjacent(context: List[Node], condition: Callable = is_insignificant_wh ...@@ -858,7 +876,6 @@ def move_adjacent(context: List[Node], condition: Callable = is_insignificant_wh
return True return True
return False return False
node = context[-1] node = context[-1]
if len(context) <= 1 or not node.children: if len(context) <= 1 or not node.children:
return return
...@@ -881,19 +898,20 @@ def move_adjacent(context: List[Node], condition: Callable = is_insignificant_wh ...@@ -881,19 +898,20 @@ def move_adjacent(context: List[Node], condition: Callable = is_insignificant_wh
if id(child) == id(node): if id(child) == id(node):
break break
# merge adjacent whitespace # merge adjacent nodes that fulfil the condition
prevN = parent.children[i - 1] if i > 0 else None if merge:
nextN = parent.children[i + 1] if i < len(parent.children) - 1 else None prevN = parent.children[i - 1] if i > 0 else None
if before and prevN and condition([prevN]): nextN = parent.children[i + 1] if i < len(parent.children) - 1 else None
# prevN.result = prevN.result + before[0].result if before and prevN and condition([prevN]):
# before = () # prevN.result = prevN.result + before[0].result
if join_results(prevN, before[0], prevN): # before = ()
before = () if merge_results(prevN, before[0], prevN):
if after and nextN and condition([nextN]): before = ()
# nextN.result = after[0].result + nextN.result if after and nextN and condition([nextN]):
# after = () # nextN.result = after[0].result + nextN.result
if join_results(after[0], nextN, nextN): # after = ()
after = () if merge_results(after[0], nextN, nextN):
after = ()
parent.result = parent.children[:i] + before + (node,) + after + parent.children[i+1:] parent.result = parent.children[:i] + before + (node,) + after + parent.children[i+1:]
......
...@@ -46,7 +46,7 @@ class TestDHParserCommandLineTool: ...@@ -46,7 +46,7 @@ class TestDHParserCommandLineTool:
os.chdir(self.cwd) os.chdir(self.cwd)
def test_dhparser(self): def test_dhparser(self):
os.system(self.python + '../scripts/dhparser.py testdata/neu ' + self.nulldevice) os.system(self.python + '../DHParser/scripts/dhparser.py testdata/neu ' + self.nulldevice)
os.system(self.python + 'testdata/neu/tst_neu_grammar.py ' + self.nulldevice) os.system(self.python + 'testdata/neu/tst_neu_grammar.py ' + self.nulldevice)
os.system(self.python + 'testdata/neu/neuCompiler.py testdata/neu/example.dsl ' os.system(self.python + 'testdata/neu/neuCompiler.py testdata/neu/example.dsl '
'>testdata/neu/example.xml') '>testdata/neu/example.xml')
......
...@@ -28,7 +28,7 @@ from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, parse_xml, PLACE ...@@ -28,7 +28,7 @@ from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, parse_xml, PLACE
tree_sanity_check, TOKEN_PTYPE tree_sanity_check, TOKEN_PTYPE
from DHParser.transform import traverse, reduce_single_child, remove_whitespace, move_adjacent, \ from DHParser.transform import traverse, reduce_single_child, remove_whitespace, move_adjacent, \
traverse_locally, collapse, collapse_if, lstrip, rstrip, remove_content, remove_tokens, \ traverse_locally, collapse, collapse_if, lstrip, rstrip, remove_content, remove_tokens, \
transformation_factory, has_parent, contains_only_whitespace transformation_factory, has_parent, contains_only_whitespace, is_insignificant_whitespace
from DHParser.toolkit import typing from DHParser.toolkit import typing
from typing import AbstractSet, List, Sequence, Tuple from typing import AbstractSet, List, Sequence, Tuple
...@@ -41,7 +41,7 @@ class TestRemoval: ...@@ -41,7 +41,7 @@ class TestRemoval:
assert contains_only_whitespace([Node('test', '')]) assert contains_only_whitespace([Node('test', '')])
assert contains_only_whitespace([Node('test', '\n')]) assert contains_only_whitespace([Node('test', '\n')])
assert not contains_only_whitespace([Node('test', 'Katze')]) assert not contains_only_whitespace([Node('test', 'Katze')])
assert contains_only_whitespace([Node('test', ' tag ')]) assert not contains_only_whitespace([Node('test', ' tag ')])
def test_lstrip(self): def test_lstrip(self):
cst = parse_sxpr('(_Token (:Whitespace " ") (:Re test))') cst = parse_sxpr('(_Token (:Whitespace " ") (:Re test))')
...@@ -255,7 +255,7 @@ class TestWhitespaceTransformations: ...@@ -255,7 +255,7 @@ class TestWhitespaceTransformations:
'(WORD (LETTERS "not") (:Whitespace " ")) ' '(WORD (LETTERS "not") (:Whitespace " ")) '
'(WORD (LETTERS "to") (:Whitespace " "))' '(WORD (LETTERS "to") (:Whitespace " "))'
'(WORD (LETTERS "be") (:Whitespace " ")))') '(WORD (LETTERS "be") (:Whitespace " ")))')
transformations = { 'WORD': move_adjacent } transformations = {'WORD': move_adjacent(is_insignificant_whitespace)}
traverse(sentence, transformations) traverse(sentence, transformations)
assert tree_sanity_check(sentence) assert tree_sanity_check(sentence)
assert all(i % 2 == 0 or node.tag_name == ':Whitespace' for i, node in enumerate(sentence)) assert all(i % 2 == 0 or node.tag_name == ':Whitespace' for i, node in enumerate(sentence))
...@@ -267,7 +267,7 @@ class TestWhitespaceTransformations: ...@@ -267,7 +267,7 @@ class TestWhitespaceTransformations:
'(WORD (:Whitespace " ") (LETTERS "not") (:Whitespace " ")) ' '(WORD (:Whitespace " ") (LETTERS "not") (:Whitespace " ")) '
'(WORD (:Whitespace " ") (LETTERS "to") (:Whitespace " "))' '(WORD (:Whitespace " ") (LETTERS "to") (:Whitespace " "))'
'(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")))') '(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")))')
transformations = { 'WORD': move_adjacent } transformations = {'WORD': move_adjacent(is_insignificant_whitespace)}
traverse(sentence, transformations) traverse(sentence, transformations)
assert tree_sanity_check(sentence) assert tree_sanity_check(sentence)
assert all(i % 2 == 0 or node.tag_name == ':Whitespace' for i, node in enumerate(sentence)) assert all(i % 2 == 0 or node.tag_name == ':Whitespace' for i, node in enumerate(sentence))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment