In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 9b3d3fcc authored by di68kap's avatar di68kap

- Transformation collapse_if hinzugefügt

parent 5220a7f7
......@@ -999,7 +999,7 @@ def parse_xml(xml: str) -> Node:
if not solitary:
while s and not s[:2] == "</":
s, leaf = parse_leaf_content(s)
if not leaf.match(RX_WHITESPACE_TAIL):
if leaf and (leaf.find('\n') < 0 or not leaf.match(RX_WHITESPACE_TAIL)):
result.append(Node(PlainText, leaf))
if s[:1] == "<" and s[:2] != "</":
s, child = parse_full_content(s)
......
......@@ -33,7 +33,8 @@ import fnmatch
from functools import partial, reduce, singledispatch
from DHParser.error import Error
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, MockParser, ZOMBIE_NODE
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, ParserBase, MockParser, \
ZOMBIE_NODE
from DHParser.toolkit import issubtype, isgenerictype, expand_table, smart_list, re, typing
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Tuple, List, Sequence, Union, Text, Generic
......@@ -53,6 +54,7 @@ __all__ = ('TransformationDict',
'replace_or_reduce',
'replace_parser',
'collapse',
'collapse_if',
'merge_children',
'replace_content',
'replace_content_by',
......@@ -65,6 +67,7 @@ __all__ = ('TransformationDict',
'is_expendable',
'is_token',
'is_one_of',
'not_one_of',
'matches_re',
'has_content',
'has_parent',
......@@ -433,6 +436,12 @@ def is_one_of(context: List[Node], tag_name_set: AbstractSet[str]) -> bool:
return context[-1].tag_name in tag_name_set
@transformation_factory(collections.abc.Set)
def not_one_of(context: List[Node], tag_name_set: AbstractSet[str]) -> bool:
"""Returns true, if the node's tag_name is not one of the given tag names."""
return context[-1].tag_name not in tag_name_set
# @transformation_factory(collections.abc.Set)
# def matches_wildcard(context: List[Node], wildcards: AbstractSet[str]) -> bool:
# """Retruns true, if the node's tag_name matches one of the glob patterns
......@@ -671,6 +680,39 @@ def collapse(context: List[Node]):
node.result = node.content
@transformation_factory(collections.abc.Callable)
def collapse_if(context: List[Node], condition: Callable, target_tag: ParserBase):
node = context[-1]
package = []
result = []
def close_package():
nonlocal package
if package:
s = "".join(str(nd.result) for nd in package)
result.append(Node(target_tag, s))
package = []
for child in node.children:
if condition([child]):
if child.children:
collapse_if([child], condition, target_tag)
for c in child.children:
if condition([c]):
package.append(c)
else:
close_package()
result.append(c)
close_package()
else:
package.append(child)
else:
close_package()
result.append(child)
close_package()
node.result = tuple(result)
# @transformation_factory
# def collect_leaves(context: List[Node], whitespace: str=''):
# """
......
......@@ -24,9 +24,9 @@ import sys
sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, parse_sxpr, ZOMBIE_NODE
from DHParser.syntaxtree import Node, parse_sxpr, parse_xml, ZOMBIE_NODE, MockParser, TOKEN_PTYPE
from DHParser.transform import traverse, reduce_single_child, remove_whitespace, \
traverse_locally, collapse, lstrip, rstrip, remove_content, remove_tokens, \
traverse_locally, collapse, collapse_if, lstrip, rstrip, remove_content, remove_tokens, \
transformation_factory
from DHParser.toolkit import typing
from typing import AbstractSet, List, Sequence, Tuple
......@@ -202,6 +202,16 @@ class TestConditionalTransformations:
# whitespace after "facitergula", but not after "bona" should have been removed
assert str(cst) == "faciterculasim.bona fide"
class TestComplexTransformations:
def test_collapse_if(self):
xml = "<EINZEILER><DEU_WORT>spectat</DEU_WORT><WS> </WS><DEU_WORT>ad</DEU_WORT>" +\
"<WS> </WS><DEU_WORT>gravitatem</DEU_WORT><TEIL_SATZZEICHEN>,</TEIL_SATZZEICHEN>" +\
"<WS> </WS><DEU_WORT>momentum</DEU_WORT></EINZEILER>"
tree = parse_xml(xml)
print(tree.as_xml())
Text = MockParser('Text', TOKEN_PTYPE)
collapse_if([tree], lambda l: True, Text)
print(tree.as_xml())
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment