Commit 5f4fe4e5 authored by di68kap's avatar di68kap

- Gezielte Einfügung von Spation (noch fehlerhaft)

parent 03a9b87b
......@@ -292,9 +292,14 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
# mpargs = {'name': self.parser.name, 'ptype': self.parser.ptype}
# name, ptype = (self._tag_name.split(':') + [''])[:2]
# parg = "MockParser({name}, {ptype})".format(name=name, ptype=ptype)
rarg = str(self) if not self.children else \
rarg = ("'%s'" % str(self)) if not self.children else \
"(" + ", ".join(child.__repr__() for child in self.children) + ")"
return "Node(%s, %s)" % (self.tag_name, rarg)
rep = ["Node('%s', %s)" % (self.tag_name, rarg)]
if self.has_attr():
rep.append('.with_attr(%s)' % repr(dict(self.attr)))
if self._pos >= 0:
rep.append('.with_pos(%i)' % self._pos)
return ''.join(rep)
def __len__(self):
return (sum(child.__len__() for child in self.children)
......@@ -539,6 +544,38 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return self.attr.get(attribute, default)
return default
def with_attr(self, *attr_dict, **attributes):
"""
Adds the attributes which are passed to `with_attr()` either as an
attribute dictionary or as keyword parameters to the node's attributes
and returns `self`.
:param attr_dict: a dictionary of attribute keys and values
:param attributes: alternatively, a squences of keyword parameters
:return: `self`
Example:
>>> node = Node('test', '').with_attr(animal = "frog", plant= "tree")
>>> dict(node.attr)
{'animal': 'frog', 'plant': 'tree'}
>>> node.with_attr({'building': 'skyscraper'})
Node('test', '').with_attr({'animal': 'frog', 'plant': 'tree', 'building': 'skyscraper'})
"""
if attr_dict:
assert not attributes, "Node.with_attr() can be called either exclusively with " \
"keyword parameters, or a single non-keyword parameter and no keyword parameters!"
assert len(attr_dict) == 1, "Node.with_attr() must not be called with more than one " \
"non-keyword parameter."
dictionary = attr_dict[0]
assert isinstance(dictionary, dict), "The non-keyword parameter passed to " \
"Node.with_attr() must be of type dict, not %s." % str(type(dictionary))
# assert all(isinstance(a, str) and isinstance(v, str) for a, v in attr_dict.items())
if dictionary: # do not update with an empty dictionary
self.attr.update(dictionary)
elif attributes:
# assert all(isinstance(a, str) and isinstance(v, str) for a, v in attributes.items())
self.attr.update(attributes)
return self
def compare_attr(self, other: 'Node', ignore_order: bool = False) -> bool:
"""
Returns True, if `self` and `other` have the same attributes with the
......
......@@ -63,6 +63,7 @@ __all__ = ('TransformationDict',
'collapse_children_if',
'replace_content',
'replace_content_by',
'add_attributes',
'normalize_whitespace',
'merge_adjacent',
'merge_results',
......@@ -110,6 +111,8 @@ __all__ = ('TransformationDict',
'forbid',
'require',
'assert_content',
'delimit_children',
'insert_delimiter',
'add_error',
'error_on',
'assert_has_children',
......@@ -837,6 +840,15 @@ def replace_content_by(context: List[Node], content: str): # Callable[[Node], R
node.result = content
@transformation_factory
def add_attributes(context: List[Node], attributes: dict): # Dict[str, str]
"""
Adds the attributes in the dictionary to the XML-Attributes of the last node
in the given context.
"""
context[-1].attrs.update(attributes)
def normalize_whitespace(context):
"""
Normalizes Whitespace inside a leaf node, i.e. any sequence of
......@@ -1210,12 +1222,19 @@ def remove_if(context: List[Node], condition: Callable):
#
#######################################################################
@transformation_factory(str)
def delimit_children(context: List[Node], delimiter_tag_name: str, delimiter: str):
"""Ensures that the children are delimited by `delimiter`. Adds a delimiting node
of type `delimiter_tag_name`, where this is nt the case."""
def delimit_children(context: List[Node],
delimiter_tag_name: str,
delimiter: str,
attributes: dict = {}): # Dict[str, str]
"""
Ensures that the children are delimited by `delimiter`. Adds a delimiting node
of type `delimiter_tag_name`, where this is not the case.
"""
node = context[-1]
children = node.children
assert children
cl = [children[0]]
for i in range(1, len(children)):
last = cl[-1]
......@@ -1224,11 +1243,47 @@ def delimit_children(context: List[Node], delimiter_tag_name: str, delimiter: st
and next.tag_name != delimiter_tag_name \
and not last.content.endswith(delimiter) \
and not next.content.startswith(delimiter):
cl.append(Node(delimiter_tag_name, delimiter, True).with_pos(last.pos + len(last)))
cl.append(Node(delimiter_tag_name, delimiter, True)\
.with_pos(last.pos + len(last))\
.with_attr(attributes))
# pos-value of new node will resemble the source-position as faithful as possible
cl.append(next)
node.result = tuple(cl)
@transformation_factory(int)
def insert_delimiter(context: List[Node],
position: int,
delimiter_tag_name: str,
delimiter: str,
attributes: dict = {}): # Dict[str, str]
"""
Inserts a delimiter at a specific position within the children.
"""
node = context[-1]
children = node.children
nd = Node(delimiter_tag_name, delimiter, True).with_attr(attributes)
text_pos = node.pos
if children:
if position < 0:
position = len(children) + position
head = children[:position]
if head:
prev = head[-1]
text_pos = prev.pos + len(prev)
if prev.tag_name == delimiter_tag_name or prev.content.endswith(delimiter):
return
tail = children[position:]
if tail:
next = tail[0]
if next.tag_name == delimiter_tag_name or next.content.startswith(delimiter):
return
node.result = head + (nd.with_pos(text_pos),) + tail
else:
assert position == 0
node.result = (nd.with_pos(text_pos),)
########################################################################
#
# AST semantic validation functions (EXPERIMENTAL!!!)
......
......@@ -205,6 +205,15 @@ class TestNode:
assert str(self.unique_tree) == "ceh"
assert str(self.recurr_tree) == "xey"
def test_repr(self):
assert repr(Node('test1', 'content1')) == "Node('test1', 'content1')"
assert repr(Node('test2', (Node('child1', 'content1'), Node('child2', 'content2')))) \
== "Node('test2', (Node('child1', 'content1'), Node('child2', 'content2')))"
assert repr(Node('test', '').with_attr(attr='value')) \
== "Node('test', '').with_attr({'attr': 'value'})"
assert repr(Node('test', '').with_pos(0).with_attr(attr='value')) \
== "Node('test', '').with_attr({'attr': 'value'}).with_pos(0)"
def test_select_subnodes(self):
tags = [node.tag_name
for node in self.unique_tree.select_if(lambda nd: True, include_root=True)]
......@@ -530,7 +539,6 @@ class TestSegementExtraction:
segment = tree.milestone_segment(B, C)
assert segment.equals(parse_sxpr('(left (B "b") (C "c"))'))
class TestPositionAssignment:
def test_position_assignment(self):
tree = parse_sxpr('(A (B (C "D") (E "FF")) (G "HHH"))')
......
......@@ -31,7 +31,8 @@ from DHParser.syntaxtree import Node, parse_sxpr, parse_xml, PLACEHOLDER, \
from DHParser.transform import traverse, reduce_single_child, remove_whitespace, move_adjacent, \
traverse_locally, collapse, collapse_children_if, lstrip, rstrip, remove_content, \
remove_tokens, transformation_factory, has_parent, contains_only_whitespace, \
is_insignificant_whitespace, merge_adjacent, is_one_of, swap_attributes
is_insignificant_whitespace, merge_adjacent, is_one_of, swap_attributes, delimit_children, \
insert_delimiter
from typing import AbstractSet, List, Sequence, Tuple
......@@ -321,6 +322,35 @@ class TestAttributeHandling:
assert B.attr['x'] == 'x'
class TestConstructiveTransformations:
def test_add_delimiters(self):
tree = parse_sxpr('(A (B 1) (B 2) (B 3))').with_pos(0)
trans_table = {'A': delimit_children('c', ',')}
traverse(tree, trans_table)
original_result = tree.serialize()
assert original_result == '(A (B "1") (c ",") (B "2") (c ",") (B "3"))', original_result
traverse(tree, trans_table)
new_result = tree.serialize()
assert new_result == original_result, new_result
def test_insert_nodes(self):
tree = parse_sxpr('(A (B 1) (B 2) (X 3))').with_pos(0)
trans_table = {'A': insert_delimiter(0, 'c', '=>')}
traverse(tree, trans_table)
result1 = tree.serialize()
assert result1 == '(A (c "=>") (B "1") (B "2") (X "3"))', result1
trans_table = {'A': insert_delimiter(1000, 'd', '<=')}
traverse(tree, trans_table)
result2 = tree.serialize()
assert result2 == '(A (c "=>") (B "1") (B "2") (X "3") (d "<="))', result2
trans_table = {'A': insert_delimiter(-2, 'e', '|')}
traverse(tree, trans_table)
result3 = tree.serialize()
assert result3 == '(A (c "=>") (B "1") (B "2") (e "|") (X "3") (d "<="))', result3
if __name__ == "__main__":
from DHParser.testing import runner
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment