Loading DHParser/syntaxtree.py +39 −2 Original line number Diff line number Diff line Loading @@ -292,9 +292,14 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil # mpargs = {'name': self.parser.name, 'ptype': self.parser.ptype} # name, ptype = (self._tag_name.split(':') + [''])[:2] # parg = "MockParser({name}, {ptype})".format(name=name, ptype=ptype) rarg = str(self) if not self.children else \ rarg = ("'%s'" % str(self)) if not self.children else \ "(" + ", ".join(child.__repr__() for child in self.children) + ")" return "Node(%s, %s)" % (self.tag_name, rarg) rep = ["Node('%s', %s)" % (self.tag_name, rarg)] if self.has_attr(): rep.append('.with_attr(%s)' % repr(dict(self.attr))) if self._pos >= 0: rep.append('.with_pos(%i)' % self._pos) return ''.join(rep) def __len__(self): return (sum(child.__len__() for child in self.children) Loading Loading @@ -539,6 +544,38 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil return self.attr.get(attribute, default) return default def with_attr(self, *attr_dict, **attributes): """ Adds the attributes which are passed to `with_attr()` either as an attribute dictionary or as keyword parameters to the node's attributes and returns `self`. :param attr_dict: a dictionary of attribute keys and values :param attributes: alternatively, a squences of keyword parameters :return: `self` Example: >>> node = Node('test', '').with_attr(animal = "frog", plant= "tree") >>> dict(node.attr) {'animal': 'frog', 'plant': 'tree'} >>> node.with_attr({'building': 'skyscraper'}) Node('test', '').with_attr({'animal': 'frog', 'plant': 'tree', 'building': 'skyscraper'}) """ if attr_dict: assert not attributes, "Node.with_attr() can be called either exclusively with " \ "keyword parameters, or a single non-keyword parameter and no keyword parameters!" assert len(attr_dict) == 1, "Node.with_attr() must not be called with more than one " \ "non-keyword parameter." dictionary = attr_dict[0] assert isinstance(dictionary, dict), "The non-keyword parameter passed to " \ "Node.with_attr() must be of type dict, not %s." % str(type(dictionary)) # assert all(isinstance(a, str) and isinstance(v, str) for a, v in attr_dict.items()) if dictionary: # do not update with an empty dictionary self.attr.update(dictionary) elif attributes: # assert all(isinstance(a, str) and isinstance(v, str) for a, v in attributes.items()) self.attr.update(attributes) return self def compare_attr(self, other: 'Node', ignore_order: bool = False) -> bool: """ Returns True, if `self` and `other` have the same attributes with the Loading DHParser/transform.py +59 −4 Original line number Diff line number Diff line Loading @@ -63,6 +63,7 @@ __all__ = ('TransformationDict', 'collapse_children_if', 'replace_content', 'replace_content_by', 'add_attributes', 'normalize_whitespace', 'merge_adjacent', 'merge_results', Loading Loading @@ -110,6 +111,8 @@ __all__ = ('TransformationDict', 'forbid', 'require', 'assert_content', 'delimit_children', 'insert_delimiter', 'add_error', 'error_on', 'assert_has_children', Loading Loading @@ -837,6 +840,15 @@ def replace_content_by(context: List[Node], content: str): # Callable[[Node], R node.result = content @transformation_factory def add_attributes(context: List[Node], attributes: dict): # Dict[str, str] """ Adds the attributes in the dictionary to the XML-Attributes of the last node in the given context. """ context[-1].attrs.update(attributes) def normalize_whitespace(context): """ Normalizes Whitespace inside a leaf node, i.e. any sequence of Loading Loading @@ -1210,12 +1222,19 @@ def remove_if(context: List[Node], condition: Callable): # ####################################################################### @transformation_factory(str) def delimit_children(context: List[Node], delimiter_tag_name: str, delimiter: str): """Ensures that the children are delimited by `delimiter`. Adds a delimiting node of type `delimiter_tag_name`, where this is nt the case.""" def delimit_children(context: List[Node], delimiter_tag_name: str, delimiter: str, attributes: dict = {}): # Dict[str, str] """ Ensures that the children are delimited by `delimiter`. Adds a delimiting node of type `delimiter_tag_name`, where this is not the case. """ node = context[-1] children = node.children assert children cl = [children[0]] for i in range(1, len(children)): last = cl[-1] Loading @@ -1224,11 +1243,47 @@ def delimit_children(context: List[Node], delimiter_tag_name: str, delimiter: st and next.tag_name != delimiter_tag_name \ and not last.content.endswith(delimiter) \ and not next.content.startswith(delimiter): cl.append(Node(delimiter_tag_name, delimiter, True).with_pos(last.pos + len(last))) cl.append(Node(delimiter_tag_name, delimiter, True)\ .with_pos(last.pos + len(last))\ .with_attr(attributes)) # pos-value of new node will resemble the source-position as faithful as possible cl.append(next) node.result = tuple(cl) @transformation_factory(int) def insert_delimiter(context: List[Node], position: int, delimiter_tag_name: str, delimiter: str, attributes: dict = {}): # Dict[str, str] """ Inserts a delimiter at a specific position within the children. """ node = context[-1] children = node.children nd = Node(delimiter_tag_name, delimiter, True).with_attr(attributes) text_pos = node.pos if children: if position < 0: position = len(children) + position head = children[:position] if head: prev = head[-1] text_pos = prev.pos + len(prev) if prev.tag_name == delimiter_tag_name or prev.content.endswith(delimiter): return tail = children[position:] if tail: next = tail[0] if next.tag_name == delimiter_tag_name or next.content.startswith(delimiter): return node.result = head + (nd.with_pos(text_pos),) + tail else: assert position == 0 node.result = (nd.with_pos(text_pos),) ######################################################################## # # AST semantic validation functions (EXPERIMENTAL!!!) Loading test/test_syntaxtree.py +9 −1 Original line number Diff line number Diff line Loading @@ -205,6 +205,15 @@ class TestNode: assert str(self.unique_tree) == "ceh" assert str(self.recurr_tree) == "xey" def test_repr(self): assert repr(Node('test1', 'content1')) == "Node('test1', 'content1')" assert repr(Node('test2', (Node('child1', 'content1'), Node('child2', 'content2')))) \ == "Node('test2', (Node('child1', 'content1'), Node('child2', 'content2')))" assert repr(Node('test', '').with_attr(attr='value')) \ == "Node('test', '').with_attr({'attr': 'value'})" assert repr(Node('test', '').with_pos(0).with_attr(attr='value')) \ == "Node('test', '').with_attr({'attr': 'value'}).with_pos(0)" def test_select_subnodes(self): tags = [node.tag_name for node in self.unique_tree.select_if(lambda nd: True, include_root=True)] Loading Loading @@ -530,7 +539,6 @@ class TestSegementExtraction: segment = tree.milestone_segment(B, C) assert segment.equals(parse_sxpr('(left (B "b") (C "c"))')) class TestPositionAssignment: def test_position_assignment(self): tree = parse_sxpr('(A (B (C "D") (E "FF")) (G "HHH"))') Loading test/test_transform.py +31 −1 Original line number Diff line number Diff line Loading @@ -31,7 +31,8 @@ from DHParser.syntaxtree import Node, parse_sxpr, parse_xml, PLACEHOLDER, \ from DHParser.transform import traverse, reduce_single_child, remove_whitespace, move_adjacent, \ traverse_locally, collapse, collapse_children_if, lstrip, rstrip, remove_content, \ remove_tokens, transformation_factory, has_parent, contains_only_whitespace, \ is_insignificant_whitespace, merge_adjacent, is_one_of, swap_attributes is_insignificant_whitespace, merge_adjacent, is_one_of, swap_attributes, delimit_children, \ insert_delimiter from typing import AbstractSet, List, Sequence, Tuple Loading Loading @@ -321,6 +322,35 @@ class TestAttributeHandling: assert B.attr['x'] == 'x' class TestConstructiveTransformations: def test_add_delimiters(self): tree = parse_sxpr('(A (B 1) (B 2) (B 3))').with_pos(0) trans_table = {'A': delimit_children('c', ',')} traverse(tree, trans_table) original_result = tree.serialize() assert original_result == '(A (B "1") (c ",") (B "2") (c ",") (B "3"))', original_result traverse(tree, trans_table) new_result = tree.serialize() assert new_result == original_result, new_result def test_insert_nodes(self): tree = parse_sxpr('(A (B 1) (B 2) (X 3))').with_pos(0) trans_table = {'A': insert_delimiter(0, 'c', '=>')} traverse(tree, trans_table) result1 = tree.serialize() assert result1 == '(A (c "=>") (B "1") (B "2") (X "3"))', result1 trans_table = {'A': insert_delimiter(1000, 'd', '<=')} traverse(tree, trans_table) result2 = tree.serialize() assert result2 == '(A (c "=>") (B "1") (B "2") (X "3") (d "<="))', result2 trans_table = {'A': insert_delimiter(-2, 'e', '|')} traverse(tree, trans_table) result3 = tree.serialize() assert result3 == '(A (c "=>") (B "1") (B "2") (e "|") (X "3") (d "<="))', result3 if __name__ == "__main__": from DHParser.testing import runner Loading Loading
DHParser/syntaxtree.py +39 −2 Original line number Diff line number Diff line Loading @@ -292,9 +292,14 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil # mpargs = {'name': self.parser.name, 'ptype': self.parser.ptype} # name, ptype = (self._tag_name.split(':') + [''])[:2] # parg = "MockParser({name}, {ptype})".format(name=name, ptype=ptype) rarg = str(self) if not self.children else \ rarg = ("'%s'" % str(self)) if not self.children else \ "(" + ", ".join(child.__repr__() for child in self.children) + ")" return "Node(%s, %s)" % (self.tag_name, rarg) rep = ["Node('%s', %s)" % (self.tag_name, rarg)] if self.has_attr(): rep.append('.with_attr(%s)' % repr(dict(self.attr))) if self._pos >= 0: rep.append('.with_pos(%i)' % self._pos) return ''.join(rep) def __len__(self): return (sum(child.__len__() for child in self.children) Loading Loading @@ -539,6 +544,38 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil return self.attr.get(attribute, default) return default def with_attr(self, *attr_dict, **attributes): """ Adds the attributes which are passed to `with_attr()` either as an attribute dictionary or as keyword parameters to the node's attributes and returns `self`. :param attr_dict: a dictionary of attribute keys and values :param attributes: alternatively, a squences of keyword parameters :return: `self` Example: >>> node = Node('test', '').with_attr(animal = "frog", plant= "tree") >>> dict(node.attr) {'animal': 'frog', 'plant': 'tree'} >>> node.with_attr({'building': 'skyscraper'}) Node('test', '').with_attr({'animal': 'frog', 'plant': 'tree', 'building': 'skyscraper'}) """ if attr_dict: assert not attributes, "Node.with_attr() can be called either exclusively with " \ "keyword parameters, or a single non-keyword parameter and no keyword parameters!" assert len(attr_dict) == 1, "Node.with_attr() must not be called with more than one " \ "non-keyword parameter." dictionary = attr_dict[0] assert isinstance(dictionary, dict), "The non-keyword parameter passed to " \ "Node.with_attr() must be of type dict, not %s." % str(type(dictionary)) # assert all(isinstance(a, str) and isinstance(v, str) for a, v in attr_dict.items()) if dictionary: # do not update with an empty dictionary self.attr.update(dictionary) elif attributes: # assert all(isinstance(a, str) and isinstance(v, str) for a, v in attributes.items()) self.attr.update(attributes) return self def compare_attr(self, other: 'Node', ignore_order: bool = False) -> bool: """ Returns True, if `self` and `other` have the same attributes with the Loading
DHParser/transform.py +59 −4 Original line number Diff line number Diff line Loading @@ -63,6 +63,7 @@ __all__ = ('TransformationDict', 'collapse_children_if', 'replace_content', 'replace_content_by', 'add_attributes', 'normalize_whitespace', 'merge_adjacent', 'merge_results', Loading Loading @@ -110,6 +111,8 @@ __all__ = ('TransformationDict', 'forbid', 'require', 'assert_content', 'delimit_children', 'insert_delimiter', 'add_error', 'error_on', 'assert_has_children', Loading Loading @@ -837,6 +840,15 @@ def replace_content_by(context: List[Node], content: str): # Callable[[Node], R node.result = content @transformation_factory def add_attributes(context: List[Node], attributes: dict): # Dict[str, str] """ Adds the attributes in the dictionary to the XML-Attributes of the last node in the given context. """ context[-1].attrs.update(attributes) def normalize_whitespace(context): """ Normalizes Whitespace inside a leaf node, i.e. any sequence of Loading Loading @@ -1210,12 +1222,19 @@ def remove_if(context: List[Node], condition: Callable): # ####################################################################### @transformation_factory(str) def delimit_children(context: List[Node], delimiter_tag_name: str, delimiter: str): """Ensures that the children are delimited by `delimiter`. Adds a delimiting node of type `delimiter_tag_name`, where this is nt the case.""" def delimit_children(context: List[Node], delimiter_tag_name: str, delimiter: str, attributes: dict = {}): # Dict[str, str] """ Ensures that the children are delimited by `delimiter`. Adds a delimiting node of type `delimiter_tag_name`, where this is not the case. """ node = context[-1] children = node.children assert children cl = [children[0]] for i in range(1, len(children)): last = cl[-1] Loading @@ -1224,11 +1243,47 @@ def delimit_children(context: List[Node], delimiter_tag_name: str, delimiter: st and next.tag_name != delimiter_tag_name \ and not last.content.endswith(delimiter) \ and not next.content.startswith(delimiter): cl.append(Node(delimiter_tag_name, delimiter, True).with_pos(last.pos + len(last))) cl.append(Node(delimiter_tag_name, delimiter, True)\ .with_pos(last.pos + len(last))\ .with_attr(attributes)) # pos-value of new node will resemble the source-position as faithful as possible cl.append(next) node.result = tuple(cl) @transformation_factory(int) def insert_delimiter(context: List[Node], position: int, delimiter_tag_name: str, delimiter: str, attributes: dict = {}): # Dict[str, str] """ Inserts a delimiter at a specific position within the children. """ node = context[-1] children = node.children nd = Node(delimiter_tag_name, delimiter, True).with_attr(attributes) text_pos = node.pos if children: if position < 0: position = len(children) + position head = children[:position] if head: prev = head[-1] text_pos = prev.pos + len(prev) if prev.tag_name == delimiter_tag_name or prev.content.endswith(delimiter): return tail = children[position:] if tail: next = tail[0] if next.tag_name == delimiter_tag_name or next.content.startswith(delimiter): return node.result = head + (nd.with_pos(text_pos),) + tail else: assert position == 0 node.result = (nd.with_pos(text_pos),) ######################################################################## # # AST semantic validation functions (EXPERIMENTAL!!!) Loading
test/test_syntaxtree.py +9 −1 Original line number Diff line number Diff line Loading @@ -205,6 +205,15 @@ class TestNode: assert str(self.unique_tree) == "ceh" assert str(self.recurr_tree) == "xey" def test_repr(self): assert repr(Node('test1', 'content1')) == "Node('test1', 'content1')" assert repr(Node('test2', (Node('child1', 'content1'), Node('child2', 'content2')))) \ == "Node('test2', (Node('child1', 'content1'), Node('child2', 'content2')))" assert repr(Node('test', '').with_attr(attr='value')) \ == "Node('test', '').with_attr({'attr': 'value'})" assert repr(Node('test', '').with_pos(0).with_attr(attr='value')) \ == "Node('test', '').with_attr({'attr': 'value'}).with_pos(0)" def test_select_subnodes(self): tags = [node.tag_name for node in self.unique_tree.select_if(lambda nd: True, include_root=True)] Loading Loading @@ -530,7 +539,6 @@ class TestSegementExtraction: segment = tree.milestone_segment(B, C) assert segment.equals(parse_sxpr('(left (B "b") (C "c"))')) class TestPositionAssignment: def test_position_assignment(self): tree = parse_sxpr('(A (B (C "D") (E "FF")) (G "HHH"))') Loading
test/test_transform.py +31 −1 Original line number Diff line number Diff line Loading @@ -31,7 +31,8 @@ from DHParser.syntaxtree import Node, parse_sxpr, parse_xml, PLACEHOLDER, \ from DHParser.transform import traverse, reduce_single_child, remove_whitespace, move_adjacent, \ traverse_locally, collapse, collapse_children_if, lstrip, rstrip, remove_content, \ remove_tokens, transformation_factory, has_parent, contains_only_whitespace, \ is_insignificant_whitespace, merge_adjacent, is_one_of, swap_attributes is_insignificant_whitespace, merge_adjacent, is_one_of, swap_attributes, delimit_children, \ insert_delimiter from typing import AbstractSet, List, Sequence, Tuple Loading Loading @@ -321,6 +322,35 @@ class TestAttributeHandling: assert B.attr['x'] == 'x' class TestConstructiveTransformations: def test_add_delimiters(self): tree = parse_sxpr('(A (B 1) (B 2) (B 3))').with_pos(0) trans_table = {'A': delimit_children('c', ',')} traverse(tree, trans_table) original_result = tree.serialize() assert original_result == '(A (B "1") (c ",") (B "2") (c ",") (B "3"))', original_result traverse(tree, trans_table) new_result = tree.serialize() assert new_result == original_result, new_result def test_insert_nodes(self): tree = parse_sxpr('(A (B 1) (B 2) (X 3))').with_pos(0) trans_table = {'A': insert_delimiter(0, 'c', '=>')} traverse(tree, trans_table) result1 = tree.serialize() assert result1 == '(A (c "=>") (B "1") (B "2") (X "3"))', result1 trans_table = {'A': insert_delimiter(1000, 'd', '<=')} traverse(tree, trans_table) result2 = tree.serialize() assert result2 == '(A (c "=>") (B "1") (B "2") (X "3") (d "<="))', result2 trans_table = {'A': insert_delimiter(-2, 'e', '|')} traverse(tree, trans_table) result3 = tree.serialize() assert result3 == '(A (c "=>") (B "1") (B "2") (e "|") (X "3") (d "<="))', result3 if __name__ == "__main__": from DHParser.testing import runner Loading