Commit adbbd2e6 authored by di68kap's avatar di68kap
Browse files

transform.py: has_attr() and attr_equals() added

parent f9c6395f
......@@ -407,7 +407,7 @@ class Parser:
# apply reentry-rule or catch error at root-parser
if i < 0: i = 0
try:
zombie = pe.node[ZOMBIE_TAG] # type: Optional[Node]
zombie = pe.node.pick_child(ZOMBIE_TAG) # type: Optional[Node]
except (KeyError, ValueError):
zombie = None
if zombie and not zombie.result:
......
......@@ -612,9 +612,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
# tree traversal and node selection #######################################
def __getitem__(self, key: Union[CriteriaType, int]) -> Union['Node', List['Node']]:
def __getitem__(self, key: Union[CriteriaType, int]) -> Union['Node', Sequence['Node']]:
"""
Returns the child node with the given index if ``index_or_tagname`` is
Returns the child node with the given index if ``key`` is
an integer or all child-nodes with the given tag name. Examples::
>>> tree = parse_sxpr('(a (b "X") (X (c "d")) (e (X "F")))')
......@@ -627,7 +627,10 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
key(str): A criterion (tag name(s), match function, node) or
an index of the child that shall be returned.
Returns:
Node: All nodes which have a given tag name.
Node: The node with the given index (always type Node),
all nodes which have a given tag name (type Node if there
exists only one or type Tuple[Node] if there are more than
one).
Raises:
KeyError: if no matching child was found.
IndexError: if key was an integer index that did not exist
......@@ -637,9 +640,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return self.children[key]
else:
mf = create_match_function(key)
for child in self.children:
if mf(child):
return child
items = tuple(child for child in self.children if mf(child))
if items:
return items if len(items) >= 2 else items[0]
raise IndexError('index out of range') if isinstance(key, int) \
else KeyError(str(key))
......@@ -658,16 +661,23 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
mf = create_match_function(key)
self.result = tuple(child for child in self.children if not mf(child))
def get(self, index_or_tagname: Union[CriteriaType, int],
surrogate: Union['Node', Iterator['Node']]) -> Union['Node', Iterator['Node']]:
"""Returns the child node with the given index if ``index_or_tagname``
def get(self, key: Union[CriteriaType, int],
surrogate: Union['Node', Sequence['Node']]) -> Union['Node', Sequence['Node']]:
"""Returns the child node with the given index if ``key``
is an integer or the first child node with the given tag name. If no
child with the given index or tag_name exists, the ``surrogate`` is
returned instead. This mimics the behaviour of Python's dictionary's
get-method.
The type of the return value is always the same type as that of the
surrogate. If the surrogate is a Node, but there are several items
matching key, then the first of these will be returned.
"""
try:
return self[index_or_tagname]
items = self[key]
if isinstance(surrogate, Sequence):
return items if isinstance(items, Sequence) else (items,)
else:
return items[0] if isinstance(items, Sequence) else items
except KeyError:
return surrogate
......
......@@ -361,7 +361,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
for parent in syntax_tree.select_if(lambda node: any(child.tag_name == ZOMBIE_TAG
for child in node.children),
include_root=True, reverse=True):
zombie = parent[ZOMBIE_TAG]
zombie = parent.pick_child(ZOMBIE_TAG)
zombie.tag_name = '__TESTING_ARTIFACT__'
zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \
'tree structure may not be the same as in a non-testing ' \
......
......@@ -71,6 +71,9 @@ __all__ = ('typing',
'abbreviate_middle',
'escape_formatstr',
'as_identifier',
'as_list',
'first',
'last',
'linebreaks',
'line_col',
'text_pos',
......@@ -307,6 +310,30 @@ def as_identifier(s: str, replacement: str = "_") -> str:
return ''.join(ident)
def as_list(item_or_sequence) -> List[Any]:
"""Turns an arbitrary sequence or a single item into a list. In case of
a single item, the list contains this element as its sole item."""
if isinstance(item_or_sequence, Iterable):
return list(item_or_sequence)
return [item_or_sequence]
def first(item_or_sequence: Union[Sequence, Any]) -> Any:
"""Returns an item or a the first item of a sequence of items."""
if isinstance(item_or_sequence, Sequence):
return item_or_sequence[0]
else:
return item_or_sequence
def last(item_or_sequence: Union[Sequence, Any]) -> Any:
"""Returns an item or a the first item of a sequence of items."""
if isinstance(item_or_sequence, Sequence):
return item_or_sequence[-1]
else:
return item_or_sequence
#######################################################################
#
# type system support
......
......@@ -575,17 +575,17 @@ def has_content(context: List[Node], regexp: str) -> bool:
return bool(re.match(regexp, context[-1].content))
# TODO: rename has_ancestor
@transformation_factory(collections.abc.Set)
def has_parent(context: List[Node], tag_name_set: AbstractSet[str], start_level: int = 2) -> bool:
def has_parent(context: List[Node], tag_name_set: AbstractSet[str], ancestry: int = 1) -> bool:
"""
Checks whether a node with one of the given tag names appears somewhere
in the context before the last node in the context.
:param start_level: "nearest" ancestor considered: 2 menans parent level,
3 grand-parents. To include the node itself, use 1.
:param ancestry: determines how deep `has_parent` should dive into
the ancestry. "1" means only the immediate parents wil be considered,
"2" means also the grandparents, ans so on.
"""
assert start_level > 0
for i in range(start_level, len(context) + 1):
assert ancestry > 0
for i in range(2, max(ancestry + 2, len(context) + 1)):
if context[-i].tag_name in tag_name_set:
return True
return False
......
......@@ -249,7 +249,7 @@ class TestNode:
tree = parse_sxpr('(A (B 1) (C 1) (B 2))')
assert 'B' in tree
assert 'X' not in tree
assert tree['B'].equals(Node('B', '1'))
assert tree.pick_child('B').equals(Node('B', '1'))
item_w_value_2 = lambda nd: nd.content == '2'
assert item_w_value_2 in tree
item_w_value_4 = lambda nd: nd.content == '4'
......@@ -534,7 +534,7 @@ class TestSerialization:
all_tags = {'XML', 'T', 'L'}
assert tree.as_xml(inline_tags=all_tags, omit_tags=all_tags) == "Hallo Welt!"
# tags with attributes will never be ommitted
tree['T'].attr['class'] = "kursiv"
tree.pick_child('T').attr['class'] = "kursiv"
assert tree.as_xml(inline_tags=all_tags, omit_tags=all_tags) == \
'<T class="kursiv">Hallo</T> Welt!'
......
......@@ -171,7 +171,7 @@ class TestConditionalTransformations:
context = [Node('A', 'alpha'),
Node('B', 'beta'),
Node('C', 'gamma')]
assert has_parent(context, {'A'})
assert has_parent(context, {'A'}, 2)
assert has_parent(context, {'B'})
assert not has_parent(context, {'C'})
......@@ -292,7 +292,7 @@ class TestWhitespaceTransformations:
transformations = {'SENTENCE': merge_adjacent(is_one_of('TEXT', 'L'), 'TEXT')}
traverse(sentence, transformations)
assert tree_sanity_check(sentence)
assert sentence['TEXT'].result == "Guten Tag"
assert sentence.pick_child('TEXT').result == "Guten Tag"
assert sentence[2].result == "Hallo Welt"
assert sentence[-1].tag_name == 'L'
assert 'T' in sentence
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment