2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

transform.py 63.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# transform.py - transformation functions for converting the
#                concrete into the abstract syntax tree
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.
18
19


20
21
22
"""
Module ``transform`` contains the functions for transforming the
concrete syntax tree (CST) into an abstract syntax tree (AST).
23

24
25
26
As these functions are very generic, they can in principle be
used for any kind of tree transformations, not necessarily only
for CST -> AST transformations.
27
28
"""

29

30
import collections.abc
31
from functools import partial, singledispatch, reduce
32
import inspect
33
import operator
di68kap's avatar
di68kap committed
34
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
di68kap's avatar
di68kap committed
35
    Tuple, List, Sequence, Union, Text
36

di68kap's avatar
di68kap committed
37
from DHParser.error import ErrorCode, AST_TRANSFORM_CRASH, ERROR
38
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, LEAF_PTYPES, PLACEHOLDER, \
di68kap's avatar
di68kap committed
39
40
    RootNode, parse_sxpr, flatten_sxpr, TreeContext
from DHParser.toolkit import issubtype, isgenerictype, expand_table, smart_list, re, cython
di68kap's avatar
di68kap committed
41

42

43
44
45
46
__all__ = ('TransformationDict',
           'TransformationProc',
           'ConditionFunc',
           'KeyFunc',
Eckhart Arnold's avatar
Eckhart Arnold committed
47
           'TransformerCallable',
48
           'transformation_factory',
49
           'key_tag_name',
50
51
52
53
           'Filter',
           'BLOCK_LEAVES',
           'BLOCK_ANONYMOUS_LEAVES',
           'BLOCK_CHILDREN',
54
           'traverse',
Eckhart Arnold's avatar
Eckhart Arnold committed
55
           'always',
56
           'never',
57
           'neg',
58
59
           'any_of',
           'all_of',
60
           'is_named',
61
           'update_attr',
di68kap's avatar
di68kap committed
62
           'swap_attributes',
63
           'replace_by_single_child',
64
           'replace_by_children',
Eckhart Arnold's avatar
Eckhart Arnold committed
65
           'reduce_single_child',
66
           'replace_or_reduce',
Eckhart Arnold's avatar
Eckhart Arnold committed
67
           'change_tag_name',
68
           'replace_tag_names',
69
           'collapse',
70
           'collapse_children_if',
eckhart's avatar
eckhart committed
71
72
           'transform_content',
           'replace_content_with',
73
           'add_attributes',
74
           'normalize_whitespace',
75
           'merge_adjacent',
di68kap's avatar
di68kap committed
76
           'merge_leaves',
77
           'merge_connected',
di68kap's avatar
di68kap committed
78
           'merge_results',
79
           'move_adjacent',
80
           'left_associative',
81
           'lean_left',
82
           'apply_if',
eckhart's avatar
eckhart committed
83
           'apply_unless',
eckhart's avatar
eckhart committed
84
           'apply_ifelse',
85
           'traverse_locally',
86
           'is_anonymous',
87
           'is_anonymous_leaf',
88
           'contains_only_whitespace',
89
           # 'is_any_kind_of_whitespace',
90
91
           'is_empty',
           'is_token',
92
           'is_one_of',
93
           'not_one_of',
Eckhart Arnold's avatar
Eckhart Arnold committed
94
           'matches_re',
95
96
           'has_attr',
           'attr_equals',
97
           'has_content',
eckhart's avatar
eckhart committed
98
           'has_ancestor',
di68kap's avatar
di68kap committed
99
           'has_parent',
100
           'has_descendant',
eckhart's avatar
eckhart committed
101
           'has_child',
102
           'has_children',
di68kap's avatar
di68kap committed
103
           'has_sibling',
104
105
106
107
108
109
110
111
           'lstrip',
           'rstrip',
           'strip',
           'keep_children',
           'keep_children_if',
           'keep_tokens',
           'keep_nodes',
           'keep_content',
112
           'remove_children_if',
113
           'remove_children',
114
           'remove_content',
115
116
           # 'remove_first',
           # 'remove_last',
117
118
           'remove_whitespace',
           'remove_empty',
di68kap's avatar
di68kap committed
119
           'remove_anonymous_empty',
120
           'remove_anonymous_tokens',
121
           'remove_brackets',
122
           'remove_infix_operator',
123
           'remove_tokens',
124
           'remove_if',
125
126
127
           'flatten',
           'forbid',
           'require',
128
           'assert_content',
129
           'AT_THE_END',
130
           'node_maker',
131
           'delimit_children',
132
           'positions_of',
eckhart's avatar
eckhart committed
133
134
           'PositionType',
           'normalize_position_representation',
135
           'insert',
136
           'add_error',
137
           'error_on',
di68kap's avatar
di68kap committed
138
139
           'assert_has_children',
           'peek')
140
141


142
143
144
145
146
class Filter:
    def __call__(self, children: Tuple[Node]) -> Tuple[Node]:
        raise NotImplementedError


di68kap's avatar
di68kap committed
147
TransformationProc = Callable[[TreeContext], None]
148
149
TransformationDict = Dict[str, Union[Callable, Sequence[Callable]]]
TransformationCache = Dict[str, Tuple[Sequence[Filter], Sequence[Callable]]]
Eckhart Arnold's avatar
Eckhart Arnold committed
150
ProcessingTableType = Dict[str, Union[Sequence[Callable], TransformationDict]]
di68kap's avatar
di68kap committed
151
ConditionFunc = Callable  # Callable[[TreeContext], bool]
152
KeyFunc = Callable[[Node], str]
eckhart's avatar
eckhart committed
153
CriteriaType = Union[int, str, Callable]
154
TransformerCallable = Union[Callable[[RootNode], RootNode], partial]
155
156


157
def transformation_factory(t1=None, t2=None, t3=None, t4=None, t5=None):
158
159
    """
    Creates factory functions from transformation-functions that
160
    dispatch on the first parameter after the context parameter.
161
162

    Decorating a transformation-function that has more than merely the
Eckhart Arnold's avatar
Eckhart Arnold committed
163
    ``context``-parameter with ``transformation_factory`` creates a
164
    function with the same name, which returns a partial-function that
165
    takes just the context-parameter.
166

167
    Additionally, there is some syntactic sugar for
168
169
170
    transformation-functions that receive a collection as their second
    parameter and do not have any further parameters. In this case a
    list of parameters passed to the factory function will be converted
eckhart's avatar
eckhart committed
171
    into a single collection-parameter.
172

173
    The primary benefit is the readability of the transformation-tables.
174

175
176
    Usage::

eckhart's avatar
eckhart committed
177
        @transformation_factory(AbstractSet[str])
178
        def remove_tokens(context, tokens):
179
            ...
180
181
182

    or, alternatively::

183
        @transformation_factory
184
        def remove_tokens(context, tokens: AbstractSet[str]):
185
186
            ...

187
188
    Example::

189
        trans_table = { 'expression': remove_tokens('+', '-') }
190
191
192

    instead of::

193
        trans_table = { 'expression': partial(remove_tokens, tokens={'+', '-'}) }
194
195

    Parameters:
196
        t1:  type of the second argument of the transformation function,
197
198
            only necessary if the transformation functions' parameter list
            does not have type annotations.
199
200
    """

201
202
    def type_guard(t):
        """Raises an error if type `t` is a generic type or could be mistaken
di68kap's avatar
di68kap committed
203
        for the type of the canonical first parameter "TreeContext of
204
        transformation functions. Returns `t`."""
205
206
207
208
        # if isinstance(t, GenericMeta):
        #     raise TypeError("Generic Type %s not permitted\n in transformation_factory "
        #                     "decorator. Use the equivalent non-generic type instead!"
        #                     % str(t))
Eckhart Arnold's avatar
Eckhart Arnold committed
209
210
        if isinstance(t, str):          # ensure compatibility with python versions
            t = eval(t.replace('unicode', 'str'))  # with alternative type handling.
211
        if isgenerictype(t):
212
213
214
            raise TypeError("Generic Type %s not permitted\n in transformation_factory "
                            "decorator. Use the equivalent non-generic type instead!"
                            % str(t))
di68kap's avatar
di68kap committed
215
        if issubtype(TreeContext, t):
216
217
            raise TypeError("Sequence type %s not permitted\nin transformation_factory "
                            "decorator, because it could be mistaken for a base class "
di68kap's avatar
di68kap committed
218
                            "of TreeContext\nwhich is the type of the canonical first "
219
220
                            "argument of transformation functions. Try 'tuple' instead!"
                            % str(t))
221
222
        return t

223
    def decorator(f):
224
        nonlocal t1
225
226
227
228
        sig = inspect.signature(f)
        params = list(sig.parameters.values())[1:]
        if len(params) == 0:
            return f  # '@transformer' not needed w/o free parameters
229
        assert t1 or params[0].annotation != params[0].empty, \
230
            "No type information on second parameter found! Please, use type " \
eckhart's avatar
eckhart committed
231
            "annotation or provide the type information via transformer-decorator."
232
        f = singledispatch(f)
233
234
235
        p1type = params[0].annotation
        if t1 is None:
            t1 = type_guard(p1type)
236
        elif issubtype(p1type, type_guard(t1)):
237
            try:
238
                if len(params) == 1 and issubtype(p1type, Container) \
239
                        and not (issubtype(p1type, Text) or issubtype(p1type, ByteString)):
240
                    def gen_special(*args):
241
242
                        c = set(args) if issubtype(p1type, AbstractSet) else \
                            tuple(args) if issubtype(p1type, Sequence) else args
243
244
                        d = {params[0].name: c}
                        return partial(f, **d)
245
246
247
                    assert p1type.__args__ is not None, "Please use more specific container " \
                        "type, e.g. %s[Callable] instead of %s, in signature of function " \
                        "%s !" % (str(p1type), str(p1type), f.__name__)
248
249
250
                    for alt_t in p1type.__args__:
                        f.register(type_guard(alt_t), gen_special)
                    # f.register(type_guard(p1type.__args__[0]), gen_special)
251
252
253
254
255
            except AttributeError:
                pass  # Union Type does not allow subclassing, but is not needed here
        else:
            raise TypeError("Annotated type %s is not a subclass of decorated type %s !"
                            % (str(p1type), str(t1)))
256
257
258
259
260
261

        def gen_partial(*args, **kwargs):
            d = {p.name: arg for p, arg in zip(params, args)}
            d.update(kwargs)
            return partial(f, **d)

262
        for t in (t1, t2, t3, t4, t5):
263
            if t:
264
                f.register(type_guard(t), gen_partial)
265
266
            else:
                break
267
268
        return f

269
    if isinstance(t1, type(lambda: 1)):
270
271
272
        # Provide for the case that transformation_factory has been
        # written as plain decorator and not as a function call that
        # returns the decorator proper.
273
274
        func = t1
        t1 = None
275
276
277
278
279
        return decorator(func)
    else:
        return decorator


280
def key_tag_name(node: Node) -> str:
Eckhart Arnold's avatar
Eckhart Arnold committed
281
    """
eckhart's avatar
eckhart committed
282
283
    Returns the tag name of the node as key for selecting transformations
    from the transformation table in function `traverse`.
Eckhart Arnold's avatar
Eckhart Arnold committed
284
    """
285
286
287
    return node.tag_name


288
289
290
291
292
293
294
295
296
297
298
class BlockChildren(Filter):
    def __call__(self, children: Tuple[Node]) -> Tuple[Node]:
        return ()

class BlockLeaves(Filter):
    def __call__(self, children: Tuple[Node]) -> Tuple[Node]:
        return tuple(child for child in children if child._children)


class BlockAnonymousLeaves(Filter):
    def __call__(self, children: Tuple[Node]) -> Tuple[Node]:
di68kap's avatar
di68kap committed
299
300
301
302
303
304
        try:
            return tuple(child for child in children
                         if child._children or not child.tag_name[0] == ':')
        except IndexError:
            return tuple(child for child in children
                         if child._children or not child.anonymous)
305
306
307
308
309
310
311


BLOCK_CHILDREN = BlockChildren()
BLOCK_LEAVES = BlockLeaves()
BLOCK_ANONYMOUS_LEAVES = BlockAnonymousLeaves()


312
def traverse(root_node: Node,
Eckhart Arnold's avatar
Eckhart Arnold committed
313
             processing_table: ProcessingTableType,
314
             key_func: KeyFunc = key_tag_name) -> Node:
315
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
316
    Traverses the syntax tree starting with the given ``node`` depth
317
    first and applies the sequences of callback-functions registered
318
    in the ``processing_table``-dictionary.
319
320
321
322
323
324
325
326
327

    The most important use case is the transformation of a concrete
    syntax tree into an abstract tree (AST). But it is also imaginable
    to employ tree-traversal for the semantic analysis of the AST.

    In order to assign sequences of callback-functions to nodes, a
    dictionary ("processing table") is used. The keys usually represent
    tag names, but any other key function is possible. There exist
    three special keys:
328

329
    - '<': always called (before any other processing function)
330
331
    - '*': called for those nodes for which no (other) processing
      function appears in the table
332
    - '>': always called (after any other processing function)
333
334
335
336
337

    Args:
        root_node (Node): The root-node of the syntax tree to be traversed
        processing_table (dict): node key -> sequence of functions that
            will be applied to matching nodes in order. This dictionary
338
339
            is interpreted as a ``compact_table``. See
            :func:`expand_table` or :func:`EBNFCompiler.EBNFTransTable`
340
        key_func (function): A mapping key_func(node) -> keystr. The default
341
            key_func yields node.tag_name.
342

343
344
    Example::

345
        table = { "term": [replace_by_single_child, flatten],
346
                  "factor, flowmarker, retrieveop": replace_by_single_child }
347
        traverse(node, table)
348

349
    """
350

351
352
353
    # Is this optimazation really needed?
    if '__cache__' in processing_table:
        # assume that processing table has already been expanded
eckhart's avatar
eckhart committed
354
        table = processing_table               # type: ProcessingTableType
eckhart's avatar
eckhart committed
355
        cache = cast(TransformationDict, processing_table['__cache__'])  # type: TransformationDict
356
    else:
357
358
        # normalize processing_table entries by turning single values
        # into lists with a single value
359
360
        table = {name: cast(Sequence[Callable], smart_list(call))
                 for name, call in list(processing_table.items())}
361
        table = expand_table(table)
362
        # substitute key for insiginificant whitespace
363
        assert '+' not in table, 'Symbol "+" in processing table is obsolete, use "<" instead'
364
365
        if '~' in table:
            if ':Whitespace' in table:
eckhart's avatar
eckhart committed
366
367
368
369
                raise AssertionError(
                    '"~" is a synonym for ":Whitespace" in the processing table. '
                    'To avoid confusion, choose either of the two, but do not use '
                    'both at the same time!')
370
371
372
373
            whitespace_transformation = table['~']
            del table['~']
            table[':Whitespace'] = whitespace_transformation
        # cache expanded table
eckhart's avatar
eckhart committed
374
375
        cache = cast(TransformationDict,
                     table.setdefault('__cache__', cast(TransformationDict, dict())))
376
377
        # change processing table in place, so its already expanded and cache filled next time
        processing_table.clear()
378
379
        processing_table.update(table)

380
381
382
383
384
385
386
387
388
389
390
391
    def split_filter(callables: Sequence[Callable]) -> Tuple[List[Filter], List[Callable]]:
        i = 0
        filter = []
        for callable in callables:
            if isinstance(callable, Filter):
                filter.append(callable)
                i += 1
            else:  break
        callables = list(callables[i:])
        assert not any(isinstance(callable, Filter) for callable in callables)
        return filter, callables

392
    def traverse_recursive(context):
eckhart's avatar
eckhart committed
393
        nonlocal cache
394
        node = context[-1]
395
396

        key = key_func(node)
397
        try:
398
            filters, sequence = cache[key]
399
        except KeyError:
400
401
402
403
404
405
            filters, pre = split_filter(table.get('<', []))
            assert BLOCK_CHILDREN not in filters
            more_filters, main = split_filter(table.get(key, table.get('*', [])))
            post = table.get('>', [])
            assert not any(isinstance(callable, Filter) for callable in post)
            sequence = pre + main + post
406
407
408
            all_filters = filters + more_filters
            if BLOCK_CHILDREN in all_filters:
                all_filters = [BLOCK_CHILDREN]
409
410
            cache[key] = (filters + more_filters, sequence)

di68kap's avatar
di68kap committed
411
        children = node._children
412
413
414
415
416
417
418
419
        for filter in filters:
            children = filter(children)
        if children:
            context.append(PLACEHOLDER)
            for child in children:
                context[-1] = child
                traverse_recursive(context)  # depth first
            context.pop()
420
421

        for call in sequence:
di68kap's avatar
di68kap committed
422
423
424
            try:
                call(context)
            except Exception as ae:
425
426
                raise AssertionError('An exception occurred when transforming "%s" with %s:\n%s'
                                     % (key, str(call), ae.__class__.__name__ + ': ' + str(ae)))
427

428
    traverse_recursive([root_node])
429
    return root_node
430
431
    # assert processing_table['__cache__']

432

433
434
435
436
437
438
#######################################################################
#
# specialized full tree transformations
#
#######################################################################

di68kap's avatar
di68kap committed
439
440
441
def merge_treetops(node: Node):
    """Recursively traverses the tree and "merges" nodes that contain
    only anonymous child nodes that are leaves. "mergeing" here means the
442
443
    node's result will be replaced by the merged content of the children.
    """
di68kap's avatar
di68kap committed
444
    if node._children:
445
        crunch = True
di68kap's avatar
di68kap committed
446
447
        for child in node._children:
            if child._children:
di68kap's avatar
di68kap committed
448
                merge_treetops(child)
449
450
451
452
453
454
455
                crunch = False
            elif not child.anonymous:
                crunch = False
        if crunch:
            node.result = node.content


456
#######################################################################
457
#
458
459
# meta transformations, i.e. transformations that call other
# transformations
460
#
461
#######################################################################
462
463


eckhart's avatar
eckhart committed
464
@transformation_factory(dict)
di68kap's avatar
di68kap committed
465
def traverse_locally(context: TreeContext,
eckhart's avatar
eckhart committed
466
467
                     processing_table: Dict,              # actually: ProcessingTableType
                     key_func: Callable = key_tag_name):  # actually: KeyFunc
468
469
    """
    Transforms the syntax tree starting from the last node in the context
470
471
472
473
474
475
476
    according to the given processing table. The purpose of this function is
    to apply certain transformations locally, i.e. only for those nodes that
    have the last node in the context as their parent node.
    """
    traverse(context[-1], processing_table, key_func)


di68kap's avatar
di68kap committed
477
478
479
480
481
482
483
484
485
486
487
def transformation_guard(value) -> None:
    if value is not None:
        raise AssertionError('Transformation a value instead of None!')


def condition_guard(value) -> bool:
    if value is None:
        raise AssertionError('Condition returned None instead of a bool!')
    return value


di68kap's avatar
di68kap committed
488
def apply_transformations(context: TreeContext, transformation: Union[Callable, Sequence[Callable]]):
489
    """Applies a single or a sequence of transformations to a context."""
490
    if callable(transformation):
di68kap's avatar
di68kap committed
491
        transformation_guard(transformation(context))
492
493
494
    else:
        assert isinstance(transformation, tuple)
        for trans in cast(tuple, transformation):
di68kap's avatar
di68kap committed
495
            transformation_guard(trans(context))
496
497


498
@transformation_factory(collections.abc.Callable, tuple)
di68kap's avatar
di68kap committed
499
def apply_if(context: TreeContext, transformation: Union[Callable, Tuple[Callable]], condition: Callable):
500
    """Applies a transformation only if a certain condition is met."""
di68kap's avatar
di68kap committed
501
    if condition_guard(condition(context)):
502
        apply_transformations(context, transformation)
503
504


eckhart's avatar
eckhart committed
505
@transformation_factory(collections.abc.Callable, tuple)
di68kap's avatar
di68kap committed
506
def apply_ifelse(context: TreeContext,
eckhart's avatar
eckhart committed
507
508
509
510
511
512
513
514
515
516
517
                 if_transformation: Union[Callable, Tuple[Callable]],
                 else_transformation: Union[Callable, Tuple[Callable]],
                 condition: Callable):
    """Applies a one particular transformation if a certain condition is met
    and another transformation otherwise."""
    if condition_guard(condition(context)):
        apply_transformations(context, if_transformation)
    else:
        apply_transformations(context, else_transformation)


518
@transformation_factory(collections.abc.Callable, tuple)
di68kap's avatar
di68kap committed
519
def apply_unless(context: TreeContext, transformation: Union[Callable, Tuple[Callable]], condition: Callable):
520
    """Applies a transformation if a certain condition is *not* met."""
di68kap's avatar
di68kap committed
521
    if not condition_guard(condition(context)):
522
        apply_transformations(context, transformation)
eckhart's avatar
eckhart committed
523
524


525
## boolean operators
526
527


di68kap's avatar
di68kap committed
528
def always(context: TreeContext) -> bool:
529
    """Always returns True, no matter what the state of the context is."""
Eckhart Arnold's avatar
Eckhart Arnold committed
530
531
532
    return True


di68kap's avatar
di68kap committed
533
def never(context: TreeContext) -> bool:
534
535
536
    """Always returns True, no matter what the state of the context is."""
    return False

537
@transformation_factory(collections.abc.Callable)
di68kap's avatar
di68kap committed
538
def neg(context: TreeContext, bool_func: collections.abc.Callable) -> bool:
539
540
541
542
    """Returns the inverted boolean result of `bool_func(context)`"""
    return not bool_func(context)


543
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
544
def any_of(context: TreeContext, bool_func_set: AbstractSet[collections.abc.Callable]) -> bool:
545
546
547
548
549
550
    """Returns True, if any of the bool functions in `bool_func_set` evaluate to True
    for the given context."""
    return any(bf(context) for bf in bool_func_set)


@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
551
def all_of(context: TreeContext, bool_func_set: AbstractSet[collections.abc.Callable]) -> bool:
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
    """Returns True, if all of the bool functions in `bool_func_set` evaluate to True
    for the given context."""
    return all(bf(context) for bf in bool_func_set)


#######################################################################
#
# conditionals that determine whether the context (or the last node in
# the context for that matter) fulfill a specific condition.
# ---------------------------------------------------------------------
#
# The context of a node is understood as a list of all parent nodes
# leading up to and including the node itself. If represented as list,
# the last element of the list is the node itself.
#
#######################################################################
568
569


di68kap's avatar
di68kap committed
570
def is_single_child(context: TreeContext) -> bool:
eckhart's avatar
eckhart committed
571
    """Returns ``True`` if the current node does not have any siblings."""
di68kap's avatar
di68kap committed
572
    return len(context[-2]._children) == 1
573
574


575
# TODO: ambiguous: named, tagging...
di68kap's avatar
di68kap committed
576
def is_named(context: TreeContext) -> bool:
eckhart's avatar
eckhart committed
577
    """Returns ``True`` if the current node's parser is a named parser."""
di68kap's avatar
di68kap committed
578
579
580
    # return not context[-1].anonymous
    tn = context[-1].tag_name
    return tn and tn[0] != ':'
581
582


di68kap's avatar
di68kap committed
583
def is_anonymous(context: TreeContext) -> bool:
584
    """Returns ``True`` if the current node is anonymous."""
di68kap's avatar
di68kap committed
585
586
587
    # return context[-1].anonymous
    tn = context[-1].tag_name
    return not tn or tn[0] == ':'
588
589


590
591
def is_anonymous_leaf(context: TreeContext) -> bool:
    """Returns `True` if context ends in an anonymous leaf-node"""
di68kap's avatar
di68kap committed
592
593
594
595
596
597
    # return not context[-1].children and context[-1].anonymous
    node = context[-1]
    if node._children:
        return False
    tn = node.tag_name
    return not tn or tn[0] == ':'
598
599


600
RX_WHITESPACE = re.compile(r'\s*$')
601
602


di68kap's avatar
di68kap committed
603
def contains_only_whitespace(context: TreeContext) -> bool:
604
    r"""Returns ``True`` for nodes that contain only whitespace regardless
605
606
607
    of the tag_name, i.e. nodes the content of which matches the regular
    expression /\s*/, including empty nodes. Note, that this is not true
    for anonymous whitespace nodes that contain comments."""
608
    return bool(RX_WHITESPACE.match(context[-1].content))
609
610


di68kap's avatar
di68kap committed
611
def is_empty(context: TreeContext) -> bool:
eckhart's avatar
eckhart committed
612
    """Returns ``True`` if the current node's content is empty."""
613
614
615
    return not context[-1].result


616
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
617
def is_token(context: TreeContext, tokens: AbstractSet[str] = frozenset()) -> bool:
618
    """
619
    Checks whether the last node in the context has the tag_name ":Text"
620
621
    and it's content matches one of the given tokens. Leading and trailing
    whitespace-tokens will be ignored. In case an empty set of tokens is passed,
eckhart's avatar
eckhart committed
622
    any token is a match.
623
624
    """
    node = context[-1]
625
    return node.tag_name == TOKEN_PTYPE and (not tokens or node.content in tokens)
626
627


628
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
629
def is_one_of(context: TreeContext, tag_name_set: AbstractSet[str]) -> bool:
630
631
632
633
    """Returns true, if the node's tag_name is one of the given tag names."""
    return context[-1].tag_name in tag_name_set


634
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
635
def not_one_of(context: TreeContext, tag_name_set: AbstractSet[str]) -> bool:
636
637
638
639
    """Returns true, if the node's tag_name is not one of the given tag names."""
    return context[-1].tag_name not in tag_name_set


Eckhart Arnold's avatar
Eckhart Arnold committed
640
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
641
def matches_re(context: TreeContext, patterns: AbstractSet[str]) -> bool:
642
643
    """
    Returns true, if the node's tag_name matches one of the regular
Eckhart Arnold's avatar
Eckhart Arnold committed
644
645
646
647
648
649
650
651
652
    expressions in `patterns`. For example, ':.*' matches all anonymous nodes.
    """
    tn = context[-1].tag_name
    for pattern in patterns:
        if re.match(pattern, tn):
            return True
    return False


653
@transformation_factory(str)
di68kap's avatar
di68kap committed
654
def has_attr(context: TreeContext, attr: str) -> bool:
655
656
657
658
659
660
661
662
663
    """
    Returns true, if the node has the attribute `attr`, no matter
    what its value is.
    """
    node = context[-1]
    return node.has_attr(attr)


@transformation_factory(str)
di68kap's avatar
di68kap committed
664
def attr_equals(context: TreeContext, attr: str, value: str) -> bool:
665
666
667
668
669
670
671
672
    """
    Returns true, if the node has the attribute `attr` and its value equals
    `value`.
    """
    node = context[-1]
    return node.has_attr(attr) and node.attr[attr] == value


eckhart's avatar
eckhart committed
673
@transformation_factory
di68kap's avatar
di68kap committed
674
def has_content(context: TreeContext, regexp: str) -> bool:
675
676
677
678
679
680
681
682
    """
    Checks a node's content against a regular expression.

    In contrast to ``re.match`` the regular expression must match the complete
    string and not just the beginning of the string to succeed!
    """
    if not regexp.endswith('$'):
        regexp += "$"
683
684
685
    return bool(re.match(regexp, context[-1].content))


686
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
687
def has_ancestor(context: TreeContext,
688
689
                 tag_name_set: AbstractSet[str],
                 generations: int =-1) -> bool:
690
691
    """
    Checks whether a node with one of the given tag names appears somewhere
692
    in the context before the last node in the context.
eckhart's avatar
eckhart committed
693

694
    :param generations: determines how deep `has_ancestor` should dive into
695
696
        the ancestry. "1" means only the immediate parents wil be considered,
        "2" means also the grandparents, ans so on.
697
        A value smaller or equal zero means all ancestors will be considered.
698
    """
699
700
701
    if generations <= 0:
        return any(nd.tag_name in tag_name_set for nd in context)
    for i in range(2, min(generations + 2, len(context) + 1)):
702
703
704
705
706
        if context[-i].tag_name in tag_name_set:
            return True
    return False


eckhart's avatar
eckhart committed
707
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
708
def has_parent(context: TreeContext, tag_name_set: AbstractSet[str]) -> bool:
eckhart's avatar
eckhart committed
709
710
711
712
713
    """Checks whether the immediate predecessor in the context has one of the
    given tags."""
    return has_ancestor(context, tag_name_set, 1)


714
715
716
717
718
def has_children(context: TreeContext) -> bool:
    """Checks whether last node in context has children."""
    return bool(context[-1]._children)


719
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
720
def has_descendant(context: TreeContext, tag_name_set: AbstractSet[str],
721
                   generations: int = -1) -> bool:
di68kap's avatar
di68kap committed
722
    assert generations != 0
di68kap's avatar
di68kap committed
723
    for child in context[-1]._children:
724
725
        if child.tag_name in tag_name_set:
            return True
di68kap's avatar
di68kap committed
726
        if (generations < 0 or generations > 1) \
727
                and has_descendant(context + [child], tag_name_set, generations - 1):
728
729
730
731
            return True
    return False


eckhart's avatar
eckhart committed
732
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
733
def has_child(context: TreeContext, tag_name_set: AbstractSet[str]) -> bool:
eckhart's avatar
eckhart committed
734
735
736
737
738
    """Checks whether at least one child (i.e. immediate descendant) has one of
    the given tags."""
    return has_descendant(context, tag_name_set, 1)


di68kap's avatar
di68kap committed
739
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
740
def has_sibling(context: TreeContext, tag_name_set: AbstractSet[str]):
741
742
    if len(context) >= 2:
        node = context[-1]
di68kap's avatar
di68kap committed
743
        for child in context[-2]._children:
744
745
            if child != node and child.tag_name in tag_name_set:
                return True
di68kap's avatar
di68kap committed
746
747
748
    return False


749
750
751
752
753
754
755
#######################################################################
#
# utility functions (private)
#
#######################################################################


756
def update_attr(dest: Node, src: Union[Node, Tuple[Node, ...]], root: RootNode):
757
    """
758
759
760
    Adds all attributes from `src` to `dest` and transfers all errors
    from `src` to `dest`. This is needed, in order to keep the attributes
    if the child node is going to be eliminated.
761
    """
762
763
    if isinstance(src, Node):
        src = (Node,)
764
    for s in src:
765
        # update attributes
766
        if s != dest and hasattr(s, '_xml_attr'):
767
            for k, v in s.attr.items():
768
769
770
771
772
                if k in dest.attr and v != dest.attr[k]:
                    raise ValueError('Conflicting attribute values %s and %s for key %s '
                                     'when reducing %s to %s ! Tree transformation stopped.'
                                     % (v, dest.attr[k], k, str(src), str(dest)))
                dest.attr[k] = v
773
774
        # transfer errors
        try:
eckhart's avatar
eckhart committed
775
776
777
778
779
            root.transfer_errors(s, dest)
            # ids = id(s)
            # if ids in root.error_nodes:
            #     root.error_nodes.setdefault(id(dest), []).extend(root.error_nodes[ids])
            #     del root.error_nodes[ids]
780
781
782
        except AttributeError:
            # root was not of type RootNode, probably a doc-test
            pass
783
784


785
786
787
def swap_attributes(node: Node, other: Node):
    """
    Exchanges the attributes between node and other. This might be
di68kap's avatar
di68kap committed
788
    needed when rearanging trees.
789
    """
790
791
    NA = node.has_attr()
    OA = other.has_attr()
792
    if NA or OA:
Eckhart Arnold's avatar
Eckhart Arnold committed
793
794
795
796
797
798
799
800
801
        save = node._xml_attr if NA else None
        if OA:
            node._xml_attr = other._xml_attr
        elif NA:
            node._xml_attr = None
        if NA:
            other._xml_attr = save
        elif OA:
            other._xml_attr = None
802
803


804
def _replace_by(node: Node, child: Node, root: RootNode):
805
806
807
    """
    Replaces node's contents by child's content including the tag name.
    """
di68kap's avatar
di68kap committed
808
809
810
811
812
813
    # if node.anonymous or not child.anonymous:
    #     node.tag_name = child.tag_name
    nd_tn = node.tag_name
    ch_tn = child.tag_name
    if not nd_tn or nd_tn[0] == ':' or (ch_tn and ch_tn[0] != ':'):
        node.tag_name = ch_tn
814
815
        # name, ptype = (node.tag_name.split(':') + [''])[:2]
        # child.parser = MockParser(name, ptype)
816
        # parser names must not be overwritten, else: child.parser.name = node.parser.name
di68kap's avatar
di68kap committed
817
    node._set_result(child._result)
818
    update_attr(node, (child,), root)
819
820


821
def _reduce_child(node: Node, child: Node, root: RootNode):
822
823
824
    """
    Sets node's results to the child's result, keeping node's tag_name.
    """
di68kap's avatar
di68kap committed
825
    node._set_result(child._result)
826
827
828
829
    update_attr(node, (child,), root)
    # update_attr(child, (node,), root)
    # if child.has_attr():
    #     node._xml_attr = child._xml_attr
830
831


832
833
834
835
836
837
838
839
840
841
842
843
#######################################################################
#
# rearranging transformations
#
# - tree may be rearranged (e.g.flattened)
# - nodes that are not leaves may be dropped
# - order is preserved
# - leave content is preserved (though not necessarily the leaves
#   themselves)
#
#######################################################################

844

di68kap's avatar
di68kap committed
845
def replace_by_single_child(context: TreeContext):
846
    """
847
848
    Removes single branch node, replacing it by its immediate descendant.
    Replacement only takes place, if the last node in the context has
849
850
851
    exactly one child. Attributes will be merged. In case one and the same
    attribute is defined for the child as well as the parent, the child's
    attribute value take precedence.
852
853
    """
    node = context[-1]
di68kap's avatar
di68kap committed
854
855
    if len(node._children) == 1:
        _replace_by(node, node._children[0], cast(RootNode, context[0]))
856
857


di68kap's avatar
di68kap committed
858
def replace_by_children(context: TreeContext):
859
860
    """
    Eliminates the last node in the context by replacing it with its children.
di68kap's avatar
di68kap committed
861
    The attributes of this node will be dropped. In case the last node is
di68kap's avatar
di68kap committed
862
    the root-note (i.e. len(context) == 1), it will only be eliminated, if
863
    there is but one child.
864
865

    WARNING: This should never be followed by move_adjacent() in the transformation list!!!
866
    """
di68kap's avatar
di68kap committed
867
868
869
    try:
        parent = context[-2]
    except IndexError:
di68kap's avatar
di68kap committed
870
        replace_by_single_child(context)
di68kap's avatar
di68kap committed
871
        return
872
    node = context[-1]
di68kap's avatar
di68kap committed
873
874
    if node._children:
        result = parent._children  # type: Tuple[Node, ...]
875
        i = result.index(node)
di68kap's avatar
di68kap committed
876
        parent._set_result(result[:i] + node._children + result[i + 1:])
877
878


di68kap's avatar
di68kap committed
879
def reduce_single_child(context: TreeContext):
880
    """
881
    Reduces a single branch node by transferring the result of its
882
    immediate descendant to this node, but keeping this node's parser entry.
883
    Reduction only takes place if the last node in the context has
884
885
886
    exactly one child. Attributes will be merged. In case one and the same
    attribute is defined for the child as well as the parent, the parent's
    attribute value take precedence.
887
888
    """
    node = context[-1]
di68kap's avatar
di68kap committed
889
890
    if len(node._children) == 1:
        _reduce_child(node, node._children[0], cast(RootNode, context[0]))
891
892


893
@transformation_factory(collections.abc.Callable)
di68kap's avatar
di68kap committed
894
def replace_or_reduce(context: TreeContext, condition: Callable = is_named):
895
896
    """
    Replaces node by a single child, if condition is met on child,
897
898
    otherwise (i.e. if the child is anonymous) reduces the child.
    """
899
    node = context[-1]
di68kap's avatar
di68kap committed
900
901
    if len(node._children) == 1:
        child = node._children[0]
Eckhart Arnold's avatar
Eckhart Arnold committed
902
        if condition(context):
903
            _replace_by(node, child, cast(RootNode, context[0]))
904
        else:
905
            _reduce_child(node, child, cast(RootNode, context[0]))
906
907


Eckhart Arnold's avatar
Eckhart Arnold committed
908
@transformation_factory(str)
di68kap's avatar
di68kap committed
909
def change_tag_name(context: TreeContext, tag_name: str, restriction: Callable = always):
910