transform.py 63.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# transform.py - transformation functions for converting the
#                concrete into the abstract syntax tree
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.
18
19


20
21
22
"""
Module ``transform`` contains the functions for transforming the
concrete syntax tree (CST) into an abstract syntax tree (AST).
23

24
25
26
As these functions are very generic, they can in principle be
used for any kind of tree transformations, not necessarily only
for CST -> AST transformations.
27
28
"""

29

30
import collections.abc
31
from functools import partial, singledispatch, reduce
32
import inspect
33
import operator
di68kap's avatar
di68kap committed
34
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
di68kap's avatar
di68kap committed
35
    Tuple, List, Sequence, Union, Text
36

di68kap's avatar
di68kap committed
37
from DHParser.error import ErrorCode, AST_TRANSFORM_CRASH, ERROR
38
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, LEAF_PTYPES, PLACEHOLDER, \
di68kap's avatar
di68kap committed
39
40
    RootNode, parse_sxpr, flatten_sxpr, TreeContext
from DHParser.toolkit import issubtype, isgenerictype, expand_table, smart_list, re, cython
di68kap's avatar
di68kap committed
41

42

43
44
__all__ = ('TransformationDict',
           'TransformationProc',
45
           'TransformationFunc',
46
47
48
           'ConditionFunc',
           'KeyFunc',
           'transformation_factory',
49
           'key_tag_name',
50
51
52
53
           'Filter',
           'BLOCK_LEAVES',
           'BLOCK_ANONYMOUS_LEAVES',
           'BLOCK_CHILDREN',
54
           'traverse',
Eckhart Arnold's avatar
Eckhart Arnold committed
55
           'always',
56
           'never',
57
           'neg',
58
59
           'any_of',
           'all_of',
60
           'is_named',
61
           'update_attr',
di68kap's avatar
di68kap committed
62
           'swap_attributes',
63
           'replace_by_single_child',
64
           'replace_by_children',
Eckhart Arnold's avatar
Eckhart Arnold committed
65
           'reduce_single_child',
66
           'replace_or_reduce',
Eckhart Arnold's avatar
Eckhart Arnold committed
67
           'change_tag_name',
68
           'replace_tag_names',
69
           'collapse',
70
           'collapse_children_if',
eckhart's avatar
eckhart committed
71
72
           'transform_content',
           'replace_content_with',
73
           'add_attributes',
74
           'normalize_whitespace',
75
           'merge_adjacent',
di68kap's avatar
di68kap committed
76
           'merge_leaves',
77
           'merge_connected',
di68kap's avatar
di68kap committed
78
           'merge_results',
79
           'move_adjacent',
80
           'left_associative',
81
           'lean_left',
82
           'apply_if',
eckhart's avatar
eckhart committed
83
           'apply_unless',
eckhart's avatar
eckhart committed
84
           'apply_ifelse',
85
           'traverse_locally',
86
           'is_anonymous',
87
           'is_anonymous_leaf',
88
           'contains_only_whitespace',
89
           # 'is_any_kind_of_whitespace',
90
91
           'is_empty',
           'is_token',
92
           'is_one_of',
93
           'not_one_of',
Eckhart Arnold's avatar
Eckhart Arnold committed
94
           'matches_re',
95
96
           'has_attr',
           'attr_equals',
97
           'has_content',
eckhart's avatar
eckhart committed
98
           'has_ancestor',
di68kap's avatar
di68kap committed
99
           'has_parent',
100
           'has_descendant',
eckhart's avatar
eckhart committed
101
           'has_child',
102
           'has_children',
di68kap's avatar
di68kap committed
103
           'has_sibling',
104
105
106
107
108
109
110
111
           'lstrip',
           'rstrip',
           'strip',
           'keep_children',
           'keep_children_if',
           'keep_tokens',
           'keep_nodes',
           'keep_content',
112
           'remove_children_if',
113
           'remove_children',
114
           'remove_content',
115
116
           # 'remove_first',
           # 'remove_last',
117
118
           'remove_whitespace',
           'remove_empty',
di68kap's avatar
di68kap committed
119
           'remove_anonymous_empty',
120
           'remove_anonymous_tokens',
121
           'remove_brackets',
122
           'remove_infix_operator',
123
           'remove_tokens',
124
           'remove_if',
125
126
127
           'flatten',
           'forbid',
           'require',
128
           'assert_content',
129
           'AT_THE_END',
130
           'node_maker',
131
           'delimit_children',
132
           'positions_of',
eckhart's avatar
eckhart committed
133
134
           'PositionType',
           'normalize_position_representation',
135
           'insert',
136
           'add_error',
137
           'error_on',
di68kap's avatar
di68kap committed
138
139
           'assert_has_children',
           'peek')
140
141


142
143
144
145
146
class Filter:
    def __call__(self, children: Tuple[Node]) -> Tuple[Node]:
        raise NotImplementedError


di68kap's avatar
di68kap committed
147
TransformationProc = Callable[[TreeContext], None]
148
149
150
TransformationDict = Dict[str, Union[Callable, Sequence[Callable]]]
TransformationCache = Dict[str, Tuple[Sequence[Filter], Sequence[Callable]]]
TransformationFunc = Union[Callable[[TreeContext], Any], partial]
Eckhart Arnold's avatar
Eckhart Arnold committed
151
ProcessingTableType = Dict[str, Union[Sequence[Callable], TransformationDict]]
di68kap's avatar
di68kap committed
152
ConditionFunc = Callable  # Callable[[TreeContext], bool]
153
KeyFunc = Callable[[Node], str]
eckhart's avatar
eckhart committed
154
CriteriaType = Union[int, str, Callable]
155
156


157
def transformation_factory(t1=None, t2=None, t3=None, t4=None, t5=None):
158
159
    """
    Creates factory functions from transformation-functions that
160
    dispatch on the first parameter after the context parameter.
161
162

    Decorating a transformation-function that has more than merely the
Eckhart Arnold's avatar
Eckhart Arnold committed
163
    ``context``-parameter with ``transformation_factory`` creates a
164
    function with the same name, which returns a partial-function that
165
    takes just the context-parameter.
166

167
    Additionally, there is some syntactic sugar for
168
169
170
    transformation-functions that receive a collection as their second
    parameter and do not have any further parameters. In this case a
    list of parameters passed to the factory function will be converted
eckhart's avatar
eckhart committed
171
    into a single collection-parameter.
172

173
    The primary benefit is the readability of the transformation-tables.
174

175
176
    Usage::

eckhart's avatar
eckhart committed
177
        @transformation_factory(AbstractSet[str])
178
        def remove_tokens(context, tokens):
179
            ...
180
181
182

    or, alternatively::

183
        @transformation_factory
184
        def remove_tokens(context, tokens: AbstractSet[str]):
185
186
            ...

187
188
    Example::

189
        trans_table = { 'expression': remove_tokens('+', '-') }
190
191
192

    instead of::

193
        trans_table = { 'expression': partial(remove_tokens, tokens={'+', '-'}) }
194
195

    Parameters:
196
        t1:  type of the second argument of the transformation function,
197
198
            only necessary if the transformation functions' parameter list
            does not have type annotations.
199
200
    """

201
202
    def type_guard(t):
        """Raises an error if type `t` is a generic type or could be mistaken
di68kap's avatar
di68kap committed
203
        for the type of the canonical first parameter "TreeContext of
204
        transformation functions. Returns `t`."""
205
206
207
208
        # if isinstance(t, GenericMeta):
        #     raise TypeError("Generic Type %s not permitted\n in transformation_factory "
        #                     "decorator. Use the equivalent non-generic type instead!"
        #                     % str(t))
Eckhart Arnold's avatar
Eckhart Arnold committed
209
210
        if isinstance(t, str):          # ensure compatibility with python versions
            t = eval(t.replace('unicode', 'str'))  # with alternative type handling.
211
        if isgenerictype(t):
212
213
214
            raise TypeError("Generic Type %s not permitted\n in transformation_factory "
                            "decorator. Use the equivalent non-generic type instead!"
                            % str(t))
di68kap's avatar
di68kap committed
215
        if issubtype(TreeContext, t):
216
217
            raise TypeError("Sequence type %s not permitted\nin transformation_factory "
                            "decorator, because it could be mistaken for a base class "
di68kap's avatar
di68kap committed
218
                            "of TreeContext\nwhich is the type of the canonical first "
219
220
                            "argument of transformation functions. Try 'tuple' instead!"
                            % str(t))
221
222
        return t

223
    def decorator(f):
224
        nonlocal t1
225
226
227
228
        sig = inspect.signature(f)
        params = list(sig.parameters.values())[1:]
        if len(params) == 0:
            return f  # '@transformer' not needed w/o free parameters
229
        assert t1 or params[0].annotation != params[0].empty, \
230
            "No type information on second parameter found! Please, use type " \
eckhart's avatar
eckhart committed
231
            "annotation or provide the type information via transformer-decorator."
232
        f = singledispatch(f)
233
234
235
        p1type = params[0].annotation
        if t1 is None:
            t1 = type_guard(p1type)
236
        elif issubtype(p1type, type_guard(t1)):
237
            try:
238
                if len(params) == 1 and issubtype(p1type, Container) \
239
                        and not (issubtype(p1type, Text) or issubtype(p1type, ByteString)):
240
                    def gen_special(*args):
241
242
                        c = set(args) if issubtype(p1type, AbstractSet) else \
                            tuple(args) if issubtype(p1type, Sequence) else args
243
244
                        d = {params[0].name: c}
                        return partial(f, **d)
245
246
247
                    assert p1type.__args__ is not None, "Please use more specific container " \
                        "type, e.g. %s[Callable] instead of %s, in signature of function " \
                        "%s !" % (str(p1type), str(p1type), f.__name__)
248
249
250
                    for alt_t in p1type.__args__:
                        f.register(type_guard(alt_t), gen_special)
                    # f.register(type_guard(p1type.__args__[0]), gen_special)
251
252
253
254
255
            except AttributeError:
                pass  # Union Type does not allow subclassing, but is not needed here
        else:
            raise TypeError("Annotated type %s is not a subclass of decorated type %s !"
                            % (str(p1type), str(t1)))
256
257
258
259
260
261

        def gen_partial(*args, **kwargs):
            d = {p.name: arg for p, arg in zip(params, args)}
            d.update(kwargs)
            return partial(f, **d)

262
        for t in (t1, t2, t3, t4, t5):
263
            if t:
264
                f.register(type_guard(t), gen_partial)
265
266
            else:
                break
267
268
        return f

269
    if isinstance(t1, type(lambda: 1)):
270
271
272
        # Provide for the case that transformation_factory has been
        # written as plain decorator and not as a function call that
        # returns the decorator proper.
273
274
        func = t1
        t1 = None
275
276
277
278
279
        return decorator(func)
    else:
        return decorator


280
def key_tag_name(node: Node) -> str:
Eckhart Arnold's avatar
Eckhart Arnold committed
281
    """
eckhart's avatar
eckhart committed
282
283
    Returns the tag name of the node as key for selecting transformations
    from the transformation table in function `traverse`.
Eckhart Arnold's avatar
Eckhart Arnold committed
284
    """
285
286
287
    return node.tag_name


288
289
290
291
292
293
294
295
296
297
298
class BlockChildren(Filter):
    def __call__(self, children: Tuple[Node]) -> Tuple[Node]:
        return ()

class BlockLeaves(Filter):
    def __call__(self, children: Tuple[Node]) -> Tuple[Node]:
        return tuple(child for child in children if child._children)


class BlockAnonymousLeaves(Filter):
    def __call__(self, children: Tuple[Node]) -> Tuple[Node]:
di68kap's avatar
di68kap committed
299
300
301
302
303
304
        try:
            return tuple(child for child in children
                         if child._children or not child.tag_name[0] == ':')
        except IndexError:
            return tuple(child for child in children
                         if child._children or not child.anonymous)
305
306
307
308
309
310
311


BLOCK_CHILDREN = BlockChildren()
BLOCK_LEAVES = BlockLeaves()
BLOCK_ANONYMOUS_LEAVES = BlockAnonymousLeaves()


312
def traverse(root_node: Node,
Eckhart Arnold's avatar
Eckhart Arnold committed
313
             processing_table: ProcessingTableType,
eckhart's avatar
eckhart committed
314
             key_func: KeyFunc = key_tag_name) -> None:
315
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
316
    Traverses the syntax tree starting with the given ``node`` depth
317
    first and applies the sequences of callback-functions registered
318
    in the ``processing_table``-dictionary.
319
320
321
322
323
324
325
326
327

    The most important use case is the transformation of a concrete
    syntax tree into an abstract tree (AST). But it is also imaginable
    to employ tree-traversal for the semantic analysis of the AST.

    In order to assign sequences of callback-functions to nodes, a
    dictionary ("processing table") is used. The keys usually represent
    tag names, but any other key function is possible. There exist
    three special keys:
328

329
    - '<': always called (before any other processing function)
330
331
    - '*': called for those nodes for which no (other) processing
      function appears in the table
332
    - '>': always called (after any other processing function)
333
334
335
336
337

    Args:
        root_node (Node): The root-node of the syntax tree to be traversed
        processing_table (dict): node key -> sequence of functions that
            will be applied to matching nodes in order. This dictionary
338
339
            is interpreted as a ``compact_table``. See
            :func:`expand_table` or :func:`EBNFCompiler.EBNFTransTable`
340
        key_func (function): A mapping key_func(node) -> keystr. The default
341
            key_func yields node.tag_name.
342

343
344
    Example::

345
        table = { "term": [replace_by_single_child, flatten],
346
                  "factor, flowmarker, retrieveop": replace_by_single_child }
347
        traverse(node, table)
348

349
    """
350

351
352
353
    # Is this optimazation really needed?
    if '__cache__' in processing_table:
        # assume that processing table has already been expanded
eckhart's avatar
eckhart committed
354
        table = processing_table               # type: ProcessingTableType
eckhart's avatar
eckhart committed
355
        cache = cast(TransformationDict, processing_table['__cache__'])  # type: TransformationDict
356
    else:
357
358
        # normalize processing_table entries by turning single values
        # into lists with a single value
359
360
        table = {name: cast(Sequence[Callable], smart_list(call))
                 for name, call in list(processing_table.items())}
361
        table = expand_table(table)
362
        # substitute key for insiginificant whitespace
363
        assert '+' not in table, 'Symbol "+" in processing table is obsolete, use "<" instead'
364
365
        if '~' in table:
            if ':Whitespace' in table:
eckhart's avatar
eckhart committed
366
367
368
369
                raise AssertionError(
                    '"~" is a synonym for ":Whitespace" in the processing table. '
                    'To avoid confusion, choose either of the two, but do not use '
                    'both at the same time!')
370
371
372
373
            whitespace_transformation = table['~']
            del table['~']
            table[':Whitespace'] = whitespace_transformation
        # cache expanded table
eckhart's avatar
eckhart committed
374
375
        cache = cast(TransformationDict,
                     table.setdefault('__cache__', cast(TransformationDict, dict())))
376
377
        # change processing table in place, so its already expanded and cache filled next time
        processing_table.clear()
378
379
        processing_table.update(table)

380
381
382
383
384
385
386
387
388
389
390
391
    def split_filter(callables: Sequence[Callable]) -> Tuple[List[Filter], List[Callable]]:
        i = 0
        filter = []
        for callable in callables:
            if isinstance(callable, Filter):
                filter.append(callable)
                i += 1
            else:  break
        callables = list(callables[i:])
        assert not any(isinstance(callable, Filter) for callable in callables)
        return filter, callables

392
    def traverse_recursive(context):
eckhart's avatar
eckhart committed
393
        nonlocal cache
394
        node = context[-1]
395
396

        key = key_func(node)
397
        try:
398
            filters, sequence = cache[key]
399
        except KeyError:
400
401
402
403
404
405
            filters, pre = split_filter(table.get('<', []))
            assert BLOCK_CHILDREN not in filters
            more_filters, main = split_filter(table.get(key, table.get('*', [])))
            post = table.get('>', [])
            assert not any(isinstance(callable, Filter) for callable in post)
            sequence = pre + main + post
406
407
408
            all_filters = filters + more_filters
            if BLOCK_CHILDREN in all_filters:
                all_filters = [BLOCK_CHILDREN]
409
410
            cache[key] = (filters + more_filters, sequence)

di68kap's avatar
di68kap committed
411
        children = node._children
412
413
414
415
416
417
418
419
        for filter in filters:
            children = filter(children)
        if children:
            context.append(PLACEHOLDER)
            for child in children:
                context[-1] = child
                traverse_recursive(context)  # depth first
            context.pop()
420
421

        for call in sequence:
di68kap's avatar
di68kap committed
422
423
424
            try:
                call(context)
            except Exception as ae:
425
426
                raise AssertionError('An exception occurred when transforming "%s" with %s:\n%s'
                                     % (key, str(call), ae.__class__.__name__ + ': ' + str(ae)))
427

428
    traverse_recursive([root_node])
429
430
    # assert processing_table['__cache__']

431

432
433
434
435
436
437
#######################################################################
#
# specialized full tree transformations
#
#######################################################################

di68kap's avatar
di68kap committed
438
439
440
def merge_treetops(node: Node):
    """Recursively traverses the tree and "merges" nodes that contain
    only anonymous child nodes that are leaves. "mergeing" here means the
441
442
    node's result will be replaced by the merged content of the children.
    """
di68kap's avatar
di68kap committed
443
    if node._children:
444
        crunch = True
di68kap's avatar
di68kap committed
445
446
        for child in node._children:
            if child._children:
di68kap's avatar
di68kap committed
447
                merge_treetops(child)
448
449
450
451
452
453
454
                crunch = False
            elif not child.anonymous:
                crunch = False
        if crunch:
            node.result = node.content


455
#######################################################################
456
#
457
458
# meta transformations, i.e. transformations that call other
# transformations
459
#
460
#######################################################################
461
462


eckhart's avatar
eckhart committed
463
@transformation_factory(dict)
di68kap's avatar
di68kap committed
464
def traverse_locally(context: TreeContext,
eckhart's avatar
eckhart committed
465
466
                     processing_table: Dict,              # actually: ProcessingTableType
                     key_func: Callable = key_tag_name):  # actually: KeyFunc
467
468
    """
    Transforms the syntax tree starting from the last node in the context
469
470
471
472
473
474
475
    according to the given processing table. The purpose of this function is
    to apply certain transformations locally, i.e. only for those nodes that
    have the last node in the context as their parent node.
    """
    traverse(context[-1], processing_table, key_func)


di68kap's avatar
di68kap committed
476
477
478
479
480
481
482
483
484
485
486
def transformation_guard(value) -> None:
    if value is not None:
        raise AssertionError('Transformation a value instead of None!')


def condition_guard(value) -> bool:
    if value is None:
        raise AssertionError('Condition returned None instead of a bool!')
    return value


di68kap's avatar
di68kap committed
487
def apply_transformations(context: TreeContext, transformation: Union[Callable, Sequence[Callable]]):
488
    """Applies a single or a sequence of transformations to a context."""
489
    if callable(transformation):
di68kap's avatar
di68kap committed
490
        transformation_guard(transformation(context))
491
492
493
    else:
        assert isinstance(transformation, tuple)
        for trans in cast(tuple, transformation):
di68kap's avatar
di68kap committed
494
            transformation_guard(trans(context))
495
496


497
@transformation_factory(collections.abc.Callable, tuple)
di68kap's avatar
di68kap committed
498
def apply_if(context: TreeContext, transformation: Union[Callable, Tuple[Callable]], condition: Callable):
499
    """Applies a transformation only if a certain condition is met."""
di68kap's avatar
di68kap committed
500
    if condition_guard(condition(context)):
501
        apply_transformations(context, transformation)
502
503


eckhart's avatar
eckhart committed
504
@transformation_factory(collections.abc.Callable, tuple)
di68kap's avatar
di68kap committed
505
def apply_ifelse(context: TreeContext,
eckhart's avatar
eckhart committed
506
507
508
509
510
511
512
513
514
515
516
                 if_transformation: Union[Callable, Tuple[Callable]],
                 else_transformation: Union[Callable, Tuple[Callable]],
                 condition: Callable):
    """Applies a one particular transformation if a certain condition is met
    and another transformation otherwise."""
    if condition_guard(condition(context)):
        apply_transformations(context, if_transformation)
    else:
        apply_transformations(context, else_transformation)


517
@transformation_factory(collections.abc.Callable, tuple)
di68kap's avatar
di68kap committed
518
def apply_unless(context: TreeContext, transformation: Union[Callable, Tuple[Callable]], condition: Callable):
519
    """Applies a transformation if a certain condition is *not* met."""
di68kap's avatar
di68kap committed
520
    if not condition_guard(condition(context)):
521
        apply_transformations(context, transformation)
eckhart's avatar
eckhart committed
522
523


524
## boolean operators
525
526


di68kap's avatar
di68kap committed
527
def always(context: TreeContext) -> bool:
528
    """Always returns True, no matter what the state of the context is."""
Eckhart Arnold's avatar
Eckhart Arnold committed
529
530
531
    return True


di68kap's avatar
di68kap committed
532
def never(context: TreeContext) -> bool:
533
534
535
    """Always returns True, no matter what the state of the context is."""
    return False

536
@transformation_factory(collections.abc.Callable)
di68kap's avatar
di68kap committed
537
def neg(context: TreeContext, bool_func: collections.abc.Callable) -> bool:
538
539
540
541
    """Returns the inverted boolean result of `bool_func(context)`"""
    return not bool_func(context)


542
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
543
def any_of(context: TreeContext, bool_func_set: AbstractSet[collections.abc.Callable]) -> bool:
544
545
546
547
548
549
    """Returns True, if any of the bool functions in `bool_func_set` evaluate to True
    for the given context."""
    return any(bf(context) for bf in bool_func_set)


@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
550
def all_of(context: TreeContext, bool_func_set: AbstractSet[collections.abc.Callable]) -> bool:
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
    """Returns True, if all of the bool functions in `bool_func_set` evaluate to True
    for the given context."""
    return all(bf(context) for bf in bool_func_set)


#######################################################################
#
# conditionals that determine whether the context (or the last node in
# the context for that matter) fulfill a specific condition.
# ---------------------------------------------------------------------
#
# The context of a node is understood as a list of all parent nodes
# leading up to and including the node itself. If represented as list,
# the last element of the list is the node itself.
#
#######################################################################
567
568


di68kap's avatar
di68kap committed
569
def is_single_child(context: TreeContext) -> bool:
eckhart's avatar
eckhart committed
570
    """Returns ``True`` if the current node does not have any siblings."""
di68kap's avatar
di68kap committed
571
    return len(context[-2]._children) == 1
572
573


574
# TODO: ambiguous: named, tagging...
di68kap's avatar
di68kap committed
575
def is_named(context: TreeContext) -> bool:
eckhart's avatar
eckhart committed
576
    """Returns ``True`` if the current node's parser is a named parser."""
di68kap's avatar
di68kap committed
577
578
579
    # return not context[-1].anonymous
    tn = context[-1].tag_name
    return tn and tn[0] != ':'
580
581


di68kap's avatar
di68kap committed
582
def is_anonymous(context: TreeContext) -> bool:
583
    """Returns ``True`` if the current node is anonymous."""
di68kap's avatar
di68kap committed
584
585
586
    # return context[-1].anonymous
    tn = context[-1].tag_name
    return not tn or tn[0] == ':'
587
588


589
590
def is_anonymous_leaf(context: TreeContext) -> bool:
    """Returns `True` if context ends in an anonymous leaf-node"""
di68kap's avatar
di68kap committed
591
592
593
594
595
596
    # return not context[-1].children and context[-1].anonymous
    node = context[-1]
    if node._children:
        return False
    tn = node.tag_name
    return not tn or tn[0] == ':'
597
598


599
RX_WHITESPACE = re.compile(r'\s*$')
600
601


di68kap's avatar
di68kap committed
602
def contains_only_whitespace(context: TreeContext) -> bool:
603
    r"""Returns ``True`` for nodes that contain only whitespace regardless
604
605
606
    of the tag_name, i.e. nodes the content of which matches the regular
    expression /\s*/, including empty nodes. Note, that this is not true
    for anonymous whitespace nodes that contain comments."""
607
    return bool(RX_WHITESPACE.match(context[-1].content))
608
609


di68kap's avatar
di68kap committed
610
def is_empty(context: TreeContext) -> bool:
eckhart's avatar
eckhart committed
611
    """Returns ``True`` if the current node's content is empty."""
612
613
614
    return not context[-1].result


615
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
616
def is_token(context: TreeContext, tokens: AbstractSet[str] = frozenset()) -> bool:
617
    """
618
    Checks whether the last node in the context has the tag_name ":Text"
619
620
    and it's content matches one of the given tokens. Leading and trailing
    whitespace-tokens will be ignored. In case an empty set of tokens is passed,
eckhart's avatar
eckhart committed
621
    any token is a match.
622
623
    """
    node = context[-1]
624
    return node.tag_name == TOKEN_PTYPE and (not tokens or node.content in tokens)
625
626


627
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
628
def is_one_of(context: TreeContext, tag_name_set: AbstractSet[str]) -> bool:
629
630
631
632
    """Returns true, if the node's tag_name is one of the given tag names."""
    return context[-1].tag_name in tag_name_set


633
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
634
def not_one_of(context: TreeContext, tag_name_set: AbstractSet[str]) -> bool:
635
636
637
638
    """Returns true, if the node's tag_name is not one of the given tag names."""
    return context[-1].tag_name not in tag_name_set


Eckhart Arnold's avatar
Eckhart Arnold committed
639
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
640
def matches_re(context: TreeContext, patterns: AbstractSet[str]) -> bool:
641
642
    """
    Returns true, if the node's tag_name matches one of the regular
Eckhart Arnold's avatar
Eckhart Arnold committed
643
644
645
646
647
648
649
650
651
    expressions in `patterns`. For example, ':.*' matches all anonymous nodes.
    """
    tn = context[-1].tag_name
    for pattern in patterns:
        if re.match(pattern, tn):
            return True
    return False


652
@transformation_factory(str)
di68kap's avatar
di68kap committed
653
def has_attr(context: TreeContext, attr: str) -> bool:
654
655
656
657
658
659
660
661
662
    """
    Returns true, if the node has the attribute `attr`, no matter
    what its value is.
    """
    node = context[-1]
    return node.has_attr(attr)


@transformation_factory(str)
di68kap's avatar
di68kap committed
663
def attr_equals(context: TreeContext, attr: str, value: str) -> bool:
664
665
666
667
668
669
670
671
    """
    Returns true, if the node has the attribute `attr` and its value equals
    `value`.
    """
    node = context[-1]
    return node.has_attr(attr) and node.attr[attr] == value


eckhart's avatar
eckhart committed
672
@transformation_factory
di68kap's avatar
di68kap committed
673
def has_content(context: TreeContext, regexp: str) -> bool:
674
675
676
677
678
679
680
681
    """
    Checks a node's content against a regular expression.

    In contrast to ``re.match`` the regular expression must match the complete
    string and not just the beginning of the string to succeed!
    """
    if not regexp.endswith('$'):
        regexp += "$"
682
683
684
    return bool(re.match(regexp, context[-1].content))


685
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
686
def has_ancestor(context: TreeContext,
687
688
                 tag_name_set: AbstractSet[str],
                 generations: int =-1) -> bool:
689
690
    """
    Checks whether a node with one of the given tag names appears somewhere
691
    in the context before the last node in the context.
eckhart's avatar
eckhart committed
692

693
    :param generations: determines how deep `has_ancestor` should dive into
694
695
        the ancestry. "1" means only the immediate parents wil be considered,
        "2" means also the grandparents, ans so on.
696
        A value smaller or equal zero means all ancestors will be considered.
697
    """
698
699
700
    if generations <= 0:
        return any(nd.tag_name in tag_name_set for nd in context)
    for i in range(2, min(generations + 2, len(context) + 1)):
701
702
703
704
705
        if context[-i].tag_name in tag_name_set:
            return True
    return False


eckhart's avatar
eckhart committed
706
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
707
def has_parent(context: TreeContext, tag_name_set: AbstractSet[str]) -> bool:
eckhart's avatar
eckhart committed
708
709
710
711
712
    """Checks whether the immediate predecessor in the context has one of the
    given tags."""
    return has_ancestor(context, tag_name_set, 1)


713
714
715
716
717
def has_children(context: TreeContext) -> bool:
    """Checks whether last node in context has children."""
    return bool(context[-1]._children)


718
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
719
def has_descendant(context: TreeContext, tag_name_set: AbstractSet[str],
720
                   generations: int = -1) -> bool:
di68kap's avatar
di68kap committed
721
    assert generations != 0
di68kap's avatar
di68kap committed
722
    for child in context[-1]._children:
723
724
        if child.tag_name in tag_name_set:
            return True
di68kap's avatar
di68kap committed
725
        if (generations < 0 or generations > 1) \
726
                and has_descendant(context + [child], tag_name_set, generations - 1):
727
728
729
730
            return True
    return False


eckhart's avatar
eckhart committed
731
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
732
def has_child(context: TreeContext, tag_name_set: AbstractSet[str]) -> bool:
eckhart's avatar
eckhart committed
733
734
735
736
737
    """Checks whether at least one child (i.e. immediate descendant) has one of
    the given tags."""
    return has_descendant(context, tag_name_set, 1)


di68kap's avatar
di68kap committed
738
@transformation_factory(collections.abc.Set)
di68kap's avatar
di68kap committed
739
def has_sibling(context: TreeContext, tag_name_set: AbstractSet[str]):
740
741
    if len(context) >= 2:
        node = context[-1]
di68kap's avatar
di68kap committed
742
        for child in context[-2]._children:
743
744
            if child != node and child.tag_name in tag_name_set:
                return True
di68kap's avatar
di68kap committed
745
746
747
    return False


748
749
750
751
752
753
754
#######################################################################
#
# utility functions (private)
#
#######################################################################


755
def update_attr(dest: Node, src: Union[Node, Tuple[Node, ...]], root: RootNode):
756
    """
757
758
759
    Adds all attributes from `src` to `dest` and transfers all errors
    from `src` to `dest`. This is needed, in order to keep the attributes
    if the child node is going to be eliminated.
760
    """
761
762
    if isinstance(src, Node):
        src = (Node,)
763
    for s in src:
764
        # update attributes
765
        if s != dest and hasattr(s, '_xml_attr'):
766
            for k, v in s.attr.items():
767
768
769
770
771
                if k in dest.attr and v != dest.attr[k]:
                    raise ValueError('Conflicting attribute values %s and %s for key %s '
                                     'when reducing %s to %s ! Tree transformation stopped.'
                                     % (v, dest.attr[k], k, str(src), str(dest)))
                dest.attr[k] = v
772
773
        # transfer errors
        try:
eckhart's avatar
eckhart committed
774
775
776
777
778
            root.transfer_errors(s, dest)
            # ids = id(s)
            # if ids in root.error_nodes:
            #     root.error_nodes.setdefault(id(dest), []).extend(root.error_nodes[ids])
            #     del root.error_nodes[ids]
779
780
781
        except AttributeError:
            # root was not of type RootNode, probably a doc-test
            pass
782
783


784
785
786
def swap_attributes(node: Node, other: Node):
    """
    Exchanges the attributes between node and other. This might be
787
    needed when rearangeing trees.
788
    """
789
790
    NA = node.has_attr()
    OA = other.has_attr()
791
    if NA or OA:
Eckhart Arnold's avatar
Eckhart Arnold committed
792
793
794
795
796
797
798
799
800
        save = node._xml_attr if NA else None
        if OA:
            node._xml_attr = other._xml_attr
        elif NA:
            node._xml_attr = None
        if NA:
            other._xml_attr = save
        elif OA:
            other._xml_attr = None
801
802


803
def _replace_by(node: Node, child: Node, root: RootNode):
804
805
806
    """
    Replaces node's contents by child's content including the tag name.
    """
di68kap's avatar
di68kap committed
807
808
809
810
811
812
    # if node.anonymous or not child.anonymous:
    #     node.tag_name = child.tag_name
    nd_tn = node.tag_name
    ch_tn = child.tag_name
    if not nd_tn or nd_tn[0] == ':' or (ch_tn and ch_tn[0] != ':'):
        node.tag_name = ch_tn
813
814
        # name, ptype = (node.tag_name.split(':') + [''])[:2]
        # child.parser = MockParser(name, ptype)
815
        # parser names must not be overwritten, else: child.parser.name = node.parser.name
di68kap's avatar
di68kap committed
816
    node._set_result(child._result)
817
    update_attr(node, (child,), root)
818
819


820
def _reduce_child(node: Node, child: Node, root: RootNode):
821
822
823
    """
    Sets node's results to the child's result, keeping node's tag_name.
    """
di68kap's avatar
di68kap committed
824
    node._set_result(child._result)
825
826
827
828
    update_attr(node, (child,), root)
    # update_attr(child, (node,), root)
    # if child.has_attr():
    #     node._xml_attr = child._xml_attr
829
830


831
832
833
834
835
836
837
838
839
840
841
842
#######################################################################
#
# rearranging transformations
#
# - tree may be rearranged (e.g.flattened)
# - nodes that are not leaves may be dropped
# - order is preserved
# - leave content is preserved (though not necessarily the leaves
#   themselves)
#
#######################################################################

843

di68kap's avatar
di68kap committed
844
def replace_by_single_child(context: TreeContext):
845
    """
846
847
    Removes single branch node, replacing it by its immediate descendant.
    Replacement only takes place, if the last node in the context has
848
849
850
    exactly one child. Attributes will be merged. In case one and the same
    attribute is defined for the child as well as the parent, the child's
    attribute value take precedence.
851
852
    """
    node = context[-1]
di68kap's avatar
di68kap committed
853
854
    if len(node._children) == 1:
        _replace_by(node, node._children[0], cast(RootNode, context[0]))
855
856


di68kap's avatar
di68kap committed
857
def replace_by_children(context: TreeContext):
858
859
    """
    Eliminates the last node in the context by replacing it with its children.
di68kap's avatar
di68kap committed
860
    The attributes of this node will be dropped. In case the last node is
di68kap's avatar
di68kap committed
861
    the root-note (i.e. len(context) == 1), it will only be eliminated, if
862
    there is but one child.
863
864

    WARNING: This should never be followed by move_adjacent() in the transformation list!!!
865
    """
di68kap's avatar
di68kap committed
866
867
868
    try:
        parent = context[-2]
    except IndexError:
di68kap's avatar
di68kap committed
869
        replace_by_single_child(context)
di68kap's avatar
di68kap committed
870
        return
871
    node = context[-1]
di68kap's avatar
di68kap committed
872
873
    if node._children:
        result = parent._children  # type: Tuple[Node, ...]
874
        i = result.index(node)
di68kap's avatar
di68kap committed
875
        parent._set_result(result[:i] + node._children + result[i + 1:])
876
877


di68kap's avatar
di68kap committed
878
def reduce_single_child(context: TreeContext):
879
    """
880
    Reduces a single branch node by transferring the result of its
881
    immediate descendant to this node, but keeping this node's parser entry.
882
    Reduction only takes place if the last node in the context has
883
884
885
    exactly one child. Attributes will be merged. In case one and the same
    attribute is defined for the child as well as the parent, the parent's
    attribute value take precedence.
886
887
    """
    node = context[-1]
di68kap's avatar
di68kap committed
888
889
    if len(node._children) == 1:
        _reduce_child(node, node._children[0], cast(RootNode, context[0]))
890
891


892
@transformation_factory(collections.abc.Callable)
di68kap's avatar
di68kap committed
893
def replace_or_reduce(context: TreeContext, condition: Callable = is_named):
894
895
    """
    Replaces node by a single child, if condition is met on child,
896
897
    otherwise (i.e. if the child is anonymous) reduces the child.
    """
898
    node = context[-1]
di68kap's avatar
di68kap committed
899
900
    if len(node._children) == 1:
        child = node._children[0]