parse.py 113 KB
Newer Older
2001
                results += (node,)
eckhart's avatar
eckhart committed
2002
        # assert len(results) <= len(self.parsers) \
2003
        #        or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG])
eckhart's avatar
eckhart committed
2004
        ret_node = self._return_values(results)  # type: Node
2005
        if error and reloc < 0:
2006
            raise ParserError(ret_node.with_pos(self.grammar.document_length__ - len(text_)),
2007
                              text, error, first_throw=True)
eckhart's avatar
eckhart committed
2008
        return ret_node, text_
2009
2010
2011

    def __repr__(self):
        return " ".join([parser.repr for parser in self.parsers[:self.mandatory]]
2012
                        + (['§'] if self.mandatory != NO_MANDATORY else [])
2013
2014
2015
                        + [parser.repr for parser in self.parsers[self.mandatory:]])

    # The following operator definitions add syntactical sugar, so one can write:
2016
    # `RE('\d+') + Optional(RE('\.\d+)` instead of `Series(RE('\d+'), Optional(RE('\.\d+))`
2017
2018

    @staticmethod
eckhart's avatar
eckhart committed
2019
    def combined_mandatory(left: 'Series', right: 'Series'):
2020
2021
2022
2023
2024
        """
        Returns the position of the first mandatory element (if any) when
        parsers `left` and `right` are joined to a sequence.
        """
        left_mandatory, left_length = (left.mandatory, len(left.parsers)) \
2025
2026
            if isinstance(left, Series) else (NO_MANDATORY, 1)
        if left_mandatory != NO_MANDATORY:
2027
            return left_mandatory
2028
2029
        right_mandatory = right.mandatory if isinstance(right, Series) else NO_MANDATORY
        if right_mandatory != NO_MANDATORY:
2030
            return right_mandatory + left_length
2031
        return NO_MANDATORY
2032
2033
2034

    def __add__(self, other: Parser) -> 'Series':
        other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
2035
            else cast(Tuple[Parser, ...], (other,))  # type: Tuple[Parser, ...]
2036
2037
2038
2039
2040
        return Series(*(self.parsers + other_parsers),
                      mandatory=self.combined_mandatory(self, other))

    def __radd__(self, other: Parser) -> 'Series':
        other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
2041
            else cast(Tuple[Parser, ...], (other,))  # type: Tuple[Parser, ...]
2042
2043
2044
2045
2046
        return Series(*(other_parsers + self.parsers),
                      mandatory=self.combined_mandatory(other, self))

    def __iadd__(self, other: Parser) -> 'Series':
        other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
2047
            else cast(Tuple[Parser, ...], (other,))  # type: Tuple[Parser, ...]
2048
2049
2050
2051
2052
        self.parsers += other_parsers
        self.mandatory = self.combined_mandatory(self, other)
        return self


eckhart's avatar
eckhart committed
2053
class Alternative(NaryParser):
2054
2055
2056
2057
2058
2059
    r"""
    Matches if one of several alternatives matches. Returns
    the first match.

    This parser represents the EBNF-operator "|" with the qualification
    that both the symmetry and the ambiguity of the EBNF-or-operator
2060
    are broken by selecting the first match.::
2061

2062
        # the order of the sub-expression matters!
2063
        >>> number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
2064
        >>> str(Grammar(number)("3.1416"))
2065
        '3 <<< Error on ".1416" | Parser stopped before end! Terminating parser. >>> '
2066

2067
        # the most selective expression should be put first:
2068
        >>> number = RE(r'\d+') + RE(r'\.') + RE(r'\d+') | RE(r'\d+')
2069
2070
        >>> Grammar(number)("3.1416").content
        '3.1416'
2071

2072
    EBNF-Notation: ``... | ...``
2073

2074
    EBNF-Example:  ``sentence = /\d+\.\d+/ | /\d+/``
2075
2076
    """

2077
    def __init__(self, *parsers: Parser) -> None:
2078
        super(Alternative, self).__init__(*parsers)
2079
2080
2081
2082
2083
        assert len(self.parsers) >= 1
        # only the last alternative may be optional. Could this be checked at compile time?
        assert all(not isinstance(p, Option) for p in self.parsers[:-1]), \
            "Parser-specification Error: only the last alternative may be optional!"

2084
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
2085
2086
        for parser in self.parsers:
            node, text_ = parser(text)
di68kap's avatar
di68kap committed
2087
            if node is not None:
2088
2089
2090
2091
                return self._return_value(node), text_
                # return self._return_value(node if node._result or parser.pname else None), text_
                # return Node(self.tag_name,
                #             node if node._result or parser.pname else ()), text_
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
        return None, text

    def __repr__(self):
        return '(' + ' | '.join(parser.repr for parser in self.parsers) + ')'

    def reset(self):
        super(Alternative, self).reset()
        return self

    # The following operator definitions add syntactical sugar, so one can write:
2102
2103
    # `RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')` instead of:
    # `Alternative(Series(RE('\d+'), RE('\.'), RE('\d+')), RE('\d+'))`
2104
2105
2106

    def __or__(self, other: Parser) -> 'Alternative':
        other_parsers = cast('Alternative', other).parsers if isinstance(other, Alternative) \
2107
            else cast(Tuple[Parser, ...], (other,))  # type: Tuple[Parser, ...]
2108
2109
2110
2111
        return Alternative(*(self.parsers + other_parsers))

    def __ror__(self, other: Parser) -> 'Alternative':
        other_parsers = cast('Alternative', other).parsers if isinstance(other, Alternative) \
2112
            else cast(Tuple[Parser, ...], (other,))  # type: Tuple[Parser, ...]
2113
2114
2115
2116
        return Alternative(*(other_parsers + self.parsers))

    def __ior__(self, other: Parser) -> 'Alternative':
        other_parsers = cast('Alternative', other).parsers if isinstance(other, Alternative) \
2117
            else cast(Tuple[Parser, ...], (other,))  # type: Tuple[Parser, ...]
2118
2119
2120
2121
        self.parsers += other_parsers
        return self


eckhart's avatar
eckhart committed
2122
class AllOf(MandatoryElementsParser):
2123
2124
2125
2126
2127
    """
    Matches if all elements of a list of parsers match. Each parser must
    match exactly once. Other than in a sequence, the order in which
    the parsers match is arbitrary, however.

2128
2129
    Example::

2130
        >>> prefixes = AllOf(TKN("A"), TKN("B"))
2131
2132
2133
2134
        >>> Grammar(prefixes)('A B').content
        'A B'
        >>> Grammar(prefixes)('B A').content
        'B A'
2135

2136
    EBNF-Notation: ``<... ...>``    (sequence of parsers enclosed by angular brackets)
2137

2138
    EBNF-Example:  ``set = <letter letter_or_digit>``
2139
2140
    """

2141
2142
2143
2144
2145
    def __init__(self, *parsers: Parser,
                 mandatory: int = NO_MANDATORY,
                 err_msgs: MessagesType = [],
                 skip: ResumeList = []) -> None:
        if len(parsers) == 1:
2146
            assert isinstance(parsers[0], Series), \
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
                "AllOf should be initialized either with a series or with more than one parser!"
            series = cast(Series, parsers[0])  # type: Series
            if mandatory == NO_MANDATORY:
                mandatory = series.mandatory
            if not err_msgs:
                err_msgs = series.err_msgs
            if not skip:
                skip = series.skip

            assert series.mandatory == NO_MANDATORY or mandatory == series.mandatory, \
                "If AllOf is initialized with a series, parameter 'mandatory' must be the same!"
            assert not series.err_msgs or err_msgs == series.err_msgs, \
                "If AllOf is initialized with a series, 'err_msg' must empty or the same!"
            assert not series.skip or skip == series.skip, \
                "If AllOf is initialized with a series, 'skip' must empty or the same!"

2163
            parsers = series.parsers
2164

eckhart's avatar
eckhart committed
2165
        super(AllOf, self).__init__(*parsers, mandatory=mandatory, err_msgs=err_msgs, skip=skip)
2166
        self.num_parsers = len(self.parsers)  # type: int
2167

2168
2169
2170
2171
    def __deepcopy__(self, memo):
        parsers = copy.deepcopy(self.parsers, memo)
        duplicate = self.__class__(*parsers, mandatory=self.mandatory,
                                   err_msgs=self.err_msgs, skip=self.skip)
di68kap's avatar
di68kap committed
2172
        duplicate.pname = self.pname
2173
        copy_parser_attrs(self, duplicate)
2174
        return duplicate
2175

2176
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
2177
2178
2179
        results = ()  # type: Tuple[Node, ...]
        text_ = text  # type: StringView
        parsers = list(self.parsers)  # type: List[Parser]
2180
        error = None  # type: Optional[Error]
2181
2182
2183
        while parsers:
            for i, parser in enumerate(parsers):
                node, text__ = parser(text_)
di68kap's avatar
di68kap committed
2184
                if node is not None:
eckhart's avatar
eckhart committed
2185
2186
                    if node._result or not node.tag_name.startswith(':'):
                        # drop anonymous empty nodes
2187
2188
                        results += (node,)
                        text_ = text__
2189
2190
2191
                    del parsers[i]
                    break
            else:
2192
2193
2194
                if self.num_parsers - len(parsers) < self.mandatory:
                    return None, text
                else:
eckhart's avatar
eckhart committed
2195
                    reloc = self.get_reentry_point(text_)
eckhart's avatar
eckhart committed
2196
                    expected = '< ' + ' '.join([parser.repr for parser in parsers]) + ' >'
2197
                    lookahead = any([isinstance(p, Lookahead) for p in parsers])
eckhart's avatar
eckhart committed
2198
2199
                    error, err_node, text_ = self.mandatory_violation(
                        text_, lookahead, expected, reloc)
2200
2201
2202
2203
                    results += (err_node,)
                    if reloc < 0:
                        parsers = []
        assert len(results) <= len(self.parsers) \
eckhart's avatar
eckhart committed
2204
            or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG])
eckhart's avatar
eckhart committed
2205
        nd = self._return_values(results)  # type: Node
2206
        if error and reloc < 0:
2207
            raise ParserError(nd.with_pos(self.grammar.document_length__ - len(text)),
2208
                              text, error, first_throw=True)
eckhart's avatar
eckhart committed
2209
        return nd, text_
2210
2211

    def __repr__(self):
2212
        return '< ' + ' '.join(parser.repr for parser in self.parsers) + ' >'
2213
2214


eckhart's avatar
eckhart committed
2215
class SomeOf(NaryParser):
2216
2217
2218
2219
2220
    """
    Matches if at least one element of a list of parsers match. No parser
    must match more than once . Other than in a sequence, the order in which
    the parsers match is arbitrary, however.

2221
2222
    Example::

2223
        >>> prefixes = SomeOf(TKN("A"), TKN("B"))
2224
2225
2226
2227
2228
2229
        >>> Grammar(prefixes)('A B').content
        'A B'
        >>> Grammar(prefixes)('B A').content
        'B A'
        >>> Grammar(prefixes)('B').content
        'B'
2230

2231
    EBNF-Notation: ``<... ...>``    (sequence of parsers enclosed by angular brackets)
2232

2233
    EBNF-Example:  ``set = <letter letter_or_digit>``
2234
2235
    """

2236
    def __init__(self, *parsers: Parser) -> None:
2237
2238
2239
2240
2241
2242
        if len(parsers) == 1:
            assert isinstance(parsers[0], Alternative), \
                "Parser-specification Error: No single arguments other than a Alternative " \
                "allowed as arguments for SomeOf-Parser !"
            alternative = cast(Alternative, parsers[0])
            parsers = alternative.parsers
2243
        super(SomeOf, self).__init__(*parsers)
2244

2245
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
2246
2247
2248
2249
2250
2251
        results = ()  # type: Tuple[Node, ...]
        text_ = text  # type: StringView
        parsers = list(self.parsers)  # type: List[Parser]
        while parsers:
            for i, parser in enumerate(parsers):
                node, text__ = parser(text_)
di68kap's avatar
di68kap committed
2252
                if node is not None:
eckhart's avatar
eckhart committed
2253
2254
                    if node._result or not node.tag_name.startswith(':'):
                        # drop anonymous empty nodes
2255
2256
                        results += (node,)
                        text_ = text__
2257
2258
2259
2260
2261
2262
                    del parsers[i]
                    break
            else:
                parsers = []
        assert len(results) <= len(self.parsers)
        if results:
eckhart's avatar
eckhart committed
2263
            return self._return_values(results), text_
2264
2265
2266
2267
        else:
            return None, text

    def __repr__(self):
2268
        return '< ' + ' | '.join(parser.repr for parser in self.parsers) + ' >'
2269
2270


eckhart's avatar
eckhart committed
2271
def Unordered(parser: NaryParser) -> NaryParser:
2272
2273
2274
2275
2276
    """
    Returns an AllOf- or SomeOf-parser depending on whether `parser`
    is a Series (AllOf) or an Alternative (SomeOf).
    """
    if isinstance(parser, Series):
2277
        return AllOf(parser)
2278
    elif isinstance(parser, Alternative):
2279
        return SomeOf(parser)
2280
2281
2282
2283
2284
2285
    else:
        raise AssertionError("Unordered can take only Series or Alternative as parser.")


########################################################################
#
eckhart's avatar
eckhart committed
2286
# Flow control parsers
2287
2288
2289
#
########################################################################

eckhart's avatar
eckhart committed
2290
class FlowParser(UnaryParser):
2291
    """
eckhart's avatar
eckhart committed
2292
    Base class for all flow parsers like Lookahead and Lookbehind.
2293
2294
2295
2296
2297
2298
    """
    def sign(self, bool_value) -> bool:
        """Returns the value. Can be overriden to return the inverted bool."""
        return bool_value


eckhart's avatar
eckhart committed
2299
2300
2301
2302
def Required(parser: Parser) -> Parser:
    return Series(parser, mandatory=0)


eckhart's avatar
eckhart committed
2303
# class Required(FlowParser):
2304
2305
2306
2307
#     """OBSOLETE. Use mandatory-parameter of Series-parser instead!
#     """
#     RX_ARGUMENT = re.compile(r'\s(\S)')
#
2308
#     def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
2309
2310
2311
2312
2313
2314
#         node, text_ = self.parser(text)
#         if not node:
#             m = text.search(Required.RX_ARGUMENT)  # re.search(r'\s(\S)', text)
#             i = max(1, text.index(m.regs[1][0])) if m else 1
#             node = Node(self, text[:i])
#             text_ = text[i:]
eckhart's avatar
eckhart committed
2315
#             self.grammar.tree__.new_error(node,
2316
2317
#                                           '%s expected; "%s" found!' % (str(self.parser),
#                                           text[:10]), code=Error.MANDATORY_CONTINUATION)
2318
2319
2320
2321
2322
2323
#         return node, text_
#
#     def __repr__(self):
#         return '§' + self.parser.repr


eckhart's avatar
eckhart committed
2324
class Lookahead(FlowParser):
2325
2326
2327
2328
    """
    Matches, if the contained parser would match for the following text,
    but does not consume any text.
    """
2329
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
2330
        node, _ = self.parser(text)
2331
        if self.sign(node is not None):
eckhart's avatar
eckhart committed
2332
2333
            # static analysis requires lookahead to be disabled at document end
            # or (self.grammar.static_analysis_pending__ and not text)):
2334
            return (EMPTY_NODE if self.anonymous else Node(self.tag_name, '')), text
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
        else:
            return None, text

    def __repr__(self):
        return '&' + self.parser.repr


class NegativeLookahead(Lookahead):
    """
    Matches, if the contained parser would *not* match for the following
    text.
    """
    def __repr__(self):
        return '!' + self.parser.repr

    def sign(self, bool_value) -> bool:
        return not bool_value


eckhart's avatar
eckhart committed
2354
class Lookbehind(FlowParser):
2355
2356
    """
    Matches, if the contained parser would match backwards. Requires
2357
    the contained parser to be a RegExp, _RE, PlainText or _Token parser.
2358
2359

    EXPERIMENTAL
2360
    """
2361
    def __init__(self, parser: Parser) -> None:
2362
2363
2364
        p = parser
        while isinstance(p, Synonym):
            p = p.parser
2365
        assert isinstance(p, RegExp) or isinstance(p, Token)
eckhart's avatar
eckhart committed
2366
        self.regexp = None
eckhart's avatar
eckhart committed
2367
        self.text = ''  # type: str
2368
        if isinstance(p, RegExp):
eckhart's avatar
eckhart committed
2369
2370
            self.regexp = cast(RegExp, p).regexp
        else:  # p is of type PlainText
2371
            self.text = cast(Token, p).text
2372
        super(Lookbehind, self).__init__(parser)
2373

2374
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
di68kap's avatar
di68kap committed
2375
        backwards_text = self.grammar.reversed__[text.__len__():]
eckhart's avatar
eckhart committed
2376
        if self.regexp is None:  # assert self.text is not None
di68kap's avatar
di68kap committed
2377
            does_match = backwards_text[:text.__len__()] == self.text
eckhart's avatar
eckhart committed
2378
2379
        else:  # assert self.regexp is not None
            does_match = backwards_text.match(self.regexp)
2380
2381
2382
2383
2384
        if self.sign(does_match):
            if self.drop_content:
                return EMPTY_NODE, text
            return Node(self.tag_name, ''), text
        return None, text
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403

    def __repr__(self):
        return '-&' + self.parser.repr


class NegativeLookbehind(Lookbehind):
    """
    Matches, if the contained parser would *not* match backwards. Requires
    the contained parser to be a RegExp-parser.
    """
    def __repr__(self):
        return '-!' + self.parser.repr

    def sign(self, bool_value) -> bool:
        return not bool(bool_value)


########################################################################
#
eckhart's avatar
eckhart committed
2404
# Capture and Retrieve parsers (for passing variables in the parser)
2405
2406
2407
2408
#
########################################################################


eckhart's avatar
eckhart committed
2409
class Capture(UnaryParser):
2410
2411
2412
2413
2414
    """
    Applies the contained parser and, in case of a match, saves the result
    in a variable. A variable is a stack of values associated with the
    contained parser's name. This requires the contained parser to be named.
    """
2415
2416
2417
2418
    def __init__(self, parser: Parser) -> None:
        assert not parser.drop_content, \
            "Cannot capture content of returned by parser, the content of which will be dropped!"
        super(Capture, self).__init__(parser)
2419

eckhart's avatar
eckhart committed
2420
2421
2422
    def _rollback(self):
        return self.grammar.variables__[self.pname].pop()

2423
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
2424
        node, text_ = self.parser(text)
di68kap's avatar
di68kap committed
2425
        if node is not None:
di68kap's avatar
di68kap committed
2426
            assert self.pname, """Tried to apply an unnamed capture-parser!"""
2427
            assert not self.parser.drop_content, \
eckhart's avatar
eckhart committed
2428
2429
                "Cannot capture content of returned by parser, the content of which " \
                "will be dropped!"
eckhart's avatar
eckhart committed
2430
            self.grammar.variables__[self.pname].append(node.content)
di68kap's avatar
di68kap committed
2431
            location = self.grammar.document_length__ - text.__len__()
eckhart's avatar
eckhart committed
2432
            self.grammar.push_rollback__(location, self._rollback)  # lambda: stack.pop())
2433
2434
            # caching will be blocked by parser guard (see way above),
            # because it would prevent recapturing of rolled back captures
eckhart's avatar
eckhart committed
2435
            return self._return_value(node), text_
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
        else:
            return None, text

    def __repr__(self):
        return self.parser.repr


RetrieveFilter = Callable[[List[str]], str]


def last_value(stack: List[str]) -> str:
eckhart's avatar
eckhart committed
2447
2448
    """Returns the last value on the cpature stack. This is the default case
    when retrieving cpatured substrings."""
2449
2450
2451
2452
    return stack[-1]


def counterpart(stack: List[str]) -> str:
eckhart's avatar
eckhart committed
2453
2454
    """Returns a closing bracket for the opening bracket on the capture stack,
    i.e. if "[" was captured, "]" will be retrieved."""
2455
2456
2457
2458
2459
    value = stack[-1]
    return value.replace("(", ")").replace("[", "]").replace("{", "}").replace("<", ">")


def accumulate(stack: List[str]) -> str:
eckhart's avatar
eckhart committed
2460
2461
    """Returns an accumulation of all values on the stack.
    By the way: I cannot remember any reasonable use case for this!?"""
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
    return "".join(stack) if len(stack) > 1 else stack[-1]  # provoke IndexError if stack empty


class Retrieve(Parser):
    """
    Matches if the following text starts with the value of a particular
    variable. As a variable in this context means a stack of values,
    the last value will be compared with the following text. It will not
    be removed from the stack! (This is the difference between the
    `Retrieve` and the `Pop` parser.)
    The constructor parameter `symbol` determines which variable is
    used.
2474
2475
2476
2477
2478

    Attributes:
        symbol: The parser that has stored the value to be retrieved, in
            other words: "the observed parser"
        rfilter: a procedure that through which the processing to the
eckhart's avatar
eckhart committed
2479
            retrieved symbols is channeled. In the simplest case it merely
2480
2481
            returns the last string stored by the observed parser. This can
            be (mis-)used to execute any kind of semantic action.
2482
2483
    """

2484
    def __init__(self, symbol: Parser, rfilter: RetrieveFilter = None) -> None:
2485
        super(Retrieve, self).__init__()
2486
2487
2488
2489
        self.symbol = symbol
        self.filter = rfilter if rfilter else last_value

    def __deepcopy__(self, memo):
2490
        duplicate = self.__class__(self.symbol, self.filter)
2491
        copy_parser_attrs(self, duplicate)
2492
        return duplicate
2493

2494
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
        # the following indirection allows the call() method to be called
        # from subclass without triggering the parser guard a second time
        return self.retrieve_and_match(text)

    def __repr__(self):
        return ':' + self.symbol.repr

    def retrieve_and_match(self, text: StringView) -> Tuple[Optional[Node], StringView]:
        """
        Retrieves variable from stack through the filter function passed to
        the class' constructor and tries to match the variable's value with
        the following text. Returns a Node containing the value or `None`
        accordingly.
        """
        try:
di68kap's avatar
di68kap committed
2510
            stack = self.grammar.variables__[self.symbol.pname]
2511
2512
            value = self.filter(stack)
        except (KeyError, IndexError):
di68kap's avatar
di68kap committed
2513
            node = Node(self.tag_name, '').with_pos(self.grammar.document_length__ - text.__len__())
eckhart's avatar
eckhart committed
2514
            self.grammar.tree__.new_error(
di68kap's avatar
di68kap committed
2515
                node, dsl_error_msg(self, "'%s' undefined or exhausted." % self.symbol.pname))
2516
            return node, text
2517
        if text.startswith(value):
2518
2519
            if self.drop_content:
                return EMPTY_NODE, text[len(value):]
2520
            return Node(self.tag_name, value), text[len(value):]
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
        else:
            return None, text


class Pop(Retrieve):
    """
    Matches if the following text starts with the value of a particular
    variable. As a variable in this context means a stack of values,
    the last value will be compared with the following text. Other
    than the `Retrieve`-parser, the `Pop`-parser removes the value
    from the stack in case of a match.

    The constructor parameter `symbol` determines which variable is
    used.
    """
eckhart's avatar
eckhart committed
2536
    def __init__(self, symbol: Parser, rfilter: RetrieveFilter = None) -> None:
2537
        super(Pop, self).__init__(symbol, rfilter)
eckhart's avatar
eckhart committed
2538
2539
2540

    def reset(self):
        super(Pop, self).reset()
eckhart's avatar
eckhart committed
2541
2542
2543
2544
        self.values = []

    def __deepcopy__(self, memo):
        duplicate = self.__class__(self.symbol, self.filter)
2545
        copy_parser_attrs(self, duplicate)
eckhart's avatar
eckhart committed
2546
2547
2548
2549
2550
        duplicate.values = self.values[:]
        return duplicate

    def _rollback(self):
        return self.grammar.variables__[self.symbol.pname].append(self.values.pop())
2551

2552
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
eckhart's avatar
eckhart committed
2553
        node, txt = self.retrieve_and_match(text)
di68kap's avatar
di68kap committed
2554
        if node is not None and not id(node) in self.grammar.tree__.error_nodes:
eckhart's avatar
eckhart committed
2555
            self.values.append(self.grammar.variables__[self.symbol.pname].pop())
di68kap's avatar
di68kap committed
2556
            location = self.grammar.document_length__ - text.__len__()
eckhart's avatar
eckhart committed
2557
            self.grammar.push_rollback__(location, self._rollback)  # lambda: stack.append(value))
2558
2559
2560
2561
2562
2563
        return node, txt

    def __repr__(self):
        return '::' + self.symbol.repr


2564
2565
2566
2567
2568
########################################################################
#
# Aliasing parser classes
#
########################################################################
2569
2570


eckhart's avatar
eckhart committed
2571
class Synonym(UnaryParser):
2572
2573
2574
2575
    r"""
    Simply calls another parser and encapsulates the result in
    another node if that parser matches.

2576
    This parser is needed to support synonyms in EBNF, e.g.::
2577

2578
2579
        jahr       = JAHRESZAHL
        JAHRESZAHL = /\d\d\d\d/
2580

2581
2582
    Otherwise the first line could not be represented by any parser
    class, in which case it would be unclear whether the parser
2583
    RegExp('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'.
2584
    """
2585
    def __init__(self, parser: Parser) -> None:
Eckhart Arnold's avatar
Eckhart Arnold committed
2586
        assert not parser.drop_content
2587
        super(Synonym, self).__init__(parser)
2588

2589
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
eckhart's avatar
eckhart committed
2590
2591
        # circumvent Parser.__call__ as an optimization (dangerous?)
        node, text = self.parser._parse(text)
di68kap's avatar
di68kap committed
2592
        if node is not None:
2593
2594
            if self.drop_content:
                return EMPTY_NODE, text
2595
2596
2597
2598
2599
            # if self.anonymous:
            #     if node.tag_name[0] != ':':  # implies != EMPTY_NODE
            #         node.tag_name = self.tag_name
            # else:
            if not self.anonymous:
2600
2601
2602
2603
                if node == EMPTY_NODE:
                    return Node(self.tag_name, ''), text
                node.tag_name = self.tag_name
        return node, text
2604
2605

    def __repr__(self):
di68kap's avatar
di68kap committed
2606
        return self.pname or self.parser.repr
2607
2608
2609
2610
2611
2612


class Forward(Parser):
    r"""
    Forward allows to declare a parser before it is actually defined.
    Forward declarations are needed for parsers that are recursively
2613
    nested, e.g.::
2614
2615
2616
2617
2618
2619
2620
2621
2622

        class Arithmetic(Grammar):
            '''
            expression =  term  { ("+" | "-") term }
            term       =  factor  { ("*" | "/") factor }
            factor     =  INTEGER | "("  expression  ")"
            INTEGER    =  /\d+/~
            '''
            expression = Forward()
2623
2624
2625
2626
            INTEGER    = RE('\\d+')
            factor     = INTEGER | TKN("(") + expression + TKN(")")
            term       = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
            expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
2627
            root__     = expression
2628
2629
2630
    """

    def __init__(self):
2631
        super(Forward, self).__init__()
Eckhart Arnold's avatar
Eckhart Arnold committed
2632
        self.parser = None  # type: Optional[Parser]
2633
2634
2635
2636
        self.cycle_reached = False

    def __deepcopy__(self, memo):
        duplicate = self.__class__()
di68kap's avatar
di68kap committed
2637
        # duplicate.pname = self.pname  # Forward-Parsers should never have a name!
2638
        duplicate.anonymous = self.anonymous
2639
        duplicate.tag_name = self.tag_name
2640
2641
        memo[id(self)] = duplicate
        parser = copy.deepcopy(self.parser, memo)
Eckhart Arnold's avatar
Eckhart Arnold committed
2642
2643
2644
        duplicate.parser = parser
        if parser is not None:
            duplicate.drop_content = parser.drop_content
2645
2646
2647
        return duplicate

    def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
2648
2649
2650
2651
2652
2653
        """
        Overrides Parser.__call__, because Forward is not an independent parser
        but merely a redirects the call to another parser. Other then parser
        `Synonym`, which might be a meaningful marker for the syntax tree,
        parser Forward should never appear in the syntax tree.
        """
2654
2655
        return self.parser(text)

di68kap's avatar
di68kap committed
2656
2657
2658
2659
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
        # for the exceptional case in class Synonym where the ._parse method is called directly
        return self.parser(text)

2660
2661
2662
2663
    def set_proxy(self, proxy: Optional[ParseFunc]):
        """`set_proxy` has no effects on Forward-objects!"""
        return

2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
    def __cycle_guard(self, func, alt_return):
        """
        Returns the value of `func()` or `alt_return` if a cycle has
        been reached (which can happen if `func` calls methods of
        child parsers).
        """
        if self.cycle_reached:
            return alt_return
        else:
            self.cycle_reached = True
            ret = func()
            self.cycle_reached = False
            return ret

    def __repr__(self):
        return self.__cycle_guard(lambda: repr(self.parser), '...')

    def __str__(self):
        return self.__cycle_guard(lambda: str(self.parser), '...')

2684
2685
    @property
    def repr(self) -> str:
di68kap's avatar
di68kap committed
2686
        """Returns the parser's name if it has a name or repr(self) if not."""
di68kap's avatar
di68kap committed
2687
        return self.parser.pname if self.parser.pname else self.__repr__()
2688

2689
2690
2691
2692
2693
2694
    def set(self, parser: Parser):
        """
        Sets the parser to which the calls to this Forward-object
        shall be delegated.
        """
        self.parser = parser
2695
        self.drop_content = parser.drop_content
2696

Eckhart Arnold's avatar
Eckhart Arnold committed
2697
2698
2699
2700
    def sub_parsers(self) -> Tuple[Parser, ...]:
        if self.parser is not None:
            return (self.parser,)
        return tuple()