parse.py 140 KB
Newer Older
3001
3002
3003
3004
3005
3006
3007
3008
3009
        has a tag name, this overrides the tag name of the retrieved symbol's
        parser."""
        if self.anonymous or not self.tag_name:
            if self.parser.pname:
                return self.parser.tag_name
            # self.parser is a Forward-Parser, so pick the name of its encapsulated parser
            return cast(Forward, self.parser).parser.tag_name
        return self.tag_name

3010
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
3011
        # auto-capture on first use if symbol was not captured before
3012
3013
        # ("or"-clause needed, because Forward parsers do not have a pname)
        if len(self.grammar.variables__[self.symbol_pname]) == 0:
di68kap's avatar
di68kap committed
3014
3015
3016
            node, text_ = self.parser(text)   # auto-capture value
            if node is None:
                return None, text_
3017
3018
        node, text_ = self.retrieve_and_match(text)
        return node, text_
3019
3020

    def __repr__(self):
3021
        return ':' + self.parser.repr
3022
3023
3024

    def retrieve_and_match(self, text: StringView) -> Tuple[Optional[Node], StringView]:
        """
3025
        Retrieves variable from stack through the match function passed to
3026
3027
3028
3029
        the class' constructor and tries to match the variable's value with
        the following text. Returns a Node containing the value or `None`
        accordingly.
        """
3030
        # `or self.parser.parser.pname` needed, because Forward-Parsers do not have a pname
3031
        try:
3032
            stack = self.grammar.variables__[self.symbol_pname]
3033
            value = self.match(text, stack)
3034
        except (KeyError, IndexError):
3035
            tn = self.get_tag_name()
3036
3037
3038
3039
            if self.match.__name__.startswith('optional_'):
                # returns a None match if parser is optional but there was no value to retrieve
                return None, text
            else:
di68kap's avatar
di68kap committed
3040
                node = Node(tn, '') # .with_pos(self.grammar.document_length__ - text.__len__())
3041
3042
                self.grammar.tree__.new_error(
                    node, dsl_error_msg(self, "'%s' undefined or exhausted." % self.symbol_pname),
3043
                    UNDEFINED_RETRIEVE)
3044
                return node, text
3045
        if value is None:
3046
            return None, text
3047
3048
        elif self.drop_content:
            return EMPTY_NODE, text[len(value):]
3049
        return Node(self.get_tag_name(), value), text[len(value):]
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062


class Pop(Retrieve):
    """
    Matches if the following text starts with the value of a particular
    variable. As a variable in this context means a stack of values,
    the last value will be compared with the following text. Other
    than the `Retrieve`-parser, the `Pop`-parser removes the value
    from the stack in case of a match.

    The constructor parameter `symbol` determines which variable is
    used.
    """
3063
3064
    def __init__(self, symbol: Parser, match_func: MatchVariableFunc = None) -> None:
        super(Pop, self).__init__(symbol, match_func)
eckhart's avatar
eckhart committed
3065
3066
3067

    def reset(self):
        super(Pop, self).reset()
eckhart's avatar
eckhart committed
3068
3069
        self.values = []

eckhart's avatar
eckhart committed
3070
3071
3072
3073
3074
3075
    # def __deepcopy__(self, memo):
    #     symbol = copy.deepcopy(self.parser, memo)
    #     duplicate = self.__class__(symbol, self.match)
    #     copy_parser_base_attrs(self, duplicate)
    #     duplicate.values = self.values[:]
    #     return duplicate
eckhart's avatar
eckhart committed
3076
3077

    def _rollback(self):
3078
        self.grammar.variables__[self.symbol_pname].append(self.values.pop())
3079

3080
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
eckhart's avatar
eckhart committed
3081
        node, txt = self.retrieve_and_match(text)
di68kap's avatar
di68kap committed
3082
        if node is not None and not id(node) in self.grammar.tree__.error_nodes:
3083
            self.values.append(self.grammar.variables__[self.symbol_pname].pop())
di68kap's avatar
di68kap committed
3084
            location = self.grammar.document_length__ - text.__len__()
eckhart's avatar
eckhart committed
3085
            self.grammar.push_rollback__(location, self._rollback)  # lambda: stack.append(value))
3086
3087
3088
        return node, txt

    def __repr__(self):
3089
        stack = self.grammar.variables__.get(self.symbol_pname, [])
3090
3091
        content = (' "%s"' % stack[-1]) if stack else ''
        prefix = ':?' if self.match.__name__.startswith('optional_') else '::'
3092
        return prefix + self.parser.repr + content
3093
3094


3095
3096
3097
3098
3099
########################################################################
#
# Aliasing parser classes
#
########################################################################
3100
3101


eckhart's avatar
eckhart committed
3102
class Synonym(UnaryParser):
3103
3104
3105
3106
    r"""
    Simply calls another parser and encapsulates the result in
    another node if that parser matches.

3107
    This parser is needed to support synonyms in EBNF, e.g.::
3108

3109
3110
        jahr       = JAHRESZAHL
        JAHRESZAHL = /\d\d\d\d/
3111

3112
3113
    Otherwise the first line could not be represented by any parser
    class, in which case it would be unclear whether the parser
3114
    RegExp('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'.
3115
    """
3116
    def __init__(self, parser: Parser) -> None:
Eckhart Arnold's avatar
Eckhart Arnold committed
3117
        assert not parser.drop_content
3118
        super(Synonym, self).__init__(parser)
3119

3120
    def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
eckhart's avatar
eckhart committed
3121
        node, text = self.parser(text)
di68kap's avatar
di68kap committed
3122
        if node is not None:
3123
3124
            if self.drop_content:
                return EMPTY_NODE, text
3125
            if not self.anonymous:
3126
                if node is EMPTY_NODE:
3127
                    return Node(self.tag_name, ''), text
eckhart's avatar
eckhart committed
3128
3129
3130
3131
3132
                if node.tag_name.startswith(':'):
                    # eliminate anonymous child-node on the fly
                    node.tag_name = self.tag_name
                else:
                    return Node(self.tag_name, (node,)), text
3133
        return node, text
3134

3135
3136
3137
    def __str__(self):
        return self.pname + (' = ' if self.pname else '') + self.parser.repr

3138
    def __repr__(self):
di68kap's avatar
di68kap committed
3139
        return self.pname or self.parser.repr
3140
3141


eckhart's avatar
eckhart committed
3142
class Forward(UnaryParser):
3143
3144
3145
    r"""
    Forward allows to declare a parser before it is actually defined.
    Forward declarations are needed for parsers that are recursively
3146
    nested, e.g.::
3147
3148
3149
3150
3151
3152
3153
3154
3155

        class Arithmetic(Grammar):
            '''
            expression =  term  { ("+" | "-") term }
            term       =  factor  { ("*" | "/") factor }
            factor     =  INTEGER | "("  expression  ")"
            INTEGER    =  /\d+/~
            '''
            expression = Forward()
3156
3157
3158
3159
            INTEGER    = RE('\\d+')
            factor     = INTEGER | TKN("(") + expression + TKN(")")
            term       = factor + ZeroOrMore((TKN("*") | TKN("/")) + factor)
            expression.set(term + ZeroOrMore((TKN("+") | TKN("-")) + term))
3160
            root__     = expression
3161
3162
3163
    """

    def __init__(self):
eckhart's avatar
eckhart committed
3164
3165
        super(Forward, self).__init__(PARSER_PLACEHOLDER)
        # self.parser = PARSER_PLACEHOLDER  # type: Parser
eckhart's avatar
eckhart committed
3166
3167
3168
3169
3170
3171
        self.cycle_reached = False  # type: bool
        self.memoization = True  # type: bool

    def reset(self):
        super(Forward, self).reset()
        self.recursion = dict()  # type: Dict[int, Tuple[int, int]]
3172
3173
3174
3175

    def __deepcopy__(self, memo):
        duplicate = self.__class__()
        memo[id(self)] = duplicate
eckhart's avatar
eckhart committed
3176
        copy_parser_base_attrs(self, duplicate)
3177
        parser = copy.deepcopy(self.parser, memo)
Eckhart Arnold's avatar
Eckhart Arnold committed
3178
        duplicate.parser = parser
eckhart's avatar
eckhart committed
3179
3180
3181
        duplicate.pname = self.pname        # Forward-Parsers should not have a name!
        duplicate.anonymous = self.anonymous
        duplicate.tag_name = self.tag_name  # Forward-Parser should not have a tag name!
eckhart's avatar
eckhart committed
3182
        duplicate.drop_content = parser.drop_content
3183
3184
3185
        return duplicate

    def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
3186
3187
3188
3189
3190
3191
        """
        Overrides Parser.__call__, because Forward is not an independent parser
        but merely a redirects the call to another parser. Other then parser
        `Synonym`, which might be a meaningful marker for the syntax tree,
        parser Forward should never appear in the syntax tree.
        """
eckhart's avatar
eckhart committed
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
        # if not isinstance(self.parser, Alternative):
        #     return self.parser(text)

        # TODO: For indirect recursion, recursion counters should not only
        #       depend on location, but on location and call stack depth
        location = self.grammar.document_length__ - text._len
        depth, oracle = self.recursion.get(location, (-1, -1))
        if oracle >= 0:
            if depth >= oracle:
                self.recursion[location] = (0, oracle + 1)
                node, _text = None, text
            else:
                self.recursion[location] = (depth + 1, oracle)
                node, _text = self.parser(text)
                oracle = self.recursion[location][1]
                self.recursion[location] = (depth, oracle)
            self.memoization = self.grammar.memoization__
            self.grammar.memoization__ = False
            return node, _text
        else:
            self.recursion[location] = (0, 0)
            longest = None, text
            length = 0
            while True:
                node, text_ = self.parser(text)
                depth, oracle = self.recursion[location]
                if oracle == 0:
                    longest = node, text_
                    break
                elif node is None:
                    break
                else:
                    l = len(node)
                    if l <= length:
                        break
                    length = l
                    longest = node, text_
            self.recursion[location] = (-1, -1)
            self.grammar.memoization__ = self.memoization
            return longest
3232

3233
3234
3235
3236
    def set_proxy(self, proxy: Optional[ParseFunc]):
        """`set_proxy` has no effects on Forward-objects!"""
        return

3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
    def __cycle_guard(self, func, alt_return):
        """
        Returns the value of `func()` or `alt_return` if a cycle has
        been reached (which can happen if `func` calls methods of
        child parsers).
        """
        if self.cycle_reached:
            return alt_return
        else:
            self.cycle_reached = True
            ret = func()
            self.cycle_reached = False
            return ret

    def __repr__(self):
        return self.__cycle_guard(lambda: repr(self.parser), '...')

    def __str__(self):
        return self.__cycle_guard(lambda: str(self.parser), '...')

3257
3258
    @property
    def repr(self) -> str:
di68kap's avatar
di68kap committed
3259
        """Returns the parser's name if it has a name or repr(self) if not."""
di68kap's avatar
di68kap committed
3260
        return self.parser.pname if self.parser.pname else self.__repr__()
3261

3262
3263
3264
3265
3266
3267
    def set(self, parser: Parser):
        """
        Sets the parser to which the calls to this Forward-object
        shall be delegated.
        """
        self.parser = parser
3268
        self.drop_content = parser.drop_content
3269

3270
    def sub_parsers(self) -> Tuple[Parser, ...]:
3271
3272
        """Note: Sub-Parsers are not passed through by Forward-Parser.
        TODO: Should this be changed?"""
3273
3274
        if is_parser_placeholder(self.parser):
            return tuple()
3275
        return self.parser,