Commit 524501e5 authored by eckhart's avatar eckhart
Browse files

- some more optimizations

parent 0c1fbc00
...@@ -11,6 +11,8 @@ cdef class Parser: ...@@ -11,6 +11,8 @@ cdef class Parser:
cdef object recursion_counter cdef object recursion_counter
cdef object cycle_detection cdef object cycle_detection
cpdef _return_node(self, node)
cpdef _return_node_from_results(self, results)
cpdef _parse(self, text) cpdef _parse(self, text)
cpdef reset(self) cpdef reset(self)
cpdef _apply(self, func, flip) cpdef _apply(self, func, flip)
......
...@@ -55,8 +55,8 @@ __all__ = ('Parser', ...@@ -55,8 +55,8 @@ __all__ = ('Parser',
'Whitespace', 'Whitespace',
'DropWhitespace', 'DropWhitespace',
'mixin_comment', 'mixin_comment',
'UnaryOperator', 'UnaryParser',
'NaryOperator', 'NaryParser',
'Synonym', 'Synonym',
'Option', 'Option',
'ZeroOrMore', 'ZeroOrMore',
...@@ -380,6 +380,30 @@ class Parser: ...@@ -380,6 +380,30 @@ class Parser:
""" """
return Alternative(self, other) return Alternative(self, other)
def _return_node(self, node: Node) -> Node:
# Node(self.tag_name, node) # unoptimized code
if node and node._result:
return Node(self.tag_name, node) if self.pname else node
if self.pname:
return Node(self.tag_name, ())
else:
# avoid creation of a node object for empty nodes
return EMPTY_NODE
@cython.locals(N=cython.int)
def _return_node_from_results(self, results: Tuple[Node, ...]) -> Node:
# return Node(self.tag_name, results) # unoptimized code
N = len(results)
if N > 1:
return Node(self.tag_name, results)
elif N == 1:
return self._return_node(results[0])
elif self.pname:
return Node(self.tag_name, ())
else:
# avoid creation of a node object for empty nodes
return EMPTY_NODE
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]: def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
"""Applies the parser to the given `text` and returns a node with """Applies the parser to the given `text` and returns a node with
the results or None as well as the text at the position right behind the results or None as well as the text at the position right behind
...@@ -1237,19 +1261,19 @@ class DropWhitespace(Whitespace): ...@@ -1237,19 +1261,19 @@ class DropWhitespace(Whitespace):
######################################################################## ########################################################################
class UnaryOperator(Parser): class UnaryParser(Parser):
""" """
Base class of all unary parser operators, i.e. parser that contains Base class of all unary parsers, i.e. parser that contains
one and only one other parser, like the optional parser for example. one and only one other parser, like the optional parser for example.
The UnaryOperator base class supplies __deepcopy__ and apply The UnaryOperator base class supplies __deepcopy__ and apply
methods for unary parser operators. The __deepcopy__ method needs methods for unary parsers. The __deepcopy__ method needs
to be overwritten, however, if the constructor of a derived class to be overwritten, however, if the constructor of a derived class
has additional parameters. has additional parameters.
""" """
def __init__(self, parser: Parser) -> None: def __init__(self, parser: Parser) -> None:
super(UnaryOperator, self).__init__() super(UnaryParser, self).__init__()
assert isinstance(parser, Parser), str(parser) assert isinstance(parser, Parser), str(parser)
self.parser = parser # type: Parser self.parser = parser # type: Parser
...@@ -1261,26 +1285,26 @@ class UnaryOperator(Parser): ...@@ -1261,26 +1285,26 @@ class UnaryOperator(Parser):
return duplicate return duplicate
def _apply(self, func: ApplyFunc, flip: FlagFunc) -> bool: def _apply(self, func: ApplyFunc, flip: FlagFunc) -> bool:
if super(UnaryOperator, self)._apply(func, flip): if super(UnaryParser, self)._apply(func, flip):
self.parser._apply(func, flip) self.parser._apply(func, flip)
return True return True
return False return False
class NaryOperator(Parser): class NaryParser(Parser):
""" """
Base class of all Nnary parser operators, i.e. parser that Base class of all Nnary parsers, i.e. parser that
contains one or more other parsers, like the alternative contains one or more other parsers, like the alternative
parser for example. parser for example.
The NnaryOperator base class supplies __deepcopy__ and apply methods The NnaryOperator base class supplies __deepcopy__ and apply methods
for unary parser operators. The __deepcopy__ method needs to be for unary parsers. The __deepcopy__ method needs to be
overwritten, however, if the constructor of a derived class has overwritten, however, if the constructor of a derived class has
additional parameters. additional parameters.
""" """
def __init__(self, *parsers: Parser) -> None: def __init__(self, *parsers: Parser) -> None:
super(NaryOperator, self).__init__() super(NaryParser, self).__init__()
assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers) assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers)
self.parsers = parsers # type: Tuple[Parser, ...] self.parsers = parsers # type: Tuple[Parser, ...]
...@@ -1292,14 +1316,14 @@ class NaryOperator(Parser): ...@@ -1292,14 +1316,14 @@ class NaryOperator(Parser):
return duplicate return duplicate
def _apply(self, func: ApplyFunc, flip: FlagFunc) -> bool: def _apply(self, func: ApplyFunc, flip: FlagFunc) -> bool:
if super(NaryOperator, self)._apply(func, flip): if super(NaryParser, self)._apply(func, flip):
for parser in self.parsers: for parser in self.parsers:
parser._apply(func, flip) parser._apply(func, flip)
return True return True
return False return False
class Option(UnaryOperator): class Option(UnaryParser):
r""" r"""
Parser ``Option`` always matches, even if its child-parser Parser ``Option`` always matches, even if its child-parser
did not match. did not match.
...@@ -1318,7 +1342,7 @@ class Option(UnaryOperator): ...@@ -1318,7 +1342,7 @@ class Option(UnaryOperator):
>>> Grammar(number)('3.14159').content >>> Grammar(number)('3.14159').content
'3.14159' '3.14159'
>>> Grammar(number)('3.14159').structure >>> Grammar(number)('3.14159').structure
'(:Series (:RegExp "3") (:Option (:RegExp ".14159")))' '(:Series (:RegExp "3") (:RegExp ".14159"))'
>>> Grammar(number)('-1').content >>> Grammar(number)('-1').content
'-1' '-1'
...@@ -1335,13 +1359,7 @@ class Option(UnaryOperator): ...@@ -1335,13 +1359,7 @@ class Option(UnaryOperator):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]: def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, text = self.parser(text) node, text = self.parser(text)
if node and (node._result or self.parser.pname): return self._return_node(node), text
return Node(self.tag_name, node), text
if self.pname:
return Node(self.tag_name, ()), text
else:
# avoid creation of a node object for empty nodes
return EMPTY_NODE, text
def __repr__(self): def __repr__(self):
return '[' + (self.parser.repr[1:-1] if isinstance(self.parser, Alternative) return '[' + (self.parser.repr[1:-1] if isinstance(self.parser, Alternative)
...@@ -1382,7 +1400,7 @@ class ZeroOrMore(Option): ...@@ -1382,7 +1400,7 @@ class ZeroOrMore(Option):
infinite_loop_error = Error(dsl_error_msg(self, 'Infinite Loop encountered.'), infinite_loop_error = Error(dsl_error_msg(self, 'Infinite Loop encountered.'),
node.pos) node.pos)
results += (node,) results += (node,)
node = Node(self.tag_name, results) node = self._return_node_from_results(results) # type: Node
if infinite_loop_error: if infinite_loop_error:
self.grammar.tree__.add_error(node, infinite_loop_error) self.grammar.tree__.add_error(node, infinite_loop_error)
return node, text return node, text
...@@ -1392,7 +1410,7 @@ class ZeroOrMore(Option): ...@@ -1392,7 +1410,7 @@ class ZeroOrMore(Option):
and not self.parser.pname else self.parser.repr) + '}' and not self.parser.pname else self.parser.repr) + '}'
class OneOrMore(UnaryOperator): class OneOrMore(UnaryParser):
r""" r"""
`OneOrMore` applies a parser repeatedly as long as this parser `OneOrMore` applies a parser repeatedly as long as this parser
matches. Other than `ZeroOrMore` which always matches, at least matches. Other than `ZeroOrMore` which always matches, at least
...@@ -1434,7 +1452,7 @@ class OneOrMore(UnaryOperator): ...@@ -1434,7 +1452,7 @@ class OneOrMore(UnaryOperator):
results += (node,) results += (node,)
if results == (): if results == ():
return None, text return None, text
node = Node(self.tag_name, results) node = self._return_node_from_results(results) # type: Node
if infinite_loop_error: if infinite_loop_error:
self.grammar.tree__.add_error(node, infinite_loop_error) self.grammar.tree__.add_error(node, infinite_loop_error)
return node, text_ return node, text_
...@@ -1476,7 +1494,7 @@ def mandatory_violation(grammar: Grammar, ...@@ -1476,7 +1494,7 @@ def mandatory_violation(grammar: Grammar,
return error, err_node, text_[i:] return error, err_node, text_[i:]
class Series(NaryOperator): class Series(NaryParser):
r""" r"""
Matches if each of a series of parsers matches exactly in the order of Matches if each of a series of parsers matches exactly in the order of
the series. the series.
...@@ -1565,7 +1583,7 @@ class Series(NaryOperator): ...@@ -1565,7 +1583,7 @@ class Series(NaryOperator):
results += (node,) results += (node,)
# assert len(results) <= len(self.parsers) \ # assert len(results) <= len(self.parsers) \
# or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG]) # or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG])
node = Node(self.tag_name, results) node = self._return_node_from_results(results) # type: Node
if error: if error:
raise ParserError(node, text, first_throw=True) raise ParserError(node, text, first_throw=True)
return node, text_ return node, text_
...@@ -1613,7 +1631,7 @@ class Series(NaryOperator): ...@@ -1613,7 +1631,7 @@ class Series(NaryOperator):
return self return self
class Alternative(NaryOperator): class Alternative(NaryParser):
r""" r"""
Matches if one of several alternatives matches. Returns Matches if one of several alternatives matches. Returns
the first match. the first match.
...@@ -1679,7 +1697,7 @@ class Alternative(NaryOperator): ...@@ -1679,7 +1697,7 @@ class Alternative(NaryOperator):
return self return self
class AllOf(NaryOperator): class AllOf(NaryParser):
""" """
Matches if all elements of a list of parsers match. Each parser must Matches if all elements of a list of parsers match. Each parser must
match exactly once. Other than in a sequence, the order in which match exactly once. Other than in a sequence, the order in which
...@@ -1777,7 +1795,7 @@ class AllOf(NaryOperator): ...@@ -1777,7 +1795,7 @@ class AllOf(NaryOperator):
parsers = [] parsers = []
assert len(results) <= len(self.parsers) \ assert len(results) <= len(self.parsers) \
or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG]) or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE_TAG])
node = Node(self.tag_name, results) node = self._return_node_from_results(results) # type: Node
if error: if error:
raise ParserError(node, text, first_throw=True) raise ParserError(node, text, first_throw=True)
return node, text_ return node, text_
...@@ -1786,7 +1804,7 @@ class AllOf(NaryOperator): ...@@ -1786,7 +1804,7 @@ class AllOf(NaryOperator):
return '< ' + ' '.join(parser.repr for parser in self.parsers) + ' >' return '< ' + ' '.join(parser.repr for parser in self.parsers) + ' >'
class SomeOf(NaryOperator): class SomeOf(NaryParser):
""" """
Matches if at least one element of a list of parsers match. No parser Matches if at least one element of a list of parsers match. No parser
must match more than once . Other than in a sequence, the order in which must match more than once . Other than in a sequence, the order in which
...@@ -1833,7 +1851,7 @@ class SomeOf(NaryOperator): ...@@ -1833,7 +1851,7 @@ class SomeOf(NaryOperator):
parsers = [] parsers = []
assert len(results) <= len(self.parsers) assert len(results) <= len(self.parsers)
if results: if results:
return Node(self.tag_name, results), text_ return self._return_node_from_results(results), text_
else: else:
return None, text return None, text
...@@ -1841,7 +1859,7 @@ class SomeOf(NaryOperator): ...@@ -1841,7 +1859,7 @@ class SomeOf(NaryOperator):
return '< ' + ' | '.join(parser.repr for parser in self.parsers) + ' >' return '< ' + ' | '.join(parser.repr for parser in self.parsers) + ' >'
def Unordered(parser: NaryOperator) -> NaryOperator: def Unordered(parser: NaryParser) -> NaryParser:
""" """
Returns an AllOf- or SomeOf-parser depending on whether `parser` Returns an AllOf- or SomeOf-parser depending on whether `parser`
is a Series (AllOf) or an Alternative (SomeOf). is a Series (AllOf) or an Alternative (SomeOf).
...@@ -1856,13 +1874,13 @@ def Unordered(parser: NaryOperator) -> NaryOperator: ...@@ -1856,13 +1874,13 @@ def Unordered(parser: NaryOperator) -> NaryOperator:
######################################################################## ########################################################################
# #
# Flow control operators # Flow control parsers
# #
######################################################################## ########################################################################
class FlowOperator(UnaryOperator): class FlowParser(UnaryParser):
""" """
Base class for all flow operator parsers like Lookahead and Lookbehind. Base class for all flow parsers like Lookahead and Lookbehind.
""" """
def sign(self, bool_value) -> bool: def sign(self, bool_value) -> bool:
"""Returns the value. Can be overriden to return the inverted bool.""" """Returns the value. Can be overriden to return the inverted bool."""
...@@ -1873,7 +1891,7 @@ def Required(parser: Parser) -> Parser: ...@@ -1873,7 +1891,7 @@ def Required(parser: Parser) -> Parser:
return Series(parser, mandatory=0) return Series(parser, mandatory=0)
# class Required(FlowOperator): # class Required(FlowParser):
# """OBSOLETE. Use mandatory-parameter of Series-parser instead! # """OBSOLETE. Use mandatory-parameter of Series-parser instead!
# """ # """
# RX_ARGUMENT = re.compile(r'\s(\S)') # RX_ARGUMENT = re.compile(r'\s(\S)')
...@@ -1894,7 +1912,7 @@ def Required(parser: Parser) -> Parser: ...@@ -1894,7 +1912,7 @@ def Required(parser: Parser) -> Parser:
# return '§' + self.parser.repr # return '§' + self.parser.repr
class Lookahead(FlowOperator): class Lookahead(FlowParser):
""" """
Matches, if the contained parser would match for the following text, Matches, if the contained parser would match for the following text,
but does not consume any text. but does not consume any text.
...@@ -1922,7 +1940,7 @@ class NegativeLookahead(Lookahead): ...@@ -1922,7 +1940,7 @@ class NegativeLookahead(Lookahead):
return not bool_value return not bool_value
class Lookbehind(FlowOperator): class Lookbehind(FlowParser):
""" """
Matches, if the contained parser would match backwards. Requires Matches, if the contained parser would match backwards. Requires
the contained parser to be a RegExp, _RE, PlainText or _Token parser. the contained parser to be a RegExp, _RE, PlainText or _Token parser.
...@@ -1968,12 +1986,12 @@ class NegativeLookbehind(Lookbehind): ...@@ -1968,12 +1986,12 @@ class NegativeLookbehind(Lookbehind):
######################################################################## ########################################################################
# #
# Capture and Retrieve operators (for passing variables in the parser) # Capture and Retrieve parsers (for passing variables in the parser)
# #
######################################################################## ########################################################################
class Capture(UnaryOperator): class Capture(UnaryParser):
""" """
Applies the contained parser and, in case of a match, saves the result Applies the contained parser and, in case of a match, saves the result
in a variable. A variable is a stack of values associated with the in a variable. A variable is a stack of values associated with the
...@@ -1991,7 +2009,7 @@ class Capture(UnaryOperator): ...@@ -1991,7 +2009,7 @@ class Capture(UnaryOperator):
self.grammar.push_rollback__(location, self._rollback) # lambda: stack.pop()) self.grammar.push_rollback__(location, self._rollback) # lambda: stack.pop())
# caching will be blocked by parser guard (see way above), # caching will be blocked by parser guard (see way above),
# because it would prevent recapturing of rolled back captures # because it would prevent recapturing of rolled back captures
return Node(self.tag_name, node), text_ return self._return_node(node), text_
else: else:
return None, text return None, text
...@@ -2121,7 +2139,7 @@ class Pop(Retrieve): ...@@ -2121,7 +2139,7 @@ class Pop(Retrieve):
######################################################################## ########################################################################
class Synonym(UnaryOperator): class Synonym(UnaryParser):
r""" r"""
Simply calls another parser and encapsulates the result in Simply calls another parser and encapsulates the result in
another node if that parser matches. another node if that parser matches.
......
# Arithmetic-grammar
#######################################################################
#
# EBNF-Directives
#
#######################################################################
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
@ drop = whitespace # drop anonymous whitespace and tokens
#######################################################################
#
# Structure and Components
#
#######################################################################
expression = term { ("+" | "-") term}
term = factor { ("*" | "/") factor}
factor = [/-/] ( NUMBER | VARIABLE | group ) { VARIABLE | group }
group = "(" expression ")"
#######################################################################
#
# Regular Expressions
#
#######################################################################
NUMBER = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
VARIABLE = /[A-Za-z]/~
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
import collections
from functools import partial
import os
import sys
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropWhitespace, \
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf, \
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
error_on, recompile_grammar, GLOBALS
#######################################################################
#
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def ArithmeticPreprocessor(text):
return text, lambda i: i
def get_preprocessor() -> PreprocessorFunc:
return ArithmeticPreprocessor
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file.
"""
expression = Forward()
source_hash__ = "d03e397fb4cabd6f20f3ae7c9add4ad5"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
wsp__ = Whitespace(WSP_RE__)
VARIABLE = Series(RegExp('[A-Za-z]'), dwsp__)
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
group = Series(Series(Token("("), dwsp__), expression, Series(Token(")"), dwsp__))
factor = Series(Option(RegExp('-')), Alternative(NUMBER, VARIABLE, group), ZeroOrMore(Alternative(VARIABLE, group)))
term = Series(factor, ZeroOrMore(Series(Alternative(Series(Token("*"), dwsp__), Series(Token("/"), dwsp__)), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(Series(Token("+"), dwsp__), Series(Token("-"), dwsp__)), term))))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
global GLOBALS
try:
grammar = GLOBALS.Arithmetic_00000001_grammar_singleton
except AttributeError:
GLOBALS.Arithmetic_00000001_grammar_singleton = ArithmeticGrammar()
if hasattr(get_grammar, 'python_src__'):
GLOBALS.Arithmetic_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.Arithmetic_00000001_grammar_singleton
return grammar
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
Arithmetic_AST_transformation_table = {
# AST Transformations for the Arithmetic-grammar
"<": remove_empty,
"expression": [],
"term": [],
"factor": [replace_or_reduce],
"NUMBER": [],
"VARIABLE": [],
":Token": reduce_single_child,
"*": replace_by_single_child
}
def ArithmeticTransform() -> TransformationDict:
return partial(traverse, processing_table=Arithmetic_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
try:
transformer = GLOBALS.Arithmetic_00000001_transformer_singleton
except AttributeError:
GLOBALS.Arithmetic_00000001_transformer_singleton = ArithmeticTransform()
transformer = GLOBALS.Arithmetic_00000001_transformer_singleton
return transformer
#######################################################################
#