04.07., 9:00 - 11:00: Due to updates GitLab will be unavailable for some minutes between 09:00 and 11:00.

Commit b4d5d7ba authored by eckhart's avatar eckhart

- more cython optimizations

parent 3ee27952
......@@ -2,3 +2,5 @@
#cython: language_level=3
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
# cpdef visitor_name(node_name)
......@@ -47,7 +47,7 @@ from DHParser.toolkit import typing, sane_parser_name, load_if_file
from typing import Any, Optional, Tuple, List, Callable
__all__ = ('CompilerError', 'Compiler', 'compile_source')
__all__ = ('CompilerError', 'Compiler', 'compile_source', 'visitor_name')
class CompilerError(Exception):
......@@ -60,6 +60,17 @@ class CompilerError(Exception):
pass
def visitor_name(node_name: str) -> str:
"""
Returns the method name for `node_name`, e.g.::
>>> visitor_name('expression')
'on_expression'
"""
# assert re.match(r'\w+$', node_name)
return 'on_' + node_name
class Compiler:
"""
Class Compiler is the abstract base class for compilers. Compiler
......@@ -112,30 +123,6 @@ class Compiler:
result = self.compile(root)
return result
# @staticmethod
# def propagate_error_flags(node: Node, lazy: bool = True) -> None:
# # See test_parser.TestCompilerClass.test_propagate_error()..
# """Propagates error flags from children to parent nodes to make sure
# that the parent's error flag is always greater or equal the maximum
# of the children's error flags."""
# if not lazy or node.error_flag < Error.HIGHEST:
# for child in node.children:
# Compiler.propagate_error_flags(child)
# node.error_flag = max(node.error_flag, child.error_flag)
# if lazy and node.error_flag >= Error.HIGHEST:
# return
@staticmethod
def method_name(node_name: str) -> str:
"""
Returns the method name for `node_name`, e.g.::
>>> Compiler.method_name('expression')
'on_expression'
"""
assert re.match(r'\w+$', node_name)
return 'on_' + node_name
def compile_children(self, node: Node) -> StrictResultType:
"""Compiles all children of the given node and returns the tuple
of the compiled children or the node's (potentially empty) result
......@@ -171,29 +158,17 @@ class Compiler:
elem = node.tag_name
if elem.startswith(':'):
elem = elem[1:]
if not sane_parser_name(elem):
self.tree.new_error(node, "Reserved name '%s' not allowed as parser "
"name! " % elem + "(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)")
return None
else:
try:
compiler = self.__getattribute__(self.method_name(elem))
except AttributeError:
compiler = self.fallback_compiler
self.context.append(node)
result = compiler(node)
self.context.pop()
if result is None:
raise CompilerError('Method on_%s returned `None` instead of a '
'valid compilation result!' % elem)
# # the following statement makes sure that the error_flag
# # is propagated early on. Otherwise it is redundant, because
# # the __call__ method globally propagates the node's error_flag
# # later anyway. So, maybe it could be removed here.
# for child in node.children:
# node.error_flag = node.error_flag or child.error_flag
return result
try:
compiler = self.__getattribute__(visitor_name(elem))
except AttributeError:
compiler = self.fallback_compiler
self.context.append(node)
result = compiler(node)
self.context.pop()
if result is None:
raise CompilerError('Method on_%s returned `None` instead of a '
'valid compilation result!' % elem)
return result
def compile_source(source: str,
......
......@@ -29,7 +29,7 @@ from functools import partial
import keyword
import os
from DHParser.compile import CompilerError, Compiler, compile_source
from DHParser.compile import CompilerError, Compiler, compile_source, visitor_name
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
......@@ -124,7 +124,7 @@ class EBNFGrammar(Grammar):
"""
expression = Forward()
source_hash__ = "82a7c668f86b83f86515078e6c9093ed"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
......@@ -591,7 +591,7 @@ class EBNFCompiler(Compiler):
' super()._reset()',
' # initialize your variables here, not in the constructor!']
for name in self.rules:
method_name = Compiler.method_name(name)
method_name = visitor_name(name)
if name == self.root_symbol:
compiler += [' def ' + method_name + '(self, node):',
' return self.fallback_compiler(node)', '']
......@@ -781,7 +781,7 @@ class EBNFCompiler(Compiler):
'r"""Parser for ' + article + self.grammar_name
+ ' source file'
+ ('. Grammar:' if self.grammar_source and show_source else '.')]
definitions.append(('parser_initialization__', '"upon instantiation"'))
definitions.append(('parser_initialization__', '["upon instantiation"]'))
if self.grammar_source:
definitions.append(('source_hash__',
'"%s"' % md5(self.grammar_source, __version__)))
......
......@@ -13,26 +13,27 @@ cdef class Parser:
# cpdef _parse(self, text)
# cdef class Grammar:
# cdef public set all_parsers__
# cdef public object start_parser__
# cdef bint _dirty_flag__
# cdef public bint history_tracking__
# cdef public bint memoization__
# cdef public bint left_recursion_handling__
# cdef public object root__
# cdef public object tree__
# cdef public object document__
# cdef public object _reversed__
# cdef public int document_length__
# cdef public list document_lbreaks__
# cdef public object variables__
# cdef public list rollback__
# cdef public int last_rb__loc__
# cdef public list call_stack__
# cdef public list history__
# cdef public bint moving_forward__
# cdef public set recursion_locations__
cdef class Grammar:
cdef dict __dict__
cdef public set all_parsers__
cdef public object start_parser__
cdef bint _dirty_flag__
cdef public bint history_tracking__
cdef public bint memoization__
cdef public bint left_recursion_handling__
# cdef public object root_parser__ # do not uncomment this!!!
cdef public object tree__
cdef public object document__
cdef public object _reversed__
cdef public int document_length__
cdef public list document_lbreaks__
cdef public object variables__
cdef public list rollback__
cdef public int last_rb__loc__
cdef public list call_stack__
cdef public list history__
cdef public bint moving_forward__
cdef public set recursion_locations__
cdef class PreprocessorToken(Parser):
pass
......
......@@ -39,7 +39,7 @@ from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, RootNode, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE, ResultType
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing, cython
from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, Optional, Any
......@@ -247,9 +247,10 @@ class Parser:
the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class."""
self.visited = dict() # type: Dict[int, Tuple[Optional[Node], StringView]]
self.recursion_counter = defaultdict(lambda: 0) # type: DefaultDict[int, int]
self.recursion_counter = defaultdict(int) # type: DefaultDict[int, int]
self.cycle_detection = set() # type: Set[ApplyFunc]
@cython.locals(location=cython.int, gap=cython.int, i=cython.int)
def __call__(self: 'Parser', text: StringView) -> Tuple[Optional[Node], StringView]:
"""Applies the parser to the given text. This is a wrapper method that adds
the business intelligence that is common to all parsers. The actual parsing is
......@@ -702,7 +703,7 @@ class Grammar:
python_src__ = '' # type: str
root__ = ZOMBIE_PARSER # type: Parser
# root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__ = "pending" # type: list[str]
parser_initialization__ = ["pending"] # type: list[str]
resume_rules__ = dict() # type: Dict[str, ResumeList]
# some default values
# COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
......@@ -733,7 +734,7 @@ class Grammar:
selected reference will be chosen. See PEP 520
(www.python.org/dev/peps/pep-0520/) for an explanation of why.
"""
if cls.parser_initialization__ != "done":
if cls.parser_initialization__[0] != "done":
cdict = cls.__dict__
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
......@@ -742,7 +743,7 @@ class Grammar:
cast(Forward, parser).parser.pname = entry
else: # if not parser.pname:
parser.pname = entry
cls.parser_initialization__ = "done"
cls.parser_initialization__[0] = "done"
def __init__(self, root: Parser = None) -> None:
......@@ -761,20 +762,22 @@ class Grammar:
# parsers not connected to the root object will be copied later
# on demand (see Grammar.__getitem__()). Usually, the need to
# do so only arises during testing.
self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
self.root__.apply(self._add_parser__)
self.root_parser__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
self.root_parser__.apply(self._add_parser__)
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
def __getitem__(self, key):
try:
return self.__dict__[key]
except KeyError:
parser_template = getattr(self, key, None)
parser_template = getattr(self.__class__, key, None)
if parser_template:
# add parser to grammar object on the fly...
parser = copy.deepcopy(parser_template)
parser.apply(self._add_parser__)
# assert self[key] == parser
assert self[key] == parser
return self[key]
raise UnknownParserError('Unknown parser "%s" !' % key)
......@@ -832,7 +835,7 @@ class Grammar:
def __call__(self,
document: str,
start_parser: Union[str, Parser] = "root__",
start_parser: Union[str, Parser] = "root_parser__",
track_history: bool = False) -> RootNode:
"""
Parses a document with with parser-combinators.
......@@ -857,8 +860,6 @@ class Grammar:
return predecessors[-1].pos + len(predecessors[-1]) if predecessors else 0
# assert isinstance(document, str), type(document)
if self.root__ is None:
raise NotImplementedError()
if self._dirty_flag__:
self._reset__()
for parser in self.all_parsers__:
......@@ -901,7 +902,7 @@ class Grammar:
# in a test case this is not necessarily an error.
last_record = self.history__[-2] if len(self.history__) > 1 else None # type: Optional[HistoryRecord]
if last_record and parser != self.root__ \
if last_record and parser != self.root_parser__ \
and last_record.status == HistoryRecord.MATCH \
and last_record.node.pos \
+ len(last_record.node) >= len(self.document__) \
......@@ -1353,6 +1354,7 @@ class ZeroOrMore(Option):
EBNF-Example: ``sentence = { /\w+,?/ } "."``
"""
@cython.locals(n=cython.int)
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
results = () # type: Tuple[Node, ...]
n = len(text) + 1 # type: int
......@@ -1523,6 +1525,7 @@ class Series(NaryOperator):
duplicate.tag_name = self.tag_name
return duplicate
@cython.locals(pos=cython.int, reloc=cython.int)
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
results = () # type: Tuple[Node, ...]
text_ = text # type: StringView
......@@ -1546,8 +1549,8 @@ class Series(NaryOperator):
results += (node,)
break
results += (node,)
assert len(results) <= len(self.parsers) \
or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE])
# assert len(results) <= len(self.parsers) \
# or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE])
node = Node(self.tag_name, results)
if error:
raise ParserError(node, text, first_throw=True)
......
......@@ -154,7 +154,7 @@ class StringView: # collections.abc.Sized
else:
return StringView(str(other) + str(self))
@cython.locals(start=cython.int, end=cython.int)
@cython.locals(start=cython.int, stop=cython.int)
def __getitem__(self, index: Union[slice, int]) -> 'StringView':
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
......
......@@ -193,7 +193,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
duplicate.errors = copy.deepcopy(self.errors) if self.errors else []
duplicate._pos = self._pos
duplicate._len = self._len
if hasattr(self, '_xml_attr'):
if self.attr_active():
duplicate._xml_attr = copy.deepcopy(self._xml_attr)
return duplicate
......@@ -398,12 +398,28 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return self
def attr_active(self) -> bool:
"""
Returns True, if XML-Attributes of this node have ever been set
or queried, even if unsuccessfully.
"""
try:
if self._xml_attr is not None:
return True
except AttributeError:
pass
return False
@property
def attr(self):
"""
Returns a dictionary of XML-attr attached to the node.
"""
if not hasattr(self, '_xml_attr'):
try:
if self._xml_attr is None: # cython compatibility
self._xml_attr = OrderedDict()
except AttributeError:
self._xml_attr = OrderedDict()
return self._xml_attr
......@@ -495,7 +511,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
"""Returns the opening string for the representation of `node`."""
txt = [left_bracket, node.tag_name]
# s += " '(pos %i)" % node.add_pos
if hasattr(node, '_xml_attr'):
if node.attr_active():
txt.extend(' `(%s "%s")' % (k, v) for k, v in node.attr.items())
if src:
line, col = line_col(lbreaks, node.pos)
......@@ -548,9 +564,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
if node.tag_name in omit_tags:
return ''
txt = ['<', node.tag_name]
has_reserved_attrs = hasattr(node, '_xml_attr') \
has_reserved_attrs = node.attr_active() \
and any(r in node.attr for r in {'err', 'line', 'col'})
if hasattr(node, '_xml_attr'):
if node.attr_active():
txt.extend(' %s="%s"' % (k, v) for k, v in node.attr.items())
if src and not has_reserved_attrs:
txt.append(' line="%i" col="%i"' % line_col(line_breaks, node.pos))
......@@ -584,7 +600,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
printed on several lines to avoid unwanted gaps in the output.
"""
return node.tag_name in inline_tags \
or (hasattr(node, '_xml_attr')
or (node.attr_active()
and node.attr.get('xml:space', 'default') == 'preserve')
line_breaks = linebreaks(src) if src else []
......@@ -713,7 +729,7 @@ class RootNode(Node):
duplicate.errors = copy.deepcopy(self.errors) if self.errors else []
duplicate._pos = self._pos
duplicate._len = self._len
if hasattr(self, '_xml_attr'):
if self.attr_active():
duplicate._xml_attr = copy.deepcopy(self._xml_attr)
duplicate.all_errors = copy.deepcopy(self.all_errors)
duplicate.error_flag = self.error_flag
......@@ -741,7 +757,7 @@ class RootNode(Node):
self._len = node._len
self._pos = node._pos
self.tag_name = node.tag_name
if hasattr(node, '_xml_attr'):
if node.attr_active():
self._xml_attr = node._xml_attr
self._content = node._content
return self
......
......@@ -43,8 +43,19 @@ except ImportError:
from typing import Any, Iterable, Sequence, Set, Union, Dict, Hashable # , cast
try:
import cython
cython_optimized = cython.compiled # type: bool
except ImportError:
# import DHParser.Shadow as cython
cython_optimized = False # type: bool
import DHParser.shadow_cython as cython
__all__ = ('escape_re',
__all__ = ('typing',
'cython',
'cython_optimized',
'escape_re',
'escape_control_characters',
'is_filename',
'concurrent_ident',
......
......@@ -3,10 +3,12 @@
# rm DHParser/*.c
# rm DHParser/*.so
# rm DHParser/parse.c
# rm DHParser/parse.cpython*.so
rm DHParser/syntaxtree.c
rm DHParser/syntaxtree.cpython*.so
rm DHParser/parse.c
rm DHParser/parse.cpython*.so
# rm DHParser/syntaxtree.c
# rm DHParser/syntaxtree.cpython*.so
# rm DHParser/transform.c
# rm DHParser/transform.cpython*.so
# CFLAGS="-O3 -march=native -mtune=native"
python3 setup.py build_ext --inplace
......
......@@ -60,7 +60,7 @@ class ArithmeticGrammar(Grammar):
expression = Forward()
variable = Forward()
source_hash__ = "120070baa84f5a2bd1bbb900627078fc"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
......
......@@ -58,7 +58,7 @@ class BibTeXGrammar(Grammar):
"""
text = Forward()
source_hash__ = "e402951b290cb0fce63ba0cbca3f23e9"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*'
......
......@@ -58,7 +58,7 @@ class EBNFGrammar(Grammar):
"""
expression = Forward()
source_hash__ = "5e9e65a057bec7da29989dba47f40394"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
......
......@@ -58,7 +58,7 @@ class LaTeXGrammar(Grammar):
tabular_config = Forward()
text_element = Forward()
source_hash__ = "e09808ecd485c07b3455c3a2bf4eada3"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'%.*'
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
......
......@@ -68,7 +68,7 @@ class XMLGrammar(Grammar):
ignoreSectContents = Forward()
markupdecl = Forward()
source_hash__ = "1c64c8f613952c5ab8e851da15f65ec3"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
......
......@@ -60,7 +60,7 @@ class XMLSnippetGrammar(Grammar):
Name = Forward()
element = Forward()
source_hash__ = "2efb839574bee3f63b5b9d1ea5c96386"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
......
......@@ -19,7 +19,7 @@ setup(
'DHParser/syntaxtree.py',
'DHParser/parse.py',
'DHParser/transform.py',
# 'DHParser/compile.py',
'DHParser/compile.py',
# 'DHParser/ebnf.py'
],
nthreads=0, annotate=False),
......
......@@ -145,7 +145,7 @@ class TestFlowControl:
def test_lookbehind_indirect(self):
class LookbehindTestGrammar(Grammar):
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
ws = RegExp('\\s*')
end = RegExp('END')
SUCC_LB = RegExp('\\s*?\\n')
......
......@@ -292,6 +292,7 @@ class TestSerialization:
assert tree.as_xml() == "<A>\n <B>C</B>\n <D>E</D>\n</A>", xml
tree.attr['xml:space'] = 'preserve'
print(tree.attr)
xml = tree.as_xml()
assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment