Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit b4d5d7ba authored by eckhart's avatar eckhart
Browse files

- more cython optimizations

parent 3ee27952
......@@ -2,3 +2,5 @@
#cython: language_level=3
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
# cpdef visitor_name(node_name)
......@@ -47,7 +47,7 @@ from DHParser.toolkit import typing, sane_parser_name, load_if_file
from typing import Any, Optional, Tuple, List, Callable
__all__ = ('CompilerError', 'Compiler', 'compile_source')
__all__ = ('CompilerError', 'Compiler', 'compile_source', 'visitor_name')
class CompilerError(Exception):
......@@ -60,6 +60,17 @@ class CompilerError(Exception):
pass
def visitor_name(node_name: str) -> str:
"""
Returns the method name for `node_name`, e.g.::
>>> visitor_name('expression')
'on_expression'
"""
# assert re.match(r'\w+$', node_name)
return 'on_' + node_name
class Compiler:
"""
Class Compiler is the abstract base class for compilers. Compiler
......@@ -112,30 +123,6 @@ class Compiler:
result = self.compile(root)
return result
# @staticmethod
# def propagate_error_flags(node: Node, lazy: bool = True) -> None:
# # See test_parser.TestCompilerClass.test_propagate_error()..
# """Propagates error flags from children to parent nodes to make sure
# that the parent's error flag is always greater or equal the maximum
# of the children's error flags."""
# if not lazy or node.error_flag < Error.HIGHEST:
# for child in node.children:
# Compiler.propagate_error_flags(child)
# node.error_flag = max(node.error_flag, child.error_flag)
# if lazy and node.error_flag >= Error.HIGHEST:
# return
@staticmethod
def method_name(node_name: str) -> str:
"""
Returns the method name for `node_name`, e.g.::
>>> Compiler.method_name('expression')
'on_expression'
"""
assert re.match(r'\w+$', node_name)
return 'on_' + node_name
def compile_children(self, node: Node) -> StrictResultType:
"""Compiles all children of the given node and returns the tuple
of the compiled children or the node's (potentially empty) result
......@@ -171,29 +158,17 @@ class Compiler:
elem = node.tag_name
if elem.startswith(':'):
elem = elem[1:]
if not sane_parser_name(elem):
self.tree.new_error(node, "Reserved name '%s' not allowed as parser "
"name! " % elem + "(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)")
return None
else:
try:
compiler = self.__getattribute__(self.method_name(elem))
except AttributeError:
compiler = self.fallback_compiler
self.context.append(node)
result = compiler(node)
self.context.pop()
if result is None:
raise CompilerError('Method on_%s returned `None` instead of a '
'valid compilation result!' % elem)
# # the following statement makes sure that the error_flag
# # is propagated early on. Otherwise it is redundant, because
# # the __call__ method globally propagates the node's error_flag
# # later anyway. So, maybe it could be removed here.
# for child in node.children:
# node.error_flag = node.error_flag or child.error_flag
return result
try:
compiler = self.__getattribute__(visitor_name(elem))
except AttributeError:
compiler = self.fallback_compiler
self.context.append(node)
result = compiler(node)
self.context.pop()
if result is None:
raise CompilerError('Method on_%s returned `None` instead of a '
'valid compilation result!' % elem)
return result
def compile_source(source: str,
......
......@@ -29,7 +29,7 @@ from functools import partial
import keyword
import os
from DHParser.compile import CompilerError, Compiler, compile_source
from DHParser.compile import CompilerError, Compiler, compile_source, visitor_name
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
......@@ -124,7 +124,7 @@ class EBNFGrammar(Grammar):
"""
expression = Forward()
source_hash__ = "82a7c668f86b83f86515078e6c9093ed"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
......@@ -591,7 +591,7 @@ class EBNFCompiler(Compiler):
' super()._reset()',
' # initialize your variables here, not in the constructor!']
for name in self.rules:
method_name = Compiler.method_name(name)
method_name = visitor_name(name)
if name == self.root_symbol:
compiler += [' def ' + method_name + '(self, node):',
' return self.fallback_compiler(node)', '']
......@@ -781,7 +781,7 @@ class EBNFCompiler(Compiler):
'r"""Parser for ' + article + self.grammar_name
+ ' source file'
+ ('. Grammar:' if self.grammar_source and show_source else '.')]
definitions.append(('parser_initialization__', '"upon instantiation"'))
definitions.append(('parser_initialization__', '["upon instantiation"]'))
if self.grammar_source:
definitions.append(('source_hash__',
'"%s"' % md5(self.grammar_source, __version__)))
......
......@@ -13,26 +13,27 @@ cdef class Parser:
# cpdef _parse(self, text)
# cdef class Grammar:
# cdef public set all_parsers__
# cdef public object start_parser__
# cdef bint _dirty_flag__
# cdef public bint history_tracking__
# cdef public bint memoization__
# cdef public bint left_recursion_handling__
# cdef public object root__
# cdef public object tree__
# cdef public object document__
# cdef public object _reversed__
# cdef public int document_length__
# cdef public list document_lbreaks__
# cdef public object variables__
# cdef public list rollback__
# cdef public int last_rb__loc__
# cdef public list call_stack__
# cdef public list history__
# cdef public bint moving_forward__
# cdef public set recursion_locations__
cdef class Grammar:
cdef dict __dict__
cdef public set all_parsers__
cdef public object start_parser__
cdef bint _dirty_flag__
cdef public bint history_tracking__
cdef public bint memoization__
cdef public bint left_recursion_handling__
# cdef public object root_parser__ # do not uncomment this!!!
cdef public object tree__
cdef public object document__
cdef public object _reversed__
cdef public int document_length__
cdef public list document_lbreaks__
cdef public object variables__
cdef public list rollback__
cdef public int last_rb__loc__
cdef public list call_stack__
cdef public list history__
cdef public bint moving_forward__
cdef public set recursion_locations__
cdef class PreprocessorToken(Parser):
pass
......
......@@ -39,7 +39,7 @@ from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, RootNode, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE, ResultType
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing, cython
from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, Optional, Any
......@@ -247,9 +247,10 @@ class Parser:
the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class."""
self.visited = dict() # type: Dict[int, Tuple[Optional[Node], StringView]]
self.recursion_counter = defaultdict(lambda: 0) # type: DefaultDict[int, int]
self.recursion_counter = defaultdict(int) # type: DefaultDict[int, int]
self.cycle_detection = set() # type: Set[ApplyFunc]
@cython.locals(location=cython.int, gap=cython.int, i=cython.int)
def __call__(self: 'Parser', text: StringView) -> Tuple[Optional[Node], StringView]:
"""Applies the parser to the given text. This is a wrapper method that adds
the business intelligence that is common to all parsers. The actual parsing is
......@@ -702,7 +703,7 @@ class Grammar:
python_src__ = '' # type: str
root__ = ZOMBIE_PARSER # type: Parser
# root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__ = "pending" # type: list[str]
parser_initialization__ = ["pending"] # type: list[str]
resume_rules__ = dict() # type: Dict[str, ResumeList]
# some default values
# COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
......@@ -733,7 +734,7 @@ class Grammar:
selected reference will be chosen. See PEP 520
(www.python.org/dev/peps/pep-0520/) for an explanation of why.
"""
if cls.parser_initialization__ != "done":
if cls.parser_initialization__[0] != "done":
cdict = cls.__dict__
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
......@@ -742,7 +743,7 @@ class Grammar:
cast(Forward, parser).parser.pname = entry
else: # if not parser.pname:
parser.pname = entry
cls.parser_initialization__ = "done"
cls.parser_initialization__[0] = "done"
def __init__(self, root: Parser = None) -> None:
......@@ -761,20 +762,22 @@ class Grammar:
# parsers not connected to the root object will be copied later
# on demand (see Grammar.__getitem__()). Usually, the need to
# do so only arises during testing.
self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
self.root__.apply(self._add_parser__)
self.root_parser__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
self.root_parser__.apply(self._add_parser__)
assert 'root_parser__' in self.__dict__
assert self.root_parser__ == self.__dict__['root_parser__']
def __getitem__(self, key):
try:
return self.__dict__[key]
except KeyError:
parser_template = getattr(self, key, None)
parser_template = getattr(self.__class__, key, None)
if parser_template:
# add parser to grammar object on the fly...
parser = copy.deepcopy(parser_template)
parser.apply(self._add_parser__)
# assert self[key] == parser
assert self[key] == parser
return self[key]
raise UnknownParserError('Unknown parser "%s" !' % key)
......@@ -832,7 +835,7 @@ class Grammar:
def __call__(self,
document: str,
start_parser: Union[str, Parser] = "root__",
start_parser: Union[str, Parser] = "root_parser__",
track_history: bool = False) -> RootNode:
"""
Parses a document with with parser-combinators.
......@@ -857,8 +860,6 @@ class Grammar:
return predecessors[-1].pos + len(predecessors[-1]) if predecessors else 0
# assert isinstance(document, str), type(document)
if self.root__ is None:
raise NotImplementedError()
if self._dirty_flag__:
self._reset__()
for parser in self.all_parsers__:
......@@ -901,7 +902,7 @@ class Grammar:
# in a test case this is not necessarily an error.
last_record = self.history__[-2] if len(self.history__) > 1 else None # type: Optional[HistoryRecord]
if last_record and parser != self.root__ \
if last_record and parser != self.root_parser__ \
and last_record.status == HistoryRecord.MATCH \
and last_record.node.pos \
+ len(last_record.node) >= len(self.document__) \
......@@ -1353,6 +1354,7 @@ class ZeroOrMore(Option):
EBNF-Example: ``sentence = { /\w+,?/ } "."``
"""
@cython.locals(n=cython.int)
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
results = () # type: Tuple[Node, ...]
n = len(text) + 1 # type: int
......@@ -1523,6 +1525,7 @@ class Series(NaryOperator):
duplicate.tag_name = self.tag_name
return duplicate
@cython.locals(pos=cython.int, reloc=cython.int)
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
results = () # type: Tuple[Node, ...]
text_ = text # type: StringView
......@@ -1546,8 +1549,8 @@ class Series(NaryOperator):
results += (node,)
break
results += (node,)
assert len(results) <= len(self.parsers) \
or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE])
# assert len(results) <= len(self.parsers) \
# or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE])
node = Node(self.tag_name, results)
if error:
raise ParserError(node, text, first_throw=True)
......
......@@ -154,7 +154,7 @@ class StringView: # collections.abc.Sized
else:
return StringView(str(other) + str(self))
@cython.locals(start=cython.int, end=cython.int)
@cython.locals(start=cython.int, stop=cython.int)
def __getitem__(self, index: Union[slice, int]) -> 'StringView':
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
......
......@@ -193,7 +193,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
duplicate.errors = copy.deepcopy(self.errors) if self.errors else []
duplicate._pos = self._pos
duplicate._len = self._len
if hasattr(self, '_xml_attr'):
if self.attr_active():
duplicate._xml_attr = copy.deepcopy(self._xml_attr)
return duplicate
......@@ -398,12 +398,28 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return self
def attr_active(self) -> bool:
"""
Returns True, if XML-Attributes of this node have ever been set
or queried, even if unsuccessfully.
"""
try:
if self._xml_attr is not None:
return True
except AttributeError:
pass
return False
@property
def attr(self):
"""
Returns a dictionary of XML-attr attached to the node.
"""
if not hasattr(self, '_xml_attr'):
try:
if self._xml_attr is None: # cython compatibility
self._xml_attr = OrderedDict()
except AttributeError:
self._xml_attr = OrderedDict()
return self._xml_attr
......@@ -495,7 +511,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
"""Returns the opening string for the representation of `node`."""
txt = [left_bracket, node.tag_name]
# s += " '(pos %i)" % node.add_pos
if hasattr(node, '_xml_attr'):
if node.attr_active():
txt.extend(' `(%s "%s")' % (k, v) for k, v in node.attr.items())
if src:
line, col = line_col(lbreaks, node.pos)
......@@ -548,9 +564,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
if node.tag_name in omit_tags:
return ''
txt = ['<', node.tag_name]
has_reserved_attrs = hasattr(node, '_xml_attr') \
has_reserved_attrs = node.attr_active() \
and any(r in node.attr for r in {'err', 'line', 'col'})
if hasattr(node, '_xml_attr'):
if node.attr_active():
txt.extend(' %s="%s"' % (k, v) for k, v in node.attr.items())
if src and not has_reserved_attrs:
txt.append(' line="%i" col="%i"' % line_col(line_breaks, node.pos))
......@@ -584,7 +600,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
printed on several lines to avoid unwanted gaps in the output.
"""
return node.tag_name in inline_tags \
or (hasattr(node, '_xml_attr')
or (node.attr_active()
and node.attr.get('xml:space', 'default') == 'preserve')
line_breaks = linebreaks(src) if src else []
......@@ -713,7 +729,7 @@ class RootNode(Node):
duplicate.errors = copy.deepcopy(self.errors) if self.errors else []
duplicate._pos = self._pos
duplicate._len = self._len
if hasattr(self, '_xml_attr'):
if self.attr_active():
duplicate._xml_attr = copy.deepcopy(self._xml_attr)
duplicate.all_errors = copy.deepcopy(self.all_errors)
duplicate.error_flag = self.error_flag
......@@ -741,7 +757,7 @@ class RootNode(Node):
self._len = node._len
self._pos = node._pos
self.tag_name = node.tag_name
if hasattr(node, '_xml_attr'):
if node.attr_active():
self._xml_attr = node._xml_attr
self._content = node._content
return self
......
......@@ -43,8 +43,19 @@ except ImportError:
from typing import Any, Iterable, Sequence, Set, Union, Dict, Hashable # , cast
try:
import cython
cython_optimized = cython.compiled # type: bool
except ImportError:
# import DHParser.Shadow as cython
cython_optimized = False # type: bool
import DHParser.shadow_cython as cython
__all__ = ('escape_re',
__all__ = ('typing',
'cython',
'cython_optimized',
'escape_re',
'escape_control_characters',
'is_filename',
'concurrent_ident',
......
......@@ -3,10 +3,12 @@
# rm DHParser/*.c
# rm DHParser/*.so
# rm DHParser/parse.c
# rm DHParser/parse.cpython*.so
rm DHParser/syntaxtree.c
rm DHParser/syntaxtree.cpython*.so
rm DHParser/parse.c
rm DHParser/parse.cpython*.so
# rm DHParser/syntaxtree.c
# rm DHParser/syntaxtree.cpython*.so
# rm DHParser/transform.c
# rm DHParser/transform.cpython*.so
# CFLAGS="-O3 -march=native -mtune=native"
python3 setup.py build_ext --inplace
......
......@@ -60,7 +60,7 @@ class ArithmeticGrammar(Grammar):
expression = Forward()
variable = Forward()
source_hash__ = "120070baa84f5a2bd1bbb900627078fc"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
......
......@@ -58,7 +58,7 @@ class BibTeXGrammar(Grammar):
"""
text = Forward()
source_hash__ = "e402951b290cb0fce63ba0cbca3f23e9"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*'
......
......@@ -58,7 +58,7 @@ class EBNFGrammar(Grammar):
"""
expression = Forward()
source_hash__ = "5e9e65a057bec7da29989dba47f40394"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
......
......@@ -58,7 +58,7 @@ class LaTeXGrammar(Grammar):
tabular_config = Forward()
text_element = Forward()
source_hash__ = "e09808ecd485c07b3455c3a2bf4eada3"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'%.*'
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
......
......@@ -68,7 +68,7 @@ class XMLGrammar(Grammar):
ignoreSectContents = Forward()
markupdecl = Forward()
source_hash__ = "1c64c8f613952c5ab8e851da15f65ec3"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
......
......@@ -60,7 +60,7 @@ class XMLSnippetGrammar(Grammar):
Name = Forward()
element = Forward()
source_hash__ = "2efb839574bee3f63b5b9d1ea5c96386"
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
......
......@@ -19,7 +19,7 @@ setup(
'DHParser/syntaxtree.py',
'DHParser/parse.py',
'DHParser/transform.py',
# 'DHParser/compile.py',
'DHParser/compile.py',
# 'DHParser/ebnf.py'
],
nthreads=0, annotate=False),
......
......@@ -145,7 +145,7 @@ class TestFlowControl:
def test_lookbehind_indirect(self):
class LookbehindTestGrammar(Grammar):
parser_initialization__ = "upon instantiation"
parser_initialization__ = ["upon instantiation"]
ws = RegExp('\\s*')
end = RegExp('END')
SUCC_LB = RegExp('\\s*?\\n')
......
......@@ -292,6 +292,7 @@ class TestSerialization:
assert tree.as_xml() == "<A>\n <B>C</B>\n <D>E</D>\n</A>", xml
tree.attr['xml:space'] = 'preserve'
print(tree.attr)
xml = tree.as_xml()
assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment