Commit 56362aa1 authored by eckhart's avatar eckhart
Browse files

full cython compatibility restored!

parent 5d87459c
......@@ -4,3 +4,15 @@
#cython: c_string_encoding=utf-8
# cpdef visitor_name(node_name)
cdef class Compiler:
cdef public object tree
cdef public list context
cdef public bint _None_check
cdef public bint _dirty_flag
cdef public bint _debug
cdef public set _debug_already_compiled
cdef public list finalizers
# cpdef fallback_compiler(self, node)
cpdef compile(self, node)
......@@ -35,8 +35,9 @@ compiler object.
"""
import copy
import functools
import os
from typing import Any, Optional, Tuple, List, cast
from typing import Any, Optional, Tuple, List, Union, Callable, cast
from DHParser.configuration import get_config_value
from DHParser.preprocess import with_source_mapping, PreprocessorFunc, SourceMapFunc
......@@ -123,7 +124,6 @@ class Compiler:
def reset(self):
# self.source = ''
self.finlizers = [] # type: List[Callable, Tuple]
self.tree = ROOTNODE_PLACEHOLDER # type: RootNode
self.context = [] # type: List[Node]
self._None_check = True # type: bool
......@@ -239,11 +239,15 @@ def logfile_basename(filename_or_text, function_or_class_or_instance) -> str:
return name[:i] + '_out' if i >= 0 else name
GrammarCallable = Union[Grammar, Callable[[str], Node], functools.partial]
CompilerCallable = Union[Compiler, Callable[[Node], Any], functools.partial]
def compile_source(source: str,
preprocessor: Optional[PreprocessorFunc], # str -> str
parser: Grammar, # str -> Node (concrete syntax tree (CST))
parser: GrammarCallable, # str -> Node (concrete syntax tree (CST))
transformer: TransformationFunc, # Node (CST) -> Node (abstract ST (AST))
compiler: Compiler, # Node (AST), Source -> Any
compiler: CompilerCallable, # Node (AST), Source -> Any
preserve_ast: bool = False) -> Tuple[Optional[Any], List[Error], Optional[Node]]:
"""
Compiles a source in four stages:
......
......@@ -2,3 +2,39 @@
#cython: language_level=3
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
cdef class EBNFDirectives:
cdef public str whitespace
cdef public str comment
cdef public set literalws
cdef public set tokens
cdef public dict filter
cdef public dict error
cdef public dict skip
cdef public dict resume
cdef public set drop
cdef public object _super_ws
# cdef class EBNFCompiler:
# cdef public int grammar_id
# cdef str _result
# cdef public set re_flags
# cdef public object rules
# cdef public list current_symbols
# cdef public dict symbols
# cdef public set variables
# cdef public set recursive
# cdef public dict definitions
# cdef public set required_keywords
# cdef public list deferred_tasks
# cdef public str root_symbol
# cdef public object directives
# cdef public set defined_directives
# cdef public set consumed_custom_errors
# cdef public set consumed_skip_rules
# cpdef _check_rx(self, node, rx)
# cpdef non_terminal(self, node, parser_class, custom_args)
# cpdef _error_customization(self, node)
......@@ -589,7 +589,7 @@ class EBNFCompiler(Compiler):
def __init__(self, grammar_name="DSL", grammar_source=""):
self.grammar_id = 0
self.grammar_id = 0 # type: int
super(EBNFCompiler, self).__init__() # calls the reset()-method
self.set_grammar_name(grammar_name, grammar_source)
......@@ -1092,7 +1092,7 @@ class EBNFCompiler(Compiler):
self.tree.new_error(node, 'Directive "literalws" allows only `left`, `right`, '
'`both` or `none`, not `%s`' % ", ".join(values))
wsp = {'left', 'right'} if 'both' in values \
else {} if 'none' in values else values
else set() if 'none' in values else values
self.directives.literalws = wsp
elif key in {'tokens', 'preprocessor_tokens'}:
......
......@@ -3,8 +3,12 @@
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
# cpdef copy_parser_attrs(src, duplicate)
cdef class Parser:
cdef public str pname
cdef public bint anonymous
cdef public bint drop_content
cdef public str tag_name
cdef _grammar
cdef object visited
......@@ -13,19 +17,28 @@ cdef class Parser:
cpdef _parse(self, text)
cpdef reset(self)
# def __call__(self, text)
# def __add__(self, other)
# def __or__(self, other)
cpdef _parse(self, text)
cpdef _apply(self, func, flip)
# cpdef push_rollback__(self, int location, func)
# cpdef rollback_to__(self, int location)
# cpdef line_col__(self, text)
cpdef apply(self, func)
# cpdef mixin_comment(whitespace, str)
# cpdef mixin_noempty(whitespace)
cdef class Grammar:
cdef dict __dict__
cdef public set all_parsers__
cdef public object comment_rx__
cdef public object start_parser__
cdef bint _dirty_flag__
cdef public bint history_tracking__
cdef public bint memoization__
cdef public bint left_recursion_handling__
cdef public bint flatten_tree__
cdef public int left_recursion_depth__
cdef public int max_parser_dropouts__
# cdef public object root_parser__ # do not uncomment this!!!
cdef public object tree__
cdef public object document__
......@@ -44,9 +57,6 @@ cdef class Grammar:
cdef class PreprocessorToken(Parser):
pass
cdef class ZombieParser(Parser):
pass
cdef class Token(Parser):
cdef public str text
cdef public int len
......
......@@ -123,6 +123,7 @@ class ParserError(Exception):
ResumeList = List[RxPatternType] # list of regular expressiones
@cython.locals(upper_limit=cython.int, closest_match=cython.int, pos=cython.int)
def reentry_point(rest: StringView, rules: ResumeList, comment_regex) -> int:
"""
Finds the point where parsing should resume after a ParserError has been caught.
......@@ -145,6 +146,7 @@ def reentry_point(rest: StringView, rules: ResumeList, comment_regex) -> int:
closest_match = upper_limit
comments = None # typ: Optional[Iterator]
@cython.locals(a=cython.int, b=cython.int)
def next_comment() -> Tuple[int, int]:
nonlocal rest, comments
if comments:
......@@ -160,6 +162,7 @@ def reentry_point(rest: StringView, rules: ResumeList, comment_regex) -> int:
# nonlocal rest
# return rest.find(s, start), len(s)
@cython.locals(start=cython.int, end=cython.int)
def rx_search(rx, start: int = 0) -> Tuple[int, int]:
nonlocal rest
m = rest.search(rx, start)
......@@ -168,6 +171,7 @@ def reentry_point(rest: StringView, rules: ResumeList, comment_regex) -> int:
return rest.index(start), end - start
return -1, 0
@cython.locals(a=cython.int, b=cython.int, k=cython.int, length=cython.int)
def entry_point(search_func, search_rule) -> int:
a, b = next_comment()
k, length = search_func(search_rule)
......@@ -886,8 +890,8 @@ class Grammar:
resume_rules__ = dict() # type: Dict[str, ResumeList]
anonymous__ = re.compile(r'_') # type: RxPatternType
# some default values
# COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
# WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
WSP_RE__ = mixin_comment(whitespace=r'[\t ]*', comment=COMMENT__) # type: str
static_analysis_pending__ = [True] # type: List[bool]
......@@ -934,12 +938,15 @@ class Grammar:
def __init__(self, root: Parser = None) -> None:
self.all_parsers__ = set() # type: Set[Parser]
# add compiled regular expression for comments, if it does not already exist
if not hasattr(self, 'comment_rx__'):
self.comment_rx__ = re.compile(self.COMMENT__) \
if hasattr(self, 'COMMENT__') and self.COMMENT__ else RX_NEVER_MATCH
if not hasattr(self, 'comment_rx__') or self.comment_rx__ is None:
if hasattr(self.__class__, 'COMMENT__') and self.__class__.COMMENT__:
self.comment_rx__ = re.compile(self.__class__.COMMENT__)
else:
self.comment_rx__ = RX_NEVER_MATCH
else:
assert ((self.COMMENT__ and self.COMMENT__ == self.comment_rx__.pattern)
or (not self.COMMENT__ and self.comment_rx__ == RX_NEVER_MATCH))
assert ((self.__class__.COMMENT__ and
self.__class__.COMMENT__ == self.comment_rx__.pattern)
or (not self.__class__.COMMENT__ and self.comment_rx__ == RX_NEVER_MATCH))
self.start_parser__ = None # type: Optional[Parser]
self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool
......@@ -1777,6 +1784,7 @@ MessagesType = List[Tuple[Union[str, Any], str]]
NO_MANDATORY = 1000
@cython.locals(i=cython.int, location=cython.int)
def mandatory_violation(grammar: Grammar,
text_: StringView,
failed_on_lookahead: bool,
......
......@@ -93,7 +93,7 @@ def create_project(path: str):
os.chdir(path)
if os.path.exists(TEST_DIRNAME):
if not os.path.isdir(TEST_DIRNAME):
print('Cannot overwrite existing file "grammar_tests"')
print('Cannot overwrite existing file "%s"' % TEST_DIRNAME)
sys.exit(1)
else:
os.mkdir(TEST_DIRNAME)
......
......@@ -43,7 +43,7 @@ except ImportError:
import DHParser.shadow_cython as cython
__all__ = ('StringView', 'EMPTY_STRING_VIEW', 'cython_optimized')
__all__ = ('StringView', 'EMPTY_STRING_VIEW')
def first_char(text, begin: int, end: int, chars) -> int:
......
......@@ -8,25 +8,39 @@ cdef class Node:
cdef public int _pos
cdef public object _result
cdef public tuple children
cdef public int _len
cdef public str tag_name
cdef object _xml_attr
cpdef get(self, index_or_tagname, surrogate)
cpdef is_anonymous(self)
# cpdef equals(self, other, ignore_attr_order)
# cpdef get(self, index_or_tagname, surrogate)
# cpdef anonymous(self)
# cpdef result(self)
cpdef __set_result(self, result)
cpdef _content(self)
# cpdef pos(self)
cpdef with_pos(self, pos)
# cpdef has_attr(self, attr)
# cpdef attr(self)
# cpdef get_attr(self, attribute, default)
# cpdef compare_attr(self, other)
# cpdef _tree_repr(self, tab, open_fn, close_fn, data_fn, density, inline, inline_fn)
# cpdef as_sxpr(self, src, indentation, compact)
# cpdef as_xml(self, src, indentation, inline_tags, omit_tags, empty_tags)
# cpdef index(self, what, start, stop)
# cpdef select_if(self, match_function, include_root, reverse)
# cpdef select(self, tag_names, include_root)
# cpdef pick(self, criterion, reverse)
# cpdef tree_size(self)
# cpdef locate(self, location)
# cpdef find_parend(self, node)
# cpdef select_context_if(self, match_function, include_root, reverse)
# cpdef select_context(self, tag_names, include_root)
# cpdef pick_content(self, criterion, reverse)
# cpdef locate_content(self, location)
# cpdef _reconstruct_context_recursive(self, node)
# cpdef reconstruct_context(self, node)
# cpdef milestone_segment(self, begin, end)
# cpdef _tree_repr(self, tab, open_fn, close_fn, data_fn, density, inline, inline_fn)
# cpdef as_sxpr(self, src, indentation, compact)
# cpdef as_xml(self, src, indentation, inline_tags, omit_tags, empty_tags)
cpdef to_json_obj(self)
# cpdef serialize(self, how)
cdef class FrozenNode(Node):
......
......@@ -208,7 +208,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
is prefixed with a colon ":". A node, the tag name of which
starts with a colon ":" or the tag name of which is the
empty string is considered as "anonymous". See
`Node.is_anonymous()`
`Node.anonymous()`-property
result (str or tuple): The result of the parser which
generated this node, which can be either a string or a
......@@ -261,7 +261,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
that generated the node and the parser's result.
"""
self._pos = -1 # type: int
# Assignment to self.result initializes the attr _result, children and _len
# Assignment to self.result initializes the attr _result and children
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if leafhint:
self._result = result # type: StrictResultType # cast(StrictResultType, result)
......@@ -597,6 +597,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return False
raise ValueError('Leaf-node cannot contain other nodes')
@cython.locals(start=cython.int, stop=cython.int, i = cython.int)
def index(self, what: CriteriaType, start: int = 0, stop: int = sys.maxsize) -> int:
"""
Returns the first index of the child that fulfills the criterion
......@@ -690,6 +691,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
except StopIteration:
return None
@cython.locals(location=cython.int, end=cython.int)
def locate(self, location: int) -> Optional['Node']:
"""
Returns the leaf-Node that covers the given `location`, where
......@@ -753,6 +755,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
except StopIteration:
return None
@cython.locals(location=cython.int, end=cython.int)
def locate_context(self, location: int) -> Optional[List['Node']]:
"""
Like `Node.locate()`, only that the entire context (i.e. chain of descendants)
......@@ -1038,9 +1041,9 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
if node.has_attr():
txt.extend(' %s="%s"' % (k, v) for k, v in node.attr.items())
if src and not has_reserved_attrs:
txt.append(' line="%i" col="%i"' % line_col(line_breaks, node.pos))
if src == '' and not (node.has_attr() and '_pos' in node.attr) and node.pos >= 0:
txt.append(' _pos="%i"' % node.pos)
txt.append(' line="%i" col="%i"' % line_col(line_breaks, node._pos))
if src == '' and not (node.has_attr() and '_pos' in node.attr) and node._pos >= 0:
txt.append(' _pos="%i"' % node._pos)
if root and id(node) in root.error_nodes and not has_reserved_attrs:
txt.append(' err="%s"' % ''.join(str(err).replace('"', "'")
for err in root.get_errors(node)))
......@@ -1117,12 +1120,13 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
# serialization meta-method ###
@cython.locals(vsize=cython.int, i=cython.int, threshold=cython.int)
def serialize(self: 'Node', how: str = 'default') -> str:
"""
Serializes the tree starting with `node` either as S-expression, XML, JSON,
or in compact form. Possible values for `how` are 'S-expression',
'XML', 'JSON', 'compact' accordingly, or 'AST', 'CST', 'default' in which case
the value of respective configuration variable determines the
or in compact form. Possible values for `how` are 'S-expression', 'XML',
'JSON', 'compact' and 'smart' accordingly, or 'AST', 'CST', 'default' in
which case the value of respective configuration variable determines the
serialization format. (See module `configuration.py`.)
"""
switch = how.lower()
......@@ -1156,7 +1160,14 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
if sxpr.find('\n') >= 0:
sxpr = re.sub(r'\n(\s*)\(', r'\n\1', sxpr)
sxpr = re.sub(r'\n\s*\)', r'', sxpr)
sxpr = re.sub(r'\)[ \t]*\n', r'\n', sxpr)
# sxpr = re.sub(r'(?<=\n[^`]*)\)[ \t]*\n', r'\n', sxpr)
s = sxpr.split('\n')
for i in range(len(s)):
if '`' in s[i]:
s[i] = s[i].replace('))', ')')
else:
s[i] = s[i].replace(')', '')
sxpr = '\n'.join(s)
sxpr = re.sub(r'^\(', r'', sxpr)
return sxpr
else:
......@@ -1553,8 +1564,11 @@ def parse_xml(xml: Union[str, StringView], ignore_pos: bool = False) -> Node:
beginning after the end of the attr.
"""
attributes = OrderedDict() # type: OrderedDict[str, str]
eot = s.find('>')
restart = 0
for match in s.finditer(re.compile(r'\s*(?P<attr>\w+)\s*=\s*"(?P<value>.*?)"\s*')):
if s.index(match.start()) >= eot:
break
d = match.groupdict()
attributes[d['attr']] = d['value']
restart = s.index(match.end())
......@@ -1613,7 +1627,7 @@ def parse_xml(xml: Union[str, StringView], ignore_pos: bool = False) -> Node:
s, child = parse_full_content(s)
res.append(child)
s, closing_tagname = parse_closing_tag(s)
assert tagname == closing_tagname
assert tagname == closing_tagname, tagname + ' != ' + closing_tagname
if len(res) == 1 and res[0].tag_name == TOKEN_PTYPE:
result = res[0].result
else:
......@@ -1630,7 +1644,7 @@ def parse_xml(xml: Union[str, StringView], ignore_pos: bool = False) -> Node:
match_header = xml.search(re.compile(r'<(?!\?)'))
start = xml.index(match_header.start()) if match_header else 0
_, tree = parse_full_content(xml[start:])
assert _.match(RX_WHITESPACE_TAIL)
assert _.match(RX_WHITESPACE_TAIL), _
return tree
......
......@@ -677,7 +677,7 @@ def create_test_templates(symbols_or_ebnf: Union[str, SymbolsDictType],
the grammar's symbols under that section or an EBNF-grammar
or file name of an EBNF-grammar from which the symbols shall
be extracted.
path: the path to the grammar-test directory (usually 'grammar_tests').
path: the path to the grammar-test directory (usually 'test_grammar').
If the last element of the path does not exist, the directory
will be created.
fmt: the test-file-format. At the moment only '.ini' is supported
......
......@@ -2,3 +2,6 @@
#cython: language_level=3
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
cpdef issubtype(subtype, base_type)
cpdef isgenerictype(t)
\ No newline at end of file
......@@ -25,7 +25,7 @@ except ModuleNotFoundError:
def recompile_grammar(grammar_src, force):
grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests')
grammar_tests_dir = os.path.join(scriptpath, 'test_grammar')
if not os.path.exists(grammar_tests_dir) \
or not any(os.path.isfile(os.path.join(grammar_tests_dir, entry))
for entry in os.listdir(grammar_tests_dir)):
......@@ -45,7 +45,7 @@ def recompile_grammar(grammar_src, force):
def run_grammar_tests(glob_pattern):
DHParser.log.start_logging(LOGGING)
error_report = testing.grammar_suite(
os.path.join(scriptpath, 'grammar_tests'),
os.path.join(scriptpath, 'test_grammar'),
get_grammar, get_transformer,
fn_patterns=[glob_pattern], report='REPORT', verbose=True)
return error_report
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment