10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 7c0b34d6 authored by Eckhart Arnold's avatar Eckhart Arnold

- cython stringview treatmend now settled on stringview.pxd solution.

parent 25cfadc1
This diff is collapsed.
......@@ -20,17 +20,16 @@ compilation of domain specific languages based on an EBNF-grammar.
"""
import os
from typing import Any, cast, List, Tuple, Union, Iterator, Iterable
from DHParser.ebnf import EBNFCompiler, grammar_changed, \
get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object, \
re, typing
from DHParser.error import Error, is_error, has_errors, only_errors
from DHParser.parser import Grammar, Compiler, compile_source, nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc
from DHParser.error import Error, is_error, has_errors, only_errors
from typing import Any, cast, Tuple, Union, Iterator, Iterable
from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object, \
re
__all__ = ('GrammarError',
'CompilationError',
......@@ -170,7 +169,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
# read grammar
grammar_src = load_if_file(grammar_representation)
if is_python_code(grammar_src):
parser_py, messages, AST = grammar_src, [], None
parser_py, messages, AST = grammar_src, [], None # type: str, List[Error], Node
else:
with logging(False):
parser_py, messages, AST = compile_source(grammar_src, None,
......@@ -371,7 +370,7 @@ def run_compiler(text_or_file: str, compiler_suite: str) -> Any:
return compileDSL(text_or_file, preprocessor(), parser(), ast(), compiler())
def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> Iterable[Error]:
"""
Compiles the a source file with a given compiler and writes the
result to a file.
......@@ -416,7 +415,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
cfactory = get_ebnf_compiler
compiler1 = cfactory()
compiler1.set_grammar_name(compiler_name, source_file)
result, messages, ast = compile_source(source, sfactory(), pfactory(), tfactory(), compiler1)
result, messages, AST = compile_source(source, sfactory(), pfactory(), tfactory(), compiler1)
if has_errors(messages):
return messages
......@@ -522,7 +521,7 @@ def recompile_grammar(ebnf_filename, force=False) -> bool:
base, ext = os.path.splitext(ebnf_filename)
compiler_name = base + 'Compiler.py'
error_file_name = base + '_ebnf_ERRORS.txt'
messages = [] # type: Iterable[str]
messages = [] # type: Iterable[Error]
if (not os.path.exists(compiler_name) or force or
grammar_changed(compiler_name, ebnf_filename)):
# print("recompiling parser for: " + ebnf_filename)
......
......@@ -44,7 +44,7 @@ class Error:
MANDATORY_CONTINUATION = 1001
def __init__(self, message: str, level: int = ERROR, code: Hashable = 0,
pos: int = -1, line: int = -1, column: int = -1):
pos: int = -1, line: int = -1, column: int = -1) -> None:
self.message = message
assert level >= 0
self.level = level or Error.ERROR
......
......@@ -60,14 +60,14 @@ import abc
import copy
import os
from functools import partial
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, load_if_file, \
re, typing
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, TOKEN_PTYPE, \
ZOMBIE_PARSER
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col
from typing import Any, Callable, cast, Dict, Iterator, List, Set, Tuple, Union, Optional
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \
load_if_file, re
__all__ = ('PreprocessorFunc',
'HistoryRecord',
......@@ -690,7 +690,6 @@ class Grammar:
parser.parser._name = entry
cls.parser_initialization__ = "done"
def __init__(self, root: Parser=None) -> None:
# if not hasattr(self.__class__, 'parser_initialization__'):
# self.__class__.parser_initialization__ = "pending"
......@@ -1603,7 +1602,7 @@ class Lookbehind(FlowOperator):
while isinstance(p, Synonym):
p = p.parser
assert isinstance(p, RegExp), str(type(p))
self.regexp = p.main.regexp if isinstance(p, RE) else p.regexp
self.regexp = cast(RE, p).main.regexp if isinstance(p, RE) else p.regexp
super(Lookbehind, self).__init__(parser, name)
def __call__(self, text: StringView) -> Tuple[Node, StringView]:
......@@ -1941,10 +1940,10 @@ class Compiler:
def compile_source(source: str,
preprocessor: PreprocessorFunc, # str -> str
parser: Grammar, # str -> Node (concrete syntax tree (CST))
preprocessor: PreprocessorFunc, # str -> str
parser: Grammar, # str -> Node (concrete syntax tree (CST))
transformer: TransformationFunc, # Node -> Node (abstract syntax tree (AST))
compiler: Compiler) -> Tuple[Any, List[str], Node]: # Node (AST) -> Any
compiler: Compiler) -> Tuple[Any, List[Error], Node]: # Node (AST) -> Any
"""
Compiles a source in four stages:
1. Scanning (if needed)
......
import cython
# type hints for Cython python -> C compiler
cdef int first_char(text, int begin, int end)
cdef int last_char(text, int begin, int end)
cdef inline int pack_index(int index, int len)
@cython.locals(cbegin=cython.int, cend=cython.int)
cpdef real_indices(begin, end, int len)
......@@ -26,30 +26,28 @@ does not work for unicode strings. Hence, the StringView class.
import collections
from typing import Optional, Iterable, Tuple
try:
import cython
except ImportError:
# from DHParser import foreign_cython as cython
pass
# needs to be imported because otherwise cython hickups on
# cpdef-functions with cython parameter annotations, like
# cpdef real_indices(begin, end, len: cython.int):
# ^
from DHParser import foreign_cython as cython
__all__ = ('StringView', 'EMPTY_STRING_VIEW')
@cython.cfunc
@cython.inline
def pack_index(index: cython.int, len: cython.int) -> cython.int:
def first_char(text, begin, end) -> int:
while begin < end and text[begin] in ' \n\t':
begin += 1
return begin
def last_char(text, begin, end) -> int:
while end > begin and text[end] in ' \n\t':
end -= 1
return end
def pack_index(index, len) -> int:
index = index if index >= 0 else index + len
return 0 if index < 0 else len if index > len else index
@cython.ccall
@cython.locals(cbegin=cython.int, cend=cython.int)
def real_indices(begin, end, len: cython.int):
def real_indices(begin, end, len) -> Tuple[int, int]:
cbegin = 0 if begin is None else begin
cend = len if end is None else end
return pack_index(cbegin, len), pack_index(cend, len)
......@@ -63,7 +61,6 @@ class StringView(collections.abc.Sized):
copying, i.e. slices are just a view on a section of the sliced
string.
"""
cython.declare(begin=cython.int, end=cython.int, len=cython.int, fullstring_flag=cython.bint)
__slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
......@@ -108,14 +105,11 @@ class StringView(collections.abc.Sized):
else:
return StringView(str(other) + str(self))
@cython.locals(start=cython.int, stop=cython.int)
def __getitem__(self, index):
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
# "Step sizes other than 1 are not yet supported by StringView"
# start, stop = real_indices(index.start, index.stop, self.len)
start = pack_index(0 if index.start is None else index.start, self.len)
stop = pack_index(self.len if index.stop is None else index.stop, self.len)
start, stop = real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop)
def count(self, sub, start=None, end=None) -> int:
......@@ -176,21 +170,14 @@ class StringView(collections.abc.Sized):
def search(self, regex):
return regex.search(self.text, pos=self.begin, endpos=self.end)
@cython.locals(begin=cython.int, end=cython.int)
def strip(self):
if self.fullstring_flag:
return self.text.strip()
else:
begin = self.begin
end = self.end
while begin < end and self.text[begin] in ' \n\t':
begin += 1
while end > begin and self.text[end] in ' \n\t':
end -= 1
begin = first_char(self.text, self.begin, self.end)
end = last_char(self.text, self.begin, self.end)
return self.text[begin:end]
# return str(self).strip() # PERFORMANCE WARNING: This creates a copy of the string
@cython.locals(i=cython.int, k=cython.int, l=cython.int)
def split(self, sep=None):
if self.fullstring_flag:
return self.text.split(sep)
......@@ -205,7 +192,6 @@ class StringView(collections.abc.Sized):
i = self.find(sep, k)
pieces.append(self.text[self.begin + k : self.end])
return pieces
# return str(self).split(sep, maxsplit) # PERFORMANCE WARNING: This creates a copy of the string
EMPTY_STRING_VIEW = StringView('')
......@@ -20,13 +20,11 @@ import collections.abc
import copy
import os
from functools import partial
from typing import Any, Callable, cast, Iterator, List, Union, Tuple, Hashable
from DHParser.toolkit import is_logging, log_dir, identity, re, typing
from DHParser.error import Error, linebreaks, line_col
from DHParser.stringview import StringView
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Iterator, Iterable, List, NamedTuple, Sequence, Union, Text, Tuple, Hashable
from DHParser.toolkit import is_logging, log_dir, identity, re
__all__ = ('ParserBase',
'WHITESPACE_PTYPE',
......@@ -203,7 +201,7 @@ class Node(collections.abc.Sized):
"""
# self._result = '' # type: StrictResultType
# self.children = () # type: ChildrenType
self.error_flag = 0 # type: bool
self.error_flag = 0 # type: int
self._errors = [] # type: List[Error]
self.result = result
self._len = len(self._result) if not self.children else \
......
......@@ -269,7 +269,7 @@ def compile_python_object(python_src, catch_obj_regex=""):
# def smart_list(arg: Union[str, Iterable[T]]) -> Union[Sequence[str], Sequence[T]]:
def smart_list(arg: Union[str, Iterable, Any]) -> Sequence:
def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
"""Returns the argument as list, depending on its type and content.
If the argument is a string, it will be interpreted as a list of
......
......@@ -188,7 +188,7 @@ def selftest() -> bool:
if errors:
print("Selftest FAILED :-(")
print("\n\n".join(errors))
print("\n\n".join(str(err) for err in errors))
return False
print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
......@@ -234,7 +234,7 @@ if __name__ == "__main__":
_errors = compile_on_disk(sys.argv[1],
sys.argv[2] if len(sys.argv) > 2 else "")
if _errors:
print('\n\n'.join(_errors))
print('\n\n'.join(str(err) for err in _errors))
sys.exit(1)
else:
create_project(sys.argv[1])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment