diff --git a/DHParser/__init__.py b/DHParser/__init__.py index 385fbe1c032911147dfca932b7b51c25afbc8628..7d556da9e44eaeb4d59ba9f81425673398296ee2 100644 --- a/DHParser/__init__.py +++ b/DHParser/__init__.py @@ -18,6 +18,7 @@ implied. See the License for the specific language governing permissions and limitations under the License. """ +from .base import * from .dsl import * from .ebnf import * from .parser import * @@ -30,4 +31,4 @@ from .versionnumber import __version__ __author__ = "Eckhart Arnold " __copyright__ = "http://www.apache.org/licenses/LICENSE-2.0" -# __all__ = ['toolkit', 'syntaxtree', 'parser', 'transform', 'ebnf', 'dsl', 'testing', 'versionnumber'] # flat namespace +# __all__ = ['toolkit', 'base', 'syntaxtree', 'parser', 'transform', 'ebnf', 'dsl', 'testing', 'versionnumber'] # flat namespace diff --git a/DHParser/base.py b/DHParser/base.py new file mode 100644 index 0000000000000000000000000000000000000000..8fb72c80bdd649f025f5f589be2bbbb05f5ca200 --- /dev/null +++ b/DHParser/base.py @@ -0,0 +1,300 @@ +"""base.py - various base classes that are used across several other + the DHParser-modules. + +Copyright 2016 by Eckhart Arnold (arnold@badw.de) + Bavarian Academy of Sciences an Humanities (badw.de) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied. See the License for the specific language governing +permissions and limitations under the License. +""" +import collections +from typing import Hashable, Iterable, Iterator, Optional, Tuple + + +__all__ = ('ParserBase', + 'WHITESPACE_PTYPE', + 'TOKEN_PTYPE', + 'MockParser', + 'ZombieParser', + 'ZOMBIE_PARSER', + 'Error', + 'is_error', + 'is_warning', + 'has_errors', + 'only_errors', + 'StringView', + 'EMPTY_STRING_VIEW') + + +####################################################################### +# +# parser base and mock parsers +# +####################################################################### + + +class ParserBase: + """ + ParserBase is the base class for all real and mock parser classes. + It is defined here, because Node objects require a parser object + for instantiation. + """ + def __init__(self, name=''): # , pbases=frozenset()): + self.name = name # type: str + self._ptype = ':' + self.__class__.__name__ # type: str + + def __repr__(self): + return self.name + self.ptype + + def __str__(self): + return self.name + (' = ' if self.name else '') + repr(self) + + @property + def ptype(self) -> str: + return self._ptype + + @property + def repr(self) -> str: + return self.name if self.name else repr(self) + + +WHITESPACE_PTYPE = ':Whitespace' +TOKEN_PTYPE = ':Token' + + +class MockParser(ParserBase): + """ + MockParser objects can be used to reconstruct syntax trees from a + serialized form like S-expressions or XML. Mock objects can mimic + different parser types by assigning them a ptype on initialization. + + Mock objects should not be used for anything other than + syntax tree (re-)construction. In all other cases where a parser + object substitute is needed, chose the singleton ZOMBIE_PARSER. + """ + def __init__(self, name='', ptype=''): # , pbases=frozenset()): + assert not ptype or ptype[0] == ':' + super(MockParser, self).__init__(name) + self.name = name + self._ptype = ptype or ':' + self.__class__.__name__ + + +class ZombieParser(MockParser): + """ + Serves as a substitute for a Parser instance. + + ``ZombieParser`` is the class of the singelton object + ``ZOMBIE_PARSER``. The ``ZOMBIE_PARSER`` has a name and can be + called, but it never matches. It serves as a substitute where only + these (or one of these properties) is needed, but no real Parser- + object is instantiated. + """ + alive = False + + def __init__(self): + super(ZombieParser, self).__init__("__ZOMBIE__") + assert not self.__class__.alive, "There can be only one!" + assert self.__class__ == ZombieParser, "No derivatives, please!" + self.__class__.alive = True + + def __copy__(self): + return self + + def __deepcopy__(self, memo): + return self + + def __call__(self, text): + """Better call Saul ;-)""" + return None, text + + +ZOMBIE_PARSER = ZombieParser() + + +####################################################################### +# +# error reporting +# +####################################################################### + + +class Error: + __slots__ = ['message', 'level', 'code', 'pos', 'line', 'column'] + + WARNING = 1 + ERROR = 1000 + HIGHEST = ERROR + + def __init__(self, message: str, level: int=ERROR, code: Hashable=0): + self.message = message + assert level >= 0 + self.level = level or Error.ERROR + self.code = code + self.pos = -1 + self.line = -1 + self.column = -1 + + def __str__(self): + prefix = '' + if self.line > 0: + prefix = "line: %3i, column: %2i, " % (self.line, self.column) + return prefix + "%s: %s" % (self.level_str, self.message) + + @property + def level_str(self): + return "Warning" if is_warning(self.level) else "Error" + + +def is_warning(level: int) -> bool: + return level < Error.ERROR + + +def is_error(level: int) -> bool: + return level >= Error.ERROR + + +def has_errors(messages: Iterable[Error], level: int=Error.ERROR) -> bool: + """ + Returns True, if at least one entry in `messages` has at + least the given error `level`. + """ + for err_obj in messages: + if err_obj.level >= level: + return True + return False + + +def only_errors(messages: Iterable[Error], level: int=Error.ERROR) -> Iterator[Error]: + """ + Returns an Iterator that yields only those messages that have + at least the given error level. + """ + return (err for err in messages if err.level >= level) + + +####################################################################### +# +# string view +# +####################################################################### + + +class StringView(collections.abc.Sized): + """"A rudimentary StringView class, just enough for the use cases + in parser.py. + + Slicing Python-strings always yields copies of a segment of the original + string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html + However, this becomes costly (in terms of space and as a consequence also + time) when parsing longer documents. Unfortunately, Python's `memoryview` + does not work for unicode strings. Hence, the StringView class. + """ + + __slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag'] + + def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None: + self.text = text # type: str + self.begin = 0 # type: int + self.end = 0 # type: int + self.begin, self.end = StringView.real_indices(begin, end, len(text)) + self.len = max(self.end - self.begin, 0) + self.fullstring_flag = (self.begin == 0 and self.len == len(self.text)) + + @staticmethod + def real_indices(begin, end, len): + def pack(index, len): + index = index if index >= 0 else index + len + return 0 if index < 0 else len if index > len else index + if begin is None: begin = 0 + if end is None: end = len + return pack(begin, len), pack(end, len) + + def __bool__(self): + return bool(self.text) and self.end > self.begin + + def __len__(self): + return self.len + + def __str__(self): + if self.fullstring_flag: # optimization: avoid slicing/copying + return self.text + return self.text[self.begin:self.end] + + def __getitem__(self, index): + # assert isinstance(index, slice), "As of now, StringView only allows slicing." + # assert index.step is None or index.step == 1, \ + # "Step sizes other than 1 are not yet supported by StringView" + start, stop = StringView.real_indices(index.start, index.stop, self.len) + return StringView(self.text, self.begin + start, self.begin + stop) + + def __eq__(self, other): + return str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings + + def count(self, sub, start=None, end=None) -> int: + if self.fullstring_flag: + return self.text.count(sub, start, end) + elif start is None and end is None: + return self.text.count(sub, self.begin, self.end) + else: + start, end = StringView.real_indices(start, end, self.len) + return self.text.count(sub, self.begin + start, self.begin + end) + + def find(self, sub, start=None, end=None) -> int: + if self.fullstring_flag: + return self.text.find(sub, start, end) + elif start is None and end is None: + return self.text.find(sub, self.begin, self.end) - self.begin + else: + start, end = StringView.real_indices(start, end, self.len) + return self.text.find(sub, self.begin + start, self.begin + end) - self.begin + + def rfind(self, sub, start=None, end=None) -> int: + if self.fullstring_flag: + return self.text.rfind(sub, start, end) + if start is None and end is None: + return self.text.rfind(sub, self.begin, self.end) - self.begin + else: + start, end = StringView.real_indices(start, end, self.len) + return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin + + def startswith(self, prefix: str, start:int = 0, end:Optional[int] = None) -> bool: + start += self.begin + end = self.end if end is None else self.begin + end + return self.text.startswith(prefix, start, end) + + def match(self, regex): + return regex.match(self.text, pos=self.begin, endpos=self.end) + + def index(self, absolute_index: int) -> int: + """ + Converts an index for a string watched by a StringView object + to an index relative to the string view object, e.g.: + >>> sv = StringView('xxIxx')[2:3] + >>> match = sv.match(re.compile('I')) + >>> match.end() + 3 + >>> sv.index(match.end()) + 1 + """ + return absolute_index - self.begin + + def indices(self, absolute_indices: Iterable[int]) -> Tuple[int, ...]: + """Converts indices for a string watched by a StringView object + to indices relative to the string view object. See also: `sv_index()` + """ + return tuple(index - self.begin for index in absolute_indices) + + def search(self, regex): + return regex.search(self.text, pos=self.begin, endpos=self.end) + + +EMPTY_STRING_VIEW = StringView('') \ No newline at end of file diff --git a/DHParser/dsl.py b/DHParser/dsl.py index f7376eeeb49fe8d47eaf6ab1af90e98e068e5fd9..8e74262cb806b98c636aa9eb1d1d72465a72095d 100644 --- a/DHParser/dsl.py +++ b/DHParser/dsl.py @@ -15,7 +15,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Module ``DSLsupport`` contains various functions to support the +Module ``dsl`` contains various functions to support the compilation of domain specific languages based on an EBNF-grammar. """ @@ -35,7 +35,8 @@ from DHParser.ebnf import EBNFCompiler, grammar_changed, \ PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object from DHParser.parser import Grammar, Compiler, compile_source, nil_preprocessor, PreprocessorFunc -from DHParser.syntaxtree import Error, is_error, has_errors, only_errors, Node, TransformationFunc +from DHParser.syntaxtree import Node, TransformationFunc +from DHParser.base import Error, is_error, has_errors, only_errors __all__ = ('GrammarError', 'CompilationError', diff --git a/DHParser/ebnf.py b/DHParser/ebnf.py index 10a40d18e1527e9c8eaacd1a93b9565dc08d1eda..775f75a6b6d88f52964d5f340472ea3d3dd33417 100644 --- a/DHParser/ebnf.py +++ b/DHParser/ebnf.py @@ -33,7 +33,8 @@ from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, RE, NegativeLookahead, \ Alternative, Series, Option, Required, OneOrMore, ZeroOrMore, Token, Compiler, \ PreprocessorFunc -from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, Error, Node, TransformationFunc +from DHParser.syntaxtree import Node, TransformationFunc +from DHParser.base import WHITESPACE_PTYPE, TOKEN_PTYPE, Error from DHParser.transform import TransformationDict, traverse, remove_brackets, \ reduce_single_child, replace_by_single_child, remove_expendables, \ remove_tokens, flatten, forbid, assert_content, remove_infix_operator diff --git a/DHParser/parser.py b/DHParser/parser.py index 227bfcaaa33509bc4d669c8dd36579054de1f9f6..19269fda471b890d5069a44593e09f4348752152 100644 --- a/DHParser/parser.py +++ b/DHParser/parser.py @@ -75,10 +75,10 @@ except ImportError: from .typing34 import Any, Callable, cast, Dict, Iterator, List, Set, Tuple, Union, Optional from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name -from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, ParserBase, \ - Error, is_error, has_errors, Node, TransformationFunc -from DHParser.toolkit import StringView, EMPTY_STRING_VIEW, sv_match, sv_index, sv_search, \ - load_if_file, error_messages, line_col +from DHParser.syntaxtree import Node, TransformationFunc +from DHParser.base import ParserBase, WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Error, is_error, has_errors, \ + StringView, EMPTY_STRING_VIEW +from DHParser.toolkit import load_if_file, error_messages, line_col __all__ = ('PreprocessorFunc', 'HistoryRecord', @@ -1066,9 +1066,9 @@ class RegExp(Parser): return RegExp(regexp, self.name) def __call__(self, text: StringView) -> Tuple[Node, StringView]: - match = text[0:1] != BEGIN_TOKEN and sv_match(self.regexp, text) # ESC starts a preprocessor token. + match = text[0:1] != BEGIN_TOKEN and text.match(self.regexp) # ESC starts a preprocessor token. if match: - end = sv_index(match.end(), text) + end = text.index(match.end()) return Node(self, text[:end]), text[end:] return None, text @@ -1521,8 +1521,8 @@ class Required(FlowOperator): def __call__(self, text: StringView) -> Tuple[Node, StringView]: node, text_ = self.parser(text) if not node: - m = sv_search(Required.RX_ARGUMENT, text) # re.search(r'\s(\S)', text) - i = max(1, sv_index(m.regs[1][0], text)) if m else 1 + m = text.search(Required.RX_ARGUMENT) # re.search(r'\s(\S)', text) + i = max(1, text.index(m.regs[1][0])) if m else 1 node = Node(self, text[:i]) text_ = text[i:] # assert False, "*"+text[:i]+"*" @@ -1585,7 +1585,7 @@ class Lookbehind(FlowOperator): def __call__(self, text: StringView) -> Tuple[Node, StringView]: backwards_text = self.grammar.reversed__[len(text):] # self.grammar.document__[-len(text) - 1::-1] - if self.sign(sv_match(self.regexp, backwards_text)): + if self.sign(backwards_text.match(self.regexp)): return Node(self, ''), text else: return None, text diff --git a/DHParser/syntaxtree.py b/DHParser/syntaxtree.py index 7a9c6d8dff7bdded475ad6517fce902cabdeca5f..2a56bf01cf9f343b03d7386bad042335eda90a1f 100644 --- a/DHParser/syntaxtree.py +++ b/DHParser/syntaxtree.py @@ -32,155 +32,14 @@ except ImportError: from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \ Iterator, Iterable, List, NamedTuple, Sequence, Union, Text, Tuple, Hashable -from DHParser.toolkit import is_logging, log_dir, StringView, linebreaks, line_col, identity - -__all__ = ('WHITESPACE_PTYPE', - 'MockParser', - 'TOKEN_PTYPE', - 'ZOMBIE_PARSER', - 'ParserBase', - 'Error', - 'is_warning', - 'is_error', - 'has_errors', - 'Node', +from DHParser.toolkit import is_logging, log_dir, linebreaks, line_col, identity +from DHParser.base import MockParser, ZOMBIE_PARSER, Error, StringView + +__all__ = ('Node', 'mock_syntax_tree', 'TransformationFunc') -class ParserBase: - """ - ParserBase is the base class for all real and mock parser classes. - It is defined here, because Node objects require a parser object - for instantiation. - """ - def __init__(self, name=''): # , pbases=frozenset()): - self.name = name # type: str - self._ptype = ':' + self.__class__.__name__ # type: str - - def __repr__(self): - return self.name + self.ptype - - def __str__(self): - return self.name + (' = ' if self.name else '') + repr(self) - - @property - def ptype(self) -> str: - return self._ptype - - @property - def repr(self) -> str: - return self.name if self.name else repr(self) - - -WHITESPACE_PTYPE = ':Whitespace' -TOKEN_PTYPE = ':Token' - - -class MockParser(ParserBase): - """ - MockParser objects can be used to reconstruct syntax trees from a - serialized form like S-expressions or XML. Mock objects can mimic - different parser types by assigning them a ptype on initialization. - - Mock objects should not be used for anything other than - syntax tree (re-)construction. In all other cases where a parser - object substitute is needed, chose the singleton ZOMBIE_PARSER. - """ - def __init__(self, name='', ptype=''): # , pbases=frozenset()): - assert not ptype or ptype[0] == ':' - super(MockParser, self).__init__(name) - self.name = name - self._ptype = ptype or ':' + self.__class__.__name__ - - -class ZombieParser(MockParser): - """ - Serves as a substitute for a Parser instance. - - ``ZombieParser`` is the class of the singelton object - ``ZOMBIE_PARSER``. The ``ZOMBIE_PARSER`` has a name and can be - called, but it never matches. It serves as a substitute where only - these (or one of these properties) is needed, but no real Parser- - object is instantiated. - """ - alive = False - - def __init__(self): - super(ZombieParser, self).__init__("__ZOMBIE__") - assert not self.__class__.alive, "There can be only one!" - assert self.__class__ == ZombieParser, "No derivatives, please!" - self.__class__.alive = True - - def __copy__(self): - return self - - def __deepcopy__(self, memo): - return self - - def __call__(self, text): - """Better call Saul ;-)""" - return None, text - - -ZOMBIE_PARSER = ZombieParser() - - -class Error: - __slots__ = ['message', 'level', 'code', 'pos', 'line', 'column'] - - WARNING = 1 - ERROR = 1000 - HIGHEST = ERROR - - def __init__(self, message: str, level: int=ERROR, code: Hashable=0): - self.message = message - assert level >= 0 - self.level = level or Error.ERROR - self.code = code - self.pos = -1 - self.line = -1 - self.column = -1 - - def __str__(self): - prefix = '' - if self.line > 0: - prefix = "line: %3i, column: %2i, " % (self.line, self.column) - return prefix + "%s: %s" % (self.level_str, self.message) - - @property - def level_str(self): - return "Warning" if is_warning(self.level) else "Error" - - -def is_warning(level: int) -> bool: - return level < Error.ERROR - - -def is_error(level: int) -> bool: - return level >= Error.ERROR - - -def has_errors(messages: Iterable[Error], level: int=Error.ERROR) -> bool: - """ - Returns True, if at least one entry in `messages` has at - least the given error `level`. - """ - for err_obj in messages: - if err_obj.level >= level: - return True - return False - - -def only_errors(messages: Iterable[Error], level: int=Error.ERROR) -> Iterator[Error]: - """ - Returns an Iterator that yields only those messages that have - at least the given error level. - """ - return (err for err in messages if err.level >= level) - - - ChildrenType = Tuple['Node', ...] StrictResultType = Union[ChildrenType, StringView, str] ResultType = Union[ChildrenType, 'Node', StringView, str, None] @@ -347,7 +206,7 @@ class Node(collections.abc.Sized): return self._errors.copy() - def add_error(self, message: str, level: int=Error.ERROR, code: Hashable=0) -> 'Node': + def add_error(self, message: str, level: int= Error.ERROR, code: Hashable=0) -> 'Node': self._errors.append(Error(message, level, code)) self.error_flag = max(self.error_flag, self._errors[-1].level) return self diff --git a/DHParser/testing.py b/DHParser/testing.py index ce37f719fabe2029a8d097d62c9039f15a375c03..a197943a52ee17ef75022ad36a794725886f0940 100644 --- a/DHParser/testing.py +++ b/DHParser/testing.py @@ -28,7 +28,8 @@ except ImportError: import re from DHParser.toolkit import is_logging, clear_logs, error_messages -from DHParser.syntaxtree import is_error, mock_syntax_tree, flatten_sxpr +from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr +from DHParser.base import is_error __all__ = ('unit_from_configfile', 'unit_from_json', diff --git a/DHParser/toolkit.py b/DHParser/toolkit.py index f21be32560d8c845cb2e8b8638e0acc369ddb0b5..8bcc8f92f89147b6c958fd31cd476af97625dd91 100644 --- a/DHParser/toolkit.py +++ b/DHParser/toolkit.py @@ -32,12 +32,13 @@ already exists. import bisect import codecs -import collections import contextlib import functools import hashlib import os +from DHParser.base import StringView + try: import regex as re except ImportError: @@ -53,10 +54,6 @@ __all__ = ('logging', 'is_logging', 'log_dir', 'logfile_basename', - 'StringView', - 'sv_match', - 'sv_index', - 'sv_search', 'linebreaks', 'line_col', 'error_messages', @@ -154,124 +151,6 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}): os.rmdir(log_dirname) -class StringView(collections.abc.Sized): - """"A rudimentary StringView class, just enough for the use cases - in parser.py. - - Slicing Python-strings always yields copies of a segment of the original - string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html - However, this becomes costly (in terms of space and as a consequence also - time) when parsing longer documents. Unfortunately, Python's `memoryview` - does not work for unicode strings. Hence, the StringView class. - """ - - __slots__ = ['text', 'begin', 'end', 'len', 'fullstring_flag'] - - def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None: - self.text = text # type: str - self.begin = 0 # type: int - self.end = 0 # type: int - self.begin, self.end = StringView.real_indices(begin, end, len(text)) - self.len = max(self.end - self.begin, 0) - self.fullstring_flag = (self.begin == 0 and self.len == len(self.text)) - - @staticmethod - def real_indices(begin, end, len): - def pack(index, len): - index = index if index >= 0 else index + len - return 0 if index < 0 else len if index > len else index - if begin is None: begin = 0 - if end is None: end = len - return pack(begin, len), pack(end, len) - - def __bool__(self): - return bool(self.text) and self.end > self.begin - - def __len__(self): - return self.len - - def __str__(self): - if self.fullstring_flag: # optimization: avoid slicing/copying - return self.text - return self.text[self.begin:self.end] - - def __getitem__(self, index): - # assert isinstance(index, slice), "As of now, StringView only allows slicing." - # assert index.step is None or index.step == 1, \ - # "Step sizes other than 1 are not yet supported by StringView" - start, stop = StringView.real_indices(index.start, index.stop, self.len) - return StringView(self.text, self.begin + start, self.begin + stop) - - def __eq__(self, other): - return str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings - - def count(self, sub, start=None, end=None) -> int: - if self.fullstring_flag: - return self.text.count(sub, start, end) - elif start is None and end is None: - return self.text.count(sub, self.begin, self.end) - else: - start, end = StringView.real_indices(start, end, self.len) - return self.text.count(sub, self.begin + start, self.begin + end) - - def find(self, sub, start=None, end=None) -> int: - if self.fullstring_flag: - return self.text.find(sub, start, end) - elif start is None and end is None: - return self.text.find(sub, self.begin, self.end) - self.begin - else: - start, end = StringView.real_indices(start, end, self.len) - return self.text.find(sub, self.begin + start, self.begin + end) - self.begin - - def rfind(self, sub, start=None, end=None) -> int: - if self.fullstring_flag: - return self.text.rfind(sub, start, end) - if start is None and end is None: - return self.text.rfind(sub, self.begin, self.end) - self.begin - else: - start, end = StringView.real_indices(start, end, self.len) - return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin - - def startswith(self, prefix: str, start:int = 0, end:Optional[int] = None) -> bool: - start += self.begin - end = self.end if end is None else self.begin + end - return self.text.startswith(prefix, start, end) - - - -def sv_match(regex, sv: StringView): - return regex.match(sv.text, pos=sv.begin, endpos=sv.end) - - -def sv_index(absolute_index: int, sv: StringView) -> int: - """ - Converts the an index into string watched by a StringView object - to an index relativ to the string view object, e.g.: - >>> sv = StringView('xxIxx')[2:3] - >>> match = sv_match(re.compile('I'), sv) - >>> match.end() - 3 - >>> sv_index(match.end(), sv) - 1 - """ - return absolute_index - sv.begin - - -def sv_indices(absolute_indices: Iterable[int], sv: StringView) -> Tuple[int, ...]: - """Converts indices into a string watched by a StringView object - to an index relativ to the string view object. See also: `sv_index()` - """ - return tuple(index - sv.begin for index in absolute_indices) - - -def sv_search(regex, sv: StringView): - return regex.search(sv.text, pos=sv.begin, endpos=sv.end) - - - -EMPTY_STRING_VIEW = StringView('') - - def linebreaks(text: Union[StringView, str]): lb = [-1] i = text.find('\n', 0) @@ -295,14 +174,14 @@ def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]: @line_col.register(list) -def _line_col(linebreaks: List[int], pos: int) -> Tuple[int, int]: +def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]: """Returns the position within a text as (line, column)-tuple based on a list of all line breaks, including -1 and EOF. """ - if pos < 0 or pos > linebreaks[-1]: # one character behind EOF is still an allowed position! - raise ValueError('Position %i outside text of length %s !' % (pos, linebreaks[-1])) - line = bisect.bisect_left(linebreaks, pos) - column = pos - linebreaks[line-1] + if pos < 0 or pos > lbreaks[-1]: # one character behind EOF is still an allowed position! + raise ValueError('Position %i outside text of length %s !' % (pos, lbreaks[-1])) + line = bisect.bisect_left(lbreaks, pos) + column = pos - lbreaks[line - 1] return line, column diff --git a/DHParser/transform.py b/DHParser/transform.py index 13e1b1cd7309ad71fc3c39683e1fdd7ad1b2dbac..318cf7fcf5a033c140976573dd75e14411bc4559 100644 --- a/DHParser/transform.py +++ b/DHParser/transform.py @@ -20,7 +20,8 @@ permissions and limitations under the License. import inspect from functools import partial, reduce, singledispatch -from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, MockParser, Node +from DHParser.syntaxtree import Node +from DHParser.base import WHITESPACE_PTYPE, TOKEN_PTYPE, MockParser try: import regex as re diff --git a/test/test_base.py b/test/test_base.py new file mode 100644 index 0000000000000000000000000000000000000000..685f18de9126115207b62cb0f660bbd17fe52cb9 --- /dev/null +++ b/test/test_base.py @@ -0,0 +1,130 @@ +#!/usr/bin/python3 + +"""test_base.py - tests of the base-module of DHParser + + +Author: Eckhart Arnold + +Copyright 2017 Bavarian Academy of Sciences and Humanities + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +try: + import regex as re +except ImportError: + import re + +from DHParser import StringView, EMPTY_STRING_VIEW + + +class TestStringView: + def test_real_indices(self): + assert StringView.real_indices(3, 5, 10) == (3, 5) + assert StringView.real_indices(None, None, 10) == (0, 10) + assert StringView.real_indices(-2, -1, 10) == (8, 9) + assert StringView.real_indices(-3, 11, 10) == (7, 10) + assert StringView.real_indices(-5, -12, 10) == (5, 0) + assert StringView.real_indices(-12, -5, 10) == (0, 5) + assert StringView.real_indices(7, 6, 10) == (7, 6) + assert StringView.real_indices(None, 0, 10) == (0, 0) + + def test_creation(self): + s = "0123456789" + assert str(StringView(s)) == s + assert str(StringView(s, 3, 4)) == '3' + assert str(StringView(s, -4)) == '6789' + + def test_equality(self): + s = "0123456789" + assert StringView(s) == s + assert StringView(s, 3, 4) == '3' + assert StringView(s, -4) == '6789' + + def test_slicing(self): + s = " 0123456789 " + sv = StringView(s, 1, -1) + assert sv == '0123456789' + assert sv[3:4] == '3' + assert sv[-3:-1] == '78' + assert sv[4:3] == '' + assert sv[:4] == '0123' + assert sv[4:] == '456789' + assert sv[-2:] == '89' + assert sv[:-5] == '01234' + assert isinstance(sv[3:5], StringView) + + def test_len(self): + s = " 0123456789 " + sv = StringView(s, 1, -1) + assert len(sv) == 10 + assert sv.len == 10 + assert len(sv[5:5]) == 0 + assert len(sv[7:4]) == 0 + assert len(sv[-12:-2]) == 8 + assert len(sv[-12:12]) == 10 + + def test_bool(self): + assert not StringView('') + assert StringView('x') + s = " 0123456789 " + sv = StringView(s, 1, -1) + assert not sv[5:4] + assert sv[4:5], str(sv[4:5]) + assert not sv[3:3] + assert not sv[12:13] + assert sv[0:20] + + def test_sv_match(self): + s = " 0123456789 " + sv = StringView(s, 1, -1) + assert sv.match(re.compile(r'\d')) + assert sv.match(re.compile(r'\d+')) + assert not sv.match(re.compile(r' ')) + assert sv[4:].match(re.compile(r'45')) + + def test_sv_search(self): + s = " 0123456789 " + sv = StringView(s, 1, -1) + assert sv.search(re.compile(r'5')) + assert not sv.search(re.compile(r' ')) + assert sv[5:].search(re.compile(r'5')) + assert not sv[:9].search(re.compile(r'9')) + + def test_find(self): + s = " 0123456789 " + sv = StringView(s, 1, -1) + assert sv.find('5') == 5 + assert sv.find(' ') < 0 + assert sv.find('0', 1) < 0 + assert sv.find('9', 0, 8) < 0 + assert sv.find('45', 1, 8) == 4 + + def test_startswith(self): + s = " 0123456789 " + sv = StringView(s, 1, -1) + assert sv.startswith('012') + assert sv.startswith('123', 1) + assert not sv.startswith('123', 1, 3) + + def test_EMPTY_STRING_VIEW(self): + assert len(EMPTY_STRING_VIEW) == 0 + assert EMPTY_STRING_VIEW.find('x') < 0 + assert not EMPTY_STRING_VIEW.match(re.compile(r'x')) + assert EMPTY_STRING_VIEW.match(re.compile(r'.*')) + assert len(EMPTY_STRING_VIEW[0:1]) == 0 + + +if __name__ == "__main__": + from DHParser.testing import runner + runner("", globals()) diff --git a/test/test_dsl.py b/test/test_dsl.py index bfdb2d9bda6c53208c9ff098266ededc299726a1..3fb1e71d8769a4213ad972b955fad0d1ded2c9c9 100644 --- a/test/test_dsl.py +++ b/test/test_dsl.py @@ -25,7 +25,7 @@ import sys sys.path.extend(['../', './']) from DHParser.parser import Grammar, Compiler -from DHParser.syntaxtree import is_error +from DHParser.base import is_error from DHParser.dsl import compile_on_disk, run_compiler, compileEBNF, grammar_provider, \ load_compiler_suite diff --git a/test/test_ebnf.py b/test/test_ebnf.py index 6a38615e0311614a6d68d85492a14ed46671197b..f02986f37c2f63fd61b3f570ae67ea2e77a6e2e2 100644 --- a/test/test_ebnf.py +++ b/test/test_ebnf.py @@ -30,8 +30,8 @@ from multiprocessing import Pool sys.path.extend(['../', './']) from DHParser.toolkit import compile_python_object -from DHParser.syntaxtree import has_errors -from DHParser.parser import compile_source, WHITESPACE_PTYPE, nil_preprocessor +from DHParser.parser import compile_source, nil_preprocessor +from DHParser.base import WHITESPACE_PTYPE, has_errors from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, get_ebnf_compiler from DHParser.dsl import CompilationError, compileDSL, DHPARSER_IMPORTS, grammar_provider diff --git a/test/test_parser.py b/test/test_parser.py index 3fd97429a580246a63ae6af5d5132baa14d143ca..02c7b8a331bcf2d2b13117e4ae1b892797fa6cca 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -24,7 +24,8 @@ from functools import partial sys.path.extend(['../', './']) -from DHParser.toolkit import is_logging, logging, StringView, compile_python_object +from DHParser.toolkit import is_logging, logging, compile_python_object +from DHParser.base import StringView from DHParser.parser import compile_source, Retrieve, Grammar, Forward, Token, ZeroOrMore, RE, \ RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler diff --git a/test/test_syntaxtree.py b/test/test_syntaxtree.py index 344dc133d558fc9774671602373047dfed05df05..60cbd88ba25494f3ec86d57d0e43b7570ce647de 100644 --- a/test/test_syntaxtree.py +++ b/test/test_syntaxtree.py @@ -23,7 +23,8 @@ import copy import sys sys.path.extend(['../', './']) -from DHParser.syntaxtree import Node, TOKEN_PTYPE, mock_syntax_tree +from DHParser.syntaxtree import Node, mock_syntax_tree +from DHParser.base import TOKEN_PTYPE from DHParser.transform import traverse, reduce_single_child, \ replace_by_single_child, flatten, remove_expendables from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler diff --git a/test/test_testing.py b/test/test_testing.py index 7fea7dc1b16451d71470cc6fec59c88b23596322..1161a10c2be10a876e17110d1cef3a20d202b50f 100644 --- a/test/test_testing.py +++ b/test/test_testing.py @@ -26,7 +26,8 @@ from functools import partial sys.path.extend(['../', './']) -from DHParser.syntaxtree import TOKEN_PTYPE, mock_syntax_tree, flatten_sxpr +from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr +from DHParser.base import TOKEN_PTYPE from DHParser.transform import traverse, remove_expendables, \ replace_by_single_child, reduce_single_child, flatten from DHParser.dsl import grammar_provider diff --git a/test/test_toolkit.py b/test/test_toolkit.py index 2c196f65e6a36e235e6dca6bdad221f17fcae8b6..99353e41fc0d3ffa042fd95f3c83b746b810b21a 100644 --- a/test/test_toolkit.py +++ b/test/test_toolkit.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -"""test_tookkit.py - tests of the toolkit-module of DHParser +"""test_toolkit.py - tests of the toolkit-module of DHParser Author: Eckhart Arnold @@ -30,105 +30,7 @@ except ImportError: sys.path.extend(['../', './']) -from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, StringView, \ - sv_match, sv_search, EMPTY_STRING_VIEW, linebreaks, line_col, error_messages - - -class TestStringView: - def test_real_indices(self): - assert StringView.real_indices(3, 5, 10) == (3, 5) - assert StringView.real_indices(None, None, 10) == (0, 10) - assert StringView.real_indices(-2, -1, 10) == (8, 9) - assert StringView.real_indices(-3, 11, 10) == (7, 10) - assert StringView.real_indices(-5, -12, 10) == (5, 0) - assert StringView.real_indices(-12, -5, 10) == (0, 5) - assert StringView.real_indices(7, 6, 10) == (7, 6) - assert StringView.real_indices(None, 0, 10) == (0, 0) - - def test_creation(self): - s = "0123456789" - assert str(StringView(s)) == s - assert str(StringView(s, 3, 4)) == '3' - assert str(StringView(s, -4)) == '6789' - - def test_equality(self): - s = "0123456789" - assert StringView(s) == s - assert StringView(s, 3, 4) == '3' - assert StringView(s, -4) == '6789' - - def test_slicing(self): - s = " 0123456789 " - sv = StringView(s, 1, -1) - assert sv == '0123456789' - assert sv[3:4] == '3' - assert sv[-3:-1] == '78' - assert sv[4:3] == '' - assert sv[:4] == '0123' - assert sv[4:] == '456789' - assert sv[-2:] == '89' - assert sv[:-5] == '01234' - assert isinstance(sv[3:5], StringView) - - def test_len(self): - s = " 0123456789 " - sv = StringView(s, 1, -1) - assert len(sv) == 10 - assert sv.len == 10 - assert len(sv[5:5]) == 0 - assert len(sv[7:4]) == 0 - assert len(sv[-12:-2]) == 8 - assert len(sv[-12:12]) == 10 - - def test_bool(self): - assert not StringView('') - assert StringView('x') - s = " 0123456789 " - sv = StringView(s, 1, -1) - assert not sv[5:4] - assert sv[4:5], str(sv[4:5]) - assert not sv[3:3] - assert not sv[12:13] - assert sv[0:20] - - def test_sv_match(self): - s = " 0123456789 " - sv = StringView(s, 1, -1) - assert sv_match(re.compile(r'\d'), sv) - assert sv_match(re.compile(r'\d+'), sv) - assert not sv_match(re.compile(r' '), sv) - assert sv_match(re.compile(r'45'), sv[4:]) - - def test_sv_search(self): - s = " 0123456789 " - sv = StringView(s, 1, -1) - assert sv_search(re.compile(r'5'), sv) - assert not sv_search(re.compile(r' '), sv) - assert sv_search(re.compile(r'5'), sv[5:]) - assert not sv_search(re.compile(r'9'), sv[:9]) - - def test_find(self): - s = " 0123456789 " - sv = StringView(s, 1, -1) - assert sv.find('5') == 5 - assert sv.find(' ') < 0 - assert sv.find('0', 1) < 0 - assert sv.find('9', 0, 8) < 0 - assert sv.find('45', 1, 8) == 4 - - def test_startswith(self): - s = " 0123456789 " - sv = StringView(s, 1, -1) - assert sv.startswith('012') - assert sv.startswith('123', 1) - assert not sv.startswith('123', 1, 3) - - def test_EMPTY_STRING_VIEW(self): - assert len(EMPTY_STRING_VIEW) == 0 - assert EMPTY_STRING_VIEW.find('x') < 0 - assert not sv_match(re.compile(r'x'), EMPTY_STRING_VIEW) - assert sv_match(re.compile(r'.*'), EMPTY_STRING_VIEW) - assert len(EMPTY_STRING_VIEW[0:1]) == 0 +from DHParser.toolkit import load_if_file, logging, log_dir, is_logging, linebreaks, line_col class TestErrorSupport: