Commit a471a1e0 authored by di68kap's avatar di68kap
Browse files

- parsers.py: string slicing replaces by toolkit.StringView

parent 481891e3
...@@ -77,7 +77,8 @@ except ImportError: ...@@ -77,7 +77,8 @@ except ImportError:
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name
from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, ParserBase, \ from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, ParserBase, \
Node, TransformationFunc Node, TransformationFunc
from DHParser.toolkit import TextView, load_if_file, error_messages, line_col from DHParser.toolkit import StringView, EMPTY_STRING_VIEW, sv_match, sv_index, sv_search, \
load_if_file, error_messages, line_col
__all__ = ('PreprocessorFunc', __all__ = ('PreprocessorFunc',
'HistoryRecord', 'HistoryRecord',
...@@ -161,7 +162,7 @@ class HistoryRecord: ...@@ -161,7 +162,7 @@ class HistoryRecord:
# type: List['Parser'] # type: List['Parser']
self.node = node # type: Node self.node = node # type: Node
self.remaining = remaining # type: int self.remaining = remaining # type: int
document = call_stack[-1].grammar.document__ if call_stack else '' document = call_stack[-1].grammar.document__.text if call_stack else ''
self.line_col = line_col(document, len(document) - remaining) # type: Tuple[int, int] self.line_col = line_col(document, len(document) - remaining) # type: Tuple[int, int]
def __str__(self): def __str__(self):
...@@ -229,11 +230,13 @@ def add_parser_guard(parser_func): ...@@ -229,11 +230,13 @@ def add_parser_guard(parser_func):
that takes care of memoizing, left recursion and optionally tracing that takes care of memoizing, left recursion and optionally tracing
(aka "history tracking") of parser calls. Returns the wrapped call. (aka "history tracking") of parser calls. Returns the wrapped call.
""" """
def guarded_call(parser: 'Parser', text: str) -> Tuple[Node, str]: def guarded_call(parser: 'Parser', text: StringView) -> Tuple[Node, StringView]:
assert isinstance(text, StringView)
def memoized(parser, location): def memoized(parser, location):
node = parser.visited[location] node = parser.visited[location]
rlen = location - (0 if node is None else node.len) rlen = location - (0 if node is None else node.len)
rest = TextView(grammar.document__, -rlen) if rlen else '' rest = grammar.document__[-rlen:] if rlen else EMPTY_STRING_VIEW
return node, rest return node, rest
# NOTE: An older and simpler implementation of memoization # NOTE: An older and simpler implementation of memoization
# relied on `parser.visited[location] == node, rest`. Although, # relied on `parser.visited[location] == node, rest`. Although,
...@@ -267,6 +270,7 @@ def add_parser_guard(parser_func): ...@@ -267,6 +270,7 @@ def add_parser_guard(parser_func):
# run original __call__ method # run original __call__ method
node, rest = parser_func(parser, text) node, rest = parser_func(parser, text)
assert isinstance(rest, StringView)
if node is None: if node is None:
# retrieve an earlier match result (from left recursion) if it exists # retrieve an earlier match result (from left recursion) if it exists
...@@ -302,7 +306,7 @@ def add_parser_guard(parser_func): ...@@ -302,7 +306,7 @@ def add_parser_guard(parser_func):
node = Node(None, text[:min(10, max(1, text.find("\n")))] + " ...") node = Node(None, text[:min(10, max(1, text.find("\n")))] + " ...")
node.add_error("maximum recursion depth of parser reached; " node.add_error("maximum recursion depth of parser reached; "
"potentially due to too many errors!") "potentially due to too many errors!")
rest = '' rest = EMPTY_STRING_VIEW
return node, rest return node, rest
...@@ -409,7 +413,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass): ...@@ -409,7 +413,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
self.cycle_detection = set() # type: Set[Callable] self.cycle_detection = set() # type: Set[Callable]
return self return self
def __call__(self, text: TextView) -> Tuple[Node, TextView]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
"""Applies the parser to the given `text` and returns a node with """Applies the parser to the given `text` and returns a node with
the results or None as well as the text at the position right behind the results or None as well as the text at the position right behind
the matching string.""" the matching string."""
...@@ -724,8 +728,8 @@ class Grammar: ...@@ -724,8 +728,8 @@ class Grammar:
def _reset__(self): def _reset__(self):
self.document__ = "" # type: str self.document__ = EMPTY_STRING_VIEW # type: StringView
self._reversed__ = "" # type: str self._reversed__ = EMPTY_STRING_VIEW # type: StringView
# variables stored and recalled by Capture and Retrieve parsers # variables stored and recalled by Capture and Retrieve parsers
self.variables__ = dict() # type: Dict[str, List[str]] self.variables__ = dict() # type: Dict[str, List[str]]
self.rollback__ = [] # type: List[Tuple[int, Callable]] self.rollback__ = [] # type: List[Tuple[int, Callable]]
...@@ -742,7 +746,7 @@ class Grammar: ...@@ -742,7 +746,7 @@ class Grammar:
@property @property
def reversed__(self) -> str: def reversed__(self) -> str:
if not self._reversed__: if not self._reversed__:
self._reversed__ = self.document__[::-1] self._reversed__ = StringView(self.document__.text[::-1])
return self._reversed__ return self._reversed__
...@@ -784,13 +788,13 @@ class Grammar: ...@@ -784,13 +788,13 @@ class Grammar:
else: else:
self._dirty_flag__ = True self._dirty_flag__ = True
self.history_tracking__ = is_logging() self.history_tracking__ = is_logging()
self.document__ = document self.document__ = StringView(document)
self.last_rb__loc__ = len(document) + 1 # rollback location self.last_rb__loc__ = len(self.document__) + 1 # rollback location
parser = self[start_parser] if isinstance(start_parser, str) else start_parser parser = self[start_parser] if isinstance(start_parser, str) else start_parser
assert parser.grammar == self, "Cannot run parsers from a different grammar object!" \ assert parser.grammar == self, "Cannot run parsers from a different grammar object!" \
" %s vs. %s" % (str(self), str(parser.grammar)) " %s vs. %s" % (str(self), str(parser.grammar))
stitches = [] # type: List[Node] stitches = [] # type: List[Node]
rest = document rest = self.document__
if not rest: if not rest:
result, ignore = parser(rest) result, ignore = parser(rest)
if result is None: if result is None:
...@@ -883,7 +887,7 @@ class Grammar: ...@@ -883,7 +887,7 @@ class Grammar:
document. document.
""" """
def prepare_line(record): def prepare_line(record):
excerpt = self.document__.__getitem__(record.extent)[:25].replace('\n', '\\n') excerpt = self.document__.text.__getitem__(record.extent)[:25].replace('\n', '\\n')
excerpt = "'%s'" % excerpt if len(excerpt) < 25 else "'%s...'" % excerpt excerpt = "'%s'" % excerpt if len(excerpt) < 25 else "'%s...'" % excerpt
return record.stack, record.status, excerpt return record.stack, record.status, excerpt
...@@ -985,7 +989,7 @@ class PreprocessorToken(Parser): ...@@ -985,7 +989,7 @@ class PreprocessorToken(Parser):
assert RX_PREPROCESSOR_TOKEN.match(token) assert RX_PREPROCESSOR_TOKEN.match(token)
super(PreprocessorToken, self).__init__(token) super(PreprocessorToken, self).__init__(token)
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
if text[0:1] == BEGIN_TOKEN: if text[0:1] == BEGIN_TOKEN:
end = text.find(END_TOKEN, 1) end = text.find(END_TOKEN, 1)
if end < 0: if end < 0:
...@@ -1040,10 +1044,10 @@ class RegExp(Parser): ...@@ -1040,10 +1044,10 @@ class RegExp(Parser):
regexp = self.regexp.pattern regexp = self.regexp.pattern
return RegExp(regexp, self.name) return RegExp(regexp, self.name)
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
match = text[0:1] != BEGIN_TOKEN and self.regexp.match(text) # ESC starts a preprocessor token. match = text[0:1] != BEGIN_TOKEN and sv_match(self.regexp, text) # ESC starts a preprocessor token.
if match: if match:
end = match.end() end = sv_index(match.end(), text)
return Node(self, text[:end]), text[end:] return Node(self, text[:end]), text[end:]
return None, text return None, text
...@@ -1114,9 +1118,9 @@ class RE(Parser): ...@@ -1114,9 +1118,9 @@ class RE(Parser):
regexp = self.main.regexp.pattern regexp = self.main.regexp.pattern
return self.__class__(regexp, self.wL, self.wR, self.name) return self.__class__(regexp, self.wL, self.wR, self.name)
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
# assert self.main.regexp.pattern != "@" # assert self.main.regexp.pattern != "@"
t = text # type: str t = text # type: StringView
wL, t = self.wspLeft(t) wL, t = self.wspLeft(t)
main, t = self.main(t) main, t = self.main(t)
if main: if main:
...@@ -1264,7 +1268,7 @@ class Optional(UnaryOperator): ...@@ -1264,7 +1268,7 @@ class Optional(UnaryOperator):
"Nesting options with required elements is contradictory: " \ "Nesting options with required elements is contradictory: " \
"%s(%s)" % (str(name), str(parser.name)) "%s(%s)" % (str(name), str(parser.name))
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
node, text = self.parser(text) node, text = self.parser(text)
if node: if node:
return Node(self, node), text return Node(self, node), text
...@@ -1289,7 +1293,7 @@ class ZeroOrMore(Optional): ...@@ -1289,7 +1293,7 @@ class ZeroOrMore(Optional):
EBNF-Notation: `{ ... }` EBNF-Notation: `{ ... }`
EBNF-Example: `sentence = { /\w+,?/ } "."` EBNF-Example: `sentence = { /\w+,?/ } "."`
""" """
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
results = () # type: Tuple[Node, ...] results = () # type: Tuple[Node, ...]
n = len(text) + 1 n = len(text) + 1
while text and len(text) < n: while text and len(text) < n:
...@@ -1314,9 +1318,9 @@ class OneOrMore(UnaryOperator): ...@@ -1314,9 +1318,9 @@ class OneOrMore(UnaryOperator):
"Use ZeroOrMore instead of nesting OneOrMore and Optional: " \ "Use ZeroOrMore instead of nesting OneOrMore and Optional: " \
"%s(%s)" % (str(name), str(parser.name)) "%s(%s)" % (str(name), str(parser.name))
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
results = () # type: Tuple[Node, ...] results = () # type: Tuple[Node, ...]
text_ = text # type: str text_ = text # type: StringView
n = len(text) + 1 n = len(text) + 1
while text_ and len(text_) < n: while text_ and len(text_) < n:
n = len(text_) n = len(text_)
...@@ -1340,9 +1344,9 @@ class Series(NaryOperator): ...@@ -1340,9 +1344,9 @@ class Series(NaryOperator):
super(Series, self).__init__(*parsers, name=name) super(Series, self).__init__(*parsers, name=name)
assert len(self.parsers) >= 1 assert len(self.parsers) >= 1
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
results = () # type: Tuple[Node, ...] results = () # type: Tuple[Node, ...]
text_ = text # type: str text_ = text # type: StringView
for parser in self.parsers: for parser in self.parsers:
node, text_ = parser(text_) node, text_ = parser(text_)
if not node: if not node:
...@@ -1400,7 +1404,7 @@ class Alternative(NaryOperator): ...@@ -1400,7 +1404,7 @@ class Alternative(NaryOperator):
assert all(not isinstance(p, Optional) for p in self.parsers[:-1]) assert all(not isinstance(p, Optional) for p in self.parsers[:-1])
self.been_here = dict() # type: Dict[int, int] self.been_here = dict() # type: Dict[int, int]
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
for parser in self.parsers: for parser in self.parsers:
node, text_ = parser(text) node, text_ = parser(text)
if node: if node:
...@@ -1447,11 +1451,13 @@ class FlowOperator(UnaryOperator): ...@@ -1447,11 +1451,13 @@ class FlowOperator(UnaryOperator):
class Required(FlowOperator): class Required(FlowOperator):
# Add constructor that checks for logical errors, like `Required(Optional(...))` constructs ? # Add constructor that checks for logical errors, like `Required(Optional(...))` constructs ?
def __call__(self, text: str) -> Tuple[Node, str]: RX_ARGUMENT = re.compile(r'\s(\S)')
def __call__(self, text: StringView) -> Tuple[Node, StringView]:
node, text_ = self.parser(text) node, text_ = self.parser(text)
if not node: if not node:
m = re.search(r'\s(\S)', text) m = sv_search(Required.RX_ARGUMENT, text) # re.search(r'\s(\S)', text)
i = max(1, m.regs[1][0]) if m else 1 i = max(1, sv_index(m.regs[1][0], text)) if m else 1
node = Node(self, text[:i]) node = Node(self, text[:i])
text_ = text[i:] text_ = text[i:]
# assert False, "*"+text[:i]+"*" # assert False, "*"+text[:i]+"*"
...@@ -1467,7 +1473,7 @@ class Lookahead(FlowOperator): ...@@ -1467,7 +1473,7 @@ class Lookahead(FlowOperator):
def __init__(self, parser: Parser, name: str = '') -> None: def __init__(self, parser: Parser, name: str = '') -> None:
super(Lookahead, self).__init__(parser, name) super(Lookahead, self).__init__(parser, name)
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
node, text_ = self.parser(text) node, text_ = self.parser(text)
if self.sign(node is not None): if self.sign(node is not None):
return Node(self, ''), text return Node(self, ''), text
...@@ -1512,9 +1518,9 @@ class Lookbehind(FlowOperator): ...@@ -1512,9 +1518,9 @@ class Lookbehind(FlowOperator):
self.regexp = p.main.regexp if isinstance(p, RE) else p.regexp self.regexp = p.main.regexp if isinstance(p, RE) else p.regexp
super(Lookbehind, self).__init__(parser, name) super(Lookbehind, self).__init__(parser, name)
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
backwards_text = self.grammar.reversed__[len(text):] # self.grammar.document__[-len(text) - 1::-1] backwards_text = self.grammar.reversed__[len(text):] # self.grammar.document__[-len(text) - 1::-1]
if self.sign(self.regexp.match(backwards_text)): if self.sign(sv_match(self.regexp, backwards_text)):
return Node(self, ''), text return Node(self, ''), text
else: else:
return None, text return None, text
...@@ -1548,7 +1554,7 @@ class Capture(UnaryOperator): ...@@ -1548,7 +1554,7 @@ class Capture(UnaryOperator):
def __init__(self, parser: Parser, name: str = '') -> None: def __init__(self, parser: Parser, name: str = '') -> None:
super(Capture, self).__init__(parser, name) super(Capture, self).__init__(parser, name)
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
node, text_ = self.parser(text) node, text_ = self.parser(text)
if node: if node:
stack = self.grammar.variables__.setdefault(self.name, []) stack = self.grammar.variables__.setdefault(self.name, [])
...@@ -1590,13 +1596,13 @@ class Retrieve(Parser): ...@@ -1590,13 +1596,13 @@ class Retrieve(Parser):
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.filter, self.name) return self.__class__(self.symbol, self.filter, self.name)
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
return self.call(text) # allow call method to be called from subclass circumventing the parser guard return self.call(text) # allow call method to be called from subclass circumventing the parser guard
def __repr__(self): def __repr__(self):
return ':' + self.symbol.repr return ':' + self.symbol.repr
def call(self, text: str) -> Tuple[Node, str]: def call(self, text: StringView) -> Tuple[Node, StringView]:
try: try:
stack = self.grammar.variables__[self.symbol.name] stack = self.grammar.variables__[self.symbol.name]
value = self.filter(stack) value = self.filter(stack)
...@@ -1612,7 +1618,7 @@ class Retrieve(Parser): ...@@ -1612,7 +1618,7 @@ class Retrieve(Parser):
class Pop(Retrieve): class Pop(Retrieve):
"""STILL EXPERIMENTAL!!!""" """STILL EXPERIMENTAL!!!"""
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
nd, txt = super(Pop, self).call(text) # call() instead of __call__() to avoid parser guard nd, txt = super(Pop, self).call(text) # call() instead of __call__() to avoid parser guard
if nd and not nd.error_flag: if nd and not nd.error_flag:
stack = self.grammar.variables__[self.symbol.name] stack = self.grammar.variables__[self.symbol.name]
...@@ -1644,7 +1650,7 @@ class Synonym(UnaryOperator): ...@@ -1644,7 +1650,7 @@ class Synonym(UnaryOperator):
class, in which case it would be unclear whether the parser class, in which case it would be unclear whether the parser
RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'. RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'.
""" """
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
node, text = self.parser(text) node, text = self.parser(text)
if node: if node:
return Node(self, node), text return Node(self, node), text
...@@ -1684,7 +1690,7 @@ class Forward(Parser): ...@@ -1684,7 +1690,7 @@ class Forward(Parser):
duplicate.set(parser) duplicate.set(parser)
return duplicate return duplicate
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: StringView) -> Tuple[Node, StringView]:
return self.parser(text) return self.parser(text)
def __repr__(self): def __repr__(self):
......
...@@ -31,7 +31,7 @@ except ImportError: ...@@ -31,7 +31,7 @@ except ImportError:
from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \ from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Iterator, List, NamedTuple, Sequence, Union, Text, Tuple Iterator, List, NamedTuple, Sequence, Union, Text, Tuple
from DHParser.toolkit import is_logging, log_dir, TextView, line_col, identity from DHParser.toolkit import is_logging, log_dir, StringView, line_col, identity
__all__ = ('WHITESPACE_PTYPE', __all__ = ('WHITESPACE_PTYPE',
'MockParser', 'MockParser',
...@@ -129,8 +129,8 @@ ZOMBIE_PARSER = ZombieParser() ...@@ -129,8 +129,8 @@ ZOMBIE_PARSER = ZombieParser()
Error = NamedTuple('Error', [('pos', int), ('msg', str)]) Error = NamedTuple('Error', [('pos', int), ('msg', str)])
ChildrenType = Tuple['Node', ...] ChildrenType = Tuple['Node', ...]
StrictResultType = Union[ChildrenType, TextView, str] StrictResultType = Union[ChildrenType, StringView, str]
ResultType = Union[ChildrenType, 'Node', TextView, str, None] ResultType = Union[ChildrenType, 'Node', StringView, str, None]
def flatten_sxpr(sxpr: str) -> str: def flatten_sxpr(sxpr: str) -> str:
...@@ -189,6 +189,7 @@ class Node: ...@@ -189,6 +189,7 @@ class Node:
__slots__ = ['_result', 'children', '_errors', '_len', '_pos', 'parser', 'error_flag'] __slots__ = ['_result', 'children', '_errors', '_len', '_pos', 'parser', 'error_flag']
def __init__(self, parser, result: ResultType) -> None: def __init__(self, parser, result: ResultType) -> None:
"""Initializes the ``Node``-object with the ``Parser``-Instance """Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result. that generated the node and the parser's result.
...@@ -251,7 +252,7 @@ class Node: ...@@ -251,7 +252,7 @@ class Node:
# or isinstance(result, Node) # or isinstance(result, Node)
# or isinstance(result, str)), str(result) # or isinstance(result, str)), str(result)
self._result = (result,) if isinstance(result, Node) else str(result) \ self._result = (result,) if isinstance(result, Node) else str(result) \
if isinstance(result, TextView) else result or '' # type: StrictResultType if isinstance(result, StringView) else result or '' # type: StrictResultType
self.children = cast(ChildrenType, self._result) \ self.children = cast(ChildrenType, self._result) \
if isinstance(self._result, tuple) else cast(ChildrenType, ()) # type: ChildrenType if isinstance(self._result, tuple) else cast(ChildrenType, ()) # type: ChildrenType
self.error_flag = any(r.error_flag for r in self.children) # type: bool self.error_flag = any(r.error_flag for r in self.children) # type: bool
......
...@@ -43,14 +43,18 @@ except ImportError: ...@@ -43,14 +43,18 @@ except ImportError:
import sys import sys
try: try:
from typing import Any, List, Tuple, Optional from typing import Any, List, Tuple, Collection, Union, Optional
except ImportError: except ImportError:
from .typing34 import Any, List, Tuple, Optional from .typing34 import Any, List, Tuple, Collection, Union, Optional
__all__ = ('logging', __all__ = ('logging',
'is_logging', 'is_logging',
'log_dir', 'log_dir',
'logfile_basename', 'logfile_basename',
'StringView',
'sv_match',
'sv_index',
'sv_search',
# 'supress_warnings', # 'supress_warnings',
# 'warnings', # 'warnings',
# 'repr_call', # 'repr_call',
...@@ -150,22 +154,93 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}): ...@@ -150,22 +154,93 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
os.rmdir(log_dirname) os.rmdir(log_dirname)
class TextView: class StringView:
__slots__ = ['text', 'begin', 'end'] """"A rudimentary StringView class, just enough for the use cases
in parswer.py.
Slicing Python-strings always yields copies of a segment of the original
string. See: https://mail.python.org/pipermail/python-dev/2008-May/079699.html
However, this becomes costly (in terms of space and as a consequence also
time) when parsing longer documents. Unfortunately, Python's `memoryview`
does not work for unicode strings. Hence, the StringView class.
"""
__slots__ = ['text', 'begin', 'end', 'len']
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None: def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
self.text = text # type: str self.text = text # type: str
self.begin = begin or 0 # type: int # TODO: Negative Values!!! self.begin, self.end = StringView.real_indices(begin, end, len(text))
self.end = end or len(text) # type: int self.len = max(self.end - self.begin, 0)
@staticmethod
def real_indices(begin, end, len):
def pack(index, len):
index = index if index >= 0 else index + len
return 0 if index < 0 else len if index > len else index
if begin is None: begin = 0
if end is None: end = len
return pack(begin, len), pack(end, len)
def __bool__(self):
return bool(self.text) and self.end > self.begin
def __len__(self):
return self.len
def __str__(self): def __str__(self):
return self.text[self.begin:self.end] return self.text[self.begin:self.end]
def __getitem__(self, index): def __getitem__(self, index):
assert isinstance(index, slice), "Minimal implementation of TextView just allows slicing." assert isinstance(index, slice), "As of now, StringView only allows slicing."
start = index.start or 0 assert index.step is None or index.step == 1, \
stop = index.stop or (self.end - self.begin) "Step sizes other than 1 are not yet supported by StringView"
return TextView(self.text, self.begin + start, self.begin + stop) start, stop = StringView.real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop)
def __eq__(self, other):
return str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings
def find(self, sub, start=None, end=None) -> int:
if start is None and end is None:
return self.text.find(sub, self.begin, self.end) - self.begin
else:
start, end = StringView.real_indices(start, end, self.len)
return self.text.find(sub, self.begin + start, self.begin + end) - self.begin
def startswith(self, prefix: str, start:int = 0, end:Optional[int] = None) -> bool:
start += self.begin
end = self.end if end is None else self.begin + end
return self.text.startswith(prefix, start, end)
def sv_match(regex, sv: StringView):
return regex.match(sv.text, pos=sv.begin, endpos=sv.end)
def sv_index(absolute_index: Union[int, Collection], sv: StringView) -> Union[int, tuple]:
"""
Converts the an index into string watched by a StringView object
to an index relativ to the string view object, e.g.:
>>> sv = StringView('xxIxx')[2:3]
>>> match = sv_match(re.compile('I'), sv)
>>> match.end()
3
>>> sv_index(match.end(), sv)
1
"""
try:
return absolute_index - sv.begin
except TypeError:
return tuple(index - sv.begin for index in absolute_index)
def sv_search(regex, sv: StringView):
return regex.search(sv.text,