Commit faa86362 authored by eckhart's avatar eckhart
Browse files

- added some cleanup for pylint and mypy messages

parent fda0225c
...@@ -14,14 +14,18 @@ testdata/*.pdf ...@@ -14,14 +14,18 @@ testdata/*.pdf
*~ *~
*.old *.old
DEBUG* DEBUG*
LOGS/ LOGS
external_resources/ external_resources
tmp/* tmp/*
test/tmp* test/tmp*
build/ build
dist/ dist
MANIFEST MANIFEST
playground/* playground/*
DevScripts/DHParser.py DevScripts/DHParser.py
DHParser/cstringview.c DHParser/cstringview.c
*.so *.so
.mypy_cache
.vscode
DHParser.egg-info
.noseids
...@@ -1294,7 +1294,7 @@ class Option(UnaryOperator): ...@@ -1294,7 +1294,7 @@ class Option(UnaryOperator):
class ZeroOrMore(Option): class ZeroOrMore(Option):
""" r"""
`ZeroOrMore` applies a parser repeatedly as long as this parser `ZeroOrMore` applies a parser repeatedly as long as this parser
matches. Like `Option` the `ZeroOrMore` parser always matches. In matches. Like `Option` the `ZeroOrMore` parser always matches. In
case of zero repetitions, the empty match `((), text)` is returned. case of zero repetitions, the empty match `((), text)` is returned.
......
This diff is collapsed.
...@@ -7,7 +7,8 @@ cdef int first_char(text, int begin, int end) ...@@ -7,7 +7,8 @@ cdef int first_char(text, int begin, int end)
cdef int last_char(text, int begin, int end) cdef int last_char(text, int begin, int end)
cdef inline int pack_index(int index, int len) cdef int pack_index(int index, int length)
@cython.locals(cbegin=cython.int, cend=cython.int) @cython.locals(cbegin=cython.int, cend=cython.int)
cpdef real_indices(begin, end, int len) cpdef real_indices(begin, end, int length)
...@@ -27,33 +27,55 @@ does not work for unicode strings. Hence, the StringView class. ...@@ -27,33 +27,55 @@ does not work for unicode strings. Hence, the StringView class.
import collections import collections
from DHParser.toolkit import typing from DHParser.toolkit import typing
from typing import Optional, Iterable, Tuple from typing import Optional, Union, Iterable, Tuple
__all__ = ('StringView', 'EMPTY_STRING_VIEW') __all__ = ('StringView', 'EMPTY_STRING_VIEW')
def first_char(text, begin, end) -> int: def first_char(text, begin: int, end: int) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while begin < end and text[begin] in ' \n\t': while begin < end and text[begin] in ' \n\t':
begin += 1 begin += 1
return begin return begin
def last_char(text, begin, end) -> int: def last_char(text, begin: int, end: int) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while end > begin and text[end] in ' \n\t': while end > begin and text[end] in ' \n\t':
end -= 1 end -= 1
return end return end
def pack_index(index, len) -> int: def pack_index(index: int, length: int) -> int:
index = index if index >= 0 else index + len """Transforms `index` into a positive index counting from the beginning
return 0 if index < 0 else len if index > len else index of the string, capping it at the boundaries [0, len].
Examples:
>>> pack_index(-1, 5)
4
>>> pack_index(6, 5)
5
>>> pack_index(-7, 5)
0
"""
# assert length >= 0
index = index if index >= 0 else index + length
return 0 if index < 0 else length if index > length else index
def real_indices(begin, end, len) -> Tuple[int, int]: def real_indices(begin: Optional[int],
end: Optional[int],
length) -> Tuple[int, int]: # "length: int" fails with cython!?
"""Returns the tuple of real (i.e. positive) indices from the slice
indices `begin`, `end`, assuming a string of size `length`.
"""
cbegin = 0 if begin is None else begin cbegin = 0 if begin is None else begin
cend = len if end is None else end cend = length if end is None else end
return pack_index(cbegin, len), pack_index(cend, len) return pack_index(cbegin, length), pack_index(cend, length)
class StringView(collections.abc.Sized): class StringView(collections.abc.Sized):
...@@ -91,20 +113,22 @@ class StringView(collections.abc.Sized): ...@@ -91,20 +113,22 @@ class StringView(collections.abc.Sized):
return self.text return self.text
def __eq__(self, other): def __eq__(self, other):
return len(other) == len(self) and str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings # PERFORMANCE WARNING: This creates copies of the strings
return len(other) == len(self) and str(self) == str(other)
def __hash__(self): def __hash__(self):
return hash(str(self)) # PERFORMANCE WARNING: This creates a copy of the string-slice # PERFORMANCE WARNING: This creates a copy of the string-slice
return hash(str(self))
def __add__(self, other): def __add__(self, other):
if isinstance(other, str): if isinstance(other, str):
return (str(self) + other) return str(self) + other
else: else:
return StringView(str(self) + str(other)) return StringView(str(self) + str(other))
def __radd__(self, other): def __radd__(self, other):
if isinstance(other, str): if isinstance(other, str):
return (other + str(self)) return other + str(self)
else: else:
return StringView(str(other) + str(self)) return StringView(str(other) + str(self))
...@@ -115,7 +139,11 @@ class StringView(collections.abc.Sized): ...@@ -115,7 +139,11 @@ class StringView(collections.abc.Sized):
start, stop = real_indices(index.start, index.stop, self.len) start, stop = real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop) return StringView(self.text, self.begin + start, self.begin + stop)
def count(self, sub, start=None, end=None) -> int: def count(self, sub: str, start=None, end=None) -> int:
"""Returns the number of non-overlapping occurrences of substring
`sub` in StringView S[start:end]. Optional arguments start and end
are interpreted as in slice notation.
"""
if self.fullstring_flag: if self.fullstring_flag:
return self.text.count(sub, start, end) return self.text.count(sub, start, end)
elif start is None and end is None: elif start is None and end is None:
...@@ -124,7 +152,12 @@ class StringView(collections.abc.Sized): ...@@ -124,7 +152,12 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len) start, end = real_indices(start, end, self.len)
return self.text.count(sub, self.begin + start, self.begin + end) return self.text.count(sub, self.begin + start, self.begin + end)
def find(self, sub, start=None, end=None) -> int: def find(self, sub: str, start=None, end=None) -> int:
"""Returns the lowest index in S where substring `sub` is found,
such that `sub` is contained within S[start:end]. Optional
arguments `start` and `end` are interpreted as in slice notation.
Returns -1 on failure.
"""
if self.fullstring_flag: if self.fullstring_flag:
return self.text.find(sub, start, end) return self.text.find(sub, start, end)
elif start is None and end is None: elif start is None and end is None:
...@@ -133,7 +166,12 @@ class StringView(collections.abc.Sized): ...@@ -133,7 +166,12 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len) start, end = real_indices(start, end, self.len)
return self.text.find(sub, self.begin + start, self.begin + end) - self.begin return self.text.find(sub, self.begin + start, self.begin + end) - self.begin
def rfind(self, sub, start=None, end=None) -> int: def rfind(self, sub: str, start=None, end=None) -> int:
"""Returns the highest index in S where substring `sub` is found,
such that `sub` is contained within S[start:end]. Optional
arguments `start` and `end` are interpreted as in slice notation.
Returns -1 on failure.
"""
if self.fullstring_flag: if self.fullstring_flag:
return self.text.rfind(sub, start, end) return self.text.rfind(sub, start, end)
if start is None and end is None: if start is None and end is None:
...@@ -142,12 +180,23 @@ class StringView(collections.abc.Sized): ...@@ -142,12 +180,23 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len) start, end = real_indices(start, end, self.len)
return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin
def startswith(self, prefix: str, start: int = 0, end: Optional[int] = None) -> bool: def startswith(self,
prefix: Union[str, Tuple[str, ...]],
start: int = 0,
end: Optional[int] = None) -> bool:
"""Return True if S starts with the specified prefix, False otherwise.
With optional `start`, test S beginning at that position.
With optional `end`, stop comparing S at that position.
prefix can also be a tuple of strings to try.
"""
start += self.begin start += self.begin
end = self.end if end is None else self.begin + end end = self.end if end is None else self.begin + end
return self.text.startswith(prefix, start, end) return self.text.startswith(prefix, start, end)
def match(self, regex): def match(self, regex):
"""Executes `regex.match` on the StringView object and returns the
result, which is either a match-object or None.
"""
return regex.match(self.text, pos=self.begin, endpos=self.end) return regex.match(self.text, pos=self.begin, endpos=self.end)
def index(self, absolute_index: int) -> int: def index(self, absolute_index: int) -> int:
...@@ -171,9 +220,15 @@ class StringView(collections.abc.Sized): ...@@ -171,9 +220,15 @@ class StringView(collections.abc.Sized):
return tuple(index - self.begin for index in absolute_indices) return tuple(index - self.begin for index in absolute_indices)
def search(self, regex): def search(self, regex):
"""Executes regex.search on the StringView object and returns the
result, which is either a match-object or None.
"""
return regex.search(self.text, pos=self.begin, endpos=self.end) return regex.search(self.text, pos=self.begin, endpos=self.end)
def strip(self): def strip(self):
"""Returns a copy of the StringView `self` with leading and trailing
whitespace removed.
"""
if self.fullstring_flag: if self.fullstring_flag:
return self.text.strip() return self.text.strip()
else: else:
...@@ -182,6 +237,11 @@ class StringView(collections.abc.Sized): ...@@ -182,6 +237,11 @@ class StringView(collections.abc.Sized):
return self.text[begin:end] return self.text[begin:end]
def split(self, sep=None): def split(self, sep=None):
"""Returns a list of the words in `self`, using `sep` as the
delimiter string. If `sep` is not specified or is None, any
whitespace string is a separator and empty strings are
removed from the result.
"""
if self.fullstring_flag: if self.fullstring_flag:
return self.text.split(sep) return self.text.split(sep)
else: else:
......
...@@ -66,23 +66,37 @@ class ParserBase: ...@@ -66,23 +66,37 @@ class ParserBase:
@property @property
def name(self): def name(self):
"""Returns the name of the parser or the empty string '' for unnamed
parsers."""
return self._name return self._name
@property @property
def ptype(self) -> str: def ptype(self) -> str:
"""Returns the type of the parser. By default this is the parser's
class name preceded by a colon, e.g. ':ZeroOrMore'."""
return self._ptype return self._ptype
@property @property
def repr(self) -> str: def repr(self) -> str:
"""Returns the parser's name if it has a name and the parser's
`ptype` otherwise. Note that for named parsers this is not the
same as `repr(parsers)` which always returns the comined name
and ptype, e.g. 'term:OneOrMore'."""
return self.name if self.name else repr(self) return self.name if self.name else repr(self)
def reset(self): def reset(self):
"""Resets any parser variables. (Should be overridden.)"""
pass pass
def grammar(self) -> 'Grammar': def grammar(self):
"""Returns the Grammar object to which the parser belongs. If not
yet connected to any Grammar object, None is returned."""
return None return None
def apply(self, func: Callable) -> bool: def apply(self, func: Callable) -> bool:
"""Applies the function `func` to the parser. Returns False, if
- for whatever reason - the functions has not been applied, True
otherwise."""
return False return False
...@@ -159,7 +173,7 @@ def flatten_sxpr(sxpr: str) -> str: ...@@ -159,7 +173,7 @@ def flatten_sxpr(sxpr: str) -> str:
>>> flatten_sxpr('(a\\n (b\\n c\\n )\\n)\\n') >>> flatten_sxpr('(a\\n (b\\n c\\n )\\n)\\n')
'(a (b c))' '(a (b c))'
""" """
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', sxpr)).strip() return re.sub(r'\s(?=\))', '', re.sub(r'\s+', ' ', sxpr)).strip()
class Node(collections.abc.Sized): class Node(collections.abc.Sized):
...@@ -209,8 +223,9 @@ class Node(collections.abc.Sized): ...@@ -209,8 +223,9 @@ class Node(collections.abc.Sized):
__slots__ = ['_result', 'children', '_errors', '_len', '_pos', 'parser', 'error_flag'] __slots__ = ['_result', 'children', '_errors', '_len', '_pos', 'parser', 'error_flag']
def __init__(self, parser, result: ResultType, leafhint: bool=False) -> None: def __init__(self, parser, result: ResultType, leafhint: bool = False) -> None:
"""Initializes the ``Node``-object with the ``Parser``-Instance """
Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result. that generated the node and the parser's result.
""" """
self.error_flag = 0 # type: int self.error_flag = 0 # type: int
...@@ -275,11 +290,22 @@ class Node(collections.abc.Sized): ...@@ -275,11 +290,22 @@ class Node(collections.abc.Sized):
@property # this needs to be a (dynamic) property, in case sef.parser gets updated @property # this needs to be a (dynamic) property, in case sef.parser gets updated
def tag_name(self) -> str: def tag_name(self) -> str:
"""
Returns the tage name of Node, i.e. the name for XML or
S-expression representation. By default the tag name is the
name of the node's parser or, if the node's parser is unnamed, the
node's parser's `ptype`.
"""
return self.parser.name or self.parser.ptype return self.parser.name or self.parser.ptype
@property @property
def result(self) -> StrictResultType: def result(self) -> StrictResultType:
"""
Returns the result from the parser that created the node.
Error messages are not included in the result. Use `self.content()`
if the result plus any error messages is needed.
"""
return self._result return self._result
@result.setter @result.setter
......
...@@ -33,7 +33,9 @@ already exists. ...@@ -33,7 +33,9 @@ already exists.
import codecs import codecs
import contextlib import contextlib
import hashlib import hashlib
import io
import os import os
import parser
try: try:
import regex as re import regex as re
...@@ -47,7 +49,7 @@ except ImportError: ...@@ -47,7 +49,7 @@ except ImportError:
import DHParser.foreign_typing as typing import DHParser.foreign_typing as typing
sys.modules['typing'] = typing # make it possible to import from typing sys.modules['typing'] = typing # make it possible to import from typing
from typing import Any, Iterable, Sequence, Set, Union from typing import Any, Iterable, Sequence, Set, Union, cast
__all__ = ('logging', __all__ = ('logging',
'is_logging', 'is_logging',
...@@ -114,13 +116,14 @@ def log_dir() -> str: ...@@ -114,13 +116,14 @@ def log_dir() -> str:
def logging(dirname="LOGS"): def logging(dirname="LOGS"):
"""Context manager. Log files within this context will be stored in """Context manager. Log files within this context will be stored in
directory ``dirname``. Logging is turned off if name is empty. directory ``dirname``. Logging is turned off if name is empty.
Args: Args:
dirname: the name for the log directory or the empty string to dirname: the name for the log directory or the empty string to
turn logging of turn logging of
""" """
global LOGGING global LOGGING
if dirname and not isinstance(dirname, str): dirname = "LOGS" # be fail tolerant here... if dirname and not isinstance(dirname, str):
dirname = "LOGS" # be fail tolerant here...
try: try:
save = LOGGING save = LOGGING
except NameError: except NameError:
...@@ -139,7 +142,7 @@ def is_logging() -> bool: ...@@ -139,7 +142,7 @@ def is_logging() -> bool:
return False return False
def clear_logs(logfile_types={'.cst', '.ast', '.log'}): def clear_logs(logfile_types=frozenset(['.cst', '.ast', '.log'])):
"""Removes all logs from the log-directory and removes the """Removes all logs from the log-directory and removes the
log-directory if it is empty. log-directory if it is empty.
""" """
...@@ -156,21 +159,21 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}): ...@@ -156,21 +159,21 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
os.rmdir(log_dirname) os.rmdir(log_dirname)
def escape_re(s) -> str: def escape_re(strg: str) -> str:
"""Returns `s` with all regular expression special characters escaped. """Returns `s` with all regular expression special characters escaped.
""" """
# assert isinstance(s, str) # assert isinstance(strg, str)
re_chars = r"\.^$*+?{}[]()#<>=|!" re_chars = r"\.^$*+?{}[]()#<>=|!"
for esc_ch in re_chars: for esc_ch in re_chars:
s = s.replace(esc_ch, '\\' + esc_ch) strg = strg.replace(esc_ch, '\\' + esc_ch)
return s return strg
def is_filename(s) -> bool: def is_filename(strg: str) -> bool:
"""Tries to guess whether string ``s`` is a file name.""" """Tries to guess whether string ``s`` is a file name."""
return s.find('\n') < 0 and s[:1] != " " and s[-1:] != " " \ return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \
and all(s.find(ch) < 0 for ch in '*?"<>|') and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and s.find('*') < 0 and s.find('?') < 0 # and strg.find('*') < 0 and strg.find('?') < 0
def logfile_basename(filename_or_text, function_or_class_or_instance) -> str: def logfile_basename(filename_or_text, function_or_class_or_instance) -> str:
...@@ -181,11 +184,11 @@ def logfile_basename(filename_or_text, function_or_class_or_instance) -> str: ...@@ -181,11 +184,11 @@ def logfile_basename(filename_or_text, function_or_class_or_instance) -> str:
return os.path.basename(os.path.splitext(filename_or_text)[0]) return os.path.basename(os.path.splitext(filename_or_text)[0])
else: else:
try: try:
s = function_or_class_or_instance.__qualname.__ name = function_or_class_or_instance.__qualname.__
except AttributeError: except AttributeError:
s = function_or_class_or_instance.__class__.__name__ name = function_or_class_or_instance.__class__.__name__
i = s.find('.') i = name.find('.')
return s[:i] + '_out' if i >= 0 else s return name[:i] + '_out' if i >= 0 else name
####################################################################### #######################################################################
...@@ -223,14 +226,15 @@ def is_python_code(text_or_file: str) -> bool: ...@@ -223,14 +226,15 @@ def is_python_code(text_or_file: str) -> bool:
if is_filename(text_or_file): if is_filename(text_or_file):
return text_or_file[-3:].lower() == '.py' return text_or_file[-3:].lower() == '.py'
try: try:
compile(text_or_file, '<string>', 'exec') parser.suite(text_or_file)
# compile(text_or_file, '<string>', 'exec')
return True return True
except (SyntaxError, ValueError, OverflowError): except (SyntaxError, ValueError, OverflowError):
pass pass
return False return False
def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool: def has_fenced_code(text_or_file: str, info_strings=('ebnf', 'test')) -> bool:
"""Checks whether `text_or_file` contains fenced code blocks, which are """Checks whether `text_or_file` contains fenced code blocks, which are
marked by one of the given info strings. marked by one of the given info strings.
See http://spec.commonmark.org/0.28/#fenced-code-blocks for more See http://spec.commonmark.org/0.28/#fenced-code-blocks for more
...@@ -245,17 +249,20 @@ def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool: ...@@ -245,17 +249,20 @@ def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool:
if markdown.find('\n~~~') < 0 and markdown.find('\n```') < 0: if markdown.find('\n~~~') < 0 and markdown.find('\n```') < 0:
return False return False
if isinstance(info_strings, str): info_strings = (info_strings,) if isinstance(info_strings, str):
FENCE_TMPL = '\n(?:(?:``[`]*[ ]*(?:%s)(?=[ .\-:\n])[^`\n]*\n)|(?:~~[~]*[ ]*(?:%s)(?=[ .\-:\n])[\n]*\n))' info_strings = (info_strings,)
LABEL_RE = '|'.join('(?:%s)' % s for s in info_strings) fence_tmpl = '\n(?:(?:``[`]*[ ]*(?:%s)(?=[ .\-:\n])[^`\n]*\n)' + \
RX_FENCE = re.compile(FENCE_TMPL % (LABEL_RE, LABEL_RE), flags=re.IGNORECASE) '|(?:~~[~]*[ ]*(?:%s)(?=[ .\-:\n])[\n]*\n))'
label_re = '|'.join('(?:%s)' % matched_string for matched_string in info_strings)
rx_fence = re.compile(fence_tmpl % (label_re, label_re), flags=re.IGNORECASE)
for m in RX_FENCE.finditer(markdown): for match in rx_fence.finditer(markdown):
s = re.match('(?:\n`+)|(?:\n~+)', m.group(0)).group(0) matched_string = re.match('(?:\n`+)|(?:\n~+)', match.group(0)).group(0)
if markdown.find(s, m.end()) >= 0: if markdown.find(matched_string, match.end()) >= 0:
return True return True
else: