Commit faa86362 authored by eckhart's avatar eckhart

- added some cleanup for pylint and mypy messages

parent fda0225c
......@@ -14,14 +14,18 @@ testdata/*.pdf
*~
*.old
DEBUG*
LOGS/
external_resources/
LOGS
external_resources
tmp/*
test/tmp*
build/
dist/
build
dist
MANIFEST
playground/*
DevScripts/DHParser.py
DHParser/cstringview.c
*.so
.mypy_cache
.vscode
DHParser.egg-info
.noseids
......@@ -1294,7 +1294,7 @@ class Option(UnaryOperator):
class ZeroOrMore(Option):
"""
r"""
`ZeroOrMore` applies a parser repeatedly as long as this parser
matches. Like `Option` the `ZeroOrMore` parser always matches. In
case of zero repetitions, the empty match `((), text)` is returned.
......
This diff is collapsed.
......@@ -7,7 +7,8 @@ cdef int first_char(text, int begin, int end)
cdef int last_char(text, int begin, int end)
cdef inline int pack_index(int index, int len)
cdef int pack_index(int index, int length)
@cython.locals(cbegin=cython.int, cend=cython.int)
cpdef real_indices(begin, end, int len)
cpdef real_indices(begin, end, int length)
......@@ -27,33 +27,55 @@ does not work for unicode strings. Hence, the StringView class.
import collections
from DHParser.toolkit import typing
from typing import Optional, Iterable, Tuple
from typing import Optional, Union, Iterable, Tuple
__all__ = ('StringView', 'EMPTY_STRING_VIEW')
def first_char(text, begin, end) -> int:
def first_char(text, begin: int, end: int) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while begin < end and text[begin] in ' \n\t':
begin += 1
return begin
def last_char(text, begin, end) -> int:
def last_char(text, begin: int, end: int) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while end > begin and text[end] in ' \n\t':
end -= 1
return end
def pack_index(index, len) -> int:
index = index if index >= 0 else index + len
return 0 if index < 0 else len if index > len else index
def pack_index(index: int, length: int) -> int:
"""Transforms `index` into a positive index counting from the beginning
of the string, capping it at the boundaries [0, len].
Examples:
>>> pack_index(-1, 5)
4
>>> pack_index(6, 5)
5
>>> pack_index(-7, 5)
0
"""
# assert length >= 0
index = index if index >= 0 else index + length
return 0 if index < 0 else length if index > length else index
def real_indices(begin, end, len) -> Tuple[int, int]:
def real_indices(begin: Optional[int],
end: Optional[int],
length) -> Tuple[int, int]: # "length: int" fails with cython!?
"""Returns the tuple of real (i.e. positive) indices from the slice
indices `begin`, `end`, assuming a string of size `length`.
"""
cbegin = 0 if begin is None else begin
cend = len if end is None else end
return pack_index(cbegin, len), pack_index(cend, len)
cend = length if end is None else end
return pack_index(cbegin, length), pack_index(cend, length)
class StringView(collections.abc.Sized):
......@@ -91,20 +113,22 @@ class StringView(collections.abc.Sized):
return self.text
def __eq__(self, other):
return len(other) == len(self) and str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings
# PERFORMANCE WARNING: This creates copies of the strings
return len(other) == len(self) and str(self) == str(other)
def __hash__(self):
return hash(str(self)) # PERFORMANCE WARNING: This creates a copy of the string-slice
# PERFORMANCE WARNING: This creates a copy of the string-slice
return hash(str(self))
def __add__(self, other):
if isinstance(other, str):
return (str(self) + other)
return str(self) + other
else:
return StringView(str(self) + str(other))
def __radd__(self, other):
if isinstance(other, str):
return (other + str(self))
return other + str(self)
else:
return StringView(str(other) + str(self))
......@@ -115,7 +139,11 @@ class StringView(collections.abc.Sized):
start, stop = real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop)
def count(self, sub, start=None, end=None) -> int:
def count(self, sub: str, start=None, end=None) -> int:
"""Returns the number of non-overlapping occurrences of substring
`sub` in StringView S[start:end]. Optional arguments start and end
are interpreted as in slice notation.
"""
if self.fullstring_flag:
return self.text.count(sub, start, end)
elif start is None and end is None:
......@@ -124,7 +152,12 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len)
return self.text.count(sub, self.begin + start, self.begin + end)
def find(self, sub, start=None, end=None) -> int:
def find(self, sub: str, start=None, end=None) -> int:
"""Returns the lowest index in S where substring `sub` is found,
such that `sub` is contained within S[start:end]. Optional
arguments `start` and `end` are interpreted as in slice notation.
Returns -1 on failure.
"""
if self.fullstring_flag:
return self.text.find(sub, start, end)
elif start is None and end is None:
......@@ -133,7 +166,12 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len)
return self.text.find(sub, self.begin + start, self.begin + end) - self.begin
def rfind(self, sub, start=None, end=None) -> int:
def rfind(self, sub: str, start=None, end=None) -> int:
"""Returns the highest index in S where substring `sub` is found,
such that `sub` is contained within S[start:end]. Optional
arguments `start` and `end` are interpreted as in slice notation.
Returns -1 on failure.
"""
if self.fullstring_flag:
return self.text.rfind(sub, start, end)
if start is None and end is None:
......@@ -142,12 +180,23 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len)
return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin
def startswith(self, prefix: str, start: int = 0, end: Optional[int] = None) -> bool:
def startswith(self,
prefix: Union[str, Tuple[str, ...]],
start: int = 0,
end: Optional[int] = None) -> bool:
"""Return True if S starts with the specified prefix, False otherwise.
With optional `start`, test S beginning at that position.
With optional `end`, stop comparing S at that position.
prefix can also be a tuple of strings to try.
"""
start += self.begin
end = self.end if end is None else self.begin + end
return self.text.startswith(prefix, start, end)
def match(self, regex):
"""Executes `regex.match` on the StringView object and returns the
result, which is either a match-object or None.
"""
return regex.match(self.text, pos=self.begin, endpos=self.end)
def index(self, absolute_index: int) -> int:
......@@ -171,9 +220,15 @@ class StringView(collections.abc.Sized):
return tuple(index - self.begin for index in absolute_indices)
def search(self, regex):
"""Executes regex.search on the StringView object and returns the
result, which is either a match-object or None.
"""
return regex.search(self.text, pos=self.begin, endpos=self.end)
def strip(self):
"""Returns a copy of the StringView `self` with leading and trailing
whitespace removed.
"""
if self.fullstring_flag:
return self.text.strip()
else:
......@@ -182,6 +237,11 @@ class StringView(collections.abc.Sized):
return self.text[begin:end]
def split(self, sep=None):
"""Returns a list of the words in `self`, using `sep` as the
delimiter string. If `sep` is not specified or is None, any
whitespace string is a separator and empty strings are
removed from the result.
"""
if self.fullstring_flag:
return self.text.split(sep)
else:
......
......@@ -66,23 +66,37 @@ class ParserBase:
@property
def name(self):
"""Returns the name of the parser or the empty string '' for unnamed
parsers."""
return self._name
@property
def ptype(self) -> str:
"""Returns the type of the parser. By default this is the parser's
class name preceded by a colon, e.g. ':ZeroOrMore'."""
return self._ptype
@property
def repr(self) -> str:
"""Returns the parser's name if it has a name and the parser's
`ptype` otherwise. Note that for named parsers this is not the
same as `repr(parsers)` which always returns the comined name
and ptype, e.g. 'term:OneOrMore'."""
return self.name if self.name else repr(self)
def reset(self):
"""Resets any parser variables. (Should be overridden.)"""
pass
def grammar(self) -> 'Grammar':
def grammar(self):
"""Returns the Grammar object to which the parser belongs. If not
yet connected to any Grammar object, None is returned."""
return None
def apply(self, func: Callable) -> bool:
"""Applies the function `func` to the parser. Returns False, if
- for whatever reason - the functions has not been applied, True
otherwise."""
return False
......@@ -159,7 +173,7 @@ def flatten_sxpr(sxpr: str) -> str:
>>> flatten_sxpr('(a\\n (b\\n c\\n )\\n)\\n')
'(a (b c))'
"""
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', sxpr)).strip()
return re.sub(r'\s(?=\))', '', re.sub(r'\s+', ' ', sxpr)).strip()
class Node(collections.abc.Sized):
......@@ -209,8 +223,9 @@ class Node(collections.abc.Sized):
__slots__ = ['_result', 'children', '_errors', '_len', '_pos', 'parser', 'error_flag']
def __init__(self, parser, result: ResultType, leafhint: bool=False) -> None:
"""Initializes the ``Node``-object with the ``Parser``-Instance
def __init__(self, parser, result: ResultType, leafhint: bool = False) -> None:
"""
Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
"""
self.error_flag = 0 # type: int
......@@ -275,11 +290,22 @@ class Node(collections.abc.Sized):
@property # this needs to be a (dynamic) property, in case sef.parser gets updated
def tag_name(self) -> str:
"""
Returns the tage name of Node, i.e. the name for XML or
S-expression representation. By default the tag name is the
name of the node's parser or, if the node's parser is unnamed, the
node's parser's `ptype`.
"""
return self.parser.name or self.parser.ptype
@property
def result(self) -> StrictResultType:
"""
Returns the result from the parser that created the node.
Error messages are not included in the result. Use `self.content()`
if the result plus any error messages is needed.
"""
return self._result
@result.setter
......
......@@ -33,7 +33,9 @@ already exists.
import codecs
import contextlib
import hashlib
import io
import os
import parser
try:
import regex as re
......@@ -47,7 +49,7 @@ except ImportError:
import DHParser.foreign_typing as typing
sys.modules['typing'] = typing # make it possible to import from typing
from typing import Any, Iterable, Sequence, Set, Union
from typing import Any, Iterable, Sequence, Set, Union, cast
__all__ = ('logging',
'is_logging',
......@@ -114,13 +116,14 @@ def log_dir() -> str:
def logging(dirname="LOGS"):
"""Context manager. Log files within this context will be stored in
directory ``dirname``. Logging is turned off if name is empty.
Args:
dirname: the name for the log directory or the empty string to
turn logging of
"""
global LOGGING
if dirname and not isinstance(dirname, str): dirname = "LOGS" # be fail tolerant here...
if dirname and not isinstance(dirname, str):
dirname = "LOGS" # be fail tolerant here...
try:
save = LOGGING
except NameError:
......@@ -139,7 +142,7 @@ def is_logging() -> bool:
return False
def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
def clear_logs(logfile_types=frozenset(['.cst', '.ast', '.log'])):
"""Removes all logs from the log-directory and removes the
log-directory if it is empty.
"""
......@@ -156,21 +159,21 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
os.rmdir(log_dirname)
def escape_re(s) -> str:
def escape_re(strg: str) -> str:
"""Returns `s` with all regular expression special characters escaped.
"""
# assert isinstance(s, str)
# assert isinstance(strg, str)
re_chars = r"\.^$*+?{}[]()#<>=|!"
for esc_ch in re_chars:
s = s.replace(esc_ch, '\\' + esc_ch)
return s
strg = strg.replace(esc_ch, '\\' + esc_ch)
return strg
def is_filename(s) -> bool:
def is_filename(strg: str) -> bool:
"""Tries to guess whether string ``s`` is a file name."""
return s.find('\n') < 0 and s[:1] != " " and s[-1:] != " " \
and all(s.find(ch) < 0 for ch in '*?"<>|')
# and s.find('*') < 0 and s.find('?') < 0
return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \
and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and strg.find('*') < 0 and strg.find('?') < 0
def logfile_basename(filename_or_text, function_or_class_or_instance) -> str:
......@@ -181,11 +184,11 @@ def logfile_basename(filename_or_text, function_or_class_or_instance) -> str:
return os.path.basename(os.path.splitext(filename_or_text)[0])
else:
try:
s = function_or_class_or_instance.__qualname.__
name = function_or_class_or_instance.__qualname.__
except AttributeError:
s = function_or_class_or_instance.__class__.__name__
i = s.find('.')
return s[:i] + '_out' if i >= 0 else s
name = function_or_class_or_instance.__class__.__name__
i = name.find('.')
return name[:i] + '_out' if i >= 0 else name
#######################################################################
......@@ -223,14 +226,15 @@ def is_python_code(text_or_file: str) -> bool:
if is_filename(text_or_file):
return text_or_file[-3:].lower() == '.py'
try:
compile(text_or_file, '<string>', 'exec')
parser.suite(text_or_file)
# compile(text_or_file, '<string>', 'exec')
return True
except (SyntaxError, ValueError, OverflowError):
pass
return False
def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool:
def has_fenced_code(text_or_file: str, info_strings=('ebnf', 'test')) -> bool:
"""Checks whether `text_or_file` contains fenced code blocks, which are
marked by one of the given info strings.
See http://spec.commonmark.org/0.28/#fenced-code-blocks for more
......@@ -245,17 +249,20 @@ def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool:
if markdown.find('\n~~~') < 0 and markdown.find('\n```') < 0:
return False
if isinstance(info_strings, str): info_strings = (info_strings,)
FENCE_TMPL = '\n(?:(?:``[`]*[ ]*(?:%s)(?=[ .\-:\n])[^`\n]*\n)|(?:~~[~]*[ ]*(?:%s)(?=[ .\-:\n])[\n]*\n))'
LABEL_RE = '|'.join('(?:%s)' % s for s in info_strings)
RX_FENCE = re.compile(FENCE_TMPL % (LABEL_RE, LABEL_RE), flags=re.IGNORECASE)
if isinstance(info_strings, str):
info_strings = (info_strings,)
fence_tmpl = '\n(?:(?:``[`]*[ ]*(?:%s)(?=[ .\-:\n])[^`\n]*\n)' + \
'|(?:~~[~]*[ ]*(?:%s)(?=[ .\-:\n])[\n]*\n))'
label_re = '|'.join('(?:%s)' % matched_string for matched_string in info_strings)
rx_fence = re.compile(fence_tmpl % (label_re, label_re), flags=re.IGNORECASE)
for m in RX_FENCE.finditer(markdown):
s = re.match('(?:\n`+)|(?:\n~+)', m.group(0)).group(0)
if markdown.find(s, m.end()) >= 0:
for match in rx_fence.finditer(markdown):
matched_string = re.match('(?:\n`+)|(?:\n~+)', match.group(0)).group(0)
if markdown.find(matched_string, match.end()) >= 0:
return True
else:
return False
break
return False
def md5(*txt):
......@@ -279,8 +286,8 @@ def compile_python_object(python_src, catch_obj_regex=""):
namespace = {}
exec(code, namespace) # safety risk?
if catch_obj_regex:
matches = [key for key in namespace.keys() if catch_obj_regex.match(key)]
if len(matches) == 0:
matches = [key for key in namespace if catch_obj_regex.match(key)]
if len(matches) < 1:
raise ValueError("No object matching /%s/ defined in source code." %
catch_obj_regex.pattern)
elif len(matches) > 1:
......@@ -301,7 +308,7 @@ def compile_python_object(python_src, catch_obj_regex=""):
# def smart_list(arg: Union[str, Iterable[T]]) -> Union[Sequence[str], Sequence[T]]:
def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
"""Returns the argument as list, depending on its type and content.
If the argument is a string, it will be interpreted as a list of
comma separated values, trying ';', ',', ' ' as possible delimiters
in this order, e.g.
......@@ -324,7 +331,7 @@ def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
>>> smart_list(i for i in {1,2,3})
[1, 2, 3]
Finally, if none of the above is true, the argument will be
Finally, if none of the above is true, the argument will be
wrapped in a list and returned, e.g.
>>> smart_list(125)
[125]
......@@ -377,6 +384,7 @@ def sane_parser_name(name) -> bool:
def identity(anything: Any) -> Any:
"""Identity function for functional programming style."""
return anything
......@@ -389,9 +397,10 @@ def identity(anything: Any) -> Any:
try:
if sys.stdout.encoding.upper() != "UTF-8":
# make sure that `print()` does not raise an error on
# make sure that `print()` does not raise an error on
# non-ASCII characters:
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
sys.stdout = cast(io.TextIOWrapper, codecs.getwriter("utf-8")(cast(
io.BytesIO, cast(io.TextIOWrapper, sys.stdout).detach())))
except AttributeError:
# somebody has already taken care of this !?
pass
#!/bin/sh
python3 setup.py build_ext --inplace
......@@ -139,11 +139,11 @@ else:
'''
def create_project(path,
ebnf_tmpl=EBNF_TEMPLATE,
readme_tmpl=README_TEMPLATE,
grammar_test_tmpl=GRAMMAR_TEST_TEMPLATE):
def create_project(path: str):
"""Creates the a new DHParser-project in the given `path`.
"""
def create_file(name, content):
"""Create a file with `name` and write `content` to file."""
if not os.path.exists(name):
print('Creating file "%s".' % name)
with open(name, 'w') as f:
......@@ -177,13 +177,16 @@ def create_project(path,
def selftest() -> bool:
"""Run a simple self-text of DHParser.
"""
print("DHParser selftest...")
print("\nSTAGE I: Trying to compile EBNF-Grammar:\n")
builtin_ebnf_parser = get_ebnf_grammar()
ebnf_src = builtin_ebnf_parser.__doc__[builtin_ebnf_parser.__doc__.find('#'):]
ebnf_transformer = get_ebnf_transformer()
ebnf_compiler = get_ebnf_compiler('EBNF')
generated_ebnf_parser, errors, ast = compile_source(ebnf_src, None,
generated_ebnf_parser, errors, _ = compile_source(
ebnf_src, None,
builtin_ebnf_parser, ebnf_transformer, ebnf_compiler)
if errors:
......@@ -191,7 +194,8 @@ def selftest() -> bool:
print("\n\n".join(str(err) for err in errors))
return False
print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
print("\n\nSTAGE 2: Selfhosting-test: "
"Trying to compile EBNF-Grammar with generated parser...\n")
selfhosted_ebnf_parser = compileDSL(ebnf_src, None, generated_ebnf_parser,
ebnf_transformer, ebnf_compiler)
ebnf_compiler.gen_transformer_skeleton()
......@@ -201,22 +205,27 @@ def selftest() -> bool:
def cpu_profile(func, repetitions=1):
import cProfile, pstats
pr = cProfile.Profile()
pr.enable()
for i in range(repetitions):
"""Profile the function `func`.
"""
import cProfile
import pstats
profile = cProfile.Profile()
profile.enable()
for _ in range(repetitions):
success = func()
if not success:
break
pr.disable()
profile.disable()
# after your program ends
st = pstats.Stats(pr)
st.strip_dirs()
st.sort_stats('time').print_stats(40)
stats = pstats.Stats(profile)
stats.strip_dirs()
stats.sort_stats('time').print_stats(40)
return success
def mem_profile(func, dummy=0):
def mem_profile(func):
"""Profile memory usage of `func`.
"""
import tracemalloc
tracemalloc.start()
success = func()
......@@ -228,7 +237,10 @@ def mem_profile(func, dummy=0):
return success
if __name__ == "__main__":
def main():
"""Creates a project (if a project name has been passed as command line
parameter) or runs a quick self-test.
"""
if len(sys.argv) > 1:
if os.path.exists(sys.argv[1]) and os.path.isfile(sys.argv[1]):
_errors = compile_on_disk(sys.argv[1],
......@@ -245,3 +257,5 @@ if __name__ == "__main__":
if not cpu_profile(selftest, 1):
sys.exit(1)
if __name__ == "__main__":
main()
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
from functools import partial
import os
import sys
try:
import regex as re
except ImportError:
import re
sys.path.extend(['../../', '../', './'])
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, TRUE_CONDITION, \
traverse, remove_children_if, merge_children, is_anonymous, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, remove_parser, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
WHITESPACE_PTYPE, TOKEN_PTYPE
#######################################################################
#
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#