Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit faa86362 authored by eckhart's avatar eckhart
Browse files

- added some cleanup for pylint and mypy messages

parent fda0225c
......@@ -14,14 +14,18 @@ testdata/*.pdf
*~
*.old
DEBUG*
LOGS/
external_resources/
LOGS
external_resources
tmp/*
test/tmp*
build/
dist/
build
dist
MANIFEST
playground/*
DevScripts/DHParser.py
DHParser/cstringview.c
*.so
.mypy_cache
.vscode
DHParser.egg-info
.noseids
......@@ -1294,7 +1294,7 @@ class Option(UnaryOperator):
class ZeroOrMore(Option):
"""
r"""
`ZeroOrMore` applies a parser repeatedly as long as this parser
matches. Like `Option` the `ZeroOrMore` parser always matches. In
case of zero repetitions, the empty match `((), text)` is returned.
......
This diff is collapsed.
......@@ -7,7 +7,8 @@ cdef int first_char(text, int begin, int end)
cdef int last_char(text, int begin, int end)
cdef inline int pack_index(int index, int len)
cdef int pack_index(int index, int length)
@cython.locals(cbegin=cython.int, cend=cython.int)
cpdef real_indices(begin, end, int len)
cpdef real_indices(begin, end, int length)
......@@ -27,33 +27,55 @@ does not work for unicode strings. Hence, the StringView class.
import collections
from DHParser.toolkit import typing
from typing import Optional, Iterable, Tuple
from typing import Optional, Union, Iterable, Tuple
__all__ = ('StringView', 'EMPTY_STRING_VIEW')
def first_char(text, begin, end) -> int:
def first_char(text, begin: int, end: int) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while begin < end and text[begin] in ' \n\t':
begin += 1
return begin
def last_char(text, begin, end) -> int:
def last_char(text, begin: int, end: int) -> int:
"""Returns the index of the first non-whitespace character in string
`text` within the bounds [begin, end].
"""
while end > begin and text[end] in ' \n\t':
end -= 1
return end
def pack_index(index, len) -> int:
index = index if index >= 0 else index + len
return 0 if index < 0 else len if index > len else index
def pack_index(index: int, length: int) -> int:
"""Transforms `index` into a positive index counting from the beginning
of the string, capping it at the boundaries [0, len].
Examples:
>>> pack_index(-1, 5)
4
>>> pack_index(6, 5)
5
>>> pack_index(-7, 5)
0
"""
# assert length >= 0
index = index if index >= 0 else index + length
return 0 if index < 0 else length if index > length else index
def real_indices(begin, end, len) -> Tuple[int, int]:
def real_indices(begin: Optional[int],
end: Optional[int],
length) -> Tuple[int, int]: # "length: int" fails with cython!?
"""Returns the tuple of real (i.e. positive) indices from the slice
indices `begin`, `end`, assuming a string of size `length`.
"""
cbegin = 0 if begin is None else begin
cend = len if end is None else end
return pack_index(cbegin, len), pack_index(cend, len)
cend = length if end is None else end
return pack_index(cbegin, length), pack_index(cend, length)
class StringView(collections.abc.Sized):
......@@ -91,20 +113,22 @@ class StringView(collections.abc.Sized):
return self.text
def __eq__(self, other):
return len(other) == len(self) and str(self) == str(other) # PERFORMANCE WARNING: This creates copies of the strings
# PERFORMANCE WARNING: This creates copies of the strings
return len(other) == len(self) and str(self) == str(other)
def __hash__(self):
return hash(str(self)) # PERFORMANCE WARNING: This creates a copy of the string-slice
# PERFORMANCE WARNING: This creates a copy of the string-slice
return hash(str(self))
def __add__(self, other):
if isinstance(other, str):
return (str(self) + other)
return str(self) + other
else:
return StringView(str(self) + str(other))
def __radd__(self, other):
if isinstance(other, str):
return (other + str(self))
return other + str(self)
else:
return StringView(str(other) + str(self))
......@@ -115,7 +139,11 @@ class StringView(collections.abc.Sized):
start, stop = real_indices(index.start, index.stop, self.len)
return StringView(self.text, self.begin + start, self.begin + stop)
def count(self, sub, start=None, end=None) -> int:
def count(self, sub: str, start=None, end=None) -> int:
"""Returns the number of non-overlapping occurrences of substring
`sub` in StringView S[start:end]. Optional arguments start and end
are interpreted as in slice notation.
"""
if self.fullstring_flag:
return self.text.count(sub, start, end)
elif start is None and end is None:
......@@ -124,7 +152,12 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len)
return self.text.count(sub, self.begin + start, self.begin + end)
def find(self, sub, start=None, end=None) -> int:
def find(self, sub: str, start=None, end=None) -> int:
"""Returns the lowest index in S where substring `sub` is found,
such that `sub` is contained within S[start:end]. Optional
arguments `start` and `end` are interpreted as in slice notation.
Returns -1 on failure.
"""
if self.fullstring_flag:
return self.text.find(sub, start, end)
elif start is None and end is None:
......@@ -133,7 +166,12 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len)
return self.text.find(sub, self.begin + start, self.begin + end) - self.begin
def rfind(self, sub, start=None, end=None) -> int:
def rfind(self, sub: str, start=None, end=None) -> int:
"""Returns the highest index in S where substring `sub` is found,
such that `sub` is contained within S[start:end]. Optional
arguments `start` and `end` are interpreted as in slice notation.
Returns -1 on failure.
"""
if self.fullstring_flag:
return self.text.rfind(sub, start, end)
if start is None and end is None:
......@@ -142,12 +180,23 @@ class StringView(collections.abc.Sized):
start, end = real_indices(start, end, self.len)
return self.text.rfind(sub, self.begin + start, self.begin + end) - self.begin
def startswith(self, prefix: str, start: int = 0, end: Optional[int] = None) -> bool:
def startswith(self,
prefix: Union[str, Tuple[str, ...]],
start: int = 0,
end: Optional[int] = None) -> bool:
"""Return True if S starts with the specified prefix, False otherwise.
With optional `start`, test S beginning at that position.
With optional `end`, stop comparing S at that position.
prefix can also be a tuple of strings to try.
"""
start += self.begin
end = self.end if end is None else self.begin + end
return self.text.startswith(prefix, start, end)
def match(self, regex):
"""Executes `regex.match` on the StringView object and returns the
result, which is either a match-object or None.
"""
return regex.match(self.text, pos=self.begin, endpos=self.end)
def index(self, absolute_index: int) -> int:
......@@ -171,9 +220,15 @@ class StringView(collections.abc.Sized):
return tuple(index - self.begin for index in absolute_indices)
def search(self, regex):
"""Executes regex.search on the StringView object and returns the
result, which is either a match-object or None.
"""
return regex.search(self.text, pos=self.begin, endpos=self.end)
def strip(self):
"""Returns a copy of the StringView `self` with leading and trailing
whitespace removed.
"""
if self.fullstring_flag:
return self.text.strip()
else:
......@@ -182,6 +237,11 @@ class StringView(collections.abc.Sized):
return self.text[begin:end]
def split(self, sep=None):
"""Returns a list of the words in `self`, using `sep` as the
delimiter string. If `sep` is not specified or is None, any
whitespace string is a separator and empty strings are
removed from the result.
"""
if self.fullstring_flag:
return self.text.split(sep)
else:
......
......@@ -66,23 +66,37 @@ class ParserBase:
@property
def name(self):
"""Returns the name of the parser or the empty string '' for unnamed
parsers."""
return self._name
@property
def ptype(self) -> str:
"""Returns the type of the parser. By default this is the parser's
class name preceded by a colon, e.g. ':ZeroOrMore'."""
return self._ptype
@property
def repr(self) -> str:
"""Returns the parser's name if it has a name and the parser's
`ptype` otherwise. Note that for named parsers this is not the
same as `repr(parsers)` which always returns the comined name
and ptype, e.g. 'term:OneOrMore'."""
return self.name if self.name else repr(self)
def reset(self):
"""Resets any parser variables. (Should be overridden.)"""
pass
def grammar(self) -> 'Grammar':
def grammar(self):
"""Returns the Grammar object to which the parser belongs. If not
yet connected to any Grammar object, None is returned."""
return None
def apply(self, func: Callable) -> bool:
"""Applies the function `func` to the parser. Returns False, if
- for whatever reason - the functions has not been applied, True
otherwise."""
return False
......@@ -159,7 +173,7 @@ def flatten_sxpr(sxpr: str) -> str:
>>> flatten_sxpr('(a\\n (b\\n c\\n )\\n)\\n')
'(a (b c))'
"""
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', sxpr)).strip()
return re.sub(r'\s(?=\))', '', re.sub(r'\s+', ' ', sxpr)).strip()
class Node(collections.abc.Sized):
......@@ -209,8 +223,9 @@ class Node(collections.abc.Sized):
__slots__ = ['_result', 'children', '_errors', '_len', '_pos', 'parser', 'error_flag']
def __init__(self, parser, result: ResultType, leafhint: bool=False) -> None:
"""Initializes the ``Node``-object with the ``Parser``-Instance
def __init__(self, parser, result: ResultType, leafhint: bool = False) -> None:
"""
Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
"""
self.error_flag = 0 # type: int
......@@ -275,11 +290,22 @@ class Node(collections.abc.Sized):
@property # this needs to be a (dynamic) property, in case sef.parser gets updated
def tag_name(self) -> str:
"""
Returns the tage name of Node, i.e. the name for XML or
S-expression representation. By default the tag name is the
name of the node's parser or, if the node's parser is unnamed, the
node's parser's `ptype`.
"""
return self.parser.name or self.parser.ptype
@property
def result(self) -> StrictResultType:
"""
Returns the result from the parser that created the node.
Error messages are not included in the result. Use `self.content()`
if the result plus any error messages is needed.
"""
return self._result
@result.setter
......
......@@ -33,7 +33,9 @@ already exists.
import codecs
import contextlib
import hashlib
import io
import os
import parser
try:
import regex as re
......@@ -47,7 +49,7 @@ except ImportError:
import DHParser.foreign_typing as typing
sys.modules['typing'] = typing # make it possible to import from typing
from typing import Any, Iterable, Sequence, Set, Union
from typing import Any, Iterable, Sequence, Set, Union, cast
__all__ = ('logging',
'is_logging',
......@@ -114,13 +116,14 @@ def log_dir() -> str:
def logging(dirname="LOGS"):
"""Context manager. Log files within this context will be stored in
directory ``dirname``. Logging is turned off if name is empty.
Args:
dirname: the name for the log directory or the empty string to
turn logging of
"""
global LOGGING
if dirname and not isinstance(dirname, str): dirname = "LOGS" # be fail tolerant here...
if dirname and not isinstance(dirname, str):
dirname = "LOGS" # be fail tolerant here...
try:
save = LOGGING
except NameError:
......@@ -139,7 +142,7 @@ def is_logging() -> bool:
return False
def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
def clear_logs(logfile_types=frozenset(['.cst', '.ast', '.log'])):
"""Removes all logs from the log-directory and removes the
log-directory if it is empty.
"""
......@@ -156,21 +159,21 @@ def clear_logs(logfile_types={'.cst', '.ast', '.log'}):
os.rmdir(log_dirname)
def escape_re(s) -> str:
def escape_re(strg: str) -> str:
"""Returns `s` with all regular expression special characters escaped.
"""
# assert isinstance(s, str)
# assert isinstance(strg, str)
re_chars = r"\.^$*+?{}[]()#<>=|!"
for esc_ch in re_chars:
s = s.replace(esc_ch, '\\' + esc_ch)
return s
strg = strg.replace(esc_ch, '\\' + esc_ch)
return strg
def is_filename(s) -> bool:
def is_filename(strg: str) -> bool:
"""Tries to guess whether string ``s`` is a file name."""
return s.find('\n') < 0 and s[:1] != " " and s[-1:] != " " \
and all(s.find(ch) < 0 for ch in '*?"<>|')
# and s.find('*') < 0 and s.find('?') < 0
return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \
and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and strg.find('*') < 0 and strg.find('?') < 0
def logfile_basename(filename_or_text, function_or_class_or_instance) -> str:
......@@ -181,11 +184,11 @@ def logfile_basename(filename_or_text, function_or_class_or_instance) -> str:
return os.path.basename(os.path.splitext(filename_or_text)[0])
else:
try:
s = function_or_class_or_instance.__qualname.__
name = function_or_class_or_instance.__qualname.__
except AttributeError:
s = function_or_class_or_instance.__class__.__name__
i = s.find('.')
return s[:i] + '_out' if i >= 0 else s
name = function_or_class_or_instance.__class__.__name__
i = name.find('.')
return name[:i] + '_out' if i >= 0 else name
#######################################################################
......@@ -223,14 +226,15 @@ def is_python_code(text_or_file: str) -> bool:
if is_filename(text_or_file):
return text_or_file[-3:].lower() == '.py'
try:
compile(text_or_file, '<string>', 'exec')
parser.suite(text_or_file)
# compile(text_or_file, '<string>', 'exec')
return True
except (SyntaxError, ValueError, OverflowError):
pass
return False
def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool:
def has_fenced_code(text_or_file: str, info_strings=('ebnf', 'test')) -> bool:
"""Checks whether `text_or_file` contains fenced code blocks, which are
marked by one of the given info strings.
See http://spec.commonmark.org/0.28/#fenced-code-blocks for more
......@@ -245,17 +249,20 @@ def has_fenced_code(text_or_file: str, info_strings = ('ebnf', 'test')) -> bool:
if markdown.find('\n~~~') < 0 and markdown.find('\n```') < 0:
return False
if isinstance(info_strings, str): info_strings = (info_strings,)
FENCE_TMPL = '\n(?:(?:``[`]*[ ]*(?:%s)(?=[ .\-:\n])[^`\n]*\n)|(?:~~[~]*[ ]*(?:%s)(?=[ .\-:\n])[\n]*\n))'
LABEL_RE = '|'.join('(?:%s)' % s for s in info_strings)
RX_FENCE = re.compile(FENCE_TMPL % (LABEL_RE, LABEL_RE), flags=re.IGNORECASE)
if isinstance(info_strings, str):
info_strings = (info_strings,)
fence_tmpl = '\n(?:(?:``[`]*[ ]*(?:%s)(?=[ .\-:\n])[^`\n]*\n)' + \
'|(?:~~[~]*[ ]*(?:%s)(?=[ .\-:\n])[\n]*\n))'
label_re = '|'.join('(?:%s)' % matched_string for matched_string in info_strings)
rx_fence = re.compile(fence_tmpl % (label_re, label_re), flags=re.IGNORECASE)
for m in RX_FENCE.finditer(markdown):
s = re.match('(?:\n`+)|(?:\n~+)', m.group(0)).group(0)
if markdown.find(s, m.end()) >= 0:
for match in rx_fence.finditer(markdown):
matched_string = re.match('(?:\n`+)|(?:\n~+)', match.group(0)).group(0)
if markdown.find(matched_string, match.end()) >= 0:
return True
else:
return False
break
return False
def md5(*txt):
......@@ -279,8 +286,8 @@ def compile_python_object(python_src, catch_obj_regex=""):
namespace = {}
exec(code, namespace) # safety risk?
if catch_obj_regex:
matches = [key for key in namespace.keys() if catch_obj_regex.match(key)]
if len(matches) == 0:
matches = [key for key in namespace if catch_obj_regex.match(key)]
if len(matches) < 1:
raise ValueError("No object matching /%s/ defined in source code." %
catch_obj_regex.pattern)
elif len(matches) > 1:
......@@ -301,7 +308,7 @@ def compile_python_object(python_src, catch_obj_regex=""):
# def smart_list(arg: Union[str, Iterable[T]]) -> Union[Sequence[str], Sequence[T]]:
def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
"""Returns the argument as list, depending on its type and content.
If the argument is a string, it will be interpreted as a list of
comma separated values, trying ';', ',', ' ' as possible delimiters
in this order, e.g.
......@@ -324,7 +331,7 @@ def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
>>> smart_list(i for i in {1,2,3})
[1, 2, 3]
Finally, if none of the above is true, the argument will be
Finally, if none of the above is true, the argument will be
wrapped in a list and returned, e.g.
>>> smart_list(125)
[125]
......@@ -377,6 +384,7 @@ def sane_parser_name(name) -> bool:
def identity(anything: Any) -> Any:
"""Identity function for functional programming style."""
return anything
......@@ -389,9 +397,10 @@ def identity(anything: Any) -> Any:
try:
if sys.stdout.encoding.upper() != "UTF-8":
# make sure that `print()` does not raise an error on
# make sure that `print()` does not raise an error on
# non-ASCII characters:
sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
sys.stdout = cast(io.TextIOWrapper, codecs.getwriter("utf-8")(cast(
io.BytesIO, cast(io.TextIOWrapper, sys.stdout).detach())))
except AttributeError:
# somebody has already taken care of this !?
pass
#!/bin/sh
python3 setup.py build_ext --inplace
......@@ -139,11 +139,11 @@ else:
'''
def create_project(path,
ebnf_tmpl=EBNF_TEMPLATE,
readme_tmpl=README_TEMPLATE,
grammar_test_tmpl=GRAMMAR_TEST_TEMPLATE):
def create_project(path: str):
"""Creates the a new DHParser-project in the given `path`.
"""
def create_file(name, content):
"""Create a file with `name` and write `content` to file."""
if not os.path.exists(name):
print('Creating file "%s".' % name)
with open(name, 'w') as f:
......@@ -177,13 +177,16 @@ def create_project(path,
def selftest() -> bool:
"""Run a simple self-text of DHParser.
"""
print("DHParser selftest...")
print("\nSTAGE I: Trying to compile EBNF-Grammar:\n")
builtin_ebnf_parser = get_ebnf_grammar()
ebnf_src = builtin_ebnf_parser.__doc__[builtin_ebnf_parser.__doc__.find('#'):]
ebnf_transformer = get_ebnf_transformer()
ebnf_compiler = get_ebnf_compiler('EBNF')
generated_ebnf_parser, errors, ast = compile_source(ebnf_src, None,
generated_ebnf_parser, errors, _ = compile_source(
ebnf_src, None,
builtin_ebnf_parser, ebnf_transformer, ebnf_compiler)
if errors:
......@@ -191,7 +194,8 @@ def selftest() -> bool:
print("\n\n".join(str(err) for err in errors))
return False
print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
print("\n\nSTAGE 2: Selfhosting-test: "
"Trying to compile EBNF-Grammar with generated parser...\n")
selfhosted_ebnf_parser = compileDSL(ebnf_src, None, generated_ebnf_parser,
ebnf_transformer, ebnf_compiler)
ebnf_compiler.gen_transformer_skeleton()
......@@ -201,22 +205,27 @@ def selftest() -> bool:
def cpu_profile(func, repetitions=1):
import cProfile, pstats
pr = cProfile.Profile()
pr.enable()
for i in range(repetitions):
"""Profile the function `func`.
"""
import cProfile
import pstats
profile = cProfile.Profile()
profile.enable()
for _ in range(repetitions):
success = func()
if not success:
break
pr.disable()
profile.disable()
# after your program ends
st = pstats.Stats(pr)
st.strip_dirs()
st.sort_stats('time').print_stats(40)
stats = pstats.Stats(profile)
stats.strip_dirs()
stats.sort_stats('time').print_stats(40)
return success
def mem_profile(func, dummy=0):
def mem_profile(func):
"""Profile memory usage of `func`.
"""
import tracemalloc
tracemalloc.start()
success = func()
......@@ -228,7 +237,10 @@ def mem_profile(func, dummy=0):
return success
if __name__ == "__main__":
def main():
"""Creates a project (if a project name has been passed as command line
parameter) or runs a quick self-test.
"""
if len(sys.argv) > 1:
if os.path.exists(sys.argv[1]) and os.path.isfile(sys.argv[1]):
_errors = compile_on_disk(sys.argv[1],
......@@ -245,3 +257,5 @@ if __name__ == "__main__":
if not cpu_profile(selftest, 1):
sys.exit(1)
if __name__ == "__main__":
main()
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
from functools import partial
import os
import sys
try:
import regex as re
except ImportError:
import re
sys.path.extend(['../../', '../', './'])
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \