05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 36592546 authored by di68kap's avatar di68kap

- Fehlermeldungen und Warnungen voneinander besser getrennt; kleinere...

- Fehlermeldungen und Warnungen voneinander besser getrennt; kleinere Verbesserungen der MLW-Grammatik
parent 1a7a00cb
......@@ -51,10 +51,12 @@ __all__ = ('CompilerError', 'Compiler', 'compile_source')
class CompilerError(Exception):
"""Exception raised when an error of the compiler itself is detected.
"""
Exception raised when an error of the compiler itself is detected.
Compiler errors are not to be confused with errors in the source
code to be compiled, which do not raise Exceptions but are merely
reported as an error."""
reported as an error.
"""
pass
......
......@@ -61,6 +61,7 @@ class ParserBase:
It is defined here, because Node objects require a parser object
for instantiation.
"""
__slots__ = 'name', 'ptype'
def __init__(self,): # , pbases=frozenset()):
......@@ -76,18 +77,6 @@ class ParserBase:
def __call__(self, text: StringView) -> Tuple[Optional['Node'], StringView]:
return None, text
# @property
# def name(self):
# """Returns the name of the parser or the empty string '' for unnamed
# parsers."""
# return self._name
#
# @property
# def ptype(self) -> str:
# """Returns the type of the parser. By default this is the parser's
# class name preceded by a colon, e.g. ':ZeroOrMore'."""
# return self._ptype
@property
def repr(self) -> str:
"""Returns the parser's name if it has a name and repr()"""
......@@ -123,6 +112,7 @@ class MockParser(ParserBase):
syntax tree (re-)construction. In all other cases where a parser
object substitute is needed, chose the singleton ZOMBIE_PARSER.
"""
__slots__ = ()
def __init__(self, name='', ptype=''): # , pbases=frozenset()):
......@@ -143,6 +133,7 @@ class ZombieParser(MockParser):
these (or one of these properties) is needed, but no real Parser-
object is instantiated.
"""
alive = False
__slots__ = ()
......@@ -181,22 +172,26 @@ ResultType = Union[ChildrenType, 'Node', StringView, str, None]
def flatten_sxpr(sxpr: str) -> str:
"""Returns S-expression ``sxpr`` as a one-liner without unnecessary
"""
Returns S-expression ``sxpr`` as a one-liner without unnecessary
whitespace.
Example:
>>> flatten_sxpr('(a\\n (b\\n c\\n )\\n)\\n')
'(a (b c))'
"""
return re.sub(r'\s(?=\))', '', re.sub(r'\s+', ' ', sxpr)).strip()
def flatten_xml(xml: str) -> str:
"""Returns an XML-tree as a one liner without unnecessary whitespace,
"""
Returns an XML-tree as a one liner without unnecessary whitespace,
i.e. only whitespace within leaf-nodes is preserved.
A more precise alternative to `flatten_xml` is to use Node.as_xml()
ans passing a set containing the top level tag to parameter `inline_tags`.
"""
# works only with regex
# return re.sub(r'\s+(?=<\w)', '', re.sub(r'(?<=</\w+>)\s+', '', xml))
def tag_only(m):
......@@ -366,12 +361,6 @@ class Node(collections.abc.Sized):
return True
return False
raise ValueError('Leave node cannot contain other nodes')
# generator = self.select_by_tag(tag_name, False)
# try:
# generator.__next__()
# return True
# except StopIteration:
# return False
def get(self, index_or_tagname: Union[int, str],
......@@ -767,6 +756,7 @@ class RootNode(Node):
error_flag (int): the highest warning or error level of all errors
that occurred.
"""
def __init__(self, node: Optional[Node] = None) -> 'RootNode':
super().__init__(ZOMBIE_PARSER, '')
self.all_errors = []
......@@ -779,7 +769,8 @@ class RootNode(Node):
self.empty_tags = set()
def swallow(self, node: Node) -> 'RootNode':
"""Put `self` in the place of `node` by copying all its data.
"""
Put `self` in the place of `node` by copying all its data.
Returns self.
This is done by the parse.Grammar object after
......@@ -800,7 +791,9 @@ class RootNode(Node):
return self
def add_error(self, node: Node, error: Error) -> 'RootNode':
"""Adds an Error object to the tree, locating it at a specific node."""
"""
Adds an Error object to the tree, locating it at a specific node.
"""
self.all_errors.append(error)
self.error_flag = max(self.error_flag, error.code)
node.errors.append(error)
......@@ -822,15 +815,18 @@ class RootNode(Node):
return self
def collect_errors(self) -> List[Error]:
"""Returns the list of errors, ordered bv their position.
"""
Returns the list of errors, ordered bv their position.
"""
self.all_errors.sort(key=lambda e: e.pos)
return self.all_errors
def customized_XML(self):
"""Returns a customized XML representation of the tree.
"""
Returns a customized XML representation of the tree.
See the docstring of `Node.as_xml()` for an explanation of the
customizations."""
customizations.
"""
return self.as_xml(inline_tags = self.inline_tags,
omit_tags=self.omit_tags,
empty_tags=self.empty_tags)
......@@ -851,13 +847,15 @@ def parse_sxpr(sxpr: str) -> Node:
>>> parse_sxpr("(a (b c))").as_sxpr()
'(a\\n (b\\n "c"\\n )\\n)'
"""
sxpr = StringView(sxpr).strip()
mock_parsers = dict()
def next_block(s: StringView):
"""Generator that yields all characters until the next closing bracket
that does not match an opening bracket matched earlier within the same
package."""
package.
"""
s = s.strip()
try:
while s[0] != ')':
......@@ -947,13 +945,15 @@ def parse_xml(xml: str) -> Node:
"""
Generates a tree of nodes from a (Pseudo-)XML-source.
"""
xml = StringView(xml)
PlainText = MockParser('', TOKEN_PTYPE)
mock_parsers = {TOKEN_PTYPE: PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
beginning after the end of the attr."""
beginning after the end of the attr.
"""
attributes = OrderedDict()
restart = 0
for match in s.finditer(re.compile(r'\s*(?P<attr>\w+)\s*=\s*"(?P<value>.*)"\s*')):
......@@ -966,7 +966,8 @@ def parse_xml(xml: str) -> Node:
"""Parses an opening tag. Returns the string segment following the
the opening tag, the tag name, a dictionary of attr and
a flag indicating whether the tag is actually a solitary tag as
indicated by a slash at the end, i.e. <br/>."""
indicated by a slash at the end, i.e. <br/>.
"""
match = s.match(re.compile(r'<\s*(?P<tagname>[\w:]+)\s*'))
assert match
tagname = match.groupdict()['tagname']
......@@ -978,7 +979,8 @@ def parse_xml(xml: str) -> Node:
def parse_closing_tag(s: StringView) -> Tuple[StringView, str]:
"""Parses a closing tag and returns the string segment, just after
the closing tag."""
the closing tag.
"""
match = s.match(re.compile(r'</\s*(?P<tagname>[\w:]+)>'))
assert match
tagname = match.groupdict()['tagname']
......@@ -986,7 +988,8 @@ def parse_xml(xml: str) -> Node:
def parse_leaf_content(s: StringView) -> Tuple[StringView, str]:
"""Parses a piece of the content of a tag, just until the next opening,
closing or solitary tag is reached."""
closing or solitary tag is reached.
"""
i = 0
while s[i] != "<" or s[max(0, i-1)] == "\\":
i = s.find("<", i)
......
......@@ -42,6 +42,7 @@ except ImportError:
from typing import Any, Iterable, Sequence, Set, Union, Dict, cast
__all__ = ('escape_re',
'escape_control_characters',
'is_filename',
......@@ -68,6 +69,7 @@ def escape_re(strg: str) -> str:
"""
Returns the string with all regular expression special characters escaped.
"""
# assert isinstance(strg, str)
re_chars = r"\.^$*+?{}[]()#<>=|!"
for esc_ch in re_chars:
......@@ -79,6 +81,7 @@ def escape_control_characters(strg: str) -> str:
"""
Replace all control characters (e.g. \n \t) in a string by their backslashed representation.
"""
return repr(strg).replace('\\\\', '\\')[1:-1]
......@@ -86,6 +89,7 @@ def lstrip_docstring(docstring: str) -> str:
"""
Strips leading whitespace from a docstring.
"""
lines = docstring.replace('\t', ' ').split('\n')
indent = 255 # highest integer value
for line in lines[1:]:
......@@ -98,7 +102,10 @@ def lstrip_docstring(docstring: str) -> str:
def is_filename(strg: str) -> bool:
"""Tries to guess whether string ``s`` is a file name."""
"""
Tries to guess whether string ``strg`` is a file name.
"""
return strg.find('\n') < 0 and strg[:1] != " " and strg[-1:] != " " \
and all(strg.find(ch) < 0 for ch in '*?"<>|')
# and strg.select('*') < 0 and strg.select('?') < 0
......@@ -112,16 +119,6 @@ def is_filename(strg: str) -> bool:
def issubtype(sub_type, base_type):
# if sys.version_info.major <= 3 and sys.version_info.minor <= 6:
# return issubclass(sub_type, base_type)
# try:
# base_type = base_type.__origin__
# except AttributeError:
# pass
# try:
# sub_type = sub_type.__origin__
# except AttributeError:
# pass
def origin(t):
try:
ot = t.__origin__
......@@ -143,11 +140,13 @@ def isgenerictype(t):
def load_if_file(text_or_file) -> str:
"""Reads and returns content of a text-file if parameter
"""
Reads and returns content of a text-file if parameter
`text_or_file` is a file name (i.e. a single line string),
otherwise (i.e. if `text_or_file` is a multi-line string)
`text_or_file` is returned.
"""
if is_filename(text_or_file):
try:
with open(text_or_file, encoding="utf-8") as f:
......@@ -164,9 +163,11 @@ def load_if_file(text_or_file) -> str:
def is_python_code(text_or_file: str) -> bool:
"""Checks whether 'text_or_file' is python code or the name of a file that
"""
Checks whether 'text_or_file' is python code or the name of a file that
contains python code.
"""
if is_filename(text_or_file):
return text_or_file[-3:].lower() == '.py'
try:
......@@ -179,11 +180,13 @@ def is_python_code(text_or_file: str) -> bool:
def has_fenced_code(text_or_file: str, info_strings=('ebnf', 'test')) -> bool:
"""Checks whether `text_or_file` contains fenced code blocks, which are
"""
Checks whether `text_or_file` contains fenced code blocks, which are
marked by one of the given info strings.
See http://spec.commonmark.org/0.28/#fenced-code-blocks for more
information on fenced code blocks in common mark documents.
"""
if is_filename(text_or_file):
with open(text_or_file, 'r', encoding='utf-8') as f:
markdown = f.read()
......@@ -210,9 +213,11 @@ def has_fenced_code(text_or_file: str, info_strings=('ebnf', 'test')) -> bool:
def md5(*txt):
"""Returns the md5-checksum for `txt`. This can be used to test if
"""
Returns the md5-checksum for `txt`. This can be used to test if
some piece of text, for example a grammar source file, has changed.
"""
md5_hash = hashlib.md5()
for t in txt:
md5_hash.update(t.encode('utf8'))
......@@ -220,10 +225,12 @@ def md5(*txt):
def compile_python_object(python_src, catch_obj_regex=""):
"""Compiles the python source code and returns the (first) object
"""
Compiles the python source code and returns the (first) object
the name of which is matched by ``catch_obj_regex``. If catch_obj
is the empty string, the namespace dictionary will be returned.
"""
if isinstance(catch_obj_regex, str):
catch_obj_regex = re.compile(catch_obj_regex)
code = compile(python_src, '<string>', 'exec')
......@@ -251,7 +258,8 @@ def compile_python_object(python_src, catch_obj_regex=""):
# def smart_list(arg: Union[str, Iterable[T]]) -> Union[Sequence[str], Sequence[T]]:
def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
"""Returns the argument as list, depending on its type and content.
"""
Returns the argument as list, depending on its type and content.
If the argument is a string, it will be interpreted as a list of
comma separated values, trying ';', ',', ' ' as possible delimiters
......@@ -280,6 +288,7 @@ def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
>>> smart_list(125)
[125]
"""
if isinstance(arg, str):
for delimiter in (';', ','):
lst = arg.split(delimiter)
......@@ -295,13 +304,15 @@ def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
def expand_table(compact_table: Dict) -> Dict:
"""Expands a table by separating keywords that are tuples or strings
"""
Expands a table by separating keywords that are tuples or strings
containing comma separated words into single keyword entries with
the same values. Returns the expanded table.
Example:
>>> expand_table({"a, b": 1, ('d','e','f'):5, "c":3})
{'a': 1, 'b': 1, 'd': 5, 'e': 5, 'f': 5, 'c': 3}
"""
expanded_table = {} # type: Dict
keys = list(compact_table.keys())
for key in keys:
......@@ -322,9 +333,11 @@ def expand_table(compact_table: Dict) -> Dict:
def sane_parser_name(name) -> bool:
"""Checks whether given name is an acceptable parser name. Parser names
"""
Checks whether given name is an acceptable parser name. Parser names
must not be preceded or succeeded by a double underscore '__'!
"""
return name and name[:2] != '__' and name[-2:] != '__'
......
......@@ -116,7 +116,8 @@ CriteriaType = Union[int, str, Callable]
def transformation_factory(t1=None, t2=None, t3=None, t4=None, t5=None):
"""Creates factory functions from transformation-functions that
"""
Creates factory functions from transformation-functions that
dispatch on the first parameter after the context parameter.
Decorating a transformation-function that has more than merely the
......@@ -276,6 +277,7 @@ def traverse(root_node: Node,
traverse(node, table)
"""
# Is this optimazation really needed?
if '__cache__' in processing_table:
# assume that processing table has already been expanded
......@@ -293,12 +295,6 @@ def traverse(root_node: Node,
processing_table.clear()
processing_table.update(table)
# assert '__cache__' in processing_table
# # Code without optimization
# table = {name: smart_list(call) for name, call in list(processing_table.items())}
# table = expand_table(table)
# cache = {} # type: Dict[str, List[Callable]]
def traverse_recursive(context):
nonlocal cache
node = context[-1]
......@@ -341,7 +337,8 @@ def traverse(root_node: Node,
def traverse_locally(context: List[Node],
processing_table: Dict, # actually: ProcessingTableType
key_func: Callable=key_tag_name): # actually: KeyFunc
"""Transforms the syntax tree starting from the last node in the context
"""
Transforms the syntax tree starting from the last node in the context
according to the given processing table. The purpose of this function is
to apply certain transformations locally, i.e. only for those nodes that
have the last node in the context as their parent node.
......@@ -351,14 +348,18 @@ def traverse_locally(context: List[Node],
@transformation_factory(collections.abc.Callable)
def apply_if(context: List[Node], transformation: Callable, condition: Callable):
"""Applies a transformation only if a certain condition is met."""
"""
Applies a transformation only if a certain condition is met.
"""
if condition(context):
transformation(context)
@transformation_factory(collections.abc.Callable)
def apply_unless(context: List[Node], transformation: Callable, condition: Callable):
"""Applies a transformation if a certain condition is *not* met."""
"""
Applies a transformation if a certain condition is *not* met.
"""
if not condition(context):
transformation(context)
......@@ -410,24 +411,12 @@ def is_expendable(context: List[Node]) -> bool:
@transformation_factory(collections.abc.Set)
def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> bool:
"""Checks whether the last node in the context has `ptype == TOKEN_PTYPE`
"""
Checks whether the last node in the context has `ptype == TOKEN_PTYPE`
and it's content matches one of the given tokens. Leading and trailing
whitespace-tokens will be ignored. In case an empty set of tokens is passed,
any token is a match.
"""
# def stripped(nd: Node) -> str:
# """Removes leading and trailing whitespace-nodes from content."""
# # assert node.parser.ptype == TOKEN_PTYPE
# if nd.children:
# i, k = 0, len(nd.children)
# while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
# i += 1
# while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
# k -= 1
# return "".join(child.content for child in node.children[i:k])
# return nd.content
# node = context[-1]
# return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens)
node = context[-1]
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.content in tokens)
......@@ -444,20 +433,10 @@ def not_one_of(context: List[Node], tag_name_set: AbstractSet[str]) -> bool:
return context[-1].tag_name not in tag_name_set
# @transformation_factory(collections.abc.Set)
# def matches_wildcard(context: List[Node], wildcards: AbstractSet[str]) -> bool:
# """Retruns true, if the node's tag_name matches one of the glob patterns
# in `wildcards`. For example, ':*' matches all anonymous nodes. """
# tn = context[-1].tag_name
# for pattern in wildcards:
# if fnmatch.fnmatch(tn, pattern):
# return True
# return False
@transformation_factory(collections.abc.Set)
def matches_re(context: List[Node], patterns: AbstractSet[str]) -> bool:
"""Retruns true, if the node's tag_name matches one of the regular
"""
Returns true, if the node's tag_name matches one of the regular
expressions in `patterns`. For example, ':.*' matches all anonymous nodes.
"""
tn = context[-1].tag_name
......@@ -482,8 +461,10 @@ def has_content(context: List[Node], regexp: str) -> bool:
@transformation_factory(collections.abc.Set)
def has_parent(context: List[Node], tag_name_set: AbstractSet[str]) -> bool:
"""Checks whether a node with one of the given tag names appears somewhere
in the context before the last node in the context."""
"""
Checks whether a node with one of the given tag names appears somewhere
in the context before the last node in the context.
"""
for i in range(2, len(context)):
if context[-i].tag_name in tag_name_set:
return True
......@@ -502,14 +483,12 @@ def _replace_by(node: Node, child: Node):
child.parser = MockParser(node.parser.name, child.parser.ptype)
# parser names must not be overwritten, else: child.parser.name = node.parser.name
node.parser = child.parser
# node.errors.extend(child.errors)
node.result = child.result
if hasattr(child, '_xml_attr'):
node.attr.update(child.attr)
def _reduce_child(node: Node, child: Node):
# node.errors.extend(child.errors)
node.result = child.result
if hasattr(child, '_xml_attr'):
node.attr.update(child.attr)
......@@ -635,6 +614,7 @@ def flatten(context: List[Node], condition: Callable=is_anonymous, recursive: bo
(1 (+ 2) (+ 3)) -> (1 + 2 + 3)
(1 (+ (2 + (3)))) -> (1 + 2 + 3)
"""
node = context[-1]
if node.children:
new_result = [] # type: List[Node]
......@@ -652,15 +632,18 @@ def flatten(context: List[Node], condition: Callable=is_anonymous, recursive: bo
def collapse(context: List[Node]):
"""Collapses all sub-nodes of a node by replacing them with the
string representation of the node. USE WITH CARE!"""
"""
Collapses all sub-nodes of a node by replacing them with the
string representation of the node. USE WITH CARE!
"""
node = context[-1]
node.result = node.content
@transformation_factory(collections.abc.Callable)
def collapse_if(context: List[Node], condition: Callable, target_tag: ParserBase):
"""(Recursively) merges the content of all adjacent child nodes that
"""
(Recursively) merges the content of all adjacent child nodes that
fulfil the given `condition` into a single leaf node with parser
`target_tag`. Nodes that do not fulfil the condition will be preserved.
......@@ -673,6 +656,7 @@ def collapse_if(context: List[Node], condition: Callable, target_tag: ParserBase
See `test_transform.TestComplexTransformations` for examples.
"""
node = context[-1]
package = []
result = []
......@@ -706,7 +690,8 @@ def collapse_if(context: List[Node], condition: Callable, target_tag: ParserBase
@transformation_factory(collections.abc.Callable)
def replace_content(context: List[Node], func: Callable): # Callable[[Node], ResultType]
"""Replaces the content of the node. ``func`` takes the node's result
"""
Replaces the content of the node. ``func`` takes the node's result
as an argument an returns the mapped result.
"""
node = context[-1]
......@@ -715,7 +700,8 @@ def replace_content(context: List[Node], func: Callable): # Callable[[Node], Re
@transformation_factory # (str)
def replace_content_by(context: List[Node], content: str): # Callable[[Node], ResultType]
"""Replaces the content of the node with the given text content.
"""
Replaces the content of the node with the given text content.
"""
node = context[-1]
node.result = content
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment