05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 1cf95083 authored by Eckhart Arnold's avatar Eckhart Arnold

slight corrections

parent c11ae730
......@@ -49,6 +49,7 @@ https://bitbucket.org/apalala/grako
"""
import abc
import copy
import os
from functools import partial
......@@ -57,11 +58,11 @@ try:
import regex as re
except ImportError:
import re
from typing import Any, Callable, Dict, Iterator, List, Set, Tuple, Union
from typing import Any, Callable, Collection, Dict, Iterator, List, Set, Tuple, Union
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name
from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Node, \
TransformationFunc
from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, ParserBase, \
Node, TransformationFunc
from DHParser.toolkit import load_if_file, error_messages
__all__ = ['ScannerFunc',
......@@ -169,16 +170,16 @@ def add_parser_guard(parser_func):
return None, text
parser.recursion_counter[location] += 1
grammar = parser.grammar
grammar = parser.grammar # grammar may be 'None' for unconnected parsers!
if grammar.history_tracking:
if grammar and grammar.history_tracking:
grammar.call_stack.append(parser)
grammar.moving_forward = True
# run original __call__ method
node, rest = parser_func(parser, text)
if grammar.history_tracking:
if grammar and grammar.history_tracking:
# don't track returning parsers except in case an error has occurred
if grammar.moving_forward or (node and node._errors):
grammar.moving_forward = False
......@@ -191,7 +192,8 @@ def add_parser_guard(parser_func):
# in case of a recursive call saves the result of the first
# (or left-most) call that matches
parser.visited[location] = (node, rest)
grammar.last_node = node # store last node for Lookbehind operator
if grammar:
grammar.last_node = node # store last node for Lookbehind operator
elif location in parser.visited:
# if parser did non match but a saved result exits, assume
# left recursion and use the saved result
......@@ -211,7 +213,7 @@ def add_parser_guard(parser_func):
return guarded_call
class ParserMetaClass(type):
class ParserMetaClass(abc.ABCMeta):
def __init__(cls, name, bases, attrs):
# The following condition is necessary for classes that don't override
# the __call__() method, because in these cases the non-overridden
......@@ -222,22 +224,18 @@ class ParserMetaClass(type):
super(ParserMetaClass, cls).__init__(name, bases, attrs)
class Parser(metaclass=ParserMetaClass):
class Parser(ParserBase, metaclass=ParserMetaClass):
ApplyFunc = Callable[['Parser'], None]
def __init__(self, name: str = '') -> None:
# assert isinstance(name, str), str(name)
self.name = name # type: str
super(Parser, self).__init__(name)
self._grammar = None # type: 'Grammar'
self.reset()
def __deepcopy__(self, memo):
return self.__class__(self.name)
@property
def ptype(self) -> str:
return ':' + self.__class__.__name__
def reset(self):
self.visited = dict() # type: Dict[int, Tuple[Node, str]]
self.recursion_counter = dict() # type: Dict[int, int]
......@@ -284,6 +282,7 @@ class Parser(metaclass=ParserMetaClass):
class Grammar:
root__ = None # type: Union[Parser, None]
# root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__ = "pending" # type: str
@classmethod
def _assign_parser_names(cls):
......@@ -316,27 +315,27 @@ class Grammar:
parser.parser.name = entry
cls.parser_initialization__ = "done"
def __init__(self, root=None):
if not hasattr(self.__class__, 'parser_initialization__'):
self.__class__.parser_initialization__ = "pending"
def __init__(self, root: Parser=None) -> None:
# if not hasattr(self.__class__, 'parser_initialization__'):
# self.__class__.parser_initialization__ = "pending"
if not hasattr(self.__class__, 'wspL__'):
self.wspL__ = ''
if not hasattr(self.__class__, 'wspR__'):
self.wspR__ = ''
self.all_parsers = set()
self.all_parsers = set() # type: Set[Parser]
self.dirty_flag = False
self.history_tracking = False
self._reset()
self._assign_parser_names()
self.root__ = root if root else copy.deepcopy(self.__class__.root__)
if self.wspL__:
self.wsp_left_parser__ = Whitespace(self.wspL__)
self.wsp_left_parser__ = Whitespace(self.wspL__) # type: ParserBase
self.wsp_left_parser__.grammar = self
self.all_parsers.add(self.wsp_left_parser__) # don't you forget about me...
else:
self.wsp_left_parser__ = ZOMBIE_PARSER
if self.wspR__:
self.wsp_right_parser__ = Whitespace(self.wspR__)
self.wsp_right_parser__ = Whitespace(self.wspR__) # type: ParserBase
self.wsp_right_parser__.grammar = self
self.all_parsers.add(self.wsp_right_parser__) # don't you forget about me...
else:
......@@ -359,7 +358,7 @@ class Grammar:
# also needed for call stack tracing
self.moving_forward = True
def _add_parser(self, parser: Parser):
def _add_parser(self, parser: Parser) -> None:
"""Adds the copy of the classes parser object to this
particular instance of Grammar.
"""
......@@ -368,7 +367,7 @@ class Grammar:
self.all_parsers.add(parser)
parser.grammar = self
def __call__(self, document: str, start_parser="root__"):
def __call__(self, document: str, start_parser="root__") -> Node:
"""Parses a document with with parser-combinators.
Args:
......@@ -390,7 +389,8 @@ class Grammar:
self.dirty_flag = True
self.history_tracking = is_logging()
self.document = document
parser = self[start_parser]
parser = self[start_parser] if isinstance(start_parser, str) else start_parser
assert parser.grammar == self, "Cannot run parsers from a differen grammar object!"
stitches = [] # type: List[Node]
rest = document
if not rest:
......@@ -430,7 +430,7 @@ class Grammar:
result.pos = 0 # calculate all positions
return result
def log_parsing_history(self, log_file_name=''):
def log_parsing_history(self, log_file_name: str='') -> None:
"""Writes a log of the parsing history of the most recently parsed
document.
"""
......@@ -463,14 +463,14 @@ class Grammar:
write_log(errors_only, log_file_name + '_errors')
def dsl_error_msg(parser, error_str) -> str:
"""Returns an error messsage for errors in the parser configuration,
def dsl_error_msg(parser: Parser, error_str: str) -> str:
"""Returns an error message for errors in the parser configuration,
e.g. errors that result in infinite loops.
Args:
parser (Parser: The parser where the error was noticed. Note
parser (Parser): The parser where the error was noticed. Note
that this is not necessarily the parser that caused the
error but only where the error became apparaent.
error but only where the error became aparent.
error_str (str): A short string describing the error.
Returns:
str: An error message including the call stack if history
......@@ -832,10 +832,15 @@ class Alternative(NaryOperator):
that both the symmetry and the ambiguity of the EBNF-or-operator
are broken by selecting the first match.
# the order of the sub-expression matters:
# the order of the sub-expression matters!
>>> number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
>>> str(Grammar(number)('3.1416'))
'3'
# the most selective expression should be put first:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> str(Grammar(number)('3.1416'))
'3.1416'
"""
def __init__(self, *parsers: Parser, name: str = '') -> None:
......@@ -1196,4 +1201,3 @@ def compile_source(source: str,
messages = error_messages(source_text, errors)
return result, messages, syntax_tree
......@@ -17,6 +17,7 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import abc
import copy
import inspect
import itertools
......@@ -35,9 +36,11 @@ from DHParser.toolkit import log_dir, expand_table, line_col, smart_list
__all__ = ['WHITESPACE_PTYPE',
'TOKEN_PTYPE',
'ZOMBIE_PARSER',
'ParserBase',
'Error',
'Node',
'TransformationFunc',
'transformation_factory',
'key_parser_name',
'key_tag_name',
'traverse',
......@@ -60,22 +63,39 @@ __all__ = ['WHITESPACE_PTYPE',
'assert_content']
class MockParser:
class ParserBase:
"""
ParserBase is the base class for all real and mock parser classes.
It is defined here, because Node objects require a parser object
for instantiation.
"""
def __init__(self, name=''): # , pbases=frozenset()):
self.name = name # type: str
self._ptype = ':' + self.__class__.__name__ # type: str
def __str__(self):
return self.name or self.ptype
@property
def ptype(self) -> str:
return self._ptype
class MockParser(ParserBase):
"""
MockParser objects can be used to reconstruct syntax trees from a
serialized form like S-expressions or XML. Mock objects are needed,
because Node objects require a parser object for instantiation.
Mock objects have just enough properties to serve that purpose.
serialized form like S-expressions or XML. Mock objects can mimic
different parser types by assigning them a ptype on initialization.
Mock objects should not be used for anything other than
syntax tree (re-)construction. In all other cases where a parser
object substitute is needed, chose the singleton ZOMBIE_PARSER.
"""
def __init__(self, name='', ptype='', pbases=frozenset()):
def __init__(self, name='', ptype=''): # , pbases=frozenset()):
assert not ptype or ptype[0] == ':'
super(MockParser, self).__init__(name)
self.name = name
self.ptype = ptype or ':' + self.__class__.__name__
# self.pbases = pbases or {cls.__name__ for cls in inspect.getmro(self.__class__)}
self._ptype = ptype or ':' + self.__class__.__name__
def __str__(self):
return self.name or self.ptype
......@@ -119,8 +139,8 @@ ZOMBIE_PARSER = ZombieParser()
# msg: str
Error = NamedTuple('Error', [('pos', int), ('msg', str)])
ResultType = Union[Tuple['Node', ...], str]
SloppyResultType = Union[Tuple['Node', ...], 'Node', str, None]
StrictResultType = Union[Tuple['Node', ...], str]
ResultType = Union[Tuple['Node', ...], 'Node', str, None]
class Node:
......@@ -164,11 +184,11 @@ class Node:
AST-transformation.
"""
def __init__(self, parser, result: SloppyResultType) -> None:
def __init__(self, parser, result: ResultType) -> None:
"""Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
"""
self._result = '' # type: ResultType
self._result = '' # type: StrictResultType
self._errors = [] # type: List[str]
self._children = () # type: Tuple['Node', ...]
self._len = len(self.result) if not self.children else \
......@@ -183,7 +203,7 @@ class Node:
def __str__(self):
if self.children:
return "".join(str(child) for child in self.children)
return str(self.result)
return str(self.result) if self.parser.name != "__ZOMBIE__" else ''
def __eq__(self, other):
# return str(self.parser) == str(other.parser) and self.result == other.result
......@@ -204,11 +224,11 @@ class Node:
# ONLY FOR DEBUGGING: return self.parser.name + ':' + self.parser.ptype
@property
def result(self) -> ResultType:
def result(self) -> StrictResultType:
return self._result
@result.setter
def result(self, result: SloppyResultType):
def result(self, result: ResultType):
# # made obsolete by static type checking with mypy is done
# assert ((isinstance(result, tuple) and all(isinstance(child, Node) for child in result))
# or isinstance(result, Node)
......@@ -451,33 +471,35 @@ TransformationFunc = Union[Callable[[Node], Any], partial]
def transformation_factory(t=None):
"""Creates factory functions transformer-functions with more than
one parameter like ``remove_tokens(node, tokens)``. Decorating this
function with ``transformation_factory`` creates a function factory with
the same name, but without the ``node`` paramter, e.g.
``remove_tokens(tokens)`` which returns a transformerfunction with
only one parameter (i.e. ``node``), which can be used in processing
dictionaries, thus avoiding explicit lamba- or partial-functions
in the table.
Additionally it converts a list of parameters into a
collection, if the decorated function has exaclty two arguments and
the second argument is of type Collection.
Main benefit is reability of processing tables.
Example:
trans_table = { 'expression': remove_tokens('+', '-') }
rather than:
trans_table = { 'expression': partial(remove_tokens, tokens={'+', '-'}) }
"""Creates factory functions from transformation-functions that
dispatch on the first parameter after the node parameter.
Decorating a transformation-function that has more than merely the
``node``-parameter with ``transformation_factory`` creates a
function with the same name, which returns a partial-function that
takes just the node-parameter.
Additionally, there is some some syntactic sugar for
transformation-functions that receive a collection as their second
parameter and do not have any further parameters. In this case a
list of parameters passed to the factory function will be converted
into a collection.
Main benefit is readability of processing tables.
Usage:
@transformation_factory(AbtractSet[str])
def remove_tokens(node, tokens):
...
or, alternatively:
or, alternatively:
@transformation_factory
def remove_tokens(node, tokens: AbstractSet[str]):
...
Example:
trans_table = { 'expression': remove_tokens('+', '-') }
instead of:
trans_table = { 'expression': partial(remove_tokens, tokens={'+', '-'}) }
"""
def decorator(f):
......@@ -509,9 +531,10 @@ def transformation_factory(t=None):
return f
if isinstance(t, type(lambda: 1)):
# assume transformation_factory has been used as decorator w/o parameters
func = t;
t = None
# Provide for the case that transformation_factory has been
# written as plain decorator and not as a function call that
# returns the decorator proper.
func = t; t = None
return decorator(func)
else:
return decorator
......@@ -731,7 +754,7 @@ def remove_enclosing_delimiters(node):
node.result = node.result[1:-1]
def map_content(node, func):
def map_content(node, func: Callable[[Node], ResultType]):
"""Replaces the content of the node. ``func`` takes the node
as an argument an returns the mapped result.
"""
......
......@@ -32,7 +32,6 @@ already exists.
import collections
import contextlib
import functools
import hashlib
import os
try:
......@@ -48,6 +47,8 @@ __all__ = ['logging',
'logfile_basename',
'line_col',
'error_messages',
'compact_sexpr',
'quick_report',
'escape_re',
'is_filename',
'load_if_file',
......@@ -162,6 +163,18 @@ def compact_sexpr(s) -> str:
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip()
# def quick_report(parsing_result) -> str:
# """Returns short report (compact s-expression + errors messages)
# of the parsing results by either a call to a grammar or to a parser
# directly."""
# err = ''
# if isinstance(parsing_result, collections.Collection):
# result = parsing_result[0]
# err = ('\nUnmatched sequence: ' + parsing_result[1]) if parsing_result[1] else ''
# sexpr = compact_sexpr(result.as_sexpr())
# return sexpr + err
def escape_re(s) -> str:
"""Returns `s` with all regular expression special characters escaped.
"""
......
......@@ -18,4 +18,4 @@ permissions and limitations under the License.
import os
__version__ = '0.6.0' + '_dev' + str(os.stat(__file__).st_mtime)
__version__ = '0.7.0' + '_dev' + str(os.stat(__file__).st_mtime)
File mode changed from 100644 to 100755
......@@ -283,13 +283,21 @@
</genus>
</:Optional>
<ABS>
<:RE>
<:RegExp>
;
<:OneOrMore>
<ZW>
<:RegExp>
</:RegExp>
</:RE>
</:RegExp>
</ZW>
<ZW>
<:RegExp>
</:RegExp>
<:Whitespace> </:Whitespace>
</ZW>
</:OneOrMore>
</ABS>
<:Optional>
<GrammatikVarianten>
......@@ -584,7 +592,7 @@
<:Sequence>
<:RE>
<:RegExp>
Catal. thes. Germ. 28,11 (post 851) -um III.
Catal.: thes. Germ.; 28,11 (post 851) "-um III."
</:RegExp>
</:RE>
......@@ -602,7 +610,7 @@
<:Sequence>
<:RE>
<:RegExp>
Form. Sangall. 39 p. 421,16 "munuscula ... direximus, hoc est palliolum ... ,
Form.: Sangall.; 39 p. 421,16 "munuscula ... direximus, hoc est palliolum ... ,
</:RegExp>
</:RE>
......@@ -628,7 +636,7 @@
<:Sequence>
<:RE>
<:RegExp>
Catal. thes. Germ. 18,7 "-eterculi viginti quatuor".
Catal.: thes. Germ.; 18,7 "-eterculi viginti quatuor".
</:RegExp>
</:RE>
......@@ -646,7 +654,7 @@
<:Sequence>
<:RE>
<:RegExp>
Libri confrat. I app. A 6 p. 137,30 "pulpitum ... -a cocco imaginata
Libri: confrat. I; app. A 6 p. 137,30 "pulpitum ... -a cocco imaginata
</:RegExp>
</:RE>
......@@ -672,7 +680,7 @@
<:Sequence>
<:RE>
<:RegExp>
Catal. thes. Germ. 76,15 -rulae II. 40,5 VI vizregule. 129a,5 -sterculas
Catal.: thes. Germ.; 76,15 "-rulae II."; 40,5 VI "vizregule."; 129a,5 "-sterculas
</:RegExp>
</:RE>
......@@ -680,7 +688,7 @@
<:Sequence>
<:RE>
<:RegExp>
II. 24,8 -itella X. 114,8 VIII fezdregle. 6,24 fasciutercule
II."; 24,8 "-itella X."; 114,8 VIII "fezdregle."; 6,24 "fasciutercule
</:RegExp>
</:RE>
......@@ -688,7 +696,7 @@
<:Sequence>
<:RE>
<:RegExp>
VII. 92,6 fascercule tres. 21,20 IIII festregele.
VII."; 92,6 "fascercule tres." 21,20 IIII "festregele."
</:RegExp>
</:RE>
......@@ -805,7 +813,7 @@
<:Sequence>
<:RE>
<:RegExp>
Transl. Libor. I 32 raptis feminarum -is (fa[s]citergiis var. l.).
Transl.: Libor. I; 32 "raptis feminarum -is (fa[s]citergiis var. l.)."
</:RegExp>
</:RE>
......@@ -823,7 +831,7 @@
<:Sequence>
<:RE>
<:RegExp>
II 20 nuditatem membrorum illius (puellae) tegere festinarunt fideles
Transl.: Libor. II; 20 "nuditatem membrorum illius (puellae) tegere festinarunt fideles
</:RegExp>
</:RE>
......@@ -831,7 +839,7 @@
<:Sequence>
<:RE>
<:RegExp>
clerici et laici inprimis cum eorum -cula, dein vestibus solitis.
clerici et laici inprimis cum eorum -cula, dein vestibus solitis."
</:RegExp>
......
......@@ -19,6 +19,9 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../','../../'])
from DHParser import testing
from MLWCompiler import get_grammar, get_transformer
......
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
import os
import sys
from functools import partial
try:
import regex as re
except ImportError:
import re
from DHParser.toolkit import logging, is_filename
from DHParser.parsers import Grammar, Compiler, Required, Token, \
Optional, OneOrMore, Sequence, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source, \
ScannerFunc
from DHParser.syntaxtree import traverse, no_transformation, TransformationFunc
#######################################################################
#
# SCANNER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def LyrikScanner(text):
return text
def get_scanner() -> ScannerFunc:
return LyrikScanner
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class LyrikGrammar(Grammar):
r"""Parser for a Lyrik source file, with this grammar:
gedicht = bibliographisches { LEERZEILE }+ [serie] §titel §text /\s*/ §ENDE
bibliographisches = autor §"," [NZ] werk §"," [NZ] ort §"," [NZ] jahr §"."
autor = namenfolge [verknüpfung]
werk = wortfolge ["." §untertitel] [verknüpfung]
untertitel = wortfolge [verknüpfung]
ort = wortfolge [verknüpfung]
jahr = JAHRESZAHL
wortfolge = { WORT }+
namenfolge = { NAME }+
verknüpfung = "<" ziel ">"
ziel = ZEICHENFOLGE
serie = !(titel vers NZ vers) { NZ zeile }+ { LEERZEILE }+
titel = { NZ zeile}+ { LEERZEILE }+
zeile = { ZEICHENFOLGE }+
text = { strophe {LEERZEILE} }+
strophe = { NZ vers }+
vers = { ZEICHENFOLGE }+
WORT = /\w+/~
NAME = /\w+\.?/~
ZEICHENFOLGE = /[^ \n<>]+/~
NZ = /\n/~
LEERZEILE = /\n[ \t]*(?=\n)/~
JAHRESZAHL = /\d\d\d\d/~
ENDE = !/./
"""
source_hash__ = "7a99fa77a7d2b81976293d54696eb4f3"
parser_initialization__ = "upon instatiation"
COMMENT__ = r''
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'')
wspL__ = ''
wspR__ = WSP__
ENDE = NegativeLookahead(RE('.', wR=''))
JAHRESZAHL = RE('\\d\\d\\d\\d')
LEERZEILE = RE('\\n[ \\t]*(?=\\n)')
NZ = RE('\\n')
ZEICHENFOLGE = RE('[^ \\n<>]+')
NAME = RE('\\w+\\.?')
WORT = RE('\\w+')
vers = OneOrMore(ZEICHENFOLGE)
strophe = OneOrMore(Sequence(NZ, vers))
text = OneOrMore(Sequence(strophe, ZeroOrMore(LEERZEILE)))
zeile = OneOrMore(ZEICHENFOLGE)
titel = Sequence(OneOrMore(Sequence(NZ, zeile)), OneOrMore(LEERZEILE))
serie = Sequence(NegativeLookahead(Sequence(titel, vers, NZ, vers)), OneOrMore(Sequence(NZ, zeile)), OneOrMore(LEERZEILE))
ziel = ZEICHENFOLGE
verknüpfung = Sequence(Token("<"), ziel, Token(">"))
namenfolge = OneOrMore(NAME)
wor