11.3.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit cc02687e authored by Eckhart Arnold's avatar Eckhart Arnold

- bug fixes

parent a21adc6a
......@@ -225,8 +225,9 @@ def compile_source(source: str,
log_ST(syntax_tree, log_file_name + '.cst')
log_parsing_history(parser, log_file_name)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
str(syntax_tree) # TODO: Ony valid if neither tokens or whitespace are dropped early!s
# assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
# str(syntax_tree) # Ony valid if neither tokens or whitespace are dropped early
# only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages
result = None
......
......@@ -46,6 +46,7 @@ from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, O
__all__ = ('Parser',
'UnknownParserError',
'Grammar',
'EMPTY_NODE',
'PreprocessorToken',
'Token',
'DropToken',
......@@ -866,7 +867,7 @@ class Grammar:
if not rest:
result, _ = parser(rest)
if result is None:
result = Node(ZOMBIE_TAG, '').init_pos(0)
result = Node(ZOMBIE_TAG, '').with_pos(0)
self.tree__.new_error(result,
'Parser "%s" did not match empty document.' % str(parser),
Error.PARSER_DID_NOT_MATCH)
......@@ -905,7 +906,7 @@ class Grammar:
if len(stitches) < MAX_DROPOUTS
else " too often! Terminating parser.")
error_code = Error.PARSER_STOPPED_BEFORE_END
stitches.append(Node(ZOMBIE_TAG, skip).init_pos(tail_pos(stitches)))
stitches.append(Node(ZOMBIE_TAG, skip).with_pos(tail_pos(stitches)))
self.tree__.new_error(stitches[-1], error_msg, error_code)
if self.history_tracking__:
# # some parsers may have matched and left history records with nodes != None.
......@@ -914,7 +915,7 @@ class Grammar:
# # to zero. Therefore, their pos properties need to be initialized here
# for record in self.history__:
# if record.node and record.node._pos < 0:
# record.node.init_pos(0)
# record.node.with_pos(0)
record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest,
self.line_col__(rest))
self.history__.append(record)
......@@ -924,7 +925,7 @@ class Grammar:
if rest:
stitches.append(Node(ZOMBIE_TAG, rest))
#try:
result = Node(ZOMBIE_TAG, tuple(stitches)).init_pos(0)
result = Node(ZOMBIE_TAG, tuple(stitches)).with_pos(0)
# except AssertionError as error:
# # some debugging output
# print(Node(ZOMBIE_TAG, tuple(stitches)).as_sxpr())
......@@ -938,7 +939,7 @@ class Grammar:
# add another child node at the end to ensure that the position
# of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical.
error_node = Node(ZOMBIE_TAG, '').init_pos(tail_pos(result.children))
error_node = Node(ZOMBIE_TAG, '').with_pos(tail_pos(result.children))
self.tree__.new_error(error_node, error_msg, error_code)
result.result = result.children + (error_node,)
else:
......@@ -1251,17 +1252,12 @@ class MetaParser(Parser):
# Node(self.tag_name, node) # unoptimized code
assert node is None or isinstance(node, Node)
if node:
if node._result:
return Node(self.tag_name, node) if self.pname else node
elif self.pname:
nd1 = Node(self.tag_name, ()) # type: Node
# nd1.errors = node.errors
return nd1
# elif node.errors:
# nd2 = Node(self.tag_name, ()) # type: Node
# nd2.errors = node.errors
# return nd2
elif self.pname:
if self.pname:
if node.tag_name[0] == ':': # faster than node.is_anonymous()
return Node(self.tag_name, node._result)
return Node(self.tag_name, node)
return node
if self.pname:
return Node(self.tag_name, ()) # type: Node
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
......@@ -1486,7 +1482,7 @@ def mandatory_violation(grammar: Grammar,
reloc: int) -> Tuple[Error, Node, StringView]:
i = reloc if reloc >= 0 else 0
location = grammar.document_length__ - len(text_)
err_node = Node(ZOMBIE_TAG, text_[:i]).init_pos(location)
err_node = Node(ZOMBIE_TAG, text_[:i]).with_pos(location)
found = text_[:10].replace('\n', '\\n ')
for search, message in err_msgs:
rxs = not isinstance(search, str)
......
......@@ -15,7 +15,7 @@ cdef class Node:
cpdef get(self, index_or_tagname, surrogate)
cpdef is_anonymous(self)
cpdef init_pos(self, pos)
cpdef with_pos(self, pos)
cpdef attr_active(self)
# cpdef compare_attr(self, other)
# cpdef _tree_repr(self, tab, open_fn, close_fn, data_fn, density, inline, inline_fn)
......@@ -28,7 +28,7 @@ cdef class Node:
cdef class FrozenNode(Node):
cpdef init_pos(self, pos)
cpdef with_pos(self, pos)
cdef class RootNode(Node):
......
......@@ -175,7 +175,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self._len = -1 # type: int # lazy evaluation
else:
self.result = result
# assert tag_name is not None
self.tag_name = tag_name # type: str
def __deepcopy__(self, memo):
......@@ -298,7 +297,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def is_anonymous(self):
return self.tag_name[0] == ':'
return not self.tag_name or self.tag_name[0] == ':'
@property
......@@ -368,30 +367,34 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def pos(self) -> int:
"""Returns the position of the Node's content in the source text."""
if self._pos < 0:
raise AssertionError("Position value not initialized!")
raise AssertionError("Position value not initialized! Use Node.with_pos()")
return self._pos
def init_pos(self, pos: int) -> 'Node':
def with_pos(self, pos: int) -> 'Node':
"""
(Re-)initialize position value. Usually, the parser guard
Initialize position value. Usually, the parser guard
(`parsers.add_parser_guard()`) takes care of assigning the
position in the document to newly created nodes. However,
where Nodes are created outside the reach of the parser
when Nodes are created outside the reach of the parser
guard, their document-position must be assigned manually.
This function recursively reassigns the position values
of the child nodes, too.
Position values of the child nodes are assigned recursively, too.
Returns the node itself for convenience.
"""
# condition self.pos == pos cannot be assumed when tokens or whitespace
# are dropped early!
# assert self._pos < 0 or self.pos == pos, ("pos mismatch %i != %i at Node: %s"
# % (self._pos, pos, repr(self)))
self._pos = pos
# recursively adjust pos-values of all children
offset = self.pos
for child in self.children:
child.init_pos(offset)
offset = child.pos + len(child)
if pos != self._pos >= 0:
raise AssertionError("Position value cannot be reassigned to a different value!")
if self._pos < 0:
self._pos = pos
# recursively adjust pos-values of all children
offset = self.pos
for child in self.children:
if child._pos < 0:
child.with_pos(offset)
offset = child.pos + len(child)
return self
......@@ -736,7 +739,7 @@ class FrozenNode(Node):
# if errors:
# raise AssertionError('Cannot assign error list to frozen node')
def init_pos(self, pos: int) -> 'Node':
def with_pos(self, pos: int) -> 'Node':
pass
......
......@@ -401,7 +401,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
except UnknownParserError as upe:
cst = RootNode()
cst = cst.new_error(Node(ZOMBIE_TAG, "").init_pos(0), str(upe))
cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
clean_test_name = str(test_name).replace('*', '')
# log_ST(cst, "match_%s_%s.cst" % (parser_name, clean_test_name))
tests.setdefault('__cst__', {})[test_name] = cst
......@@ -450,7 +450,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
try:
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
except UnknownParserError as upe:
node = Node(ZOMBIE_TAG, "").init_pos(0)
node = Node(ZOMBIE_TAG, "").with_pos(0)
cst = RootNode(node).new_error(node, str(upe))
errata.append('Unknown parser "{}" in fail test "{}"!'.format(parser_name, test_name))
tests.setdefault('__err__', {})[test_name] = errata[-1]
......
......@@ -146,9 +146,9 @@ Lyrik_AST_transformation_table = {
"jahr":
[reduce_single_child, remove_whitespace, reduce_single_child],
"wortfolge":
[flatten(is_one_of('WORT'), recursive=False), peek, rstrip, collapse],
[flatten(is_one_of('WORT'), recursive=False), rstrip, collapse],
"namenfolge":
[flatten(is_one_of('NAME'), recursive=False), peek, rstrip, collapse],
[flatten(is_one_of('NAME'), recursive=False), rstrip, collapse],
"verknüpfung":
[flatten, remove_tokens('<', '>'), remove_whitespace, reduce_single_child],
"ziel":
......
@ whitespace = horizontal
@ drop = whitespace
@ drop = whitespace, token
gedicht = bibliographisches { LEERZEILE }+ [serie] §titel text /\s*/ ENDE
......
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
import collections
from functools import partial
import os
import sys
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropWhitespace, \
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf, \
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, is_empty, \
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
error_on, recompile_grammar, GLOBALS
#######################################################################
#
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def Lyrik_explicit_whitespacePreprocessor(text):
return text, lambda i: i
def get_preprocessor() -> PreprocessorFunc:
return Lyrik_explicit_whitespacePreprocessor
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class Lyrik_explicit_whitespaceGrammar(Grammar):
r"""Parser for a Lyrik_explicit_whitespace source file.
"""
source_hash__ = "824c3970f8997489b9a0faa53f2dff51"
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'[\t ]*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
wsp__ = Whitespace(WSP_RE__)
L = Series(RegExp('[ \\t]+'), dwsp__)
ENDE = NegativeLookahead(RegExp('.'))
JAHRESZAHL = RegExp('\\d\\d\\d\\d')
LEERZEILE = Series(RegExp('\\n[ \\t]*(?=\\n)'), dwsp__)
NZ = RegExp('\\n')
ZEICHENFOLGE = RegExp('[^ \\n<>]+')
NAME = RegExp('\\w+\\.?')
WORT = RegExp('\\w+')
vers = OneOrMore(Series(ZEICHENFOLGE, Option(L)))
strophe = OneOrMore(Series(NZ, vers))
text = OneOrMore(Series(strophe, ZeroOrMore(LEERZEILE)))
zeile = OneOrMore(Series(ZEICHENFOLGE, Option(L)))
titel = OneOrMore(Series(NZ, Option(L), zeile, OneOrMore(LEERZEILE)))
serie = Series(NegativeLookahead(Series(titel, vers, NZ, vers)), OneOrMore(Series(NZ, zeile)), OneOrMore(LEERZEILE))
ziel = Series(ZEICHENFOLGE, dwsp__)
verknüpfung = Series(Series(DropToken("<"), dwsp__), ziel, Series(DropToken(">"), dwsp__))
namenfolge = OneOrMore(Series(NAME, Option(L)))
wortfolge = OneOrMore(Series(WORT, Option(L)))
jahr = Series(JAHRESZAHL, dwsp__)
ort = Series(wortfolge, Option(verknüpfung))
untertitel = Series(wortfolge, Option(verknüpfung))
werk = Series(wortfolge, Option(Series(Series(DropToken("."), dwsp__), untertitel, mandatory=1)), Option(verknüpfung))
autor = Series(namenfolge, Option(verknüpfung))
bibliographisches = Series(autor, Series(DropToken(","), dwsp__), Option(Series(NZ, dwsp__)), werk, Series(DropToken(","), dwsp__), Option(Series(NZ, dwsp__)), ort, Series(DropToken(","), dwsp__), Option(Series(NZ, dwsp__)), jahr, Series(DropToken("."), dwsp__), mandatory=1)
gedicht = Series(bibliographisches, OneOrMore(LEERZEILE), Option(serie), titel, text, RegExp('\\s*'), ENDE, mandatory=3)
root__ = gedicht
def get_grammar() -> Lyrik_explicit_whitespaceGrammar:
global GLOBALS
try:
grammar = GLOBALS.Lyrik_explicit_whitespace_00000002_grammar_singleton
except AttributeError:
GLOBALS.Lyrik_explicit_whitespace_00000002_grammar_singleton = Lyrik_explicit_whitespaceGrammar()
if hasattr(get_grammar, 'python_src__'):
GLOBALS.Lyrik_explicit_whitespace_00000002_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.Lyrik_explicit_whitespace_00000002_grammar_singleton
return grammar
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
Lyrik_explicit_whitespace_AST_transformation_table = {
# AST Transformations for the Lyrik_explicit_whitespace-grammar
"<": remove_empty,
"gedicht": [],
"bibliographisches": [],
"autor": [],
"werk": [],
"untertitel": [],
"ort": [],
"jahr": [],
"wortfolge": [],
"namenfolge": [],
"verknüpfung": [],
"ziel": [],
"serie": [],
"titel": [],
"zeile": [],
"text": [],
"strophe": [],
"vers": [],
"WORT": [],
"NAME": [],
"ZEICHENFOLGE": [],
"NZ": [],
"LEERZEILE": [],
"JAHRESZAHL": [],
"ENDE": [],
"L": [],
":Token": reduce_single_child,
"*": replace_by_single_child
}
def Lyrik_explicit_whitespaceTransform() -> TransformationDict:
return partial(traverse, processing_table=Lyrik_explicit_whitespace_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
try:
transformer = GLOBALS.Lyrik_explicit_whitespace_00000002_transformer_singleton
except AttributeError:
GLOBALS.Lyrik_explicit_whitespace_00000002_transformer_singleton = Lyrik_explicit_whitespaceTransform()
transformer = GLOBALS.Lyrik_explicit_whitespace_00000002_transformer_singleton
return transformer
#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
class Lyrik_explicit_whitespaceCompiler(Compiler):
"""Compiler for the abstract-syntax-tree of a Lyrik_explicit_whitespace source file.
"""
def __init__(self):
super(Lyrik_explicit_whitespaceCompiler, self).__init__()
def _reset(self):
super()._reset()
# initialize your variables here, not in the constructor!
def on_gedicht(self, node):
return self.fallback_compiler(node)
# def on_bibliographisches(self, node):
# return node
# def on_autor(self, node):
# return node
# def on_werk(self, node):
# return node
# def on_untertitel(self, node):
# return node
# def on_ort(self, node):
# return node
# def on_jahr(self, node):
# return node
# def on_wortfolge(self, node):
# return node
# def on_namenfolge(self, node):
# return node
# def on_verknüpfung(self, node):
# return node
# def on_ziel(self, node):
# return node
# def on_serie(self, node):
# return node
# def on_titel(self, node):
# return node
# def on_zeile(self, node):
# return node
# def on_text(self, node):
# return node
# def on_strophe(self, node):
# return node
# def on_vers(self, node):
# return node
# def on_WORT(self, node):
# return node
# def on_NAME(self, node):
# return node
# def on_ZEICHENFOLGE(self, node):
# return node
# def on_NZ(self, node):
# return node
# def on_LEERZEILE(self, node):
# return node
# def on_JAHRESZAHL(self, node):
# return node
# def on_ENDE(self, node):
# return node
# def on_L(self, node):
# return node
def get_compiler() -> Lyrik_explicit_whitespaceCompiler:
try:
compiler = GLOBALS.Lyrik_explicit_whitespace_00000002_compiler_singleton
except AttributeError:
GLOBALS.Lyrik_explicit_whitespace_00000002_compiler_singleton = Lyrik_explicit_whitespaceCompiler()
compiler = GLOBALS.Lyrik_explicit_whitespace_00000002_compiler_singleton
return compiler
#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################
def compile_src(source, log_dir=''):
"""Compiles ``source`` and returns (result, errors, ast).
"""
with logging(log_dir):
compiler = get_compiler()
cname = compiler.__class__.__name__
result_tuple = compile_source(source, get_preprocessor(),
get_grammar(),
get_transformer(), compiler)
return result_tuple
if __name__ == "__main__":
# recompile grammar if needed
grammar_path = os.path.abspath(__file__).replace('Compiler.py', '.ebnf')
if os.path.exists(grammar_path):
if not recompile_grammar(grammar_path, force=False,
notify=lambda:print('recompiling ' + grammar_path)):
error_file = os.path.basename(__file__).replace('Compiler.py', '_ebnf_ERRORS.txt')
with open(error_file, encoding="utf-8") as f:
print(f.read())
sys.exit(1)
else:
print('Could not check whether grammar requires recompiling, '
'because grammar was not found at: ' + grammar_path)
if len(sys.argv) > 1:
# compile file
file_name, log_dir = sys.argv[1], ''
if file_name in ['-d', '--debug'] and len(sys.argv) > 2:
file_name, log_dir = sys.argv[2], 'LOGS'
result, errors, ast = compile_src(file_name, log_dir)
if errors:
cwd = os.getcwd()
rel_path = file_name[len(cwd):] if file_name.startswith(cwd) else file_name
for error in errors:
print(rel_path + ':' + str(error))
sys.exit(1)
else:
print(result.as_xml() if isinstance(result, Node) else result)
else:
print("Usage: Lyrik_explicit_whitespaceCompiler.py [FILENAME]")
......@@ -27,12 +27,13 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object
from DHParser.log import logging, is_logging, log_ST, log_parsing_history
from DHParser.error import Error
from DHParser.parse import Retrieve, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
from DHParser.parse import Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
UnknownParserError
UnknownParserError, MetaParser, EMPTY_NODE
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider, DHPARSER_IMPORTS
from DHParser.syntaxtree import Node
class TestParserClass:
......@@ -785,6 +786,51 @@ class TestEarlyTokenWhitespaceDrop:
assert next(cst.select(lambda node: node.content == 'X'))
class TestMetaParser:
def test_meta_parser(self):
mp = MetaParser()
mp.pname = "named"
mp.tag_name = mp.pname
nd = mp._return_value(Node('tagged', 'non-empty'))
assert nd.tag_name == 'named', nd.as_sxpr()
assert len(nd.children) == 1
assert nd.children[0].tag_name == 'tagged'
assert nd.children[0].result == "non-empty"
nd = mp._return_value(Node('tagged', ''))
assert nd.tag_name == 'named', nd.as_sxpr()
assert len(nd.children) == 1
assert nd.children[0].tag_name == 'tagged'
assert not nd.children[0].result
nd = mp._return_value(Node(':anonymous', 'content'))
assert nd.tag_name == 'named', nd.as_sxpr()
assert not nd.children
assert nd.result == 'content'
nd = mp._return_value(Node(':anonymous', ''))
assert nd.tag_name == 'named', nd.as_sxpr()
assert not nd.children
assert not nd.content
mp.pname = ''
mp.tag_name = ':unnamed'
nd = mp._return_value(Node('tagged', 'non-empty'))
assert nd.tag_name == 'tagged', nd.as_sxpr()
assert len(nd.children) == 0
assert nd.content == 'non-empty'
nd = mp._return_value(Node('tagged', ''))
assert nd.tag_name == 'tagged', nd.as_sxpr()
assert len(nd.children) == 0
assert not nd.content
nd = mp._return_value(Node(':anonymous', 'content'))
assert nd.tag_name == ':anonymous', nd.as_sxpr()
assert not nd.children
assert nd.result == 'content'
nd = mp._return_value(Node('', ''))
assert nd.tag_name == '', nd.as_sxpr()
assert not nd.children
assert not nd.content
assert mp._return_value(None) == EMPTY_NODE