Commit cc02687e authored by Eckhart Arnold's avatar Eckhart Arnold

- bug fixes

parent a21adc6a
......@@ -225,8 +225,9 @@ def compile_source(source: str,
log_ST(syntax_tree, log_file_name + '.cst')
log_parsing_history(parser, log_file_name)
assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
str(syntax_tree) # TODO: Ony valid if neither tokens or whitespace are dropped early!s
# assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \
# str(syntax_tree) # Ony valid if neither tokens or whitespace are dropped early
# only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages
result = None
......
......@@ -46,6 +46,7 @@ from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, O
__all__ = ('Parser',
'UnknownParserError',
'Grammar',
'EMPTY_NODE',
'PreprocessorToken',
'Token',
'DropToken',
......@@ -866,7 +867,7 @@ class Grammar:
if not rest:
result, _ = parser(rest)
if result is None:
result = Node(ZOMBIE_TAG, '').init_pos(0)
result = Node(ZOMBIE_TAG, '').with_pos(0)
self.tree__.new_error(result,
'Parser "%s" did not match empty document.' % str(parser),
Error.PARSER_DID_NOT_MATCH)
......@@ -905,7 +906,7 @@ class Grammar:
if len(stitches) < MAX_DROPOUTS
else " too often! Terminating parser.")
error_code = Error.PARSER_STOPPED_BEFORE_END
stitches.append(Node(ZOMBIE_TAG, skip).init_pos(tail_pos(stitches)))
stitches.append(Node(ZOMBIE_TAG, skip).with_pos(tail_pos(stitches)))
self.tree__.new_error(stitches[-1], error_msg, error_code)
if self.history_tracking__:
# # some parsers may have matched and left history records with nodes != None.
......@@ -914,7 +915,7 @@ class Grammar:
# # to zero. Therefore, their pos properties need to be initialized here
# for record in self.history__:
# if record.node and record.node._pos < 0:
# record.node.init_pos(0)
# record.node.with_pos(0)
record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest,
self.line_col__(rest))
self.history__.append(record)
......@@ -924,7 +925,7 @@ class Grammar:
if rest:
stitches.append(Node(ZOMBIE_TAG, rest))
#try:
result = Node(ZOMBIE_TAG, tuple(stitches)).init_pos(0)
result = Node(ZOMBIE_TAG, tuple(stitches)).with_pos(0)
# except AssertionError as error:
# # some debugging output
# print(Node(ZOMBIE_TAG, tuple(stitches)).as_sxpr())
......@@ -938,7 +939,7 @@ class Grammar:
# add another child node at the end to ensure that the position
# of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical.
error_node = Node(ZOMBIE_TAG, '').init_pos(tail_pos(result.children))
error_node = Node(ZOMBIE_TAG, '').with_pos(tail_pos(result.children))
self.tree__.new_error(error_node, error_msg, error_code)
result.result = result.children + (error_node,)
else:
......@@ -1251,17 +1252,12 @@ class MetaParser(Parser):
# Node(self.tag_name, node) # unoptimized code
assert node is None or isinstance(node, Node)
if node:
if node._result:
return Node(self.tag_name, node) if self.pname else node
elif self.pname:
nd1 = Node(self.tag_name, ()) # type: Node
# nd1.errors = node.errors
return nd1
# elif node.errors:
# nd2 = Node(self.tag_name, ()) # type: Node
# nd2.errors = node.errors
# return nd2
elif self.pname:
if self.pname:
if node.tag_name[0] == ':': # faster than node.is_anonymous()
return Node(self.tag_name, node._result)
return Node(self.tag_name, node)
return node
if self.pname:
return Node(self.tag_name, ()) # type: Node
return EMPTY_NODE # avoid creation of a node object for anonymous empty nodes
......@@ -1486,7 +1482,7 @@ def mandatory_violation(grammar: Grammar,
reloc: int) -> Tuple[Error, Node, StringView]:
i = reloc if reloc >= 0 else 0
location = grammar.document_length__ - len(text_)
err_node = Node(ZOMBIE_TAG, text_[:i]).init_pos(location)
err_node = Node(ZOMBIE_TAG, text_[:i]).with_pos(location)
found = text_[:10].replace('\n', '\\n ')
for search, message in err_msgs:
rxs = not isinstance(search, str)
......
......@@ -15,7 +15,7 @@ cdef class Node:
cpdef get(self, index_or_tagname, surrogate)
cpdef is_anonymous(self)
cpdef init_pos(self, pos)
cpdef with_pos(self, pos)
cpdef attr_active(self)
# cpdef compare_attr(self, other)
# cpdef _tree_repr(self, tab, open_fn, close_fn, data_fn, density, inline, inline_fn)
......@@ -28,7 +28,7 @@ cdef class Node:
cdef class FrozenNode(Node):
cpdef init_pos(self, pos)
cpdef with_pos(self, pos)
cdef class RootNode(Node):
......
......@@ -175,7 +175,6 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self._len = -1 # type: int # lazy evaluation
else:
self.result = result
# assert tag_name is not None
self.tag_name = tag_name # type: str
def __deepcopy__(self, memo):
......@@ -298,7 +297,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def is_anonymous(self):
return self.tag_name[0] == ':'
return not self.tag_name or self.tag_name[0] == ':'
@property
......@@ -368,30 +367,34 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def pos(self) -> int:
"""Returns the position of the Node's content in the source text."""
if self._pos < 0:
raise AssertionError("Position value not initialized!")
raise AssertionError("Position value not initialized! Use Node.with_pos()")
return self._pos
def init_pos(self, pos: int) -> 'Node':
def with_pos(self, pos: int) -> 'Node':
"""
(Re-)initialize position value. Usually, the parser guard
Initialize position value. Usually, the parser guard
(`parsers.add_parser_guard()`) takes care of assigning the
position in the document to newly created nodes. However,
where Nodes are created outside the reach of the parser
when Nodes are created outside the reach of the parser
guard, their document-position must be assigned manually.
This function recursively reassigns the position values
of the child nodes, too.
Position values of the child nodes are assigned recursively, too.
Returns the node itself for convenience.
"""
# condition self.pos == pos cannot be assumed when tokens or whitespace
# are dropped early!
# assert self._pos < 0 or self.pos == pos, ("pos mismatch %i != %i at Node: %s"
# % (self._pos, pos, repr(self)))
self._pos = pos
# recursively adjust pos-values of all children
offset = self.pos
for child in self.children:
child.init_pos(offset)
offset = child.pos + len(child)
if pos != self._pos >= 0:
raise AssertionError("Position value cannot be reassigned to a different value!")
if self._pos < 0:
self._pos = pos
# recursively adjust pos-values of all children
offset = self.pos
for child in self.children:
if child._pos < 0:
child.with_pos(offset)
offset = child.pos + len(child)
return self
......@@ -736,7 +739,7 @@ class FrozenNode(Node):
# if errors:
# raise AssertionError('Cannot assign error list to frozen node')
def init_pos(self, pos: int) -> 'Node':
def with_pos(self, pos: int) -> 'Node':
pass
......
......@@ -401,7 +401,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
except UnknownParserError as upe:
cst = RootNode()
cst = cst.new_error(Node(ZOMBIE_TAG, "").init_pos(0), str(upe))
cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
clean_test_name = str(test_name).replace('*', '')
# log_ST(cst, "match_%s_%s.cst" % (parser_name, clean_test_name))
tests.setdefault('__cst__', {})[test_name] = cst
......@@ -450,7 +450,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
try:
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
except UnknownParserError as upe:
node = Node(ZOMBIE_TAG, "").init_pos(0)
node = Node(ZOMBIE_TAG, "").with_pos(0)
cst = RootNode(node).new_error(node, str(upe))
errata.append('Unknown parser "{}" in fail test "{}"!'.format(parser_name, test_name))
tests.setdefault('__err__', {})[test_name] = errata[-1]
......
......@@ -146,9 +146,9 @@ Lyrik_AST_transformation_table = {
"jahr":
[reduce_single_child, remove_whitespace, reduce_single_child],
"wortfolge":
[flatten(is_one_of('WORT'), recursive=False), peek, rstrip, collapse],
[flatten(is_one_of('WORT'), recursive=False), rstrip, collapse],
"namenfolge":
[flatten(is_one_of('NAME'), recursive=False), peek, rstrip, collapse],
[flatten(is_one_of('NAME'), recursive=False), rstrip, collapse],
"verknüpfung":
[flatten, remove_tokens('<', '>'), remove_whitespace, reduce_single_child],
"ziel":
......
@ whitespace = horizontal
@ drop = whitespace
@ drop = whitespace, token
gedicht = bibliographisches { LEERZEILE }+ [serie] §titel text /\s*/ ENDE
......
This diff is collapsed.
......@@ -27,12 +27,13 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object
from DHParser.log import logging, is_logging, log_ST, log_parsing_history
from DHParser.error import Error
from DHParser.parse import Retrieve, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
from DHParser.parse import Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
UnknownParserError
UnknownParserError, MetaParser, EMPTY_NODE
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider, DHPARSER_IMPORTS
from DHParser.syntaxtree import Node
class TestParserClass:
......@@ -785,6 +786,51 @@ class TestEarlyTokenWhitespaceDrop:
assert next(cst.select(lambda node: node.content == 'X'))
class TestMetaParser:
def test_meta_parser(self):
mp = MetaParser()
mp.pname = "named"
mp.tag_name = mp.pname
nd = mp._return_value(Node('tagged', 'non-empty'))
assert nd.tag_name == 'named', nd.as_sxpr()
assert len(nd.children) == 1
assert nd.children[0].tag_name == 'tagged'
assert nd.children[0].result == "non-empty"
nd = mp._return_value(Node('tagged', ''))
assert nd.tag_name == 'named', nd.as_sxpr()
assert len(nd.children) == 1
assert nd.children[0].tag_name == 'tagged'
assert not nd.children[0].result
nd = mp._return_value(Node(':anonymous', 'content'))
assert nd.tag_name == 'named', nd.as_sxpr()
assert not nd.children
assert nd.result == 'content'
nd = mp._return_value(Node(':anonymous', ''))
assert nd.tag_name == 'named', nd.as_sxpr()
assert not nd.children
assert not nd.content
mp.pname = ''
mp.tag_name = ':unnamed'
nd = mp._return_value(Node('tagged', 'non-empty'))
assert nd.tag_name == 'tagged', nd.as_sxpr()
assert len(nd.children) == 0
assert nd.content == 'non-empty'
nd = mp._return_value(Node('tagged', ''))
assert nd.tag_name == 'tagged', nd.as_sxpr()
assert len(nd.children) == 0
assert not nd.content
nd = mp._return_value(Node(':anonymous', 'content'))
assert nd.tag_name == ':anonymous', nd.as_sxpr()
assert not nd.children
assert nd.result == 'content'
nd = mp._return_value(Node('', ''))
assert nd.tag_name == '', nd.as_sxpr()
assert not nd.children
assert not nd.content
assert mp._return_value(None) == EMPTY_NODE
if __name__ == "__main__":
from DHParser.testing import runner
with logging(False):
......
......@@ -83,7 +83,7 @@ class TestNode:
def test_deepcopy(self):
tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
tree.init_pos(0)
tree.with_pos(0)
tree_copy = copy.deepcopy(tree)
assert tree == tree_copy
......@@ -179,7 +179,7 @@ class TestNode:
assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
nd = Node(ZOMBIE_TAG, (nd1, nd2))
assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
nd.init_pos(0)
nd.with_pos(0)
assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos
......@@ -192,7 +192,7 @@ class TestNode:
class TestRootNode:
def test_error_handling(self):
tree = parse_sxpr('(A (B D) (C E))')
tree.init_pos(0)
tree.with_pos(0)
root = RootNode()
root.new_error(tree.children[1], "error C")
root.new_error(tree.children[0], "error B")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment