Commit 037936a1 authored by di68kap's avatar di68kap

- Major Refactoring: First parameter of Node() now tag-name-string instead of Parser-object!

parent fcb7b9c8
......@@ -1075,7 +1075,7 @@ class EBNFCompiler(Compiler):
def on_term(self, node) -> str:
filtered_result, custom_args = self._error_customization(node)
mock_node = Node(node.parser, filtered_result)
mock_node = Node(node.tag_name, filtered_result)
return self.non_terminal(mock_node, 'Series', custom_args)
......@@ -1099,7 +1099,7 @@ class EBNFCompiler(Compiler):
elif len(node.children) > 2:
# shift = (Node(node.parser, node.result[1].result),)
# node.result[1].result = shift + node.result[2:]
node.children[1].result = (Node(node.children[1].parser, node.children[1].result),) \
node.children[1].result = (Node(node.children[1].tag_name, node.children[1].result),) \
+ node.children[2:]
node.children[1].tag_name = node.tag_name
node.result = (node.children[0], node.children[1])
......@@ -1156,7 +1156,7 @@ class EBNFCompiler(Compiler):
nd = node.children[0]
if nd.tag_name == "term":
filtered_result, custom_args = self._error_customization(nd)
mock_node = Node(nd.parser, filtered_result)
mock_node = Node(nd.tag_name, filtered_result)
return self.non_terminal(mock_node, 'AllOf', custom_args)
elif nd.tag_name == "expression":
if any(c.tag_name == TOKEN_PTYPE and nd.content == '§' for c in nd.children):
......
......@@ -290,12 +290,12 @@ class Parser(ParserBase):
node = error.node
node.result += (nd,)
else:
node = Node(self, (Node(None, text[:gap]), error.node, nd))
node = Node(self.tag_name, (Node(None, text[:gap]), error.node, nd))
elif error.first_throw:
raise ParserError(error.node, error.rest, first_throw=False)
else:
result = (Node(None, text[:gap]), error.node) if gap else error.node # type: ResultType
raise ParserError(Node(self, result), text, first_throw=False)
raise ParserError(Node(self.tag_name, result), text, first_throw=False)
if grammar.left_recursion_handling__:
self.recursion_counter[location] -= 1
......@@ -758,6 +758,7 @@ class Grammar:
'already exists in grammar object: %s!'
% (parser.name, str(self.__dict__[parser.name])))
setattr(self, parser.name, parser)
parser.tag_name = parser.name or parser.ptype
self.all_parsers__.add(parser)
parser.grammar = self
......@@ -884,7 +885,7 @@ class Grammar:
# add another child node at the end to ensure that the position
# of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical.
error_node = Node(ZOMBIE_PARSER, '').init_pos(tail_pos(result.children))
error_node = Node(ZOMBIE, '').init_pos(tail_pos(result.children))
self.tree__.new_error(error_node, error_msg, error_code)
result.result = result.children + (error_node,)
else:
......@@ -983,21 +984,21 @@ class PreprocessorToken(Parser):
if text[0:1] == BEGIN_TOKEN:
end = text.find(END_TOKEN, 1)
if end < 0:
node = Node(self, '')
node = Node(self.tag_name, '')
self.grammar.tree__.new_error(
node,
'END_TOKEN delimiter missing from preprocessor token. '
'(Most likely due to a preprocessor bug!)') # type: Node
return node, text[1:]
elif end == 0:
node = Node(self, '')
node = Node(self.tag_name, '')
self.grammar.tree__.new_error(
node,
'Preprocessor-token cannot have zero length. '
'(Most likely due to a preprocessor bug!)')
return node, text[2:]
elif text.find(BEGIN_TOKEN, 1, end) >= 0:
node = Node(self, text[len(self.name) + 1:end])
node = Node(self.tag_name, text[len(self.name) + 1:end])
self.grammar.tree__.new_error(
node,
'Preprocessor-tokens must not be nested or contain '
......@@ -1005,7 +1006,7 @@ class PreprocessorToken(Parser):
'(Most likely due to a preprocessor bug!)')
return node, text[end:]
if text[1:len(self.name) + 1] == self.name:
return Node(self, text[len(self.name) + 2:end]), text[end + 1:]
return Node(self.tag_name, text[len(self.name) + 2:end]), text[end + 1:]
return None, text
......@@ -1034,7 +1035,7 @@ class Token(Parser):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
if text.startswith(self.text):
return Node(self, self.text, True), text[self.len:]
return Node(self.tag_name, self.text, True), text[self.len:]
return None, text
def __repr__(self):
......@@ -1087,7 +1088,7 @@ class RegExp(Parser):
if i >= 0:
capture = capture[:i]
end = i
return Node(self, capture, True), text[end:]
return Node(self.tag_name, capture, True), text[end:]
return None, text
def __repr__(self):
......@@ -1237,8 +1238,8 @@ class Option(UnaryOperator):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, text = self.parser(text)
if node:
return Node(self, node), text
return Node(self, ()), text
return Node(self.tag_name, node), text
return Node(self.tag_name, ()), text
def __repr__(self):
return '[' + (self.parser.repr[1:-1] if isinstance(self.parser, Alternative)
......@@ -1278,7 +1279,7 @@ class ZeroOrMore(Option):
infinite_loop_error = Error(dsl_error_msg(self, 'Infinite Loop encountered.'),
node.pos)
results += (node,)
node = Node(self, results)
node = Node(self.tag_name, results)
if infinite_loop_error:
self.grammar.tree__.add_error(node, infinite_loop_error)
return node, text
......@@ -1330,7 +1331,7 @@ class OneOrMore(UnaryOperator):
results += (node,)
if results == ():
return None, text
node = Node(self, results)
node = Node(self.tag_name, results)
if infinite_loop_error:
self.grammar.tree__.add_error(node, infinite_loop_error)
return node, text_
......@@ -1459,7 +1460,7 @@ class Series(NaryOperator):
results += (node,)
assert len(results) <= len(self.parsers) \
or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE])
node = Node(self, results)
node = Node(self.tag_name, results)
if error:
raise ParserError(node, text, first_throw=True)
return node, text_
......@@ -1542,7 +1543,7 @@ class Alternative(NaryOperator):
for parser in self.parsers:
node, text_ = parser(text)
if node:
return Node(self, node), text_
return Node(self.tag_name, node), text_
return None, text
def __repr__(self):
......@@ -1670,7 +1671,7 @@ class AllOf(NaryOperator):
parsers = []
assert len(results) <= len(self.parsers) \
or len(self.parsers) >= len([p for p in results if p.tag_name != ZOMBIE])
node = Node(self, results)
node = Node(self.tag_name, results)
if error:
raise ParserError(node, text, first_throw=True)
return node, text_
......@@ -1725,7 +1726,7 @@ class SomeOf(NaryOperator):
parsers = []
assert len(results) <= len(self.parsers)
if results:
return Node(self, results), text_
return Node(self.tag_name, results), text_
else:
return None, text
......@@ -1794,7 +1795,7 @@ class Lookahead(FlowOperator):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, _ = self.parser(text)
if self.sign(node is not None):
return Node(self, ''), text
return Node(self.tag_name, ''), text
else:
return None, text
......@@ -1840,7 +1841,7 @@ class Lookbehind(FlowOperator):
does_match = backwards_text[:len(self.text)] == self.text
else: # assert self.regexp is not None
does_match = backwards_text.match(self.regexp)
return (Node(self, ''), text) if self.sign(does_match) else (None, text)
return (Node(self.tag_name, ''), text) if self.sign(does_match) else (None, text)
def __repr__(self):
return '-&' + self.parser.repr
......@@ -1881,7 +1882,7 @@ class Capture(UnaryOperator):
self.grammar.push_rollback__(location, lambda: stack.pop())
# caching will be blocked by parser guard (see way above),
# because it would prevent recapturing of rolled back captures
return Node(self, node), text_
return Node(self.tag_name, node), text_
else:
return None, text
......@@ -1954,12 +1955,12 @@ class Retrieve(Parser):
stack = self.grammar.variables__[self.symbol.name]
value = self.filter(stack)
except (KeyError, IndexError):
node = Node(self, '')
node = Node(self.tag_name, '')
self.grammar.tree__.new_error(
node, dsl_error_msg(self, "'%s' undefined or exhausted." % self.symbol.name))
return node, text
if text.startswith(value):
return Node(self, value), text[len(value):]
return Node(self.tag_name, value), text[len(value):]
else:
return None, text
......@@ -2014,7 +2015,7 @@ class Synonym(UnaryOperator):
def _parse(self, text: StringView) -> Tuple[Optional[Node], StringView]:
node, text = self.parser(text)
if node:
return Node(self, node), text
return Node(self.tag_name, node), text
return None, text
def __repr__(self):
......
......@@ -68,11 +68,12 @@ class ParserBase:
for instantiation.
"""
__slots__ = 'name', 'ptype'
__slots__ = 'name', 'ptype', 'tag_name'
def __init__(self,): # , pbases=frozenset()):
self.name = '' # type: str
self.ptype = ':' + self.__class__.__name__ # type: str
self.tag_name = self.ptype # type: str
def __repr__(self):
return self.name + self.ptype
......@@ -269,7 +270,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
__slots__ = '_result', 'children', '_len', '_pos', '_tag_name', 'errors', '_xml_attr', '_content'
def __init__(self, parser, result: ResultType, leafhint: bool = False) -> None:
def __init__(self, name: Optional[str], result: ResultType, leafhint: bool = False) -> None:
"""
Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
......@@ -285,16 +286,18 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self._len = -1 # type: int # lazy evaluation
else:
self.result = result
if parser is None:
self._tag_name = ZOMBIE
else:
self._tag_name = parser.name or parser.ptype
assert name is None or isinstance(name, str) # TODO: Delete this line
self._tag_name = name if name else ZOMBIE
# if parser is None:
# self._tag_name = ZOMBIE
# else:
# self._tag_name = parser.name or parser.ptype
def __deepcopy__(self, memo):
if self.children:
duplicate = self.__class__(self.parser, copy.deepcopy(self.children), False)
duplicate = self.__class__(self._tag_name, copy.deepcopy(self.children), False)
else:
duplicate = self.__class__(self.parser, self.result, True)
duplicate = self.__class__(self._tag_name, self.result, True)
duplicate.errors = copy.deepcopy(self.errors) if self.errors else []
duplicate._pos = self._pos
duplicate._len = self._len
......@@ -312,11 +315,11 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def __repr__(self):
# mpargs = {'name': self.parser.name, 'ptype': self.parser.ptype}
name, ptype = (self._tag_name.split(':') + [''])[:2]
parg = "MockParser({name}, {ptype})".format(name=name, ptype=ptype)
# name, ptype = (self._tag_name.split(':') + [''])[:2]
# parg = "MockParser({name}, {ptype})".format(name=name, ptype=ptype)
rarg = str(self) if not self.children else \
"(" + ", ".join(repr(child) for child in self.children) + ")"
return "Node(%s, %s)" % (parg, rarg)
return "Node(%s, %s)" % (self._tag_name, rarg)
def __len__(self):
......@@ -804,7 +807,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return sum(child.tree_size() for child in self.children) + 1
ZOMBIE_NODE = Node(ZOMBIE_PARSER, '')
ZOMBIE_NODE = Node(ZOMBIE, '')
class RootNode(Node):
......@@ -819,7 +822,7 @@ class RootNode(Node):
"""
def __init__(self, node: Optional[Node] = None):
super().__init__(ZOMBIE_PARSER, '')
super().__init__(ZOMBIE, '')
self.all_errors = [] # type: List[Error]
self.error_flag = 0
if node is not None:
......@@ -938,7 +941,7 @@ def parse_sxpr(sxpr: Union[str, StringView]) -> Node:
"""
sxpr = StringView(sxpr).strip() if isinstance(sxpr, str) else sxpr.strip()
mock_parsers = dict() # type: Dict[StringView, MockParser]
# mock_parsers = dict() # type: Dict[StringView, MockParser]
def next_block(s: StringView):
"""Generator that yields all characters until the next closing bracket
......@@ -1020,7 +1023,7 @@ def parse_sxpr(sxpr: Union[str, StringView]) -> Node:
lines.append(str(sxpr[:end]))
sxpr = sxpr[end:]
result = "\n".join(lines)
node = Node(mock_parsers.setdefault(tagname, MockParser(name, ':' + class_name)), result)
node = Node(name or ':' + class_name, result)
if attributes:
node.attr.update(attributes)
return node
......@@ -1037,8 +1040,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
"""
xml = StringView(str(xml))
PlainText = MockParser('', TOKEN_PTYPE)
mock_parsers = {TOKEN_PTYPE: PlainText}
# PlainText = MockParser('', TOKEN_PTYPE)
# mock_parsers = {TOKEN_PTYPE: PlainText}
def parse_attributes(s: StringView) -> Tuple[StringView, OrderedDict]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
......@@ -1096,7 +1099,7 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
while s and not s[:2] == "</":
s, leaf = parse_leaf_content(s)
if leaf and (leaf.find('\n') < 0 or not leaf.match(RX_WHITESPACE_TAIL)):
res.append(Node(PlainText, leaf))
res.append(Node(TOKEN_PTYPE, leaf))
if s[:1] == "<" and s[:2] != "</":
s, child = parse_full_content(s)
res.append(child)
......@@ -1106,7 +1109,7 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
result = res[0].result
else:
result = tuple(res)
return s, Node(mock_parsers.setdefault(tagname, MockParser(name, ":" + class_name)), result)
return s, Node(name or ':' + class_name, result)
match_header = xml.search(re.compile(r'<(?!\?)'))
start = xml.index(match_header.start()) if match_header else 0
......
......@@ -39,7 +39,7 @@ import threading
from DHParser.error import Error, is_error, adjust_error_locations
from DHParser.log import is_logging, clear_logs, log_parsing_history
from DHParser.parse import UnknownParserError, Parser, Lookahead
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE_PARSER
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE
from DHParser.toolkit import re, typing
from typing import Tuple
......@@ -384,7 +384,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
except UnknownParserError as upe:
cst = RootNode()
cst = cst.new_error(Node(ZOMBIE_PARSER, "").init_pos(0), str(upe))
cst = cst.new_error(Node(ZOMBIE, "").init_pos(0), str(upe))
clean_test_name = str(test_name).replace('*', '')
# log_ST(cst, "match_%s_%s.cst" % (parser_name, clean_test_name))
tests.setdefault('__cst__', {})[test_name] = cst
......@@ -433,7 +433,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
try:
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
except UnknownParserError as upe:
node = Node(ZOMBIE_PARSER, "").init_pos(0)
node = Node(ZOMBIE, "").init_pos(0)
cst = RootNode(node).new_error(node, str(upe))
errata.append('Unknown parser "{}" in fail test "{}"!'.format(parser_name, test_name))
tests.setdefault('__err__', {})[test_name] = errata[-1]
......
......@@ -648,7 +648,7 @@ def collapse(context: List[Node]):
@transformation_factory(collections.abc.Callable)
def collapse_if(context: List[Node], condition: Callable, target_tag: ParserBase):
def collapse_if(context: List[Node], condition: Callable, target_tag: str):
"""
(Recursively) merges the content of all adjacent child nodes that
fulfil the given `condition` into a single leaf node with parser
......@@ -656,14 +656,15 @@ def collapse_if(context: List[Node], condition: Callable, target_tag: ParserBase
>>> sxpr = '(place (abbreviation "p.") (page "26") (superscript "b") (mark ",") (page "18"))'
>>> tree = parse_sxpr(sxpr)
>>> text = MockParser('text')
>>> collapse_if([tree], not_one_of({'superscript', 'subscript'}), text)
>>> collapse_if([tree], not_one_of({'superscript', 'subscript'}), 'text')
>>> print(flatten_sxpr(tree.as_sxpr()))
(place (text "p.26") (superscript "b") (text ",18"))
See `test_transform.TestComplexTransformations` for examples.
"""
assert isinstance(target_tag, str) # TODO: Delete this when safe
node = context[-1]
package = [] # type: List[Node]
result = [] # type: List[Node]
......
......@@ -183,7 +183,7 @@ class TestNode:
assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos
def test_xml_sanitizer(self):
node = Node(MockParser('tag'), '<&>')
node = Node('tag', '<&>')
assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
......
......@@ -158,9 +158,9 @@ class TestConditionalTransformations:
"""Tests conditional transformations."""
def test_has_parent(self):
context = [Node(MockParser('A'), 'alpha'),
Node(MockParser('B'), 'beta'),
Node(MockParser('C'), 'gamma')]
context = [Node('A', 'alpha'),
Node('B', 'beta'),
Node('C', 'gamma')]
assert has_parent(context, {'A'})
assert has_parent(context, {'B'})
assert not has_parent(context, {'C'})
......@@ -213,7 +213,7 @@ class TestConditionalTransformations:
class TestComplexTransformations:
def setup(self):
self.Text = MockParser('Text', TOKEN_PTYPE)
self.Text = 'Text' # TOKEN_PTYPE
def test_collapse_if_plain(self):
xml = "<EINZEILER><DEU_WORT>spectat</DEU_WORT><WS> </WS><DEU_WORT>ad</DEU_WORT>" +\
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment