Commit 603b99bb authored by Eckhart Arnold's avatar Eckhart Arnold

drop empty anonymous nodes while parsing already

parent d004ab8c
......@@ -38,7 +38,7 @@ import copy
import re
from DHParser.preprocess import strip_tokens, with_source_mapping, PreprocessorFunc
from DHParser.syntaxtree import Node, RootNode, ZOMBIE_ROOTNODE, StrictResultType
from DHParser.syntaxtree import Node, RootNode, ZOMBIE_TAG, StrictResultType
from DHParser.transform import TransformationFunc
from DHParser.parse import Grammar
from DHParser.error import adjust_error_locations, is_error, Error
......@@ -71,6 +71,9 @@ def visitor_name(node_name: str) -> str:
return 'on_' + node_name
ROOTNODE_PLACEHOLDER = RootNode()
class Compiler:
"""
Class Compiler is the abstract base class for compilers. Compiler
......@@ -104,7 +107,7 @@ class Compiler:
self._reset()
def _reset(self):
self.tree = ZOMBIE_ROOTNODE # type: RootNode
self.tree = ROOTNODE_PLACEHOLDER # type: RootNode
self.context = [] # type: List[Node]
self._dirty_flag = False
......@@ -116,6 +119,7 @@ class Compiler:
(This very much depends on the kind and purpose of the
implemented compiler.)
"""
assert root.tag_name != ZOMBIE_TAG
if self._dirty_flag:
self._reset()
self._dirty_flag = True
......
This diff is collapsed.
......@@ -34,14 +34,14 @@ from typing import Callable, cast, Iterator, List, AbstractSet, Set, Union, Tupl
__all__ = ('WHITESPACE_PTYPE',
'TOKEN_PTYPE',
'ZOMBIE',
'ZOMBIE_NODE',
'ZOMBIE_TAG',
'PLACEHOLDER',
'ResultType',
'StrictResultType',
'ChildrenType',
'Node',
'FrozenNode',
'RootNode',
'ZOMBIE_ROOTNODE',
'parse_sxpr',
'parse_xml',
'flatten_sxpr',
......@@ -58,7 +58,7 @@ __all__ = ('WHITESPACE_PTYPE',
WHITESPACE_PTYPE = ':Whitespace'
TOKEN_PTYPE = ':Token'
ZOMBIE = "__ZOMBIE__"
ZOMBIE_TAG = "__ZOMBIE__"
#######################################################################
#
......@@ -162,7 +162,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
__slots__ = '_result', 'children', '_len', '_pos', 'tag_name', 'errors', '_xml_attr', '_content'
def __init__(self, tag_name: Optional[str], result: ResultType, leafhint: bool = False) -> None:
def __init__(self, tag_name: str, result: ResultType, leafhint: bool = False) -> None:
"""
Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
......@@ -178,12 +178,8 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
self._len = -1 # type: int # lazy evaluation
else:
self.result = result
assert tag_name is None or isinstance(tag_name, str) # TODO: Delete this line
self.tag_name = tag_name if tag_name else ZOMBIE
# if parser is None:
# self._tag_name = ZOMBIE
# else:
# self._tag_name = parser.name or parser.ptype
# assert tag_name is not None
self.tag_name = tag_name # type: str
def __deepcopy__(self, memo):
if self.children:
......@@ -693,7 +689,29 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return sum(child.tree_size() for child in self.children) + 1
ZOMBIE_NODE = Node(ZOMBIE, '')
class FrozenNode(Node):
def __init__(self, tag_name: str, result: ResultType) -> None:
if isinstance(result, str) or isinstance(result, StringView):
result = str(result)
else:
raise TypeError('FrozenNode only accepts string as results. '
'(Only leaf-nodes can be frozen nodes.)')
super(FrozenNode, self).__init__(tag_name, result, True)
@property
def result(self) -> StrictResultType:
return self._result
@result.setter
def result(self, result: ResultType):
raise TypeError('FrozenNode does not allow re-assignment of results.')
def init_pos(self, pos: int) -> 'Node':
pass
PLACEHOLDER = Node('__PLACEHOLDER__', '')
class RootNode(Node):
......@@ -708,7 +726,7 @@ class RootNode(Node):
"""
def __init__(self, node: Optional[Node] = None):
super().__init__(ZOMBIE, '')
super().__init__(ZOMBIE_TAG, '')
self.all_errors = [] # type: List[Error]
self.error_flag = 0
if node is not None:
......@@ -804,8 +822,6 @@ class RootNode(Node):
empty_tags=self.empty_tags)
ZOMBIE_ROOTNODE = RootNode()
#######################################################################
#
# S-expression- and XML-parsers
......
......@@ -40,7 +40,7 @@ import sys
from DHParser.error import Error, is_error, adjust_error_locations
from DHParser.log import is_logging, clear_logs, log_parsing_history
from DHParser.parse import UnknownParserError, Parser, Lookahead
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, flatten_sxpr, ZOMBIE_TAG
from DHParser.toolkit import re, typing
from typing import Tuple
......@@ -401,7 +401,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
except UnknownParserError as upe:
cst = RootNode()
cst = cst.new_error(Node(ZOMBIE, "").init_pos(0), str(upe))
cst = cst.new_error(Node(ZOMBIE_TAG, "").init_pos(0), str(upe))
clean_test_name = str(test_name).replace('*', '')
# log_ST(cst, "match_%s_%s.cst" % (parser_name, clean_test_name))
tests.setdefault('__cst__', {})[test_name] = cst
......@@ -450,7 +450,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
try:
cst = parser(test_code, parser_name, track_history=has_lookahead(parser_name))
except UnknownParserError as upe:
node = Node(ZOMBIE, "").init_pos(0)
node = Node(ZOMBIE_TAG, "").init_pos(0)
cst = RootNode(node).new_error(node, str(upe))
errata.append('Unknown parser "{}" in fail test "{}"!'.format(parser_name, test_name))
tests.setdefault('__err__', {})[test_name] = errata[-1]
......
......@@ -32,7 +32,7 @@ import inspect
from functools import partial, singledispatch
from DHParser.error import Error, ErrorCode
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_NODE, RootNode, parse_sxpr, flatten_sxpr
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, PLACEHOLDER, RootNode, parse_sxpr, flatten_sxpr
from DHParser.toolkit import issubtype, isgenerictype, expand_table, smart_list, re, typing
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Tuple, List, Sequence, Union, Text, Generic
......@@ -309,7 +309,7 @@ def traverse(root_node: Node,
nonlocal cache
node = context[-1]
if node.children:
context.append(ZOMBIE_NODE)
context.append(PLACEHOLDER)
for child in node.children:
context[-1] = child
traverse_recursive(context) # depth first
......@@ -624,7 +624,7 @@ def flatten(context: List[Node], condition: Callable = is_anonymous, recursive:
node = context[-1]
if node.children:
new_result = [] # type: List[Node]
context.append(ZOMBIE_NODE)
context.append(PLACEHOLDER)
for child in node.children:
context[-1] = child
if child.children and condition(context):
......
......@@ -55,6 +55,13 @@ def fail_on_error(src, result):
sys.exit(1)
def count_nodes(tree, condition=lambda n: True):
N = 0
for nd in tree.select(condition, include_root=True):
N += 1
return N
def tst_func():
with DHParser.log.logging(LOGGING):
if not os.path.exists('REPORT'):
......@@ -68,6 +75,9 @@ def tst_func():
print('\n\nParsing document: "%s"' % file)
result = parser(doc)
print("Number of CST-nodes: " + str(count_nodes(result)))
# print("Number of empty nodes: " + str(count_nodes(result,
# lambda n: not bool(n.result))))
if DHParser.log.is_logging():
print('Saving CST')
with open('REPORT/' + file[:-4] + '.cst', 'w', encoding='utf-8') as f:
......@@ -79,6 +89,7 @@ def tst_func():
fail_on_error(doc, result)
transformer(result)
fail_on_error(doc, result)
print("Number of AST-nodes: " + str(count_nodes(result)))
if DHParser.log.is_logging():
print('Saving AST')
with open('LOGS/' + file[:-4] + '.ast', 'w', encoding='utf-8') as f:
......
......@@ -32,7 +32,7 @@ from DHParser import logging, is_filename, load_if_file, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, ZOMBIE_NODE
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, PLACEHOLDER
#######################################################################
......@@ -574,7 +574,7 @@ class XMLCompiler(Compiler):
node.attr.update(attributes)
preserve_whitespace |= attributes.get('xml:space', '') == 'preserve'
node.tag_name = tag_name
content = self.compile_children(node.get('content', ZOMBIE_NODE))
content = self.compile_children(node.get('content', PLACEHOLDER))
if len(content) == 1:
if content[0].tag_name == "CharData":
# reduce single CharData children
......
......@@ -7,11 +7,13 @@ import doctest
import multiprocessing
import os
import platform
#import subprocess
#import sys
import sys
import time
import threading
scriptdir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scriptdir, '../'))
lock = threading.Lock()
......@@ -33,8 +35,6 @@ def run_doctests(module):
if __name__ == "__main__":
scriptdir = os.path.dirname(os.path.realpath(__file__))
if platform.system() != "Windows":
interpreters = ['pypy3 ', 'python3 ']
else:
......
......@@ -23,7 +23,8 @@ import copy
import sys
sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
flatten_xml, ZOMBIE_TAG
from DHParser.transform import traverse, reduce_single_child, \
replace_by_single_child, flatten, remove_expendables
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
......@@ -170,11 +171,11 @@ class TestNode:
def test_len_and_pos(self):
"""Test len-property of Node."""
nd1 = Node(None, "123")
nd1 = Node(ZOMBIE_TAG, "123")
assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
nd2 = Node(None, "456")
nd2 = Node(ZOMBIE_TAG, "456")
assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
nd = Node(None, (nd1, nd2))
nd = Node(ZOMBIE_TAG, (nd1, nd2))
assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
nd.init_pos(0)
assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
......
......@@ -24,7 +24,7 @@ import sys
sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, parse_xml, ZOMBIE_NODE, \
from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, parse_xml, PLACEHOLDER, \
TOKEN_PTYPE
from DHParser.transform import traverse, reduce_single_child, remove_whitespace, \
traverse_locally, collapse, collapse_if, lstrip, rstrip, remove_content, remove_tokens, \
......@@ -140,7 +140,7 @@ class TestTransformationFactory:
nonlocal save
save = parameters
transformation = parameterized_transformation('a', 'b', 'c')
transformation([ZOMBIE_NODE])
transformation([PLACEHOLDER])
assert save == {'a', 'b', 'c'}
def test_parameter_set_expansion2(self):
......@@ -150,7 +150,7 @@ class TestTransformationFactory:
nonlocal save
save = parameters
transformation = parameterized_transformation('a', 'b', 'c')
transformation([ZOMBIE_NODE])
transformation([PLACEHOLDER])
assert save == ('a', 'b', 'c'), str(save)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment