2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 5eb732ac authored by eckhart's avatar eckhart
Browse files

- Refactoring: Elimination of ParserBase. Still one Error!

parent f414983c
......@@ -87,7 +87,7 @@ try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, MockParser, \\
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
......
......@@ -52,12 +52,9 @@ import collections
import contextlib
import html
import os
import threading
from DHParser.error import line_col
from DHParser.stringview import StringView
from DHParser.syntaxtree import Node
from DHParser.parse import ParserBase
from DHParser.toolkit import is_filename, escape_control_characters, GLOBALS, typing
from typing import List, Tuple, Union
......@@ -221,17 +218,15 @@ class HistoryRecord:
'\n</style>\n</head>\n<body>\n')
HTML_LEAD_OUT = '\n</body>\n</html>\n'
def __init__(self, call_stack: List['ParserBase'], node: Node, text: StringView) -> None:
def __init__(self, call_stack: List[str],
node: Node,
text: StringView,
line_col: Tuple[int, int]) -> None:
# copy call stack, dropping uninformative Forward-Parsers
self.call_stack = [p for p in call_stack if p.ptype != ":Forward"] # type: List['ParserBase']
self.call_stack = [tn for tn in call_stack if tn != ":Forward"] # type: List[str]
self.node = node # type: Node
self.text = text # type: StringView
self.line_col = (1, 1) # type: Tuple[int, int]
if call_stack:
grammar = call_stack[-1].grammar
document = grammar.document__
lbreaks = grammar.document_lbreaks__
self.line_col = line_col(lbreaks, len(document) - len(text))
self.line_col = line_col # type: Tuple[int, int]
def __str__(self):
return '%4i, %2i: %s; %s; "%s"' % self.as_tuple()
......@@ -287,8 +282,7 @@ class HistoryRecord:
@property
def stack(self) -> str:
return "->".join((p.repr if p.ptype in {':RegExp', ':Token'} else p.name or p.ptype)
for p in self.call_stack)
return "->".join(self.call_stack)
@property
def status(self) -> str:
......
......@@ -33,7 +33,7 @@ for an example.
from collections import defaultdict, OrderedDict
import copy
from DHParser.error import Error, linebreaks
from DHParser.error import Error, linebreaks, line_col
from DHParser.log import is_logging, HistoryRecord
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
......@@ -80,7 +80,7 @@ __all__ = ('Parser',
########################################################################
#
# Grammar and parsing infrastructure
# Parser base class
#
########################################################################
......@@ -147,51 +147,7 @@ ApplyFunc = Callable[['Parser'], None]
FlagFunc = Callable[[ApplyFunc, Set[ApplyFunc]], bool]
class ParserBase:
"""
ParserBase is the base class for all real and mock parser classes.
It is defined here, because Node objects require a parser object
for instantiation.
"""
__slots__ = 'name', 'ptype', 'tag_name'
def __init__(self,): # , pbases=frozenset()):
self.name = '' # type: str
self.ptype = ':' + self.__class__.__name__ # type: str
self.tag_name = self.ptype # type: str
def __repr__(self):
return self.name + self.ptype
def __str__(self):
return self.name + (' = ' if self.name else '') + repr(self)
def __call__(self, text: StringView) -> Tuple[Optional['Node'], StringView]:
return None, text
@property
def repr(self) -> str:
"""Returns the parser's name if it has a name and repr()"""
return self.name if self.name else self.__repr__()
def reset(self):
"""Resets any parser variables. (Should be overridden.)"""
pass
@property
def grammar(self) -> 'Grammar':
"""Returns the Grammar object to which the parser belongs. If not
yet connected to any Grammar object, None is returned."""
raise NotImplementedError
def apply(self, func: Callable):
"""Applies the function `func` recursively to the parser and all
descendant parsers, if any exist."""
pass
class Parser(ParserBase):
class Parser:
"""
(Abstract) Base class for Parser combinator parsers. Any parser
object that is actually used for parsing (i.e. no mock parsers)
......@@ -251,22 +207,14 @@ class Parser(ParserBase):
def __init__(self) -> None:
# assert isinstance(name, str), str(name)
super().__init__()
self.name = '' # type: str
self.ptype = ':' + self.__class__.__name__ # type: str
self.tag_name = self.ptype # type: str
self._grammar = ZOMBIE_GRAMMAR # type: Grammar
self.reset()
# # add "aspect oriented" wrapper around parser calls
# # for memoizing, left recursion and tracing
# if not isinstance(self, Forward): # should Forward-Parser not be guarded? Not sure...
# guarded_parser_call = add_parser_guard(self.__class__.__call__)
# # The following check is necessary for classes that don't override
# # the __call__() method, because in these cases the non-overridden
# # __call__()-method would be substituted a second time!
# if self.__class__.__call__.__code__ != guarded_parser_call.__code__:
# self.__class__.__call__ = guarded_parser_call
def __deepcopy__(self, memo):
"""Deepcopy method of the parser. Upon instantiation of a Grammar-
""" Deepcopy method of the parser. Upon instantiation of a Grammar-
object, parsers will be deep-copied to the Grammar object. If a
derived parser-class changes the signature of the constructor,
`__deepcopy__`-method must be replaced (i.e. overridden without
......@@ -275,8 +223,20 @@ class Parser(ParserBase):
duplicate = self.__class__()
duplicate.name = self.name
duplicate.ptype = self.ptype
duplicate.tag_name = self.tag_name
return duplicate
def __repr__(self):
return self.name + self.ptype
def __str__(self):
return self.name + (' = ' if self.name else '') + repr(self)
@property
def repr(self) -> str:
"""Returns the parser's name if it has a name and self.__repr___() otherwise."""
return self.name if self.name else self.__repr__()
def reset(self):
"""Initializes or resets any parser variables. If overwritten,
the `reset()`-method of the parent class must be called from the
......@@ -311,7 +271,8 @@ class Parser(ParserBase):
self.recursion_counter[location] += 1
if grammar.history_tracking__:
grammar.call_stack__.append(self)
grammar.call_stack__.append(self.repr if self.tag_name in (':RegExp', ':Token')
else self.tag_name)
grammar.moving_forward__ = True
try:
......@@ -375,7 +336,8 @@ class Parser(ParserBase):
# don't track returning parsers except in case an error has occurred
# remaining = len(rest)
if (grammar.moving_forward__ or (node and node.errors)):
record = HistoryRecord(grammar.call_stack__, node, text)
record = HistoryRecord(grammar.call_stack__, node, text,
grammar.line_col__(text))
grammar.history__.append(record)
# print(record.stack, record.status, rest[:20].replace('\n', '|'))
grammar.moving_forward__ = False
......@@ -479,6 +441,56 @@ class Parser(ParserBase):
self._apply(func, positive_flip)
class ZombieParser(Parser):
"""
Serves as a substitute for a Parser instance.
``ZombieParser`` is the class of the singelton object
``ZOMBIE_PARSER``. The ``ZOMBIE_PARSER`` has a name and can be
called, but it never matches. It serves as a substitute where only
these (or one of these properties) is needed, but no real Parser-
object is instantiated.
"""
alive = False
__slots__ = ()
def __init__(self):
# no need to call super class constructor
assert not self.__class__.alive, "There can be only one!"
assert self.__class__ == ZombieParser, "No derivatives, please!"
self.name = ZOMBIE
self.ptype = ':' + self.__class__.__name__
self.tag_name = ZOMBIE
self.__class__.alive = True
self.reset()
def __copy__(self):
return self
def __deepcopy__(self, memo):
return self
def __call__(self, text):
raise AssertionError("Better call Saul ;-)")
def _grammar_assigned_notifier(self):
raise AssertionError("No zombies allowed in any grammar!")
def apply(self, func: ApplyFunc):
return "Eaten alive..."
ZOMBIE_PARSER = ZombieParser()
########################################################################
#
# Grammar class, central administration of all parser of a grammar
#
########################################################################
def mixin_comment(whitespace: str, comment: str) -> str:
"""
Returns a regular expression that merges comment and whitespace
......@@ -648,8 +660,8 @@ class Grammar:
location to which the parser backtracks. This is done by
calling method :func:`rollback_to__(location)`.
call_stack__: A stack of all parsers that have been called. This
is required for recording the parser history (for debugging)
call_stack__: A stack of the tag names of all parsers that have been called.
This is required for recording the parser history (for debugging)
and, eventually, i.e. one day in the future, for tracing through
the parsing process.
......@@ -769,7 +781,7 @@ class Grammar:
self.rollback__ = [] # type: List[Tuple[int, Callable]]
self.last_rb__loc__ = -1 # type: int
# support for call stack tracing
self.call_stack__ = [] # type: List[ParserBase]
self.call_stack__ = [] # type: List[str]
# snapshots of call stacks
self.history__ = [] # type: List[HistoryRecord]
# also needed for call stack tracing
......@@ -907,7 +919,8 @@ class Grammar:
# for record in self.history__:
# if record.node and record.node._pos < 0:
# record.node.init_pos(0)
record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest)
record = HistoryRecord(self.call_stack__.copy(), stitches[-1], rest,
self.line_col__(rest))
self.history__.append(record)
# stop history tracking when parser returned too early
self.history_tracking__ = False
......@@ -969,6 +982,15 @@ class Grammar:
else (len(self.document__) + 1)
def line_col__(self, text):
"""
Returns the line and column where text is located in the document.
Does not check, whether text is actually a substring of the currently
parsed document.
"""
return line_col(self.document_lbreaks__, self.document_length__ - len(text))
def dsl_error_msg(parser: Parser, error_str: str) -> str:
"""
Returns an error message for errors in the parser configuration,
......@@ -2147,59 +2169,3 @@ class Forward(Parser):
self.parser._apply(func, flip)
return True
return False
class MockParser(ParserBase):
"""
MockParser objects can be used to reconstruct syntax trees from a
serialized form like S-expressions or XML. Mock objects can mimic
different parser types by assigning them a `ptype` on initialization.
Mock objects should not be used for anything other than
syntax tree (re-)construction. In all other cases where a parser
object substitute is needed, chose the singleton ZOMBIE_PARSER.
"""
__slots__ = ()
def __init__(self, name='', ptype=''): # , pbases=frozenset()):
assert not ptype or ptype[0] == ':'
super().__init__()
self.name = name
if ptype:
self.ptype = ptype # or ':' + self.__class__.__name__
class ZombieParser(MockParser):
"""
Serves as a substitute for a Parser instance.
``ZombieParser`` is the class of the singelton object
``ZOMBIE_PARSER``. The ``ZOMBIE_PARSER`` has a name and can be
called, but it never matches. It serves as a substitute where only
these (or one of these properties) is needed, but no real Parser-
object is instantiated.
"""
alive = False
__slots__ = ()
def __init__(self):
super(ZombieParser, self).__init__()
assert not self.__class__.alive, "There can be only one!"
assert self.__class__ == ZombieParser, "No derivatives, please!"
self.name = ZOMBIE
self.__class__.alive = True
def __copy__(self):
return self
def __deepcopy__(self, memo):
return self
def __call__(self, text):
"""Better call Saul ;-)"""
return None, text
ZOMBIE_PARSER = ZombieParser()
\ No newline at end of file
......@@ -349,7 +349,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
or (len(raw_errors) == 1
and raw_errors[-1].code == Error.MANDATORY_CONTINUATION_AT_EOF)
and any(isinstance(p, Lookahead)
for p in parser.history__[-1].call_stack))
for p in parser.history__[-1].call_stack)) # TODO: Refactor this clause!!!
for parser_name, tests in test_unit.items():
assert parser_name, "Missing parser name in test %s!" % unit_name
......
......@@ -33,7 +33,6 @@ from functools import partial, singledispatch
from DHParser.error import Error, ErrorCode
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_NODE, RootNode, parse_sxpr, flatten_sxpr
from DHParser.parse import ParserBase, MockParser
from DHParser.toolkit import issubtype, isgenerictype, expand_table, smart_list, re, typing
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Tuple, List, Sequence, Union, Text, Generic
......
......@@ -33,7 +33,6 @@ from DHParser import logging, is_filename, load_if_file, \
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, ZOMBIE_NODE
from DHParser.parse import MockParser
#######################################################################
......@@ -395,7 +394,7 @@ class XMLCompiler(Compiler):
node.attr.update(attributes)
node.result = ''
self.tree.empty_tags.add('?xml')
node.parser = self.get_parser('?xml')
node.tag_name = '?xml' # node.parser = self.get_parser('?xml')
return node
# def on_VersionInfo(self, node):
......@@ -597,7 +596,7 @@ class XMLCompiler(Compiler):
attributes = self.extract_attributes(node.children)
if attributes:
node.attr.update(attributes)
node.parser = self.get_parser(node['Name'].content)
node.tag_name = node['Name'].content # node.parser = self.get_parser(node['Name'].content)
node.result = ''
self.tree.empty_tags.add(node.tag_name)
return node
......
......@@ -33,7 +33,6 @@ from DHParser import logging, is_filename, load_if_file, Grammar, Compiler, nil_
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \
error_on, recompile_grammar, GLOBALS
from DHParser.parse import MockParser
#######################################################################
......
......@@ -25,7 +25,7 @@ from functools import partial
sys.path.extend(['../', './'])
from DHParser.toolkit import compile_python_object
from DHParser.log import logging, is_logging, log_ST
from DHParser.log import logging, is_logging, log_ST, log_parsing_history
from DHParser.error import Error
from DHParser.parse import Retrieve, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
......@@ -73,7 +73,7 @@ class TestInfiLoopsAndRecursion:
assert snippet == str(syntax_tree)
if is_logging():
log_ST(syntax_tree, "test_LeftRecursion_direct.cst")
parser.log_parsing_history__("test_LeftRecursion_direct")
log_parsing_history(parser, "test_LeftRecursion_direct")
def test_direct_left_recursion2(self):
minilang = """
......@@ -110,7 +110,7 @@ class TestInfiLoopsAndRecursion:
assert snippet == str(syntax_tree)
if is_logging():
log_ST(syntax_tree, "test_LeftRecursion_indirect.cst")
parser.log_parsing_history__("test_LeftRecursion_indirect")
log_parsing_history(parser, "test_LeftRecursion_indirect")
def test_inifinite_loops(self):
minilang = """not_forever = { // } \n"""
......@@ -243,7 +243,7 @@ class TestRegex:
assert not messages, str(messages)
parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
result = parser(testdoc)
# parser.log_parsing_history("test.log")
# log_parsing_history(parser, "test.log")
assert not result.error_flag
......
......@@ -24,7 +24,6 @@ import sys
sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml
from DHParser.parse import MockParser
from DHParser.transform import traverse, reduce_single_child, \
replace_by_single_child, flatten, remove_expendables
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
......
......@@ -253,7 +253,7 @@ class TestLookahead:
"""
EBNF = r"""
document = { category | entry } { LF }
category = {LF } sequence_of_letters { /:/ sequence_of_letters } /:/ §&(LF sequence_of_letters)
category = { LF } sequence_of_letters { /:/ sequence_of_letters } /:/ §&(LF sequence_of_letters)
entry = { LF } sequence_of_letters !/:/
sequence_of_letters = /[A-Za-z0-9 ]+/
LF = / *\n/
......
......@@ -26,7 +26,6 @@ sys.path.extend(['../', './'])
from DHParser.syntaxtree import Node, parse_sxpr, flatten_sxpr, parse_xml, ZOMBIE_NODE, \
TOKEN_PTYPE
from DHParser.parse import MockParser
from DHParser.transform import traverse, reduce_single_child, remove_whitespace, \
traverse_locally, collapse, collapse_if, lstrip, rstrip, remove_content, remove_tokens, \
transformation_factory, has_parent
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment