05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit ff32c89f authored by Eckhart Arnold's avatar Eckhart Arnold

- virtualized Parser.__class__.__name__ by introducing Parser.ptype (which by...

- virtualized Parser.__class__.__name__ by introducing Parser.ptype (which by default is set to Parser.__class__.__name__)
- removed all isinstance(node, ...) statements from ebnf.EBNFCompiler, because this does not work with mock trees which are be needed for testing.
parent a289b064
......@@ -221,14 +221,15 @@ EBNF_transformation_table = {
EBNF_validation_table = {
# Semantic validation on the AST
"repetition, option, oneormore":
[partial(forbid, child_tags=['repetition', 'option', 'oneormore']),
[partial(forbid, child_names=['repetition', 'option', 'oneormore']),
partial(assert_content, regex=r'(?!§)')],
}
def EBNFTransformer(syntax_tree):
for processing_table in [EBNF_transformation_table, EBNF_validation_table]:
traverse(syntax_tree, processing_table)
for processing_table, key_func in [(EBNF_transformation_table, lambda node: node.parser.name),
(EBNF_validation_table, lambda node: node.tag_name)]:
traverse(syntax_tree, processing_table, key_func)
def get_ebnf_transformer():
......@@ -560,7 +561,6 @@ class EBNFCompiler(CompilerBase):
return self.non_terminal(node, 'Sequence')
def on_factor(self, node):
assert isinstance(node.parser, Sequence), node.as_sexpr() # these assert statements can be removed
assert node.children
assert len(node.result) >= 2, node.as_sexpr()
prefix = node.result[0].result
......
......@@ -208,6 +208,8 @@ class Parser(metaclass=ParserMetaClass):
def __init__(self, name=None):
assert name is None or isinstance(name, str), str(name)
self.name = name or ''
self.ptype = self.__class__.__name__
# self.pbases = {cls.__name__ for cls in inspect.getmro(self.__class__)}
self._grammar = None # center for global variables etc.
self.reset()
......@@ -224,7 +226,7 @@ class Parser(metaclass=ParserMetaClass):
return None, text # default behaviour: don't match
def __str__(self):
return self.name or self.__class__.__name__
return self.name or self.ptype
@property
def grammar(self):
......@@ -531,6 +533,9 @@ class RegExp(Parser):
return Node(self, text[:end]), text[end:]
return None, text
def __str__(self):
return self.name or self.ptype + ' /%s/' % self.regexp.pattern
class RE(Parser):
"""Regular Expressions with optional leading or trailing whitespace.
......@@ -587,11 +592,11 @@ class RE(Parser):
return Node(self, result), t
return None, text
# def __str__(self):
# if self.name == TOKEN_KEYWORD:
# return 'Token "%s"' % self.main.regexp.pattern.replace('\\', '')
# return self.name or ('RE ' + ('~' if self.wL else '')
# + '/%s/' % self.main.regexp.pattern + ('~' if self.wR else ''))
def __str__(self):
if self.name == TOKEN_KEYWORD:
return 'Token "%s"' % self.main.regexp.pattern.replace('\\', '')
return self.name or ('RE ' + ('~' if self.wL else '')
+ '/%s/' % self.main.regexp.pattern + ('~' if self.wR else ''))
def _grammar_assigned_notifier(self):
if self.grammar:
......@@ -618,7 +623,9 @@ def Token(token, wL=None, wR=None, name=None):
to the TOKEN_KEYWORD, making it easy to identify tokens in the
abstract syntax tree transformation and compilation stage.
"""
return RE(escape_re(token), wL, wR, name or TOKEN_KEYWORD)
parser = RE(escape_re(token), wL, wR, name or TOKEN_KEYWORD)
parser.ptype = "Token"
return parser
def mixin_comment(whitespace, comment):
......@@ -918,7 +925,7 @@ class Forward(Parser):
Parser.__init__(self)
self.parser = None
self.name = ''
# self.cycle_reached = False
self.cycle_reached = False
def __deepcopy__(self, memo):
duplicate = self.__class__()
......@@ -930,16 +937,14 @@ class Forward(Parser):
def __call__(self, text):
return self.parser(text)
# def __str__(self):
# if self.cycle_reached:
# if self.parser and self.parser.name:
# return str(self.parser.name)
# return "..."
# else:
# self.cycle_reached = True
# s = str(self.parser)
# self.cycle_reached = False
# return s
def __str__(self):
if self.cycle_reached:
return "..."
else:
self.cycle_reached = True
s = str(self.parser)
self.cycle_reached = False
return s
def set(self, parser):
assert isinstance(parser, Parser)
......
......@@ -40,7 +40,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'no_operation',
'replace_by_single_child',
'reduce_single_child',
'change_parser',
'replace_parser',
'is_whitespace',
'is_empty',
'is_expendable',
......@@ -67,12 +67,13 @@ class MockParser:
syntax tree (re-)construction. In all other cases where a parser
object substitute is needed, chose the singleton ZOMBIE_PARSER.
"""
def __init__(self, name='', class_name=''):
def __init__(self, name='', ptype='', pbases=frozenset()):
self.name = name
self.class_name = class_name or self.__class__.__name__
self.ptype = ptype or self.__class__.__name__
# self.pbases = pbases or {cls.__name__ for cls in inspect.getmro(self.__class__)}
def __str__(self):
return self.name or self.class_name
return self.name or self.ptype
class ZombieParser(MockParser):
......@@ -88,7 +89,7 @@ class ZombieParser(MockParser):
alive = False
def __init__(self):
super(ZombieParser, self).__init__("ZOMBIE")
super(ZombieParser, self).__init__("__ZOMBIE__")
assert not self.__class__.alive, "There can be only one!"
assert self.__class__ == ZombieParser, "No derivatives, please!"
self.__class__.alive = True
......@@ -126,7 +127,12 @@ class Node:
tuple of child nodes.
children (tuple): The tuple of child nodes or an empty tuple
if there are no child nodes. READ ONLY!
parser (Parser): The parser which generated this node.
parser (Parser): The parser which generated this node.
WARNING: In case you use mock syntax trees for testing or
parser replacement during the AST-transformation: DO NOT
rely on this being a real parser object in any phase after
parsing (i.e. AST-transformation and compiling), for
example by calling ``isinstance(node.parer, ...)``.
errors (list): A list of parser- or compiler-errors:
tuple(position, string) attached to this node
len (int): The full length of the node's string result if the
......@@ -173,7 +179,7 @@ class Node:
return self.tag_name == other.tag_name and self.result == other.result
def __hash__(self):
return hash(str(self.parser))
return hash(self.tag_name)
def __deepcopy__(self, memodict={}):
result = copy.deepcopy(self.result)
......@@ -183,8 +189,8 @@ class Node:
@property
def tag_name(self):
return str(self.parser)
# ONLY FOR DEBUGGING: return self.parser.name + ':' + self.parser.__class__.__name__
return self.parser.name or self.parser.ptype
# ONLY FOR DEBUGGING: return self.parser.name + ':' + self.parser.ptype
@property
def result(self):
......@@ -204,7 +210,7 @@ class Node:
@property
def len(self):
# DEBUGGING: print(str(self.parser), str(self.pos), str(self._len), str(self)[:10].replace('\n','.'))
# DEBUGGING: print(self.tag_name, str(self.pos), str(self._len), str(self)[:10].replace('\n','.'))
return self._len
@property
......@@ -475,7 +481,7 @@ WHITESPACE_KEYWORD = 'WSP__'
TOKEN_KEYWORD = 'TOKEN__'
def traverse(root_node, processing_table):
def traverse(root_node, processing_table, key_func=lambda node: node.parser.name):
"""Traverses the snytax tree starting with the given ``node`` depth
first and applies the sequences of callback functions registered
in the ``calltable``-dictionary.
......@@ -485,10 +491,12 @@ def traverse(root_node, processing_table):
Args:
root_node (Node): The root-node of the syntax tree to be traversed
processing_table (dict): parser.name -> sequence of functions that
processing_table (dict): node key -> sequence of functions that
will be applied to matching nodes in order. This dictionary
is interpreted as a ``compact_table``. See
``toolkit.expand_table`` or ``EBNFCompiler.EBNFTransTable``
key_func (function): A mapping key_func(node) -> keystr. The default
key_func yields node.parser.name.
Example:
table = { "term": [replace_by_single_child, flatten],
......@@ -506,7 +514,7 @@ def traverse(root_node, processing_table):
traverse_recursive(child)
node.error_flag |= child.error_flag # propagate error flag
sequence = table.get('*', []) + \
table.get(node.parser.name, table.get('?', [])) + \
table.get(key_func(node), table.get('?', [])) + \
table.get('~', [])
# '*' always called (before any other processing function)
# '?' called for those nodes for which no (other) processing functions is in the table
......@@ -554,11 +562,11 @@ def reduce_single_child(node):
node.result = node.result[0].result
def change_parser(node, new_parser_name):
"""Changes the parser of a Node to a mock parser with the given
name.
def replace_parser(node, parser_name, parser_type=''):
"""Replaces the parser of a Node to a mock parser with the given
name and pseudo-type.
"""
node.parser = MockParser(new_parser_name)
node.parser = MockParser(parser_name, parser_type)
# ------------------------------------------------
......@@ -652,22 +660,22 @@ def remove_enclosing_delimiters(node):
########################################################################
def require(node, child_tags):
def require(node, child_names):
for child in node.children:
if child.tag_name not in child_tags:
if child.parser.name not in child_names:
node.add_error('Element "%s" is not allowed inside "%s".' %
(child.tag_name, node.tag_name))
(child.parser.name, node.parser.name))
def forbid(node, child_tags):
def forbid(node, child_names):
for child in node.children:
if child.tag_name in child_tags:
if child.parser.name in child_names:
node.add_error('Element "%s" cannot be nested inside "%s".' %
(child.tag_name, node.tag_name))
(child.parser.name, node.parser.name))
def assert_content(node, regex):
content = str(node)
if not re.match(regex, content):
node.add_error('Element "%s" violates %s on %s' %
(node.tag_name, str(regex), content))
(node.parser.name, str(regex), content))
......@@ -22,7 +22,7 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD, change_parser
WHITESPACE_KEYWORD, TOKEN_KEYWORD, replace_parser
......@@ -148,7 +148,7 @@ class MLWGrammar(GrammarBase):
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
"""
source_hash__ = "ce9155e0248ac27756283d067342182e"
source_hash__ = "5e5f53f5ef36706df8dc1ec0ecd73859"
parser_initialization__ = "upon instatiation"
COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)')
......@@ -283,7 +283,7 @@ MLW_AST_transformation_table = {
[partial(remove_tokens, tokens={'AUTORIN', 'AUTOR'})],
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT":
[remove_expendables, reduce_single_child],
"LEER, TRENNER, ZSPRUNG": partial(change_parser, new_parser_name=WHITESPACE_KEYWORD),
"LEER, TRENNER, ZSPRUNG": partial(replace_parser, parser_name=WHITESPACE_KEYWORD),
"DATEI_ENDE": no_operation,
"NIEMALS": no_operation,
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment