Commit 08eef33e authored by Eckhart Arnold's avatar Eckhart Arnold

- added separate ptype parameter to Parser class (to avoid magic)

parent ff434def
......@@ -211,7 +211,7 @@ EBNF_transformation_table = {
[remove_expendables, reduce_single_child],
"list_":
[flatten, partial(remove_tokens, tokens={','})],
"":
"*":
[remove_expendables, replace_by_single_child]
}
......
......@@ -205,21 +205,17 @@ class ParserMetaClass(type):
class Parser(metaclass=ParserMetaClass):
def __init__(self, name=''):
def __init__(self, name='', ptype=''):
assert isinstance(name, str), str(name)
i = name.find(':')
if i >= 0:
self.name = name[:i]
self.ptype = name[i:]
else:
self.name = name
self.ptype = ':' + self.__class__.__name__
assert not ptype or ptype[0] == ':'
self.name = name
self.ptype = ptype or ':' + self.__class__.__name__
# self.pbases = {cls.__name__ for cls in inspect.getmro(self.__class__)}
self._grammar = None # center for global variables etc.
self.reset()
def __deepcopy__(self, memo):
return self.__class__(self.name + self.ptype)
return self.__class__(self.name, self.ptype)
def reset(self):
self.visited = dict()
......@@ -478,11 +474,11 @@ class ScannerToken(Parser):
indented block. Otherwise indented block are difficult to handle
with parsing expression grammars.
"""
def __init__(self, scanner_token):
def __init__(self, scanner_token, ptype=''):
assert isinstance(scanner_token, str) and scanner_token and \
scanner_token.isupper()
assert RX_SCANNER_TOKEN.match(scanner_token)
super(ScannerToken, self).__init__(scanner_token)
super(ScannerToken, self).__init__(scanner_token, ptype)
def __call__(self, text):
if text[0:1] == BEGIN_SCANNER_TOKEN:
......@@ -519,8 +515,8 @@ class RegExp(Parser):
other parsers delegate part of the parsing job to other parsers,
but do not match text directly.
"""
def __init__(self, regexp, name=''):
super(RegExp, self).__init__(name)
def __init__(self, regexp, name='', ptype=''):
super(RegExp, self).__init__(name, ptype)
self.regexp = re.compile(regexp) if isinstance(regexp, str) else regexp
def __deepcopy__(self, memo):
......@@ -529,7 +525,7 @@ class RegExp(Parser):
regexp = copy.deepcopy(self.regexp, memo)
except TypeError:
regexp = self.regexp.pattern
return RegExp(regexp, self.name + self.ptype)
return RegExp(regexp, self.name, self.ptype)
def __call__(self, text):
match = text[0:1] != BEGIN_SCANNER_TOKEN and self.regexp.match(text) # ESC starts a scanner token.
......@@ -561,7 +557,7 @@ class RE(Parser):
respective parameters in the constructor are set to ``None`` the
default whitespace expression from the Grammar object will be used.
"""
def __init__(self, regexp, wL=None, wR=None, name=''):
def __init__(self, regexp, wL=None, wR=None, name='', ptype=''):
"""Constructor for class RE.
Args:
......@@ -577,7 +573,7 @@ class RE(Parser):
See above.
name: The optional name of the parser.
"""
super(RE, self).__init__(name)
super(RE, self).__init__(name, ptype)
self.wL = wL
self.wR = wR
self.wspLeft = Whitespace(wL) if wL else ZOMBIE_PARSER
......@@ -589,7 +585,7 @@ class RE(Parser):
regexp = copy.deepcopy(self.main.regexp, memo)
except TypeError:
regexp = self.main.regexp.pattern
return self.__class__(regexp, self.wL, self.wR, self.name + self.ptype)
return self.__class__(regexp, self.wL, self.wR, self.name, self.ptype)
def __call__(self, text):
# assert self.main.regexp.pattern != "@"
......@@ -634,9 +630,7 @@ def Token(token, wL=None, wR=None, name=''):
identify tokens in the abstract syntax tree transformation and
compilation stage.
"""
parser = RE(escape_re(token), wL, wR, name)
parser.ptype = TOKEN_PTYPE
return parser
return RE(escape_re(token), wL, wR, name, TOKEN_PTYPE)
def mixin_comment(whitespace, comment):
......@@ -660,14 +654,14 @@ def mixin_comment(whitespace, comment):
class UnaryOperator(Parser):
def __init__(self, parser, name=''):
super(UnaryOperator, self).__init__(name)
def __init__(self, parser, name='', ptype=''):
super(UnaryOperator, self).__init__(name, ptype)
assert isinstance(parser, Parser)
self.parser = parser
def __deepcopy__(self, memo):
parser = copy.deepcopy(self.parser, memo)
return self.__class__(parser, self.name + self.ptype)
return self.__class__(parser, self.name, self.ptype)
def apply(self, func):
if super(UnaryOperator, self).apply(func):
......@@ -675,14 +669,14 @@ class UnaryOperator(Parser):
class NaryOperator(Parser):
def __init__(self, *parsers, name=''):
super(NaryOperator, self).__init__(name)
def __init__(self, *parsers, name='', ptype=''):
super(NaryOperator, self).__init__(name, ptype)
assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers)
self.parsers = parsers
def __deepcopy__(self, memo):
parsers = copy.deepcopy(self.parsers, memo)
return self.__class__(*parsers, name=self.name + self.ptype)
return self.__class__(*parsers, name=self.name, ptype=self.ptype)
def apply(self, func):
if super(NaryOperator, self).apply(func):
......@@ -691,8 +685,8 @@ class NaryOperator(Parser):
class Optional(UnaryOperator):
def __init__(self, parser, name=''):
super(Optional, self).__init__(parser, name)
def __init__(self, parser, name='', ptype=''):
super(Optional, self).__init__(parser, name, ptype)
assert isinstance(parser, Parser)
assert not isinstance(parser, Optional), \
"Nesting options would be redundant: %s(%s)" % \
......@@ -724,8 +718,8 @@ class ZeroOrMore(Optional):
class OneOrMore(UnaryOperator):
def __init__(self, parser, name=''):
super(OneOrMore, self).__init__(parser, name)
def __init__(self, parser, name='', ptype=''):
super(OneOrMore, self).__init__(parser, name, ptype)
assert not isinstance(parser, Optional), \
"Use ZeroOrMore instead of nesting OneOrMore and Optional: " \
"%s(%s)" % (str(name), str(parser.name))
......@@ -748,8 +742,8 @@ class OneOrMore(UnaryOperator):
class Sequence(NaryOperator):
def __init__(self, *parsers, name=''):
super(Sequence, self).__init__(*parsers, name=name)
def __init__(self, *parsers, name='', ptype=''):
super(Sequence, self).__init__(*parsers, name=name, ptype=ptype)
assert len(self.parsers) >= 1
def __call__(self, text):
......@@ -768,8 +762,8 @@ class Sequence(NaryOperator):
class Alternative(NaryOperator):
def __init__(self, *parsers, name=''):
super(Alternative, self).__init__(*parsers, name=name)
def __init__(self, *parsers, name='', ptype=''):
super(Alternative, self).__init__(*parsers, name=name, ptype=ptype)
assert len(self.parsers) >= 1
assert all(not isinstance(p, Optional) for p in self.parsers)
......@@ -789,8 +783,8 @@ class Alternative(NaryOperator):
class FlowOperator(UnaryOperator):
def __init__(self, parser, name=''):
super(FlowOperator, self).__init__(parser, name)
def __init__(self, parser, name='', ptype=''):
super(FlowOperator, self).__init__(parser, name, ptype)
class Required(FlowOperator):
......@@ -809,8 +803,8 @@ class Required(FlowOperator):
class Lookahead(FlowOperator):
def __init__(self, parser, name=''):
super(Lookahead, self).__init__(parser, name)
def __init__(self, parser, name='', ptype=''):
super(Lookahead, self).__init__(parser, name, ptype)
def __call__(self, text):
node, text_ = self.parser(text)
......@@ -841,8 +835,8 @@ def iter_right_branch(node):
class Lookbehind(FlowOperator):
def __init__(self, parser, name=''):
super(Lookbehind, self).__init__(parser, name)
def __init__(self, parser, name='', ptype=''):
super(Lookbehind, self).__init__(parser, name, ptype)
print("WARNING: Lookbehind Operator is experimental!")
def __call__(self, text):
......@@ -881,8 +875,8 @@ class NegativeLookbehind(Lookbehind):
class Capture(UnaryOperator):
def __init__(self, parser, name=''):
super(Capture, self).__init__(parser, name)
def __init__(self, parser, name='', ptype=''):
super(Capture, self).__init__(parser, name, ptype)
print("WARNING: Capture operator is experimental")
def __call__(self, text):
......@@ -896,16 +890,16 @@ class Capture(UnaryOperator):
class Retrieve(Parser):
def __init__(self, symbol, counterpart=None, name=''):
def __init__(self, symbol, counterpart=None, name='', ptype=''):
if not name:
name = symbol.name
super(Retrieve, self).__init__(name)
super(Retrieve, self).__init__(name, ptype)
self.symbol = symbol
self.counterpart = counterpart if counterpart else lambda value: value
print("WARNING: Retrieve operator is experimental")
def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.counterpart, self.name + self.ptype)
return self.__class__(self.symbol, self.counterpart, self.name, self.ptype)
def __call__(self, text):
stack = self.grammar.variables[self.symbol.name]
......@@ -935,7 +929,6 @@ class Forward(Parser):
def __init__(self):
Parser.__init__(self)
self.parser = None
self.name = ''
self.cycle_reached = False
def __deepcopy__(self, memo):
......
......@@ -524,11 +524,11 @@ def traverse(root_node, processing_table, key_func=key_tag_name):
for child in node.result:
traverse_recursive(child)
node.error_flag |= child.error_flag # propagate error flag
sequence = table.get('*', []) + \
table.get(key_func(node), table.get('', [])) + \
sequence = table.get('+', []) + \
table.get(key_func(node), table.get('*', [])) + \
table.get('~', [])
# '*' always called (before any other processing function)
# '?' called for those nodes for which no (other) processing functions is in the table
# '+' always called (before any other processing function)
# '*' called for those nodes for which no (other) processing functions is in the table
# '~' always called (after any other processing function)
for call in sequence:
call(node)
......
......@@ -22,7 +22,7 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD, replace_parser
WHITESPACE_PTYPE, TOKEN_PTYPE, replace_parser
......@@ -286,13 +286,13 @@ MLW_AST_transformation_table = {
"LEER, TRENNER, ZSPRUNG": partial(replace_parser, parser_name=WHITESPACE_KEYWORD),
"DATEI_ENDE": no_operation,
"NIEMALS": no_operation,
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
(TOKEN_PTYPE, WHITESPACE_PTYPE):
[remove_expendables, reduce_single_child],
"*":
"+":
remove_expendables,
"~":
partial(remove_tokens, tokens={',', ';'}),
"":
"*":
[remove_expendables, replace_by_single_child]
}
......
......@@ -48,7 +48,7 @@ ARITHMETIC_EBNF_transformation_table = {
"term, expr": [replace_by_single_child, flatten],
"factor": [remove_expendables, reduce_single_child],
(TOKEN_PTYPE): [remove_expendables, reduce_single_child],
"": [remove_expendables, replace_by_single_child]
"*": [remove_expendables, replace_by_single_child]
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment