Commit ca3dc76b authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- more general approach for retrieve-filter-functions

parent a217669e
......@@ -75,7 +75,8 @@ from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
nop_filter, counterpart_filter, accumulating_filter
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
......
......@@ -318,7 +318,7 @@ class EBNFCompiler(CompilerBase):
'comment': '',
'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens'
'counterpart': set()} # alt. 'retrieve_counterpart'
'filter': dict()} # alt. 'retrieve_filter'
@property
def result(self):
......@@ -407,14 +407,11 @@ class EBNFCompiler(CompilerBase):
declarations = declarations[:-1]
declarations.append('"""')
# add default functions for counterpart filters of pop or retrieve operators
# add default functions for retrieve_filter filters of pop or retrieve operators
for symbol in self.directives['counterpart']:
# declarations.append('def %s_counterpart(value): \n' % symbol +
# ' return value.replace("(", ")").replace("[", "]")'
# '.replace("{", "}").replace(">", "<")\n')
declarations.append(symbol + '_counterpart = lambda value: value.replace("(", ")")'
'.replace("[", "]").replace("{", "}").replace(">", "<")')
# for symbol, fun in self.directives['filter']:
# declarations.append(symbol + '_filter = lambda value: value.replace("(", ")")'
# '.replace("[", "]").replace("{", "}").replace(">", "<")')
# turn definitions into declarations in reverse order
......@@ -540,8 +537,12 @@ class EBNFCompiler(CompilerBase):
elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self._compile(node.result[1])
elif key in {'counterpart', 'retrieve_counterpart'}:
self.directives['counterpart'] |= self._compile(node.result[1])
elif key.endswith('_filter'):
filter_set = self._compile(node.result[1])
if not isinstance(filter_set, set) or len(filter_set) != 1:
node.add_error('Directive "%s" accepts exactly on symbol, not %s'
% (key, str(filter_set)))
self.directives['filter'][key[:-7]] = filter_set.pop()
else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' %
......@@ -575,8 +576,8 @@ class EBNFCompiler(CompilerBase):
node.add_error(('Retrieve Operator "%s" requires a symbol, '
'and not a %s.') % (prefix, str(arg.parser)))
return str(arg.result)
if str(arg) in self.directives['counterpart']:
custom_args = ['counterpart=%s_counterpart' % str(arg)]
if str(arg) in self.directives['filter']:
custom_args = ['retrieve_filter=%s' % self.directives['filter'][str(arg)]]
self.variables.add(arg.result)
elif len(node.result) > 2:
......
......@@ -875,7 +875,6 @@ class NegativeLookbehind(Lookbehind):
class Capture(UnaryOperator):
def __init__(self, parser, name=''):
super(Capture, self).__init__(parser, name)
print("WARNING: Capture operator is experimental")
def __call__(self, text):
node, text = self.parser(text)
......@@ -887,21 +886,38 @@ class Capture(UnaryOperator):
return None, text
def nop_filter(stack):
return stack[-1]
def counterpart_filter(stack):
value = stack[-1]
return value.replace("(", ")").replace("[", "]").replace("{", "}").replace(">", "<")
def accumulating_filter(stack):
return "".join(stack)
class Retrieve(Parser):
def __init__(self, symbol, counterpart=None, name=''):
def __init__(self, symbol, retrieve_filter=None, name=''):
if not name:
name = symbol.name
super(Retrieve, self).__init__(name)
self.symbol = symbol
self.counterpart = counterpart if counterpart else lambda value: value
print("WARNING: Retrieve operator is experimental")
self.retrieve_filter = retrieve_filter if retrieve_filter else nop_filter
def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.counterpart, self.name)
return self.__class__(self.symbol, self.retrieve_filter, self.name)
def __call__(self, text):
try:
stack = self.grammar.variables[self.symbol.name]
value = self.counterpart(self.pick_value(stack))
value = self.retrieve_filter(stack)
self.pick_value(stack)
except (KeyError, IndexError):
return Node(self, '').add_error(dsl_error_msg(self,
"%s undefined or exhausted" % self.symbol.name)), text
if text.startswith(value):
return Node(self, value), text[len(value):]
else:
......
......@@ -168,7 +168,7 @@ class Node:
self.error_flag = any(r.error_flag for r in self.result) if self.children else False
self._len = len(self.result) if not self.children else \
sum(child._len for child in self.children)
# self.pos = 0 # coninuous updating of pos values
# self.pos = 0 # continuous updating of pos values
self._pos = -1
def __str__(self):
......
......@@ -47,7 +47,7 @@ def mock_syntax_tree(sexpr):
while s[0] != ')':
if s[0] != '(': raise ValueError('"(" expected, not ' + s[:10])
# assert s[0] == '(', s
level = 1;
level = 1
i = 1
while level > 0:
if s[i] == '(':
......
@retrieve_counterpart = braces
@braces_filter = counterpart_filter
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
......
<document>
<Alternative>
<:Alternative>
<text>
<RegExp>
Anfang
</RegExp>
<:RegExp>Anfang </:RegExp>
</text>
</Alternative>
<Alternative>
</:Alternative>
<:Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
```
</RegExp>
<:RegExp>```</:RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<:ZeroOrMore>
<:Alternative>
<text>
<RegExp>
code block
</RegExp>
<:RegExp>code block </:RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
</:Alternative>
<:Alternative>
<:Sequence>
<delimiter_sign>
<RegExp>
``
</RegExp>
<:RegExp>``</:RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
</:Sequence>
</:Alternative>
<:Alternative>
<text>
<RegExp>
<- keine Ende-Zeichen !
</RegExp>
<:RegExp> <- keine Ende-Zeichen ! </:RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
```
</delimiter>
</:Alternative>
</:ZeroOrMore>
<delimiter>```</delimiter>
</codeblock>
</Alternative>
<Alternative>
</:Alternative>
<:Alternative>
<text>
<RegExp>
<:RegExp>
Ende
</RegExp>
</:RegExp>
</text>
</Alternative>
</:Alternative>
</document>
\ No newline at end of file
<document>
<Alternative>
<:Alternative>
<text>
<RegExp>
Anfang
</RegExp>
<:RegExp>Anfang </:RegExp>
</text>
</Alternative>
<Alternative>
</:Alternative>
<:Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
```
</RegExp>
<:RegExp>```</:RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<:ZeroOrMore>
<:Alternative>
<text>
<RegExp>
code block
</RegExp>
<:RegExp>code block </:RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
</:Alternative>
<:Alternative>
<:Sequence>
<delimiter_sign>
<RegExp>
``
</RegExp>
<:RegExp>``</:RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
</:Sequence>
</:Alternative>
<:Alternative>
<text>
<RegExp>
<- keine Ende-Zeichen !
</RegExp>
<:RegExp> <- keine Ende-Zeichen ! </:RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
```
</delimiter>
</:Alternative>
</:ZeroOrMore>
<delimiter>```</delimiter>
</codeblock>
</Alternative>
<Alternative>
</:Alternative>
<:Alternative>
<text>
<RegExp>
<:RegExp>
Ende
Absatz ohne
</RegExp>
</:RegExp>
</text>
</Alternative>
<Alternative>
</:Alternative>
<:Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
```
</RegExp>
<:RegExp>```</:RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<:ZeroOrMore>
<:Alternative>
<text>
<RegExp>
<:RegExp>
codeblock, aber
das stellt sich erst am Ende heraus...
Mehrzeliger
</RegExp>
</:RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
```
</delimiter>
</:Alternative>
</:ZeroOrMore>
<delimiter>```</delimiter>
</codeblock>
</Alternative>
<Alternative>
</:Alternative>
<:Alternative>
<text>
<RegExp>
code block
</RegExp>
<:RegExp>code block </:RegExp>
</text>
</Alternative>
</:Alternative>
</document>
\ No newline at end of file
......@@ -121,7 +121,7 @@ class TestPopRetrieve:
text = /[^`]+/
"""
mini_lang2 = """
@retrieve_counterpart = braces
@braces_filter=counterpart_filter
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment