Commit ca3dc76b authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- more general approach for retrieve-filter-functions

parent a217669e
...@@ -75,7 +75,8 @@ from DHParser.toolkit import logging, is_filename, load_if_file ...@@ -75,7 +75,8 @@ from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \\ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\ Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
nop_filter, counterpart_filter, accumulating_filter
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\ remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\ no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
......
...@@ -318,7 +318,7 @@ class EBNFCompiler(CompilerBase): ...@@ -318,7 +318,7 @@ class EBNFCompiler(CompilerBase):
'comment': '', 'comment': '',
'literalws': ['right'], 'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens' 'tokens': set(), # alt. 'scanner_tokens'
'counterpart': set()} # alt. 'retrieve_counterpart' 'filter': dict()} # alt. 'retrieve_filter'
@property @property
def result(self): def result(self):
...@@ -407,14 +407,11 @@ class EBNFCompiler(CompilerBase): ...@@ -407,14 +407,11 @@ class EBNFCompiler(CompilerBase):
declarations = declarations[:-1] declarations = declarations[:-1]
declarations.append('"""') declarations.append('"""')
# add default functions for counterpart filters of pop or retrieve operators # add default functions for retrieve_filter filters of pop or retrieve operators
for symbol in self.directives['counterpart']: # for symbol, fun in self.directives['filter']:
# declarations.append('def %s_counterpart(value): \n' % symbol + # declarations.append(symbol + '_filter = lambda value: value.replace("(", ")")'
# ' return value.replace("(", ")").replace("[", "]")' # '.replace("[", "]").replace("{", "}").replace(">", "<")')
# '.replace("{", "}").replace(">", "<")\n')
declarations.append(symbol + '_counterpart = lambda value: value.replace("(", ")")'
'.replace("[", "]").replace("{", "}").replace(">", "<")')
# turn definitions into declarations in reverse order # turn definitions into declarations in reverse order
...@@ -540,8 +537,12 @@ class EBNFCompiler(CompilerBase): ...@@ -540,8 +537,12 @@ class EBNFCompiler(CompilerBase):
elif key in {'tokens', 'scanner_tokens'}: elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self._compile(node.result[1]) self.directives['tokens'] |= self._compile(node.result[1])
elif key in {'counterpart', 'retrieve_counterpart'}: elif key.endswith('_filter'):
self.directives['counterpart'] |= self._compile(node.result[1]) filter_set = self._compile(node.result[1])
if not isinstance(filter_set, set) or len(filter_set) != 1:
node.add_error('Directive "%s" accepts exactly on symbol, not %s'
% (key, str(filter_set)))
self.directives['filter'][key[:-7]] = filter_set.pop()
else: else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' % node.add_error('Unknown directive %s ! (Known ones are %s .)' %
...@@ -575,8 +576,8 @@ class EBNFCompiler(CompilerBase): ...@@ -575,8 +576,8 @@ class EBNFCompiler(CompilerBase):
node.add_error(('Retrieve Operator "%s" requires a symbol, ' node.add_error(('Retrieve Operator "%s" requires a symbol, '
'and not a %s.') % (prefix, str(arg.parser))) 'and not a %s.') % (prefix, str(arg.parser)))
return str(arg.result) return str(arg.result)
if str(arg) in self.directives['counterpart']: if str(arg) in self.directives['filter']:
custom_args = ['counterpart=%s_counterpart' % str(arg)] custom_args = ['retrieve_filter=%s' % self.directives['filter'][str(arg)]]
self.variables.add(arg.result) self.variables.add(arg.result)
elif len(node.result) > 2: elif len(node.result) > 2:
......
...@@ -875,7 +875,6 @@ class NegativeLookbehind(Lookbehind): ...@@ -875,7 +875,6 @@ class NegativeLookbehind(Lookbehind):
class Capture(UnaryOperator): class Capture(UnaryOperator):
def __init__(self, parser, name=''): def __init__(self, parser, name=''):
super(Capture, self).__init__(parser, name) super(Capture, self).__init__(parser, name)
print("WARNING: Capture operator is experimental")
def __call__(self, text): def __call__(self, text):
node, text = self.parser(text) node, text = self.parser(text)
...@@ -887,21 +886,38 @@ class Capture(UnaryOperator): ...@@ -887,21 +886,38 @@ class Capture(UnaryOperator):
return None, text return None, text
def nop_filter(stack):
return stack[-1]
def counterpart_filter(stack):
value = stack[-1]
return value.replace("(", ")").replace("[", "]").replace("{", "}").replace(">", "<")
def accumulating_filter(stack):
return "".join(stack)
class Retrieve(Parser): class Retrieve(Parser):
def __init__(self, symbol, counterpart=None, name=''): def __init__(self, symbol, retrieve_filter=None, name=''):
if not name: if not name:
name = symbol.name name = symbol.name
super(Retrieve, self).__init__(name) super(Retrieve, self).__init__(name)
self.symbol = symbol self.symbol = symbol
self.counterpart = counterpart if counterpart else lambda value: value self.retrieve_filter = retrieve_filter if retrieve_filter else nop_filter
print("WARNING: Retrieve operator is experimental")
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.counterpart, self.name) return self.__class__(self.symbol, self.retrieve_filter, self.name)
def __call__(self, text): def __call__(self, text):
try:
stack = self.grammar.variables[self.symbol.name] stack = self.grammar.variables[self.symbol.name]
value = self.counterpart(self.pick_value(stack)) value = self.retrieve_filter(stack)
self.pick_value(stack)
except (KeyError, IndexError):
return Node(self, '').add_error(dsl_error_msg(self,
"%s undefined or exhausted" % self.symbol.name)), text
if text.startswith(value): if text.startswith(value):
return Node(self, value), text[len(value):] return Node(self, value), text[len(value):]
else: else:
......
...@@ -168,7 +168,7 @@ class Node: ...@@ -168,7 +168,7 @@ class Node:
self.error_flag = any(r.error_flag for r in self.result) if self.children else False self.error_flag = any(r.error_flag for r in self.result) if self.children else False
self._len = len(self.result) if not self.children else \ self._len = len(self.result) if not self.children else \
sum(child._len for child in self.children) sum(child._len for child in self.children)
# self.pos = 0 # coninuous updating of pos values # self.pos = 0 # continuous updating of pos values
self._pos = -1 self._pos = -1
def __str__(self): def __str__(self):
......
...@@ -47,7 +47,7 @@ def mock_syntax_tree(sexpr): ...@@ -47,7 +47,7 @@ def mock_syntax_tree(sexpr):
while s[0] != ')': while s[0] != ')':
if s[0] != '(': raise ValueError('"(" expected, not ' + s[:10]) if s[0] != '(': raise ValueError('"(" expected, not ' + s[:10])
# assert s[0] == '(', s # assert s[0] == '(', s
level = 1; level = 1
i = 1 i = 1
while level > 0: while level > 0:
if s[i] == '(': if s[i] == '(':
......
@retrieve_counterpart = braces @braces_filter = counterpart_filter
document = { text | codeblock } document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces braces = opening_braces
......
<document> <document>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp>Anfang </:RegExp>
Anfang
</RegExp>
</text> </text>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<codeblock> <codeblock>
<delimiter> <delimiter>
<delimiter_sign> <delimiter_sign>
<RegExp> <:RegExp>```</:RegExp>
```
</RegExp>
</delimiter_sign> </delimiter_sign>
</delimiter> </delimiter>
<ZeroOrMore> <:ZeroOrMore>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp>code block </:RegExp>
code block
</RegExp>
</text> </text>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<Sequence> <:Sequence>
<delimiter_sign> <delimiter_sign>
<RegExp> <:RegExp>``</:RegExp>
``
</RegExp>
</delimiter_sign> </delimiter_sign>
</Sequence> </:Sequence>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp> <- keine Ende-Zeichen ! </:RegExp>
<- keine Ende-Zeichen !
</RegExp>
</text> </text>
</Alternative> </:Alternative>
</ZeroOrMore> </:ZeroOrMore>
<delimiter> <delimiter>```</delimiter>
```
</delimiter>
</codeblock> </codeblock>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp>
Ende Ende
</RegExp> </:RegExp>
</text> </text>
</Alternative> </:Alternative>
</document> </document>
\ No newline at end of file
<document> <document>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp>Anfang </:RegExp>
Anfang
</RegExp>
</text> </text>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<codeblock> <codeblock>
<delimiter> <delimiter>
<delimiter_sign> <delimiter_sign>
<RegExp> <:RegExp>```</:RegExp>
```
</RegExp>
</delimiter_sign> </delimiter_sign>
</delimiter> </delimiter>
<ZeroOrMore> <:ZeroOrMore>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp>code block </:RegExp>
code block
</RegExp>
</text> </text>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<Sequence> <:Sequence>
<delimiter_sign> <delimiter_sign>
<RegExp> <:RegExp>``</:RegExp>
``
</RegExp>
</delimiter_sign> </delimiter_sign>
</Sequence> </:Sequence>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp> <- keine Ende-Zeichen ! </:RegExp>
<- keine Ende-Zeichen !
</RegExp>
</text> </text>
</Alternative> </:Alternative>
</ZeroOrMore> </:ZeroOrMore>
<delimiter> <delimiter>```</delimiter>
```
</delimiter>
</codeblock> </codeblock>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp>
Ende Ende
Absatz ohne Absatz ohne
</RegExp> </:RegExp>
</text> </text>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<codeblock> <codeblock>
<delimiter> <delimiter>
<delimiter_sign> <delimiter_sign>
<RegExp> <:RegExp>```</:RegExp>
```
</RegExp>
</delimiter_sign> </delimiter_sign>
</delimiter> </delimiter>
<ZeroOrMore> <:ZeroOrMore>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp>
codeblock, aber codeblock, aber
das stellt sich erst am Ende heraus... das stellt sich erst am Ende heraus...
Mehrzeliger Mehrzeliger
</RegExp> </:RegExp>
</text> </text>
</Alternative> </:Alternative>
</ZeroOrMore> </:ZeroOrMore>
<delimiter> <delimiter>```</delimiter>
```
</delimiter>
</codeblock> </codeblock>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text>
<RegExp> <:RegExp>code block </:RegExp>
code block
</RegExp>
</text> </text>
</Alternative> </:Alternative>
</document> </document>
\ No newline at end of file
...@@ -121,7 +121,7 @@ class TestPopRetrieve: ...@@ -121,7 +121,7 @@ class TestPopRetrieve:
text = /[^`]+/ text = /[^`]+/
""" """
mini_lang2 = """ mini_lang2 = """
@retrieve_counterpart = braces @braces_filter=counterpart_filter
document = { text | codeblock } document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces braces = opening_braces
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment