Commit c230b2e2 authored by di68kap's avatar di68kap

- Complement-Filter for Retrieve-Operator added (not yet tested)

parent a0ac8cf6
......@@ -172,10 +172,10 @@ class EBNFCompiler(CompilerBase):
RESERVED_SYMBOLS = {TOKEN_KEYWORD, WHITESPACE_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneuos AST transformation."
PREFIX_TABLE = [('§', 'Required'), ('&', 'Lookahead'),
('!', 'NegativeLookahead'), ('-&', 'Lookbehind'),
('-!', 'NegativeLookbehind'), ('::', 'Pop'),
(':', 'Retrieve')]
PREFIX_TABLE = {'§': 'Required',
'&': 'Lookahead', '!': 'NegativeLookahead',
'-&': 'Lookbehind', '-!': 'NegativeLookbehind',
'::': 'Pop', ':': 'Retrieve'}
WHITESPACE = {'horizontal': r'[\t ]*', # default: horizontal
'linefeed': r'[ \t]*\n?(?!\s*\n)[ \t]*',
'vertical': r'\s*'}
......@@ -197,7 +197,8 @@ class EBNFCompiler(CompilerBase):
self.directives = {'whitespace': self.WHITESPACE['horizontal'],
'comment': '',
'literalws': ['right'],
'tokens': set()}
'tokens': set(), # alt. 'scanner_tokens'
'complement': set()} # alt. 'retrieve_complement'
def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner"
......@@ -244,8 +245,7 @@ class EBNFCompiler(CompilerBase):
if self.variables:
for i in range(len(definitions)):
if definitions[i][0] in self.variables:
definitions[i] = (definitions[i][0], 'Capture(%s, "%s")' %
(definitions[1], definitions[0]))
definitions[i] = (definitions[i][0], 'Capture(%s)' % definitions[1])
self.definition_names = [defn[0] for defn in definitions]
definitions.append(('wspR__', WHITESPACE_KEYWORD
......@@ -276,6 +276,13 @@ class EBNFCompiler(CompilerBase):
declarations = declarations[:-1]
declarations.append('"""')
# add default functions for complement filters of pop or retrieve operators
for symbol in self.directives['complement']:
declarations.append('@staticmethod\n'
'def complement_%s(value): \n' % symbol +
' return value.replace("(", ")").replace("[", "]")'
'.replace("{", "}").replace(">", "<")\n')
# turn definitions into declarations in reverse order
self.root = definitions[0][0] if definitions else ""
definitions.reverse()
......@@ -332,7 +339,7 @@ class EBNFCompiler(CompilerBase):
self.rules.add(rule)
defn = self.compile__(node.result[1])
if rule in self.variables:
defn = 'Capture(%s, "%s")' % (defn, rule)
defn = 'Capture(%s)' % defn
self.variables.remove(rule)
except TypeError as error:
errmsg = EBNFCompiler.AST_ERROR + " (" + str(error) + ")\n" + node.as_sexpr()
......@@ -392,20 +399,23 @@ class EBNFCompiler(CompilerBase):
else {} if 'none' in value else value
self.directives[key] = list(ws)
elif key == 'tokens':
elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self.compile__(node.result[1])
elif key in {'complement', 'retrieve_complement'}:
self.directives['complement'] |= self.compile__(node.result[1])
else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' %
(key,
', '.join(list(self.directives.keys()))))
return ""
def non_terminal(self, node, parser_class):
def non_terminal(self, node, parser_class, custom_args=[]):
"""Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal.
"""
arguments = filter(lambda arg: arg,
[self.compile__(r) for r in node.result])
arguments = [self.compile__(r) for r in node.result] + custom_args
return parser_class + '(' + ', '.join(arguments) + ')'
def expression(self, node):
......@@ -419,31 +429,34 @@ class EBNFCompiler(CompilerBase):
assert node.children
assert len(node.result) >= 2, node.as_sexpr()
prefix = node.result[0].result
custom_args = []
arg = node.result[-1]
if prefix in {'::', ':'}:
assert len(node.result) == 2
arg = node.result[-1]
argstr = str(arg)
if arg.parser.name != 'symbol':
node.add_error(('Retrieve Operator "%s" requires a symbols, '
'and not a %s.') % (prefix, str(arg.parser)))
return str(arg.result)
if str(arg) in self.directives['complement']:
custom_args = ['complement=%s_complement' % str(arg)]
self.variables.add(arg.result)
if len(node.result) > 2:
elif len(node.result) > 2:
# shift = (Node(node.parser, node.result[1].result),)
# node.result[1].result = shift + node.result[2:]
node.result[1].result = (Node(node.result[1].parser,
node.result[1].result),) \
node.result[1].result = (Node(node.result[1].parser, node.result[1].result),) \
+ node.result[2:]
node.result[1].parser = node.parser
node.result = (node.result[0], node.result[1])
node.result = node.result[1:]
for match, parser_class in self.PREFIX_TABLE:
if prefix == match:
return self.non_terminal(node, parser_class)
assert False, ("Unknown prefix %s \n" % prefix) + node.as_sexpr()
try:
parser_class = self.PREFIX_TABLE[prefix]
return self.non_terminal(node, parser_class, custom_args)
except KeyError:
node.add_error('Unknown prefix "%s".' % prefix)
def option(self, node):
return self.non_terminal(node, 'Optional')
......
......@@ -48,10 +48,9 @@ Berlin Heidelberg 2008.
Juancarlo Añez: grako, a PEG parser generator in Python,
https://bitbucket.org/apalala/grako
"""
import copy
import os
try:
......@@ -864,18 +863,19 @@ class Capture(UnaryOperator):
class Retrieve(Parser):
def __init__(self, symbol, name=None):
def __init__(self, symbol, complement=None, name=None):
if not name:
name = symbol.name
super(Retrieve, self).__init__(name)
self.symbol = symbol # if isinstance(symbol, str) else symbol.name
self.symbol = symbol
self.complement = complement if complement else lambda value: value
def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.name)
return self.__class__(self.symbol, self.complement, self.name)
def __call__(self, text):
symbol = self.symbol if isinstance(self.symbol, str) \
else self.symbol.name
stack = self.grammar.variables[symbol]
value = self.pick_value(stack)
stack = self.grammar.variables[self.symbol.name]
value = self.complement(self.pick_value(stack))
if text.startswith(value):
return Node(self, value), text[len(value):]
else:
......
......@@ -24,7 +24,7 @@ flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&'
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
regexchain = "<" expression §">" # compiles "expression" into a singular regular expression
regexchain = ">" expression §"<" # compiles "expression" into a singular regular expression
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
option = "[" expression §"]"
......
......@@ -40,9 +40,9 @@
</text>
</Alternative>
</ZeroOrMore>
<Pop>
<delimiter>
```
</Pop>
</delimiter>
</codeblock>
</Alternative>
<Alternative>
......
......@@ -40,9 +40,9 @@
</text>
</Alternative>
</ZeroOrMore>
<Pop>
<delimiter>
```
</Pop>
</delimiter>
</codeblock>
</Alternative>
<Alternative>
......@@ -75,9 +75,9 @@
</text>
</Alternative>
</ZeroOrMore>
<Pop>
<delimiter>
```
</Pop>
</delimiter>
</codeblock>
</Alternative>
<Alternative>
......
......@@ -57,7 +57,7 @@ class PopRetrieveGrammar(GrammarBase):
wspR__ = WSP__
text = RE('[^`]+', wR='')
delimiter_sign = RE('`+', wR='')
delimiter = Capture(delimiter_sign, "delimiter")
delimiter = Capture(delimiter_sign)
codeblock = Sequence(delimiter, ZeroOrMore(Alternative(text, Sequence(NegativeLookahead(Retrieve(delimiter)), delimiter_sign))), Pop(delimiter))
document = ZeroOrMore(Alternative(text, codeblock))
root__ = document
......
......@@ -20,11 +20,12 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
from functools import partial
import os
import sys
sys.path.append(os.path.abspath('../../'))
from DHParser.syntaxtree import traverse
from DHParser.parsers import full_compilation, WHITESPACE_KEYWORD
from DHParser.parsers import full_compilation, Retrieve, WHITESPACE_KEYWORD
from DHParser.ebnf import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler
from DHParser.dsl import compileEBNF
......@@ -103,10 +104,26 @@ class TestPopRetrieve:
delimiter_sign = /`+/
text = /[^`]+/
"""
mini_lang2 = """
@retrieve_filter = delimiter
document = { text | codeblock }
codeblock = braces { text | (!:braces closing_braces) } ::braces
braces = /\{+/
closing_braces = /\}+/
text = /[^`]+/
"""
def setup(self):
self.minilang_parser = compileEBNF(self.mini_language)()
@staticmethod
def opening_delimiter(node, name):
return node.tag_name == name and not isinstance(node.parser, Retrieve)
@staticmethod
def closing_delimiter(node):
return isinstance(node.parser, Retrieve)
def test_compile_mini_language(self):
assert self.minilang_parser
......@@ -114,8 +131,8 @@ class TestPopRetrieve:
teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
syntax_tree = self.minilang_parser.parse(teststr)
assert not syntax_tree.collect_errors()
delim = str(next(syntax_tree.find(lambda node: node.tag_name == "delimiter")))
pop = str(next(syntax_tree.find(lambda node: node.tag_name == "Pop")))
delim = str(next(syntax_tree.find(partial(self.opening_delimiter, name="delimiter"))))
pop = str(next(syntax_tree.find(self.closing_delimiter)))
assert delim == pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_single_line", '.cst')
......@@ -132,8 +149,8 @@ class TestPopRetrieve:
"""
syntax_tree = self.minilang_parser.parse(teststr)
assert not syntax_tree.collect_errors()
delim = str(next(syntax_tree.find(lambda node: node.tag_name == "delimiter")))
pop = str(next(syntax_tree.find(lambda node: node.tag_name == "Pop")))
delim = str(next(syntax_tree.find(partial(self.opening_delimiter, name="delimiter"))))
pop = str(next(syntax_tree.find(self.closing_delimiter)))
assert delim == pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_multi_line", '.cst')
......@@ -172,4 +189,4 @@ class TestCompilerErrors:
if __name__ == "__main__":
from run import runner
runner("TestEBNFParser", globals())
runner("TestPopRetrieve", globals())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment