In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 38f369b5 authored by Eckhart Arnold's avatar Eckhart Arnold

- Tests for extended Pop Retrieved; now tested

parent b2699837
......@@ -30,7 +30,8 @@ except ImportError:
from .__init__ import __version__
from .toolkit import load_if_file, escape_re, md5, sane_parser_name
from .parsers import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase, \
Capture, Retrieve
from .syntaxtree import Node, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_KEYWORD
......@@ -121,6 +122,8 @@ class EBNFGrammar(GrammarBase):
root__ = syntax
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!?
EBNF_ASTTransform = {
# AST Transformations for EBNF-grammar
"syntax":
......@@ -148,14 +151,16 @@ EBNF_ASTTransform = {
[remove_expendables, replace_by_single_child]
}
EBNF_semantic_validation = {
EBNF_AST_validation = {
# Semantic validation on the AST
"repetition, option, oneormore":
[partial(forbid, child_tags=['repetition', 'option', 'oneormore']),
partial(assert_content, regex=r'(?!§)')],
}
EBNF_ASTPipeline = [EBNF_ASTTransform, EBNF_semantic_validation]
EBNF_ASTPipeline = [EBNF_ASTTransform, EBNF_AST_validation]
class EBNFCompilerError(Exception):
......@@ -198,7 +203,7 @@ class EBNFCompiler(CompilerBase):
'comment': '',
'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens'
'complement': set()} # alt. 'retrieve_complement'
'counterpart': set()} # alt. 'retrieve_counterpart'
def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner"
......@@ -276,12 +281,13 @@ class EBNFCompiler(CompilerBase):
declarations = declarations[:-1]
declarations.append('"""')
# add default functions for complement filters of pop or retrieve operators
for symbol in self.directives['complement']:
declarations.append('@staticmethod\n'
'def complement_%s(value): \n' % symbol +
' return value.replace("(", ")").replace("[", "]")'
'.replace("{", "}").replace(">", "<")\n')
# add default functions for counterpart filters of pop or retrieve operators
for symbol in self.directives['counterpart']:
# declarations.append('def %s_counterpart(value): \n' % symbol +
# ' return value.replace("(", ")").replace("[", "]")'
# '.replace("{", "}").replace(">", "<")\n')
declarations.append(symbol + '_counterpart = lambda value: value.replace("(", ")")'
'.replace("[", "]").replace("{", "}").replace(">", "<")')
# turn definitions into declarations in reverse order
self.root = definitions[0][0] if definitions else ""
......@@ -402,8 +408,8 @@ class EBNFCompiler(CompilerBase):
elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self.compile__(node.result[1])
elif key in {'complement', 'retrieve_complement'}:
self.directives['complement'] |= self.compile__(node.result[1])
elif key in {'counterpart', 'retrieve_counterpart'}:
self.directives['counterpart'] |= self.compile__(node.result[1])
else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' %
......@@ -439,8 +445,8 @@ class EBNFCompiler(CompilerBase):
node.add_error(('Retrieve Operator "%s" requires a symbol, '
'and not a %s.') % (prefix, str(arg.parser)))
return str(arg.result)
if str(arg) in self.directives['complement']:
custom_args = ['complement=%s_complement' % str(arg)]
if str(arg) in self.directives['counterpart']:
custom_args = ['counterpart=%s_counterpart' % str(arg)]
self.variables.add(arg.result)
elif len(node.result) > 2:
......
......@@ -859,23 +859,25 @@ class Capture(UnaryOperator):
if node:
stack = self.grammar.variables.setdefault(self.name, [])
stack.append(str(node))
return Node(self, node), text
return Node(self, node), text
else:
return None, text
class Retrieve(Parser):
def __init__(self, symbol, complement=None, name=None):
def __init__(self, symbol, counterpart=None, name=None):
if not name:
name = symbol.name
super(Retrieve, self).__init__(name)
self.symbol = symbol
self.complement = complement if complement else lambda value: value
self.counterpart = counterpart if counterpart else lambda value: value
def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.complement, self.name)
return self.__class__(self.symbol, self.counterpart, self.name)
def __call__(self, text):
stack = self.grammar.variables[self.symbol.name]
value = self.complement(self.pick_value(stack))
value = self.counterpart(self.pick_value(stack))
if text.startswith(value):
return Node(self, value), text[len(value):]
else:
......
......@@ -153,6 +153,7 @@ class Node:
@property
def tag_name(self):
return self.parser.name or self.parser.__class__.__name__
# ONLY FOR DEBUGGING: return self.parser.name + ':' + self.parser.__class__.__name__
@property
def result(self):
......@@ -328,7 +329,6 @@ class Node:
Args:
match_function (function): A function that takes as Node
object as argument and returns True or False
Yields:
Node: all nodes of the tree for which
``match_function(node)`` returns True
......@@ -340,6 +340,23 @@ class Node:
for nd in child.find(match_function):
yield nd
# def range(self, match_first, match_last):
# """Iterates over the range of nodes, starting from the first
# node for which ``match_first`` becomes True until the first node
# after this one for which ``match_last`` becomes true or until
# the end if it never does.
#
# Args:
# match_first (function): A function that takes as Node
# object as argument and returns True or False
# match_last (function): A function that takes as Node
# object as argument and returns True or False
# Yields:
# Node: all nodes of the tree for which
# ``match_function(node)`` returns True
# """
def navigate(self, path):
"""Yields the results of all descendant elements matched by
``path``, e.g.
......@@ -405,8 +422,12 @@ def traverse(root_node, processing_table):
for child in node.result:
traverse_recursive(child)
node.error_flag |= child.error_flag # propagate error flag
sequence = table.get(node.parser.name,
table.get('~', [])) + table.get('*', [])
sequence = table.get('*', []) + \
table.get(node.parser.name, table.get('?', [])) + \
table.get('~', [])
# '*' always called (before any other processing function)
# '?' called for those nodes for which no (other) processing functions is in the table
# '~' always called (after any other processing function)
for call in sequence:
call(node)
......
@retrieve_counterpart = braces
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
opening_braces = /\{+/
closing_braces = /\}+/
text = /[^{}]+/
Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende
Anfang {{{code block {{ <- keine Ende-Zeichen ! }}} Ende
Absatz ohne {{{ codeblock, aber
das stellt sich erst am Ende heraus...
Mehrzeliger }}}code block
\ No newline at end of file
<document>
<Alternative>
<text>
<RegExp>
<document:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
Anfang
</RegExp>
</text>
</Alternative>
<Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
```
</RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</delimiter:Capture>
<:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
code block
</RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<:Sequence>
<delimiter_sign:RE>
<:RegExp>
``
</RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
<- keine Ende-Zeichen !
</RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
</:RegExp>
</text:RE>
</:Alternative>
</:ZeroOrMore>
<delimiter:Pop>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
<text>
<RegExp>
</delimiter:Pop>
</codeblock:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
Ende
</RegExp>
</text>
</Alternative>
</document>
\ No newline at end of file
</:RegExp>
</text:RE>
</:Alternative>
</document:ZeroOrMore>
\ No newline at end of file
Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde
Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende
Absatz ohne ``` codeblock, aber
das stellt sich erst am Ende herause...
das stellt sich erst am Ende heraus...
Mehrzeliger ```code block
\ No newline at end of file
<document>
<Alternative>
<text>
<RegExp>
<document:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
Anfang
</RegExp>
</text>
</Alternative>
<Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
```
</RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</delimiter:Capture>
<:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
code block
</RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<:Sequence>
<delimiter_sign:RE>
<:RegExp>
``
</RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
<- keine Ende-Zeichen !
</RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
</:RegExp>
</text:RE>
</:Alternative>
</:ZeroOrMore>
<delimiter:Pop>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
<text>
<RegExp>
Ebde
</delimiter:Pop>
</codeblock:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
Ende
Absatz ohne
</RegExp>
</text>
</Alternative>
<Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
```
</RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</delimiter:Capture>
<:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
codeblock, aber
das stellt sich erst am Ende herause...
das stellt sich erst am Ende heraus...
Mehrzeliger
</RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
</:RegExp>
</text:RE>
</:Alternative>
</:ZeroOrMore>
<delimiter:Pop>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
<text>
<RegExp>
</delimiter:Pop>
</codeblock:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
code block
</RegExp>
</text>
</Alternative>
</document>
\ No newline at end of file
</:RegExp>
</text:RE>
</:Alternative>
</document:ZeroOrMore>
\ No newline at end of file
......@@ -49,7 +49,7 @@ class PopRetrieveGrammar(GrammarBase):
delimiter_sign = /`+/
text = /[^`]+/
"""
source_hash__ = "1312f8befacbc4d03bcc320644f37015"
source_hash__ = "a418b812a36733a4713eb4e06322e1b5"
parser_initialization__ = "upon instatiation"
COMMENT__ = r''
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'')
......
......@@ -27,12 +27,13 @@ from DHParser.dsl import run_compiler, suite_outdated
if (not os.path.exists('PopRetrieve_compiler.py') or
suite_outdated('PopRetrieve_compiler.py', 'PopRetrieve.ebnf')):
print("recompiling parser")
print("recompiling PopRetrieve parser")
errors = run_compiler("PopRetrieve.ebnf")
if errors:
print('\n\n'.join(errors))
sys.exit(1)
# from PopRetrieve_compiler import compile_PopRetrieve
#
# print("PopRetrieveTest 1")
......@@ -63,3 +64,32 @@ errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py')
if errors:
print(errors)
sys.exit(1)
if (not os.path.exists('PopRetrieveComplement_compiler.py') or
suite_outdated('PopRetrieveComplement_compiler.py', 'PopRetrieveComplement.ebnf')):
print("recompiling PopRetrieveComplement parser")
errors = run_compiler("PopRetrieveComplement.ebnf")
if errors:
print('\n\n'.join(errors))
sys.exit(1)
from PopRetrieveComplement_compiler import compile_PopRetrieveComplement
print("PopRetrieveComplement Test 1")
result, errors, ast = compile_PopRetrieveComplement("PopRetrieveComplementTest.txt")
if errors:
print(errors)
sys.exit(1)
else:
print(result)
print("PopRetrieveComplement Test 2")
result, errors, ast = compile_PopRetrieveComplement("PopRetrieveComplementTest2.txt")
if errors:
print(errors)
sys.exit(1)
else:
print(result)
......@@ -100,21 +100,23 @@ class TestPopRetrieve:
mini_language = """
document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign
delimiter = delimiter_sign # never use delimiter between capture and retrieve!!!
delimiter_sign = /`+/
text = /[^`]+/
"""
mini_lang2 = """
@retrieve_filter = delimiter
@retrieve_counterpart = braces
document = { text | codeblock }
codeblock = braces { text | (!:braces closing_braces) } ::braces
braces = /\{+/
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
opening_braces = /\{+/
closing_braces = /\}+/
text = /[^`]+/
text = /[^{}]+/
"""
def setup(self):
self.minilang_parser = compileEBNF(self.mini_language)()
self.minilang_parser2 = compileEBNF(self.mini_lang2)()
@staticmethod
def opening_delimiter(node, name):
......@@ -126,6 +128,7 @@ class TestPopRetrieve:
def test_compile_mini_language(self):
assert self.minilang_parser
assert self.minilang_parser2
def test_single_line(self):
teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
......@@ -136,7 +139,6 @@ class TestPopRetrieve:
assert delim == pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_single_line", '.cst')
# self.minilang_parser.log_parsing_history("test_PopRetrieve_single_line")
def test_multi_line(self):
teststr = """
......@@ -154,7 +156,33 @@ class TestPopRetrieve:
assert delim == pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_multi_line", '.cst')
# self.minilang_parser.log_parsing_history("test_PopRetrieve_multi_line")
def test_single_line_complement(self):
teststr = "Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende"
syntax_tree = self.minilang_parser2.parse(teststr)
assert not syntax_tree.collect_errors()
delim = str(next(syntax_tree.find(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.find(self.closing_delimiter)))
assert len(delim) == len(pop) and delim != pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_single_line", '.cst')
def test_multi_line_complement(self):
teststr = """
Anfang {{{code block {{ <- keine Ende-Zeichen ! }}} Ende
Absatz ohne {{{ codeblock, aber
das stellt sich erst am Ende heraus...
Mehrzeliger }}}code block
"""
syntax_tree = self.minilang_parser2.parse(teststr)
assert not syntax_tree.collect_errors()
delim = str(next(syntax_tree.find(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.find(self.closing_delimiter)))
assert len(delim) == len(pop) and delim != pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_multi_line", '.cst')
class TestSemanticValidation:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment