Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 38f369b5 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- Tests for extended Pop Retrieved; now tested

parent b2699837
......@@ -30,7 +30,8 @@ except ImportError:
from .__init__ import __version__
from .toolkit import load_if_file, escape_re, md5, sane_parser_name
from .parsers import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase, \
Capture, Retrieve
from .syntaxtree import Node, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_KEYWORD
......@@ -121,6 +122,8 @@ class EBNFGrammar(GrammarBase):
root__ = syntax
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!?
EBNF_ASTTransform = {
# AST Transformations for EBNF-grammar
"syntax":
......@@ -148,14 +151,16 @@ EBNF_ASTTransform = {
[remove_expendables, replace_by_single_child]
}
EBNF_semantic_validation = {
EBNF_AST_validation = {
# Semantic validation on the AST
"repetition, option, oneormore":
[partial(forbid, child_tags=['repetition', 'option', 'oneormore']),
partial(assert_content, regex=r'(?!§)')],
}
EBNF_ASTPipeline = [EBNF_ASTTransform, EBNF_semantic_validation]
EBNF_ASTPipeline = [EBNF_ASTTransform, EBNF_AST_validation]
class EBNFCompilerError(Exception):
......@@ -198,7 +203,7 @@ class EBNFCompiler(CompilerBase):
'comment': '',
'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens'
'complement': set()} # alt. 'retrieve_complement'
'counterpart': set()} # alt. 'retrieve_counterpart'
def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner"
......@@ -276,12 +281,13 @@ class EBNFCompiler(CompilerBase):
declarations = declarations[:-1]
declarations.append('"""')
# add default functions for complement filters of pop or retrieve operators
for symbol in self.directives['complement']:
declarations.append('@staticmethod\n'
'def complement_%s(value): \n' % symbol +
' return value.replace("(", ")").replace("[", "]")'
'.replace("{", "}").replace(">", "<")\n')
# add default functions for counterpart filters of pop or retrieve operators
for symbol in self.directives['counterpart']:
# declarations.append('def %s_counterpart(value): \n' % symbol +
# ' return value.replace("(", ")").replace("[", "]")'
# '.replace("{", "}").replace(">", "<")\n')
declarations.append(symbol + '_counterpart = lambda value: value.replace("(", ")")'
'.replace("[", "]").replace("{", "}").replace(">", "<")')
# turn definitions into declarations in reverse order
self.root = definitions[0][0] if definitions else ""
......@@ -402,8 +408,8 @@ class EBNFCompiler(CompilerBase):
elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self.compile__(node.result[1])
elif key in {'complement', 'retrieve_complement'}:
self.directives['complement'] |= self.compile__(node.result[1])
elif key in {'counterpart', 'retrieve_counterpart'}:
self.directives['counterpart'] |= self.compile__(node.result[1])
else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' %
......@@ -439,8 +445,8 @@ class EBNFCompiler(CompilerBase):
node.add_error(('Retrieve Operator "%s" requires a symbol, '
'and not a %s.') % (prefix, str(arg.parser)))
return str(arg.result)
if str(arg) in self.directives['complement']:
custom_args = ['complement=%s_complement' % str(arg)]
if str(arg) in self.directives['counterpart']:
custom_args = ['counterpart=%s_counterpart' % str(arg)]
self.variables.add(arg.result)
elif len(node.result) > 2:
......
......@@ -860,22 +860,24 @@ class Capture(UnaryOperator):
stack = self.grammar.variables.setdefault(self.name, [])
stack.append(str(node))
return Node(self, node), text
else:
return None, text
class Retrieve(Parser):
def __init__(self, symbol, complement=None, name=None):
def __init__(self, symbol, counterpart=None, name=None):
if not name:
name = symbol.name
super(Retrieve, self).__init__(name)
self.symbol = symbol
self.complement = complement if complement else lambda value: value
self.counterpart = counterpart if counterpart else lambda value: value
def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.complement, self.name)
return self.__class__(self.symbol, self.counterpart, self.name)
def __call__(self, text):
stack = self.grammar.variables[self.symbol.name]
value = self.complement(self.pick_value(stack))
value = self.counterpart(self.pick_value(stack))
if text.startswith(value):
return Node(self, value), text[len(value):]
else:
......
......@@ -153,6 +153,7 @@ class Node:
@property
def tag_name(self):
return self.parser.name or self.parser.__class__.__name__
# ONLY FOR DEBUGGING: return self.parser.name + ':' + self.parser.__class__.__name__
@property
def result(self):
......@@ -328,7 +329,6 @@ class Node:
Args:
match_function (function): A function that takes as Node
object as argument and returns True or False
Yields:
Node: all nodes of the tree for which
``match_function(node)`` returns True
......@@ -340,6 +340,23 @@ class Node:
for nd in child.find(match_function):
yield nd
# def range(self, match_first, match_last):
# """Iterates over the range of nodes, starting from the first
# node for which ``match_first`` becomes True until the first node
# after this one for which ``match_last`` becomes true or until
# the end if it never does.
#
# Args:
# match_first (function): A function that takes as Node
# object as argument and returns True or False
# match_last (function): A function that takes as Node
# object as argument and returns True or False
# Yields:
# Node: all nodes of the tree for which
# ``match_function(node)`` returns True
# """
def navigate(self, path):
"""Yields the results of all descendant elements matched by
``path``, e.g.
......@@ -405,8 +422,12 @@ def traverse(root_node, processing_table):
for child in node.result:
traverse_recursive(child)
node.error_flag |= child.error_flag # propagate error flag
sequence = table.get(node.parser.name,
table.get('~', [])) + table.get('*', [])
sequence = table.get('*', []) + \
table.get(node.parser.name, table.get('?', [])) + \
table.get('~', [])
# '*' always called (before any other processing function)
# '?' called for those nodes for which no (other) processing functions is in the table
# '~' always called (after any other processing function)
for call in sequence:
call(node)
......
@retrieve_counterpart = braces
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
opening_braces = /\{+/
closing_braces = /\}+/
text = /[^{}]+/
Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende
Anfang {{{code block {{ <- keine Ende-Zeichen ! }}} Ende
Absatz ohne {{{ codeblock, aber
das stellt sich erst am Ende heraus...
Mehrzeliger }}}code block
\ No newline at end of file
<document>
<Alternative>
<text>
<RegExp>
<document:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
Anfang
</RegExp>
</text>
</Alternative>
<Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
```
</RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</delimiter:Capture>
<:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
code block
</RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<:Sequence>
<delimiter_sign:RE>
<:RegExp>
``
</RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
<- keine Ende-Zeichen !
</RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
</:RegExp>
</text:RE>
</:Alternative>
</:ZeroOrMore>
<delimiter:Pop>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
<text>
<RegExp>
</delimiter:Pop>
</codeblock:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
Ende
</RegExp>
</text>
</Alternative>
</document>
\ No newline at end of file
</:RegExp>
</text:RE>
</:Alternative>
</document:ZeroOrMore>
\ No newline at end of file
Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde
Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende
Absatz ohne ``` codeblock, aber
das stellt sich erst am Ende herause...
das stellt sich erst am Ende heraus...
Mehrzeliger ```code block
\ No newline at end of file
<document>
<Alternative>
<text>
<RegExp>
<document:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
Anfang
</RegExp>
</text>
</Alternative>
<Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
```
</RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</delimiter:Capture>
<:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
code block
</RegExp>
</text>
</Alternative>
<Alternative>
<Sequence>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<:Sequence>
<delimiter_sign:RE>
<:RegExp>
``
</RegExp>
</delimiter_sign>
</Sequence>
</Alternative>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
<- keine Ende-Zeichen !
</RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
</:RegExp>
</text:RE>
</:Alternative>
</:ZeroOrMore>
<delimiter:Pop>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
<text>
<RegExp>
Ebde
</delimiter:Pop>
</codeblock:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
Ende
Absatz ohne
</RegExp>
</text>
</Alternative>
<Alternative>
<codeblock>
<delimiter>
<delimiter_sign>
<RegExp>
</:RegExp>
</text:RE>
</:Alternative>
<:Alternative>
<codeblock:Sequence>
<delimiter:Capture>
<delimiter_sign:RE>
<:RegExp>
```
</RegExp>
</delimiter_sign>
</delimiter>
<ZeroOrMore>
<Alternative>
<text>
<RegExp>
</:RegExp>
</delimiter_sign:RE>
</delimiter:Capture>
<:ZeroOrMore>
<:Alternative>
<text:RE>
<:RegExp>
codeblock, aber
das stellt sich erst am Ende herause...
das stellt sich erst am Ende heraus...
Mehrzeliger
</RegExp>
</text>
</Alternative>
</ZeroOrMore>
<delimiter>
</:RegExp>
</text:RE>
</:Alternative>
</:ZeroOrMore>
<delimiter:Pop>
```
</delimiter>
</codeblock>
</Alternative>
<Alternative>
<text>
<RegExp>
</delimiter:Pop>
</codeblock:Sequence>
</:Alternative>
<:Alternative>
<text:RE>
<:RegExp>
code block
</RegExp>
</text>
</Alternative>
</document>
\ No newline at end of file
</:RegExp>
</text:RE>
</:Alternative>
</document:ZeroOrMore>
\ No newline at end of file
......@@ -49,7 +49,7 @@ class PopRetrieveGrammar(GrammarBase):
delimiter_sign = /`+/
text = /[^`]+/
"""
source_hash__ = "1312f8befacbc4d03bcc320644f37015"
source_hash__ = "a418b812a36733a4713eb4e06322e1b5"
parser_initialization__ = "upon instatiation"
COMMENT__ = r''
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'')
......
......@@ -27,12 +27,13 @@ from DHParser.dsl import run_compiler, suite_outdated
if (not os.path.exists('PopRetrieve_compiler.py') or
suite_outdated('PopRetrieve_compiler.py', 'PopRetrieve.ebnf')):
print("recompiling parser")
print("recompiling PopRetrieve parser")
errors = run_compiler("PopRetrieve.ebnf")
if errors:
print('\n\n'.join(errors))
sys.exit(1)
# from PopRetrieve_compiler import compile_PopRetrieve
#
# print("PopRetrieveTest 1")
......@@ -63,3 +64,32 @@ errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py')
if errors:
print(errors)
sys.exit(1)
if (not os.path.exists('PopRetrieveComplement_compiler.py') or
suite_outdated('PopRetrieveComplement_compiler.py', 'PopRetrieveComplement.ebnf')):
print("recompiling PopRetrieveComplement parser")
errors = run_compiler("PopRetrieveComplement.ebnf")
if errors:
print('\n\n'.join(errors))
sys.exit(1)
from PopRetrieveComplement_compiler import compile_PopRetrieveComplement
print("PopRetrieveComplement Test 1")
result, errors, ast = compile_PopRetrieveComplement("PopRetrieveComplementTest.txt")
if errors:
print(errors)
sys.exit(1)
else:
print(result)
print("PopRetrieveComplement Test 2")
result, errors, ast = compile_PopRetrieveComplement("PopRetrieveComplementTest2.txt")
if errors:
print(errors)
sys.exit(1)
else:
print(result)
......@@ -100,21 +100,23 @@ class TestPopRetrieve:
mini_language = """
document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign
delimiter = delimiter_sign # never use delimiter between capture and retrieve!!!
delimiter_sign = /`+/
text = /[^`]+/
"""
mini_lang2 = """
@retrieve_filter = delimiter
@retrieve_counterpart = braces
document = { text | codeblock }
codeblock = braces { text | (!:braces closing_braces) } ::braces
braces = /\{+/
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
opening_braces = /\{+/
closing_braces = /\}+/
text = /[^`]+/
text = /[^{}]+/
"""
def setup(self):
self.minilang_parser = compileEBNF(self.mini_language)()
self.minilang_parser2 = compileEBNF(self.mini_lang2)()
@staticmethod
def opening_delimiter(node, name):
......@@ -126,6 +128,7 @@ class TestPopRetrieve:
def test_compile_mini_language(self):
assert self.minilang_parser
assert self.minilang_parser2
def test_single_line(self):
teststr = "Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende"
......@@ -136,7 +139,6 @@ class TestPopRetrieve:
assert delim == pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_single_line", '.cst')
# self.minilang_parser.log_parsing_history("test_PopRetrieve_single_line")
def test_multi_line(self):
teststr = """
......@@ -154,7 +156,33 @@ class TestPopRetrieve:
assert delim == pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_multi_line", '.cst')
# self.minilang_parser.log_parsing_history("test_PopRetrieve_multi_line")
def test_single_line_complement(self):
teststr = "Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende"
syntax_tree = self.minilang_parser2.parse(teststr)
assert not syntax_tree.collect_errors()
delim = str(next(syntax_tree.find(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.find(self.closing_delimiter)))
assert len(delim) == len(pop) and delim != pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_single_line", '.cst')
def test_multi_line_complement(self):
teststr = """
Anfang {{{code block {{ <- keine Ende-Zeichen ! }}} Ende
Absatz ohne {{{ codeblock, aber
das stellt sich erst am Ende heraus...
Mehrzeliger }}}code block
"""
syntax_tree = self.minilang_parser2.parse(teststr)
assert not syntax_tree.collect_errors()
delim = str(next(syntax_tree.find(partial(self.opening_delimiter, name="braces"))))
pop = str(next(syntax_tree.find(self.closing_delimiter)))
assert len(delim) == len(pop) and delim != pop
if WRITE_LOGS:
syntax_tree.log("test_PopRetrieve_multi_line", '.cst')
class TestSemanticValidation:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment