16.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 38f369b5 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- Tests for extended Pop Retrieved; now tested

parent b2699837
...@@ -30,7 +30,8 @@ except ImportError: ...@@ -30,7 +30,8 @@ except ImportError:
from .__init__ import __version__ from .__init__ import __version__
from .toolkit import load_if_file, escape_re, md5, sane_parser_name from .toolkit import load_if_file, escape_re, md5, sane_parser_name
from .parsers import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \ from .parsers import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase, \
Capture, Retrieve
from .syntaxtree import Node, remove_enclosing_delimiters, reduce_single_child, \ from .syntaxtree import Node, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \ replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_KEYWORD forbid, assert_content, WHITESPACE_KEYWORD
...@@ -121,6 +122,8 @@ class EBNFGrammar(GrammarBase): ...@@ -121,6 +122,8 @@ class EBNFGrammar(GrammarBase):
root__ = syntax root__ = syntax
#TODO: Add Capture and Retrieve Validation: A variable mustn't be captured twice before retrival?!?
EBNF_ASTTransform = { EBNF_ASTTransform = {
# AST Transformations for EBNF-grammar # AST Transformations for EBNF-grammar
"syntax": "syntax":
...@@ -148,14 +151,16 @@ EBNF_ASTTransform = { ...@@ -148,14 +151,16 @@ EBNF_ASTTransform = {
[remove_expendables, replace_by_single_child] [remove_expendables, replace_by_single_child]
} }
EBNF_semantic_validation = {
EBNF_AST_validation = {
# Semantic validation on the AST # Semantic validation on the AST
"repetition, option, oneormore": "repetition, option, oneormore":
[partial(forbid, child_tags=['repetition', 'option', 'oneormore']), [partial(forbid, child_tags=['repetition', 'option', 'oneormore']),
partial(assert_content, regex=r'(?!§)')], partial(assert_content, regex=r'(?!§)')],
} }
EBNF_ASTPipeline = [EBNF_ASTTransform, EBNF_semantic_validation]
EBNF_ASTPipeline = [EBNF_ASTTransform, EBNF_AST_validation]
class EBNFCompilerError(Exception): class EBNFCompilerError(Exception):
...@@ -198,7 +203,7 @@ class EBNFCompiler(CompilerBase): ...@@ -198,7 +203,7 @@ class EBNFCompiler(CompilerBase):
'comment': '', 'comment': '',
'literalws': ['right'], 'literalws': ['right'],
'tokens': set(), # alt. 'scanner_tokens' 'tokens': set(), # alt. 'scanner_tokens'
'complement': set()} # alt. 'retrieve_complement' 'counterpart': set()} # alt. 'retrieve_counterpart'
def gen_scanner_skeleton(self): def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner" name = self.grammar_name + "Scanner"
...@@ -276,12 +281,13 @@ class EBNFCompiler(CompilerBase): ...@@ -276,12 +281,13 @@ class EBNFCompiler(CompilerBase):
declarations = declarations[:-1] declarations = declarations[:-1]
declarations.append('"""') declarations.append('"""')
# add default functions for complement filters of pop or retrieve operators # add default functions for counterpart filters of pop or retrieve operators
for symbol in self.directives['complement']: for symbol in self.directives['counterpart']:
declarations.append('@staticmethod\n' # declarations.append('def %s_counterpart(value): \n' % symbol +
'def complement_%s(value): \n' % symbol + # ' return value.replace("(", ")").replace("[", "]")'
' return value.replace("(", ")").replace("[", "]")' # '.replace("{", "}").replace(">", "<")\n')
'.replace("{", "}").replace(">", "<")\n') declarations.append(symbol + '_counterpart = lambda value: value.replace("(", ")")'
'.replace("[", "]").replace("{", "}").replace(">", "<")')
# turn definitions into declarations in reverse order # turn definitions into declarations in reverse order
self.root = definitions[0][0] if definitions else "" self.root = definitions[0][0] if definitions else ""
...@@ -402,8 +408,8 @@ class EBNFCompiler(CompilerBase): ...@@ -402,8 +408,8 @@ class EBNFCompiler(CompilerBase):
elif key in {'tokens', 'scanner_tokens'}: elif key in {'tokens', 'scanner_tokens'}:
self.directives['tokens'] |= self.compile__(node.result[1]) self.directives['tokens'] |= self.compile__(node.result[1])
elif key in {'complement', 'retrieve_complement'}: elif key in {'counterpart', 'retrieve_counterpart'}:
self.directives['complement'] |= self.compile__(node.result[1]) self.directives['counterpart'] |= self.compile__(node.result[1])
else: else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' % node.add_error('Unknown directive %s ! (Known ones are %s .)' %
...@@ -439,8 +445,8 @@ class EBNFCompiler(CompilerBase): ...@@ -439,8 +445,8 @@ class EBNFCompiler(CompilerBase):
node.add_error(('Retrieve Operator "%s" requires a symbol, ' node.add_error(('Retrieve Operator "%s" requires a symbol, '
'and not a %s.') % (prefix, str(arg.parser))) 'and not a %s.') % (prefix, str(arg.parser)))
return str(arg.result) return str(arg.result)
if str(arg) in self.directives['complement']: if str(arg) in self.directives['counterpart']:
custom_args = ['complement=%s_complement' % str(arg)] custom_args = ['counterpart=%s_counterpart' % str(arg)]
self.variables.add(arg.result) self.variables.add(arg.result)
elif len(node.result) > 2: elif len(node.result) > 2:
......
...@@ -859,23 +859,25 @@ class Capture(UnaryOperator): ...@@ -859,23 +859,25 @@ class Capture(UnaryOperator):
if node: if node:
stack = self.grammar.variables.setdefault(self.name, []) stack = self.grammar.variables.setdefault(self.name, [])
stack.append(str(node)) stack.append(str(node))
return Node(self, node), text return Node(self, node), text
else:
return None, text
class Retrieve(Parser): class Retrieve(Parser):
def __init__(self, symbol, complement=None, name=None): def __init__(self, symbol, counterpart=None, name=None):
if not name: if not name:
name = symbol.name name = symbol.name
super(Retrieve, self).__init__(name) super(Retrieve, self).__init__(name)
self.symbol = symbol self.symbol = symbol
self.complement = complement if complement else lambda value: value self.counterpart = counterpart if counterpart else lambda value: value
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.complement, self.name) return self.__class__(self.symbol, self.counterpart, self.name)
def __call__(self, text): def __call__(self, text):
stack = self.grammar.variables[self.symbol.name] stack = self.grammar.variables[self.symbol.name]
value = self.complement(self.pick_value(stack)) value = self.counterpart(self.pick_value(stack))
if text.startswith(value): if text.startswith(value):
return Node(self, value), text[len(value):] return Node(self, value), text[len(value):]
else: else:
......
...@@ -153,6 +153,7 @@ class Node: ...@@ -153,6 +153,7 @@ class Node:
@property @property
def tag_name(self): def tag_name(self):
return self.parser.name or self.parser.__class__.__name__ return self.parser.name or self.parser.__class__.__name__
# ONLY FOR DEBUGGING: return self.parser.name + ':' + self.parser.__class__.__name__
@property @property
def result(self): def result(self):
...@@ -328,7 +329,6 @@ class Node: ...@@ -328,7 +329,6 @@ class Node:
Args: Args:
match_function (function): A function that takes as Node match_function (function): A function that takes as Node
object as argument and returns True or False object as argument and returns True or False
Yields: Yields:
Node: all nodes of the tree for which Node: all nodes of the tree for which
``match_function(node)`` returns True ``match_function(node)`` returns True
...@@ -340,6 +340,23 @@ class Node: ...@@ -340,6 +340,23 @@ class Node:
for nd in child.find(match_function): for nd in child.find(match_function):
yield nd yield nd
# def range(self, match_first, match_last):
# """Iterates over the range of nodes, starting from the first
# node for which ``match_first`` becomes True until the first node
# after this one for which ``match_last`` becomes true or until
# the end if it never does.
#
# Args:
# match_first (function): A function that takes as Node
# object as argument and returns True or False
# match_last (function): A function that takes as Node
# object as argument and returns True or False
# Yields:
# Node: all nodes of the tree for which
# ``match_function(node)`` returns True
# """
def navigate(self, path): def navigate(self, path):
"""Yields the results of all descendant elements matched by """Yields the results of all descendant elements matched by
``path``, e.g. ``path``, e.g.
...@@ -405,8 +422,12 @@ def traverse(root_node, processing_table): ...@@ -405,8 +422,12 @@ def traverse(root_node, processing_table):
for child in node.result: for child in node.result:
traverse_recursive(child) traverse_recursive(child)
node.error_flag |= child.error_flag # propagate error flag node.error_flag |= child.error_flag # propagate error flag
sequence = table.get(node.parser.name, sequence = table.get('*', []) + \
table.get('~', [])) + table.get('*', []) table.get(node.parser.name, table.get('?', [])) + \
table.get('~', [])
# '*' always called (before any other processing function)
# '?' called for those nodes for which no (other) processing functions is in the table
# '~' always called (after any other processing function)
for call in sequence: for call in sequence:
call(node) call(node)
......
@retrieve_counterpart = braces
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
opening_braces = /\{+/
closing_braces = /\}+/
text = /[^{}]+/
Anfang {{{code block }} <- keine Ende-Zeichen ! }}} Ende
Anfang {{{code block {{ <- keine Ende-Zeichen ! }}} Ende
Absatz ohne {{{ codeblock, aber
das stellt sich erst am Ende heraus...
Mehrzeliger }}}code block
\ No newline at end of file
<document> <document:ZeroOrMore>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
Anfang Anfang
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<codeblock> <codeblock:Sequence>
<delimiter> <delimiter:Capture>
<delimiter_sign> <delimiter_sign:RE>
<RegExp> <:RegExp>
``` ```
</RegExp> </:RegExp>
</delimiter_sign> </delimiter_sign:RE>
</delimiter> </delimiter:Capture>
<ZeroOrMore> <:ZeroOrMore>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
code block code block
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<Sequence> <:Sequence>
<delimiter_sign> <delimiter_sign:RE>
<RegExp> <:RegExp>
`` ``
</RegExp> </:RegExp>
</delimiter_sign> </delimiter_sign:RE>
</Sequence> </:Sequence>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
<- keine Ende-Zeichen ! <- keine Ende-Zeichen !
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
</ZeroOrMore> </:ZeroOrMore>
<delimiter> <delimiter:Pop>
``` ```
</delimiter> </delimiter:Pop>
</codeblock> </codeblock:Sequence>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
Ende Ende
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
</document> </document:ZeroOrMore>
\ No newline at end of file \ No newline at end of file
Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ende
Absatz ohne ``` codeblock, aber Absatz ohne ``` codeblock, aber
das stellt sich erst am Ende herause... das stellt sich erst am Ende heraus...
Mehrzeliger ```code block Mehrzeliger ```code block
\ No newline at end of file
<document> <document:ZeroOrMore>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
Anfang Anfang
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<codeblock> <codeblock:Sequence>
<delimiter> <delimiter:Capture>
<delimiter_sign> <delimiter_sign:RE>
<RegExp> <:RegExp>
``` ```
</RegExp> </:RegExp>
</delimiter_sign> </delimiter_sign:RE>
</delimiter> </delimiter:Capture>
<ZeroOrMore> <:ZeroOrMore>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
code block code block
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<Sequence> <:Sequence>
<delimiter_sign> <delimiter_sign:RE>
<RegExp> <:RegExp>
`` ``
</RegExp> </:RegExp>
</delimiter_sign> </delimiter_sign:RE>
</Sequence> </:Sequence>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
<- keine Ende-Zeichen ! <- keine Ende-Zeichen !
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
</ZeroOrMore> </:ZeroOrMore>
<delimiter> <delimiter:Pop>
``` ```
</delimiter> </delimiter:Pop>
</codeblock> </codeblock:Sequence>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
Ebde Ende
Absatz ohne Absatz ohne
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<codeblock> <codeblock:Sequence>
<delimiter> <delimiter:Capture>
<delimiter_sign> <delimiter_sign:RE>
<RegExp> <:RegExp>
``` ```
</RegExp> </:RegExp>
</delimiter_sign> </delimiter_sign:RE>
</delimiter> </delimiter:Capture>
<ZeroOrMore> <:ZeroOrMore>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
codeblock, aber codeblock, aber
das stellt sich erst am Ende herause... das stellt sich erst am Ende heraus...
Mehrzeliger Mehrzeliger
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
</ZeroOrMore> </:ZeroOrMore>
<delimiter> <delimiter:Pop>
``` ```
</delimiter> </delimiter:Pop>
</codeblock> </codeblock:Sequence>
</Alternative> </:Alternative>
<Alternative> <:Alternative>
<text> <text:RE>
<RegExp> <:RegExp>
code block code block
</RegExp> </:RegExp>
</text> </text:RE>
</Alternative> </:Alternative>
</document> </document:ZeroOrMore>
\ No newline at end of file \ No newline at end of file
...@@ -49,7 +49,7 @@ class PopRetrieveGrammar(GrammarBase): ...@@ -49,7 +49,7 @@ class PopRetrieveGrammar(GrammarBase):
delimiter_sign = /`+/ delimiter_sign = /`+/
text = /[^`]+/ text = /[^`]+/
""" """
source_hash__ = "1312f8befacbc4d03bcc320644f37015" source_hash__ = "a418b812a36733a4713eb4e06322e1b5"
parser_initialization__ = "upon instatiation" parser_initialization__ = "upon instatiation"
COMMENT__ = r'' COMMENT__ = r''
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'') WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'')
......
...@@ -27,12 +27,13 @@ from DHParser.dsl import run_compiler, suite_outdated ...@@ -27,12 +27,13 @@ from DHParser.dsl import run_compiler, suite_outdated
if (not os.path.exists('PopRetrieve_compiler.py') or if (not os.path.exists('PopRetrieve_compiler.py') or
suite_outdated('PopRetrieve_compiler.py', 'PopRetrieve.ebnf')): suite_outdated('PopRetrieve_compiler.py', 'PopRetrieve.ebnf')):
print("recompiling parser") print("recompiling PopRetrieve parser")
errors = run_compiler("PopRetrieve.ebnf") errors = run_compiler("PopRetrieve.ebnf")
if errors: if errors:
print('\n\n'.join(errors)) print('\n\n'.join(errors))
sys.exit(1) sys.exit(1)
# from PopRetrieve_compiler import compile_PopRetrieve # from PopRetrieve_compiler import compile_PopRetrieve
# #
# print("PopRetrieveTest 1") # print("PopRetrieveTest 1")
...@@ -63,3 +64,32 @@ errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py') ...@@ -63,3 +64,32 @@ errors = run_compiler("PopRetrieveTest2.txt", 'PopRetrieve_compiler.py')
if errors: if errors:
print(errors) print(errors)
sys.exit(1) sys.exit(1)
if (not os.path.exists('PopRetrieveComplement_compiler.py') or
suite_outdated('PopRetrieveComplement_compiler.py', 'PopRetrieveComplement.ebnf')):
print("recompiling PopRetrieveComplement parser")
errors = run_compiler("PopRetrieveComplement.ebnf")
if errors:
print('\n\n'.join(errors))
sys.exit(1)
from PopRetrieveComplement_compiler import compile_PopRetrieveComplement
print("PopRetrieveComplement Test 1")
result, errors, ast = compile_PopRetrieveComplement("PopRetrieveComplementTest.txt")
if errors:
print(errors)
sys.exit(1)
else:
print(result)
print("PopRetrieveComplement Test 2")
result, errors, ast = compile_PopRetrieveComplement("PopRetrieveComplementTest2.txt")
if errors:
print(errors)
sys.exit(1)
else:
print(result)
...@@ -100,21 +100,23 @@ class TestPopRetrieve: ...@@ -100,21 +100,23 @@ class TestPopRetrieve:
mini_language = """ mini_language = """
document = { text | codeblock }