Commit fc96335e authored by Eckhart Arnold's avatar Eckhart Arnold

- some bug fixes, mostly related to capture-retrieve

parent 31331ec8
......@@ -76,7 +76,7 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
nop_filter, counterpart_filter, accumulating_filter, ScannerFunc
last_value, counterpart, accumulate, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_transformation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
......
......@@ -196,8 +196,10 @@ def get_ebnf_grammar() -> EBNFGrammar:
EBNF_transformation_table = {
# AST Transformations for EBNF-grammar
"syntax":
"+":
remove_expendables,
"syntax":
[],
"directive, definition":
remove_tokens('@', '='),
"expression":
......@@ -211,13 +213,13 @@ EBNF_transformation_table = {
"oneormore, repetition, option, regexchain":
[reduce_single_child, remove_enclosing_delimiters],
"symbol, literal, regexp":
[remove_expendables, reduce_single_child],
[reduce_single_child],
(TOKEN_PTYPE, WHITESPACE_PTYPE):
[remove_expendables, reduce_single_child],
[reduce_single_child],
"list_":
[flatten, remove_tokens(',')],
"*":
[remove_expendables, replace_by_single_child]
[replace_by_single_child]
}
......
......@@ -204,7 +204,6 @@ def add_parser_guard(parser_func):
node = Node(None, text[:min(10, max(1, text.find("\n")))] + " ...")
node.add_error("maximum recursion depth of parser reached; "
"potentially due to too many errors!")
node.error_flag = True
rest = ''
return node, rest
......@@ -355,10 +354,6 @@ class Grammar:
except KeyError:
parser = getattr(self, key, None)
if parser:
# if toolkit.warnings():
# raise KeyError(('Parser "%s" inaccesible, because it is not connected '
# 'to the root parser "%s" !') % (key, self.root__.name))
# print('Parser "%s" not connected to root parser.' % key)
# add parser to grammar object on the fly...
setattr(self, key, copy.deepcopy(parser))
self[key].apply(self._add_parser)
......@@ -449,6 +444,9 @@ class Grammar:
if rest:
stitches.append(Node(None, rest))
result = Node(None, tuple(stitches))
if any(self.variables.values()):
result.add_error("Capture-retrieve-stack not empty after end of parsing: "
+ str(self.variables))
result.pos = 0 # calculate all positions
return result
......@@ -478,7 +476,7 @@ class Grammar:
full_history.append(line)
if record.node and record.node.parser.ptype != WHITESPACE_PTYPE:
match_history.append(line)
if record.node.errors:
if record.node.error_flag:
errors_only.append(line)
write_log(full_history, log_file_name + '_full')
write_log(match_history, log_file_name + '_match')
......@@ -842,9 +840,8 @@ class Sequence(NaryOperator):
for parser in self.parsers:
node, text_ = parser(text_)
if not node:
return node, text
if node.result: # Nodes with zero-length result are silently omitted
results += (node,)
return None, text
results += (node,)
if node.error_flag:
break
assert len(results) <= len(self.parsers)
......@@ -1009,6 +1006,8 @@ class NegativeLookbehind(Lookbehind):
class Capture(UnaryOperator):
"""STILL EXPERIMENTAL!"""
def __init__(self, parser: Parser, name: str = '') -> None:
super(Capture, self).__init__(parser, name)
......@@ -1025,50 +1024,54 @@ class Capture(UnaryOperator):
RetrieveFilter = Callable[[List[str]], str]
def nop_filter(stack: List[str]) -> str:
def last_value(stack: List[str]) -> str:
return stack[-1]
def counterpart_filter(stack: List[str]) -> str:
def counterpart(stack: List[str]) -> str:
value = stack[-1]
return value.replace("(", ")").replace("[", "]").replace("{", "}").replace(">", "<")
def accumulating_filter(stack: List[str]) -> str:
return "".join(stack)
def accumulate(stack: List[str]) -> str:
return "".join(stack) if len(stack) > 1 else stack[-1] # provoke IndexError if stack empty
class Retrieve(Parser):
"""STILL EXPERIMENTAL!"""
def __init__(self, symbol: Parser, filter: RetrieveFilter = None, name: str = '') -> None:
if not name:
name = symbol.name
super(Retrieve, self).__init__(name)
self.symbol = symbol
self.filter = filter if filter else nop_filter
self.filter = filter if filter else last_value
def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.filter, self.name)
def __call__(self, text: str) -> Tuple[Node, str]:
return self.call(text) # allow call method to be called from subclass circumventing the parser guard
def call(self, text: str) -> Tuple[Node, str]:
try:
stack = self.grammar.variables[self.symbol.name]
value = self.filter(stack)
self.pick_value(stack)
except (KeyError, IndexError):
return Node(self, '').add_error(dsl_error_msg(self,
"%s undefined or exhausted" % self.symbol.name)), text
"'%s' undefined or exhausted." % self.symbol.name)), text
if text.startswith(value):
return Node(self, value), text[len(value):]
else:
return None, text
def pick_value(self, stack: List[str]) -> str:
return stack[-1]
class Pop(Retrieve):
def pick_value(self, stack: List[str]) -> str:
return stack.pop()
"""STILL EXPERIMENTAL!!!"""
def __call__(self, text: str) -> Tuple[Node, str]:
nd, txt = super(Pop, self).call(text) # call() instead of __call__() to avoid parser guard
if nd and not nd.error_flag:
stack = self.grammar.variables[self.symbol.name]
stack.pop()
return nd, txt
########################################################################
......@@ -1105,7 +1108,7 @@ class Forward(Parser):
def set(self, parser: Parser):
# assert isinstance(parser, Parser)
self.name = parser.name # redundant, see Grammar-constructor
# self.name = parser.name # redundant, see Grammar-constructor
self.parser = parser
def apply(self, func: Parser.ApplyFunc):
......
......@@ -9,15 +9,17 @@ preamble = { command }+
document = [PARSEP] { [PARSEP] paragraph } §EOF
genericenv = beginenv sequence §endenv
beginenv = "\begin" §( "{" NAME "}" )
endenv = "\end" §( "{" ::NAME "}" )
blockenv = beginenv sequence §endenv
parblock = "{" sequence §"}"
sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) }+
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [ config ] block
config = "[" cfgtext §"]"
block = "{" { text | block } §"}"
......
@braces_filter = counterpart_filter
@braces_filter = counterpart
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
......
document = { text | env }
env = (openenv | altopen) { text } [closeenv | altclose]
openenv = "\begin{" name "}"
altopen = "\begin{" name "*}"
closeenv = "\end{" ::name "}"
altclose = "\end{" ::name "*}"
text = /[^\\]+/
name = /\w+/
Environment \begin{env} inside \end{env*} should not fail
Environment \begin{env*} inside should not leave any symbols on the stack
Environment \begin{env} inside \end{env*} should not fail
Environment \begin{env*} inside \end{env} should not leave any symbols on the stack
......@@ -19,6 +19,7 @@
</:Alternative>
<:Alternative>
<:Sequence>
<:NegativeLookahead></:NegativeLookahead>
<delimiter_sign>
<:RegExp>``</:RegExp>
</delimiter_sign>
......
......@@ -19,6 +19,7 @@
</:Alternative>
<:Alternative>
<:Sequence>
<:NegativeLookahead></:NegativeLookahead>
<delimiter_sign>
<:RegExp>``</:RegExp>
</delimiter_sign>
......
......@@ -25,13 +25,14 @@ import sys
sys.path.append(os.path.abspath('../../'))
from DHParser.dsl import compile_on_disk, is_outdated
if (not os.path.exists('PopRetrieveCompiler.py') or
is_outdated('PopRetrieveCompiler.py', 'PopRetrieve.ebnf')):
print("recompiling PopRetrieve parser")
errors = compile_on_disk("PopRetrieve.ebnf")
if errors:
print('\n\n'.join(errors))
sys.exit(1)
#
# if (not os.path.exists('PopRetrieveCompiler.py') or
# is_outdated('PopRetrieveCompiler.py', 'PopRetrieve.ebnf')):
# print("recompiling PopRetrieve parser")
# errors = compile_on_disk("PopRetrieve.ebnf")
# if errors:
# print('\n\n'.join(errors))
# sys.exit(1)
# from PopRetrieve_compiler import compile_PopRetrieve
......@@ -53,43 +54,65 @@ if (not os.path.exists('PopRetrieveCompiler.py') or
# print(result)
print("PopRetrieveTest 1")
errors = compile_on_disk("PopRetrieveTest.txt", 'PopRetrieveCompiler.py')
if errors:
print(errors)
sys.exit(1)
print("PopRetrieveTest 2")
errors = compile_on_disk("PopRetrieveTest2.txt", 'PopRetrieveCompiler.py')
if errors:
print(errors)
sys.exit(1)
# print("PopRetrieveTest 1")
# errors = compile_on_disk("PopRetrieveTest.txt", 'PopRetrieveCompiler.py')
# if errors:
# print(errors)
# sys.exit(1)
#
# print("PopRetrieveTest 2")
# errors = compile_on_disk("PopRetrieveTest2.txt", 'PopRetrieveCompiler.py')
# if errors:
# print(errors)
# sys.exit(1)
#
#
#
# if (not os.path.exists('PopRetrieveComplementCompiler.py') or
# is_outdated('PopRetrieveComplementCompiler.py', 'PopRetrieveComplement.ebnf')):
# print("recompiling PopRetrieveComplement parser")
# errors = compile_on_disk("PopRetrieveComplement.ebnf")
# if errors:
# print('\n\n'.join(errors))
# sys.exit(1)
#
#
# from PopRetrieveComplementCompiler import compile_src
#
# print("PopRetrieveComplement Test 1")
# result, errors, ast = compile_src("PopRetrieveComplementTest.txt")
# if errors:
# print(errors)
# sys.exit(1)
# else:
# print(result)
#
# print("PopRetrieveComplement Test 2")
# result, errors, ast = compile_src("PopRetrieveComplementTest2.txt")
# if errors:
# print(errors)
# sys.exit(1)
# else:
# print(result)
if (not os.path.exists('PopRetrieveComplementCompiler.py') or
is_outdated('PopRetrieveComplementCompiler.py', 'PopRetrieveComplement.ebnf')):
print("recompiling PopRetrieveComplement parser")
errors = compile_on_disk("PopRetrieveComplement.ebnf")
if (not os.path.exists('PopRetrieveConfusionCompiler.py') or
is_outdated('PopRetrieveConfusionCompiler.py', 'PopRetrieveConfusion.ebnf')):
print("recompiling PopRetrieveConfusion parser")
errors = compile_on_disk("PopRetrieveConfusion.ebnf")
if errors:
print('\n\n'.join(errors))
sys.exit(1)
from PopRetrieveConfusionCompiler import compile_src
from PopRetrieveComplementCompiler import compile_src
print("PopRetrieveComplement Test 1")
result, errors, ast = compile_src("PopRetrieveComplementTest.txt")
if errors:
print(errors)
sys.exit(1)
else:
print(result)
print("PopRetrieveComplement Test 2")
result, errors, ast = compile_src("PopRetrieveComplementTest2.txt")
print("PopRetrieveConfusion Test 1")
result, errors, ast = compile_src("PopRetrieveConfusion.txt")
print(ast.as_sexpr())
if errors:
print(errors)
for e in errors:
print(e)
sys.exit(1)
else:
print(result)
......@@ -122,7 +122,7 @@ class TestPopRetrieve:
text = /[^`]+/
"""
mini_lang2 = """
@braces_filter=counterpart_filter
@braces_filter=counterpart
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment