Commit fc96335e authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- some bug fixes, mostly related to capture-retrieve

parent 31331ec8
...@@ -76,7 +76,7 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\ ...@@ -76,7 +76,7 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\ Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
nop_filter, counterpart_filter, accumulating_filter, ScannerFunc last_value, counterpart, accumulate, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\ remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_transformation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\ no_transformation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
......
...@@ -196,8 +196,10 @@ def get_ebnf_grammar() -> EBNFGrammar: ...@@ -196,8 +196,10 @@ def get_ebnf_grammar() -> EBNFGrammar:
EBNF_transformation_table = { EBNF_transformation_table = {
# AST Transformations for EBNF-grammar # AST Transformations for EBNF-grammar
"syntax": "+":
remove_expendables, remove_expendables,
"syntax":
[],
"directive, definition": "directive, definition":
remove_tokens('@', '='), remove_tokens('@', '='),
"expression": "expression":
...@@ -211,13 +213,13 @@ EBNF_transformation_table = { ...@@ -211,13 +213,13 @@ EBNF_transformation_table = {
"oneormore, repetition, option, regexchain": "oneormore, repetition, option, regexchain":
[reduce_single_child, remove_enclosing_delimiters], [reduce_single_child, remove_enclosing_delimiters],
"symbol, literal, regexp": "symbol, literal, regexp":
[remove_expendables, reduce_single_child], [reduce_single_child],
(TOKEN_PTYPE, WHITESPACE_PTYPE): (TOKEN_PTYPE, WHITESPACE_PTYPE):
[remove_expendables, reduce_single_child], [reduce_single_child],
"list_": "list_":
[flatten, remove_tokens(',')], [flatten, remove_tokens(',')],
"*": "*":
[remove_expendables, replace_by_single_child] [replace_by_single_child]
} }
......
...@@ -204,7 +204,6 @@ def add_parser_guard(parser_func): ...@@ -204,7 +204,6 @@ def add_parser_guard(parser_func):
node = Node(None, text[:min(10, max(1, text.find("\n")))] + " ...") node = Node(None, text[:min(10, max(1, text.find("\n")))] + " ...")
node.add_error("maximum recursion depth of parser reached; " node.add_error("maximum recursion depth of parser reached; "
"potentially due to too many errors!") "potentially due to too many errors!")
node.error_flag = True
rest = '' rest = ''
return node, rest return node, rest
...@@ -355,10 +354,6 @@ class Grammar: ...@@ -355,10 +354,6 @@ class Grammar:
except KeyError: except KeyError:
parser = getattr(self, key, None) parser = getattr(self, key, None)
if parser: if parser:
# if toolkit.warnings():
# raise KeyError(('Parser "%s" inaccesible, because it is not connected '
# 'to the root parser "%s" !') % (key, self.root__.name))
# print('Parser "%s" not connected to root parser.' % key)
# add parser to grammar object on the fly... # add parser to grammar object on the fly...
setattr(self, key, copy.deepcopy(parser)) setattr(self, key, copy.deepcopy(parser))
self[key].apply(self._add_parser) self[key].apply(self._add_parser)
...@@ -449,6 +444,9 @@ class Grammar: ...@@ -449,6 +444,9 @@ class Grammar:
if rest: if rest:
stitches.append(Node(None, rest)) stitches.append(Node(None, rest))
result = Node(None, tuple(stitches)) result = Node(None, tuple(stitches))
if any(self.variables.values()):
result.add_error("Capture-retrieve-stack not empty after end of parsing: "
+ str(self.variables))
result.pos = 0 # calculate all positions result.pos = 0 # calculate all positions
return result return result
...@@ -478,7 +476,7 @@ class Grammar: ...@@ -478,7 +476,7 @@ class Grammar:
full_history.append(line) full_history.append(line)
if record.node and record.node.parser.ptype != WHITESPACE_PTYPE: if record.node and record.node.parser.ptype != WHITESPACE_PTYPE:
match_history.append(line) match_history.append(line)
if record.node.errors: if record.node.error_flag:
errors_only.append(line) errors_only.append(line)
write_log(full_history, log_file_name + '_full') write_log(full_history, log_file_name + '_full')
write_log(match_history, log_file_name + '_match') write_log(match_history, log_file_name + '_match')
...@@ -842,9 +840,8 @@ class Sequence(NaryOperator): ...@@ -842,9 +840,8 @@ class Sequence(NaryOperator):
for parser in self.parsers: for parser in self.parsers:
node, text_ = parser(text_) node, text_ = parser(text_)
if not node: if not node:
return node, text return None, text
if node.result: # Nodes with zero-length result are silently omitted results += (node,)
results += (node,)
if node.error_flag: if node.error_flag:
break break
assert len(results) <= len(self.parsers) assert len(results) <= len(self.parsers)
...@@ -1009,6 +1006,8 @@ class NegativeLookbehind(Lookbehind): ...@@ -1009,6 +1006,8 @@ class NegativeLookbehind(Lookbehind):
class Capture(UnaryOperator): class Capture(UnaryOperator):
"""STILL EXPERIMENTAL!"""
def __init__(self, parser: Parser, name: str = '') -> None: def __init__(self, parser: Parser, name: str = '') -> None:
super(Capture, self).__init__(parser, name) super(Capture, self).__init__(parser, name)
...@@ -1025,50 +1024,54 @@ class Capture(UnaryOperator): ...@@ -1025,50 +1024,54 @@ class Capture(UnaryOperator):
RetrieveFilter = Callable[[List[str]], str] RetrieveFilter = Callable[[List[str]], str]
def nop_filter(stack: List[str]) -> str: def last_value(stack: List[str]) -> str:
return stack[-1] return stack[-1]
def counterpart_filter(stack: List[str]) -> str: def counterpart(stack: List[str]) -> str:
value = stack[-1] value = stack[-1]
return value.replace("(", ")").replace("[", "]").replace("{", "}").replace(">", "<") return value.replace("(", ")").replace("[", "]").replace("{", "}").replace(">", "<")
def accumulating_filter(stack: List[str]) -> str: def accumulate(stack: List[str]) -> str:
return "".join(stack) return "".join(stack) if len(stack) > 1 else stack[-1] # provoke IndexError if stack empty
class Retrieve(Parser): class Retrieve(Parser):
"""STILL EXPERIMENTAL!"""
def __init__(self, symbol: Parser, filter: RetrieveFilter = None, name: str = '') -> None: def __init__(self, symbol: Parser, filter: RetrieveFilter = None, name: str = '') -> None:
if not name:
name = symbol.name
super(Retrieve, self).__init__(name) super(Retrieve, self).__init__(name)
self.symbol = symbol self.symbol = symbol
self.filter = filter if filter else nop_filter self.filter = filter if filter else last_value
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return self.__class__(self.symbol, self.filter, self.name) return self.__class__(self.symbol, self.filter, self.name)
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: str) -> Tuple[Node, str]:
return self.call(text) # allow call method to be called from subclass circumventing the parser guard
def call(self, text: str) -> Tuple[Node, str]:
try: try:
stack = self.grammar.variables[self.symbol.name] stack = self.grammar.variables[self.symbol.name]
value = self.filter(stack) value = self.filter(stack)
self.pick_value(stack)
except (KeyError, IndexError): except (KeyError, IndexError):
return Node(self, '').add_error(dsl_error_msg(self, return Node(self, '').add_error(dsl_error_msg(self,
"%s undefined or exhausted" % self.symbol.name)), text "'%s' undefined or exhausted." % self.symbol.name)), text
if text.startswith(value): if text.startswith(value):
return Node(self, value), text[len(value):] return Node(self, value), text[len(value):]
else: else:
return None, text return None, text
def pick_value(self, stack: List[str]) -> str:
return stack[-1]
class Pop(Retrieve): class Pop(Retrieve):
def pick_value(self, stack: List[str]) -> str: """STILL EXPERIMENTAL!!!"""
return stack.pop()
def __call__(self, text: str) -> Tuple[Node, str]:
nd, txt = super(Pop, self).call(text) # call() instead of __call__() to avoid parser guard
if nd and not nd.error_flag:
stack = self.grammar.variables[self.symbol.name]
stack.pop()
return nd, txt
######################################################################## ########################################################################
...@@ -1105,7 +1108,7 @@ class Forward(Parser): ...@@ -1105,7 +1108,7 @@ class Forward(Parser):
def set(self, parser: Parser): def set(self, parser: Parser):
# assert isinstance(parser, Parser) # assert isinstance(parser, Parser)
self.name = parser.name # redundant, see Grammar-constructor # self.name = parser.name # redundant, see Grammar-constructor
self.parser = parser self.parser = parser
def apply(self, func: Parser.ApplyFunc): def apply(self, func: Parser.ApplyFunc):
......
...@@ -9,15 +9,17 @@ preamble = { command }+ ...@@ -9,15 +9,17 @@ preamble = { command }+
document = [PARSEP] { [PARSEP] paragraph } §EOF document = [PARSEP] { [PARSEP] paragraph } §EOF
genericenv = beginenv sequence §endenv blockenv = beginenv sequence §endenv
beginenv = "\begin" §( "{" NAME "}" )
endenv = "\end" §( "{" ::NAME "}" )
parblock = "{" sequence §"}" parblock = "{" sequence §"}"
sequence = { paragraph [PARSEP] }+ sequence = { paragraph [PARSEP] }+
paragraph = { !blockcmd (command | block | text) }+ paragraph = { !blockcmd (command | block | text) }+
inlineenv = beginenv { command | block | text }+ endenv
beginenv = "\begin{" §NAME §"}"
endenv = "\end{" §::NAME §"}"
command = CMDNAME [ config ] block command = CMDNAME [ config ] block
config = "[" cfgtext §"]" config = "[" cfgtext §"]"
block = "{" { text | block } §"}" block = "{" { text | block } §"}"
......
@braces_filter = counterpart_filter @braces_filter = counterpart
document = { text | codeblock } document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces braces = opening_braces
......
document = { text | env }
env = (openenv | altopen) { text } [closeenv | altclose]
openenv = "\begin{" name "}"
altopen = "\begin{" name "*}"
closeenv = "\end{" ::name "}"
altclose = "\end{" ::name "*}"
text = /[^\\]+/
name = /\w+/
Environment \begin{env} inside \end{env*} should not fail
Environment \begin{env*} inside should not leave any symbols on the stack
Environment \begin{env} inside \end{env*} should not fail
Environment \begin{env*} inside \end{env} should not leave any symbols on the stack
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
</:Alternative> </:Alternative>
<:Alternative> <:Alternative>
<:Sequence> <:Sequence>
<:NegativeLookahead></:NegativeLookahead>
<delimiter_sign> <delimiter_sign>
<:RegExp>``</:RegExp> <:RegExp>``</:RegExp>
</delimiter_sign> </delimiter_sign>
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
</:Alternative> </:Alternative>
<:Alternative> <:Alternative>
<:Sequence> <:Sequence>
<:NegativeLookahead></:NegativeLookahead>
<delimiter_sign> <delimiter_sign>
<:RegExp>``</:RegExp> <:RegExp>``</:RegExp>
</delimiter_sign> </delimiter_sign>
......
...@@ -25,13 +25,14 @@ import sys ...@@ -25,13 +25,14 @@ import sys
sys.path.append(os.path.abspath('../../')) sys.path.append(os.path.abspath('../../'))
from DHParser.dsl import compile_on_disk, is_outdated from DHParser.dsl import compile_on_disk, is_outdated
if (not os.path.exists('PopRetrieveCompiler.py') or #
is_outdated('PopRetrieveCompiler.py', 'PopRetrieve.ebnf')): # if (not os.path.exists('PopRetrieveCompiler.py') or
print("recompiling PopRetrieve parser") # is_outdated('PopRetrieveCompiler.py', 'PopRetrieve.ebnf')):
errors = compile_on_disk("PopRetrieve.ebnf") # print("recompiling PopRetrieve parser")
if errors: # errors = compile_on_disk("PopRetrieve.ebnf")
print('\n\n'.join(errors)) # if errors:
sys.exit(1) # print('\n\n'.join(errors))
# sys.exit(1)
# from PopRetrieve_compiler import compile_PopRetrieve # from PopRetrieve_compiler import compile_PopRetrieve
...@@ -53,43 +54,65 @@ if (not os.path.exists('PopRetrieveCompiler.py') or ...@@ -53,43 +54,65 @@ if (not os.path.exists('PopRetrieveCompiler.py') or
# print(result) # print(result)
print("PopRetrieveTest 1") # print("PopRetrieveTest 1")
errors = compile_on_disk("PopRetrieveTest.txt", 'PopRetrieveCompiler.py') # errors = compile_on_disk("PopRetrieveTest.txt", 'PopRetrieveCompiler.py')
if errors: # if errors:
print(errors) # print(errors)
sys.exit(1) # sys.exit(1)
#
print("PopRetrieveTest 2") # print("PopRetrieveTest 2")
errors = compile_on_disk("PopRetrieveTest2.txt", 'PopRetrieveCompiler.py') # errors = compile_on_disk("PopRetrieveTest2.txt", 'PopRetrieveCompiler.py')
if errors: # if errors:
print(errors) # print(errors)
sys.exit(1) # sys.exit(1)
#
#
#
# if (not os.path.exists('PopRetrieveComplementCompiler.py') or
# is_outdated('PopRetrieveComplementCompiler.py', 'PopRetrieveComplement.ebnf')):
# print("recompiling PopRetrieveComplement parser")
# errors = compile_on_disk("PopRetrieveComplement.ebnf")
# if errors:
# print('\n\n'.join(errors))
# sys.exit(1)
#
#
# from PopRetrieveComplementCompiler import compile_src
#
# print("PopRetrieveComplement Test 1")
# result, errors, ast = compile_src("PopRetrieveComplementTest.txt")
# if errors:
# print(errors)
# sys.exit(1)
# else:
# print(result)
#
# print("PopRetrieveComplement Test 2")
# result, errors, ast = compile_src("PopRetrieveComplementTest2.txt")
# if errors:
# print(errors)
# sys.exit(1)
# else:
# print(result)
if (not os.path.exists('PopRetrieveComplementCompiler.py') or if (not os.path.exists('PopRetrieveConfusionCompiler.py') or
is_outdated('PopRetrieveComplementCompiler.py', 'PopRetrieveComplement.ebnf')): is_outdated('PopRetrieveConfusionCompiler.py', 'PopRetrieveConfusion.ebnf')):
print("recompiling PopRetrieveComplement parser") print("recompiling PopRetrieveConfusion parser")
errors = compile_on_disk("PopRetrieveComplement.ebnf") errors = compile_on_disk("PopRetrieveConfusion.ebnf")
if errors: if errors:
print('\n\n'.join(errors)) print('\n\n'.join(errors))
sys.exit(1) sys.exit(1)
from PopRetrieveConfusionCompiler import compile_src
from PopRetrieveComplementCompiler import compile_src print("PopRetrieveConfusion Test 1")
result, errors, ast = compile_src("PopRetrieveConfusion.txt")
print("PopRetrieveComplement Test 1") print(ast.as_sexpr())
result, errors, ast = compile_src("PopRetrieveComplementTest.txt")
if errors:
print(errors)
sys.exit(1)
else:
print(result)
print("PopRetrieveComplement Test 2")
result, errors, ast = compile_src("PopRetrieveComplementTest2.txt")
if errors: if errors:
print(errors) for e in errors:
print(e)
sys.exit(1) sys.exit(1)
else: else:
print(result) print(result)
...@@ -122,7 +122,7 @@ class TestPopRetrieve: ...@@ -122,7 +122,7 @@ class TestPopRetrieve:
text = /[^`]+/ text = /[^`]+/
""" """
mini_lang2 = """ mini_lang2 = """
@braces_filter=counterpart_filter @braces_filter=counterpart
document = { text | codeblock } document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
braces = opening_braces braces = opening_braces
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment