In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 821cb67c authored by Eckhart Arnold's avatar Eckhart Arnold

bugfixes and tests for Lookbehind-Operator

parent 2e5f466d
......@@ -670,12 +670,20 @@ class EBNFCompiler(Compiler):
if prefix[:1] == '-':
def check(node):
nd = node
while len(nd.children) == 1 and nd.children[1].parser.name == "symbol":
nd = nd.children[1]
if len(nd.children) >= 1:
nd = nd.children[0]
while nd.parser.name == "symbol":
symlist = self.rules.get(str(nd), [])
if len(symlist) == 2:
nd = symlist[1]
else:
if len(symlist) == 1:
nd = symlist[0].children[1]
break
if (nd.parser.name != "regexp" or str(nd)[:1] != '/'
or str(nd)[-1:] != '/'):
node.add_error("Lookbehind-parser can only be used with plain RegExp-"
"parsers, not with: " + str(nd))
"parsers, not with: " + nd.parser.name + nd.parser.ptype)
if not result.startswith('RegExp('):
self.deferred_tasks.append(lambda: check(node))
......@@ -713,7 +721,7 @@ class EBNFCompiler(Compiler):
else:
self.current_symbols.append(node)
if symbol not in self.symbols:
self.symbols[symbol] = node
self.symbols[symbol] = node # remember first use of symbol
if symbol in self.rules:
self.recursive.add(symbol)
return symbol
......@@ -726,18 +734,22 @@ class EBNFCompiler(Compiler):
def on_regexp(self, node: Node) -> str:
rx = str(node)
name = [] # type: List[str]
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name
rx = rx[1:]
elif 'left' in self.directives['literalws']:
name = ["wL=''"] + name
if rx[-2:] == '/~':
if 'right' not in self.directives['literalws']:
name = ['wR=' + self.WHITESPACE_KEYWORD] + name
rx = rx[:-1]
elif 'right' in self.directives['literalws']:
name = ["wR=''"] + name
if rx[0] == '/' and rx[-1] == '/':
parser = 'RegExp('
else:
parser = 'RE('
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name
rx = rx[1:]
elif 'left' in self.directives['literalws']:
name = ["wL=''"] + name
if rx[-2:] == '/~':
if 'right' not in self.directives['literalws']:
name = ['wR=' + self.WHITESPACE_KEYWORD] + name
rx = rx[:-1]
elif 'right' in self.directives['literalws']:
name = ["wR=''"] + name
try:
arg = repr(self._check_rx(node, rx[1:-1].replace(r'\/', '/')))
except AttributeError as error:
......@@ -745,7 +757,7 @@ class EBNFCompiler(Compiler):
node.as_sxpr()
node.add_error(errmsg)
return '"' + errmsg + '"'
return 'RE(' + ', '.join([arg] + name) + ')'
return parser + ', '.join([arg] + name) + ')'
def on_list_(self, node) -> Set[str]:
......
......@@ -900,8 +900,8 @@ class RE(Parser):
return None, text
def __repr__(self):
wL = '~' if self.wspLeft else ''
wR = '~' if self.wspRight else ''
wL = '~' if self.wspLeft != ZOMBIE_PARSER else ''
wR = '~' if self.wspRight != ZOMBIE_PARSER else ''
return wL + '/%s/' % self.main.regexp.pattern + wR
def _grammar_assigned_notifier(self):
......@@ -1240,7 +1240,11 @@ class NegativeLookahead(Lookahead):
class Lookbehind(FlowOperator):
"""EXPERIMENTAL!!!"""
def __init__(self, parser: Parser, name: str = '') -> None:
assert isinstance(parser, RegExp)
p = parser
while isinstance(p, Synonym):
p = p.parser
assert isinstance(p, RegExp), str(type(p))
self.regexp = p.main.regexp if isinstance(p, RE) else p.regexp
super(Lookbehind, self).__init__(parser, name)
print("WARNING: Lookbehind Operator is experimental!")
......@@ -1258,7 +1262,7 @@ class Lookbehind(FlowOperator):
def condition(self):
node = self.grammar.last_node__
return node and self.parser.regexp.match(str(node))
return node and self.regexp.match(str(node))
class NegativeLookbehind(Lookbehind):
......@@ -1336,7 +1340,7 @@ class Retrieve(Parser):
stack = self.grammar.variables__[self.symbol.name]
value = self.filter(stack)
except (KeyError, IndexError):
return Node(self, '').add_error(dsl_error_msg(self,
return Node(self, '').add_error(dsl_error_msg(self, \
"'%s' undefined or exhausted." % self.symbol.name)), text
if text.startswith(value):
return Node(self, value), text[len(value):]
......
......@@ -524,7 +524,8 @@ def compact_sxpr(s) -> str:
TransformationFunc = Union[Callable[[Node], Any], partial]
if __name__ == "__main__":
st = mock_syntax_tree("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
print(st.as_sxpr())
print(st.as_xml())
# if __name__ == "__main__":
# st = mock_syntax_tree("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
# print(st.as_sxpr())
# print(st.as_xml())
Folder "DevScripts"
===================
This folder contains helper scripts for the
development of DHParser.
This folder contains helper scripts for the development of DHParser.
* collect_symbols.py - Lists all exported symbols from DHParser modules
......
......@@ -53,12 +53,12 @@ Index = "\printindex" [PARSEP]
#### block environments ####
# TODO: ambiguity between generic block envieronments and generic inline environments
block_environment = known_environment | generic_environment
block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_environment = begin_environment sequence §end_environment
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&SUCC_LB begin_environment &PRED_LB
end_generic_block = -&SUCC_LB end_environment &PRED_LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
......@@ -85,7 +85,9 @@ text_elements = command | text | block | inline_environment
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_environment { text_elements }+ §end_environment
generic_inline_env = begin_inline_env { text_elements }+ §end_inline_env
begin_inline_env = (-!SUCC_LB begin_environment) | (begin_environment !PRED_LB)
end_inline_env = (-!SUCC_LB end_environment) | (end_environment !PRED_LB)
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
......@@ -139,7 +141,10 @@ BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # LF but not an empty line
LF = !PARSEP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
SUCC_LB = /(?:.*\n)+\s*$/ # linebreak succeeding an arbitrary chunk of text
PRED_LB = /\s*?\n/ # linebreak preeceding any text
......@@ -305,6 +305,37 @@ class TestSynonymDetection:
assert grammar('b').as_sxpr().count('b') == 2
class TestFlowControlOperators:
def setup(self):
self.t1 = """
All work and no play
makes Jack a dull boy
END
"""
self.t2 = "All word and not play makes Jack a dull boy END\n"
def test_lookbehind_indirect(self):
lang = r"""
document = ws sequence doc_end ws
sequence = { !end word ws }+
doc_end = -&SUCC_LB end
ws = /\s*/
end = /END/
word = /\w+/
SUCC_LB = indirection
indirection = /(?:.*\n)+\s*$/
"""
# result, messages, syntax_tree = compile_source(lang, None, get_ebnf_grammar(),
# get_ebnf_transformer(), get_ebnf_compiler('LookbehindTest'))
# print(result)
parser = grammar_provider(lang)()
cst = parser(self.t1)
assert not cst.error_flag, cst.as_sxpr()
cst = parser(self.t2)
# this should fail, because 'END' is not preceeded by a line feed
assert cst.error_flag, cst.as_sxpr()
if __name__ == "__main__":
from DHParser.testing import runner
runner("", globals())
#!/usr/bin/python3
"""test_parsers.py - tests of the parsers-module of DHParser
"""test_parser.py - tests of the parsers-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
......@@ -26,7 +26,7 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import is_logging, logging, compile_python_object
from DHParser.parser import compile_source, Retrieve, Grammar, Forward, Token, ZeroOrMore, RE, \
RegExp, Lookbehind, Lookahead, NegativeLookahead, OneOrMore
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider, DHPARSER_IMPORTS
......@@ -99,6 +99,14 @@ class TestInfiLoopsAndRecursion:
class TestFlowControl:
def setup(self):
self.t1 = """
All work and no play
makes Jack a dull boy
END
"""
self.t2 = "All word and not play makes Jack a dull boy END\n"
def test_lookbehind(self):
ws = RegExp('\s*')
end = RegExp("END")
......@@ -108,15 +116,27 @@ class TestFlowControl:
document = ws + sequence + doc_end + ws
parser = Grammar(document)
t1 = """
All work and no play
makes Jack a dull boy
END
"""
cst = parser(t1)
cst = parser(self.t1)
assert not cst.error_flag, cst.as_sxpr()
cst = parser(self.t2)
assert cst.error_flag, cst.as_sxpr()
def test_lookbehind_indirect(self):
class LookbehindTestGrammar(Grammar):
parser_initialization__ = "upon instantiation"
ws = RegExp('\\s*')
end = RegExp('END')
SUCC_LB = RegExp('(?:.*\\n)+\\s*$')
doc_end = Series(Lookbehind(SUCC_LB), end)
word = RegExp('\w+')
sequence = OneOrMore(Series(NegativeLookahead(end), word, ws))
document = Series(ws, sequence, doc_end, ws)
root__ = document
parser = LookbehindTestGrammar()
cst = parser(self.t1)
assert not cst.error_flag, cst.as_sxpr()
t2 = "All word and not play makes Jack a dull boy END\n"
cst = parser(t2)
cst = parser(self.t2)
assert cst.error_flag, cst.as_sxpr()
......
#!/usr/bin/python3
"""test_parsers.py - tests of the parsers-module of DHParser
"""test_testing.py - tests of the testing-module of DHParser
Author: Eckhart Arnold <arnold@badw.de>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment