Commit e3a60af3 authored by di68kap's avatar di68kap
Browse files

- more unit tests and important bug fixes

parent e0512e77
......@@ -310,8 +310,6 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
print(result)
finally:
if f: f.close()
if IS_LOGGING():
print(ast)
return []
......
......@@ -130,10 +130,12 @@ EBNF_ASTTransform = {
[remove_enclosing_delimiters, replace_by_single_child],
"oneormore, repetition, option, regexchain":
[reduce_single_child, remove_enclosing_delimiters],
"symbol, literal, regexp, list_":
"symbol, literal, regexp":
[remove_expendables, reduce_single_child],
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
[remove_expendables, reduce_single_child],
"list_":
[partial(remove_tokens, tokens={','})],
"":
[remove_expendables, replace_by_single_child]
}
......@@ -157,7 +159,7 @@ class EBNFCompiler(CompilerBase):
in EBNF-Notation.
"""
COMMENT_KEYWORD = "COMMENT__"
DEFAULT_WHITESPACE = '[\t ]*'
DEFAULT_WHITESPACE = r'[\t ]*'
RESERVED_SYMBOLS = {TOKEN_KEYWORD, WHITESPACE_KEYWORD, COMMENT_KEYWORD}
KNOWN_DIRECTIVES = {'comment', 'whitespace', 'tokens', 'literalws'}
VOWELS = {'A', 'E', 'I', 'O', 'U'} # what about cases like 'hour', 'universe' etc.?
......@@ -240,7 +242,6 @@ class EBNFCompiler(CompilerBase):
if 'right' in self.directives['literalws'] else "''"))
definitions.append(('wspL__', WHITESPACE_KEYWORD
if 'left' in self.directives['literalws'] else "''"))
print(self.directives) ####
definitions.append((WHITESPACE_KEYWORD,
("mixin_comment(whitespace="
"r'{whitespace}', comment=r'{comment}')").
......@@ -349,17 +350,24 @@ class EBNFCompiler(CompilerBase):
key = node.result[0].result.lower()
assert key not in self.scanner_tokens
if key in {'comment', 'whitespace'}:
value = node.result[1].result
if value[0] + value[-1] in {'""', "''"}:
value = escape_re(value[1:-1])
elif value[0] + value[-1] == '//':
value = self._check_rx(node, value[1:-1])
if node.result[1].parser.name == "list_":
if len(node.result[1].result) != 1:
node.add_error("Directive %s must have one, but not %i values" %
(key, len(node.result[1])))
value = self.compile__(node.result[1]).pop()
if value in {'linefeed', 'standard'} and key == 'whitespace':
value = '\s*' if value == "linefeed" else self.DEFAULT_WHITESPACE
else:
node.add_error('Value "%" not allowed for directive %s' % (value, key))
else:
if value == "linefeed":
value = '\s*'
elif value == "standard":
value = self.DEFAULT_WHITESPACE
value = self._check_rx(node, value)
value = node.result[1].result.strip("~")
if value != node.result[1].result:
node.add_error("Whitespace marker '~' not allowed in definition of "
"%s regular expression." % key)
if value[0] + value[-1] in {'""', "''"}:
value = escape_re(value[1:-1])
elif value[0] + value[-1] == '//':
value = self._check_rx(node, value[1:-1])
self.directives[key] = value
elif key == 'literalws':
value = {item.lower() for item in self.compile__(node.result[1])}
......@@ -478,4 +486,5 @@ class EBNFCompiler(CompilerBase):
return 'RE(' + ', '.join([arg] + name) + ')'
def list_(self, node):
return set(item.strip() for item in node.result.split(','))
assert node.children
return set(item.result.strip() for item in node.result)
......@@ -286,11 +286,13 @@ class GrammarBase:
if self.wspL__:
self.wsp_left_parser__ = RegExp(self.wspL__, WHITESPACE_KEYWORD)
self.wsp_left_parser__.grammar = self
self.all_parsers.add(self.wsp_left_parser__)
else:
self.wsp_left_parser__ = ZOMBIE_PARSER
if self.wspR__:
self.wsp_right_parser__ = RegExp(self.wspR__, WHITESPACE_KEYWORD)
self.wsp_right_parser__.grammar = self
self.all_parsers.add(self.wsp_right_parser__)
else:
self.wsp_right_parser__ = ZOMBIE_PARSER
self.root__.apply(self._add_parser)
......
......@@ -28,7 +28,6 @@ from DHParser.DSLsupport import compileDSL, run_compiler
from DHParser.EBNFcompiler import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler
from DHParser.parsercombinators import full_compilation
def selftest(file_name):
print(file_name)
with open('examples/' + file_name, encoding="utf-8") as f:
......
......@@ -37,6 +37,6 @@ literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+\s*(?:,\s*\w+\s*)*/~ # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
......@@ -49,10 +49,10 @@ class PopRetrieveGrammar(GrammarBase):
delimiter_sign = /`+/
text = /[^`]+/
"""
source_hash__ = "a418b812a36733a4713eb4e06322e1b5"
source_hash__ = "1312f8befacbc4d03bcc320644f37015"
parser_initialization__ = "upon instatiation"
COMMENT__ = r''
WSP__ = mixin_comment(whitespace=r'[ ]*', comment=r'')
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'')
wspL__ = ''
wspR__ = WSP__
text = RE('[^`]+', wR='')
......
......@@ -29,6 +29,36 @@ from DHParser.DSLsupport import compileEBNF
WRITE_LOGS = True
class TestDirectives:
mini_language = """
expression = term { ("+" | "-") term }
term = factor { ("*" | "/") factor }
factor = constant | "(" expression ")"
constant = digit { digit } [ //~ ]
digit = /0/ | /1/ | /2/ | /3/ | /4/ | /5/ | /6/ | /7/ | /8/ | /9/
"""
def test_whitespace_linefeed(self):
lang = "@ whitespace = linefeed\n" + self.mini_language
MinilangParser = compileEBNF(lang)
parser = MinilangParser()
assert parser
syntax_tree = parser.parse("3 + 4 * 12")
parser.log_parsing_history('WSP1')
assert not syntax_tree.collect_errors()
syntax_tree = parser.parse("3 + 4 \n * 12")
parser.log_parsing_history('WSP2')
assert not syntax_tree.collect_errors()
def test_whitespace_standard(self):
lang = "@ whitespace = standard\n" + self.mini_language
parser = compileEBNF(lang)()
assert parser
syntax_tree = parser.parse("3 + 4 * 12")
assert not syntax_tree.collect_errors()
syntax_tree = parser.parse("3 + 4 \n * 12")
assert syntax_tree.collect_errors()
class TestPopRetrieve:
mini_language = """
document = { text | codeblock }
......@@ -70,4 +100,4 @@ class TestPopRetrieve:
if __name__ == "__main__":
from run import run_tests
run_tests("TestPopRetrieve", globals())
\ No newline at end of file
run_tests("TestDirectives TestPopRetrieve", globals())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment