Commit adc76f68 authored by di68kap's avatar di68kap
Browse files

- Support functions for grammar- and ast-testing moved into a spearate module "testing"

- example MLW extended by adding rudimentary testcases
parent 74b39df2
...@@ -24,8 +24,9 @@ from .syntaxtree import * ...@@ -24,8 +24,9 @@ from .syntaxtree import *
from .parsers import * from .parsers import *
from .ebnf import * from .ebnf import *
from .dsl import * from .dsl import *
# from .testing import *
from .versionnumber import __version__ from .versionnumber import __version__
__author__ = "Eckhart Arnold <arnold@badw.de>" __author__ = "Eckhart Arnold <arnold@badw.de>"
__copyright__ = "http://www.apache.org/licenses/LICENSE-2.0" __copyright__ = "http://www.apache.org/licenses/LICENSE-2.0"
# __all__ = ['toolkit', 'syntaxtree', 'parsers', 'ebnf', 'dsl'] # flat namespace # __all__ = ['toolkit', 'syntaxtree', 'parsers', 'ebnf', 'dsl', 'testing', 'versionnumber'] # flat namespace
...@@ -369,7 +369,7 @@ class EBNFCompiler(CompilerBase): ...@@ -369,7 +369,7 @@ class EBNFCompiler(CompilerBase):
compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)] compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(compiler) return '\n'.join(compiler)
def assemble_parser(self, definitions): def assemble_parser(self, definitions, root_node):
# fix capture of variables that have been defined before usage [sic!] # fix capture of variables that have been defined before usage [sic!]
if self.variables: if self.variables:
...@@ -427,9 +427,11 @@ class EBNFCompiler(CompilerBase): ...@@ -427,9 +427,11 @@ class EBNFCompiler(CompilerBase):
declarations += [symbol + '.set(' + statement + ')'] declarations += [symbol + '.set(' + statement + ')']
else: else:
declarations += [symbol + ' = ' + statement] declarations += [symbol + ' = ' + statement]
known_symbols = self.rules | self.RESERVED_SYMBOLS
for nd in self.symbol_nodes: for nd in self.symbol_nodes:
if nd.result not in self.rules: if nd.result not in known_symbols:
nd.add_error("Missing production for symbol '%s'" % nd.result) nd.add_error("Missing production for symbol '%s'" % nd.result)
root_node.error_flag = True
if self.root and 'root__' not in self.rules: if self.root and 'root__' not in self.rules:
declarations.append('root__ = ' + self.root) declarations.append('root__ = ' + self.root)
declarations.append('') declarations.append('')
...@@ -452,8 +454,9 @@ class EBNFCompiler(CompilerBase): ...@@ -452,8 +454,9 @@ class EBNFCompiler(CompilerBase):
else: else:
assert nd.parser.name == "directive", nd.as_sexpr() assert nd.parser.name == "directive", nd.as_sexpr()
self._compile(nd) self._compile(nd)
node.error_flag |= nd.error_flag
return self.assemble_parser(definitions) return self.assemble_parser(definitions, node)
def on_definition(self, node): def on_definition(self, node):
rule = node.result[0].result rule = node.result[0].result
......
...@@ -56,10 +56,8 @@ try: ...@@ -56,10 +56,8 @@ try:
except ImportError: except ImportError:
import re import re
from .toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \ from .toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name
compact_sexpr from .syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Node
from .syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Node, \
mock_syntax_tree
from DHParser.toolkit import load_if_file, error_messages from DHParser.toolkit import load_if_file, error_messages
__all__ = ['HistoryRecord', __all__ = ['HistoryRecord',
...@@ -1087,43 +1085,8 @@ def compile_source(source, scanner, parser, transformer, compiler): ...@@ -1087,43 +1085,8 @@ def compile_source(source, scanner, parser, transformer, compiler):
errors = syntax_tree.collect_errors() errors = syntax_tree.collect_errors()
if not errors: if not errors:
result = compiler(syntax_tree) result = compiler(syntax_tree)
errors = syntax_tree.collect_errors() errors = syntax_tree.collect_errors() if syntax_tree.error_flag else []
messages = error_messages(source_text, errors) messages = error_messages(source_text, errors)
return result, messages, syntax_tree return result, messages, syntax_tree
def test_grammar(test_suite, parser_factory, transformer_factory):
errata = []
parser = parser_factory()
transform = transformer_factory()
for parser_name, tests in test_suite.items():
assert set(tests.keys()).issubset({'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'})
for test_name, test_code in tests['match'].items():
cst = parser(test_code, parser_name)
tests.setdefault('__cst__', {})[test_name] = cst
if cst.error_flag:
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\t%s' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(error_messages(test_code, cst.collect_errors()))))
elif "cst" in tests and mock_syntax_tree(tests["cst"][test_name]) != cst:
errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
(test_name, parser_name, cst.as_sexpr()))
elif "ast" in tests:
ast = copy.deepcopy(cst)
transform(ast)
tests.setdefault('__ast__', {})[test_name] = ast
compare = mock_syntax_tree(tests["ast"][test_name])
if compare != ast:
errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
'\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s'
% (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
compact_sexpr(compare.as_sexpr()),
compact_sexpr(ast.as_sexpr())))
for test_name, test_code in tests['fail'].items():
cst = parser(test_code, parser_name)
if not cst.error_flag:
errata.append('Fail test "%s" for parser "%s" yields match instead of '
'expected failure!' % (test_name, parser_name))
return errata
...@@ -27,7 +27,7 @@ except ImportError: ...@@ -27,7 +27,7 @@ except ImportError:
import re import re
from typing import NamedTuple from typing import NamedTuple
from .toolkit import is_logging, log_dir, expand_table, line_col, smart_list from .toolkit import log_dir, expand_table, line_col, smart_list
__all__ = ['WHITESPACE_PTYPE', __all__ = ['WHITESPACE_PTYPE',
...@@ -35,7 +35,6 @@ __all__ = ['WHITESPACE_PTYPE', ...@@ -35,7 +35,6 @@ __all__ = ['WHITESPACE_PTYPE',
'ZOMBIE_PARSER', 'ZOMBIE_PARSER',
'Error', 'Error',
'Node', 'Node',
'mock_syntax_tree',
'key_parser_name', 'key_parser_name',
'key_tag_name', 'key_tag_name',
'traverse', 'traverse',
...@@ -339,21 +338,28 @@ class Node: ...@@ -339,21 +338,28 @@ class Node:
self.error_flag = True self.error_flag = True
return self return self
def propagate_error_flags(self):
""" Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
nodes after syntaxtree construction, i.e. in the compile phase.
"""
for child in self.children:
child.propagate_error_flags()
self.error_flag |= child.error_flag
def collect_errors(self, clear_errors=False): def collect_errors(self, clear_errors=False):
""" """
Returns all errors of this node or any child node in the form Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position of a set of tuples (position, error_message), where position
is always relative to this node. is always relative to this node.
""" """
errors = [] errors = self.errors
if self.error_flag: if clear_errors:
errors = self.errors self._errors = []
if clear_errors: self.error_flag = False
self._errors = [] if self.children:
self.error_flag = False for child in self.result:
if self.children: errors.extend(child.collect_errors(clear_errors))
for child in self.result:
errors.extend(child.collect_errors(clear_errors))
return errors return errors
def log(self, log_file_name): def log(self, log_file_name):
...@@ -423,60 +429,6 @@ class Node: ...@@ -423,60 +429,6 @@ class Node:
return nav(path.split('/')) return nav(path.split('/'))
def mock_syntax_tree(sexpr):
"""Generates a tree of nodes from an S-expression.
Example:
>>> mock_syntax_tree("(a (b c))").as_sexpr()
(a
(b
"c"
)
)
"""
def next_block(s):
s = s.strip()
while s[0] != ')':
if s[0] != '(': raise ValueError('"(" expected, not ' + s[:10])
# assert s[0] == '(', s
level = 1;
i = 1
while level > 0:
if s[i] == '(':
level += 1
elif s[i] == ')':
level -= 1
i += 1
yield s[:i]
s = s[i:].strip()
sexpr = sexpr.strip()
if sexpr[0] != '(': raise ValueError('"(" expected, not ' + sexpr[:10])
# assert sexpr[0] == '(', sexpr
sexpr = sexpr[1:].strip()
m = re.match('[\w:]+', sexpr)
name, class_name = (sexpr[:m.end()].split(':') + [''])[:2]
sexpr = sexpr[m.end():].strip()
if sexpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sexpr))
else:
lines = []
while sexpr and sexpr[0] != ')':
for qm in ['"""', "'''", '"', "'"]:
m = re.match(qm + r'.*?' + qm, sexpr)
if m:
i = len(qm)
lines.append(sexpr[i:m.end() - i])
sexpr = sexpr[m.end():].strip()
break
else:
m = re.match(r'(?:(?!\)).)*', sexpr)
lines.append(sexpr[:m.end()])
sexpr = sexpr[m.end():]
result = "\n".join(lines)
return Node(MockParser(name, ':' + class_name), result)
######################################################################## ########################################################################
# #
# syntax tree transformation functions # syntax tree transformation functions
......
"""testing.py - test support for DHParser based grammars and compilers
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import copy
import regex as re
from DHParser import Node, error_messages
from DHParser.syntaxtree import MockParser
from DHParser.toolkit import compact_sexpr
def mock_syntax_tree(sexpr):
"""Generates a tree of nodes from an S-expression.
Example:
>>> mock_syntax_tree("(a (b c))").as_sexpr()
(a
(b
"c"
)
)
"""
def next_block(s):
s = s.strip()
while s[0] != ')':
if s[0] != '(': raise ValueError('"(" expected, not ' + s[:10])
# assert s[0] == '(', s
level = 1;
i = 1
while level > 0:
if s[i] == '(':
level += 1
elif s[i] == ')':
level -= 1
i += 1
yield s[:i]
s = s[i:].strip()
sexpr = sexpr.strip()
if sexpr[0] != '(': raise ValueError('"(" expected, not ' + sexpr[:10])
# assert sexpr[0] == '(', sexpr
sexpr = sexpr[1:].strip()
m = re.match('[\w:]+', sexpr)
name, class_name = (sexpr[:m.end()].split(':') + [''])[:2]
sexpr = sexpr[m.end():].strip()
if sexpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sexpr))
else:
lines = []
while sexpr and sexpr[0] != ')':
for qm in ['"""', "'''", '"', "'"]:
m = re.match(qm + r'.*?' + qm, sexpr)
if m:
i = len(qm)
lines.append(sexpr[i:m.end() - i])
sexpr = sexpr[m.end():].strip()
break
else:
m = re.match(r'(?:(?!\)).)*', sexpr)
lines.append(sexpr[:m.end()])
sexpr = sexpr[m.end():]
result = "\n".join(lines)
return Node(MockParser(name, ':' + class_name), result)
def test_grammar(test_suite, parser_factory, transformer_factory):
"""Unit tests for a grammar-parser and ast transformations.
"""
errata = []
parser = parser_factory()
transform = transformer_factory()
for parser_name, tests in test_suite.items():
assert set(tests.keys()).issubset({'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'})
for test_name, test_code in tests.get('match', dict()).items():
cst = parser(test_code, parser_name)
tests.setdefault('__cst__', {})[test_name] = cst
if cst.error_flag:
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\t%s' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(error_messages(test_code, cst.collect_errors()))))
elif "cst" in tests and mock_syntax_tree(tests["cst"][test_name]) != cst:
errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
(test_name, parser_name, cst.as_sexpr()))
elif "ast" in tests:
ast = copy.deepcopy(cst)
transform(ast)
tests.setdefault('__ast__', {})[test_name] = ast
compare = mock_syntax_tree(tests["ast"][test_name])
if compare != ast:
errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
'\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s'
% (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
compact_sexpr(compare.as_sexpr()),
compact_sexpr(ast.as_sexpr())))
for test_name, test_code in tests.get('fail', dict()).items():
cst = parser(test_code, parser_name)
if not cst.error_flag:
errata.append('Fail test "%s" for parser "%s" yields match instead of '
'expected failure!' % (test_name, parser_name))
return errata
def runner(tests, namespace):
""" Runs all or some selected tests from a test suite. To run all
tests in a module, call ``runner("", globals())`` from within
that module.
Args:
tests: Either a string or a list of strings that contains the
names of test or test classes. Each test and, in the case
of a test class, all tests within the test class will be
run.
namespace: The namespace for running the test, usually
``globals()`` should be used.
Example:
class TestSomething()
def setup(self):
pass
def teardown(self):
pass
def test_something(self):
pass
if __name__ == "__main__":
from run import runner
runner("", globals())
"""
def instantiate(cls_name):
exec("obj = " + cls_name + "()", namespace)
obj = namespace["obj"]
if "setup" in dir(obj):
obj.setup()
return obj
if tests:
if isinstance(tests, str):
tests = tests.split(" ")
else:
# collect all test classes, in case no methods or classes have been passed explicitly
tests = []
for name in namespace.keys():
if name.lower().startswith('test') and inspect.isclass(namespace[name]):
tests.append(name)
obj = None
for test in tests:
try:
if test.find('.') >= 0:
cls_name, method_name = test.split('.')
obj = instantiate(cls_name)
print("Running " + cls_name + "." + method_name)
exec('obj.' + method_name + '()')
else:
obj = instantiate(test)
for name in dir(obj):
if name.lower().startswith("test"):
print("Running " + test + "." + name)
exec('obj.' + name + '()')
finally:
if "teardown" in dir(obj):
obj.teardown()
# EBNF-Syntax für MLW-Artikel # EBNF-Syntax für MLW-Artikel
@ comment = /#.*(?:\n|$)/ # Kommentare beginnen mit '#' und reichen bis zum Zeilenende @ comment = /#.*(?:\n|$)/ # Kommentare beginnen mit '#' und reichen bis zum Zeilenende
@ whitespace = /[\t ]*/ # Zeilensprünge zählen nicht als Leerraum @ whitespace = /[\t ]*/ # Zeilensprünge zählen nicht als Leerraum
@ literalws = both # Leerraum vor und nach Literalen wird automatisch entfernt @ literalws = right # Leerraum vor und nach Literalen wird automatisch entfernt
############################################################################## ##############################################################################
Artikel = [LEER] Artikel = [LZ]
§LemmaPosition [ArtikelKopf] §LemmaPosition
§BedeutungsPosition [ArtikelKopf]
§Autorinfo §BedeutungsPosition
[LEER] DATEI_ENDE §Autorinfo
[LZ] DATEI_ENDE
#### LEMMA-POSITION ########################################################## #### LEMMA-POSITION ##########################################################
LemmaPosition = "LEMMA" §Lemma [LemmaVarianten] §GrammatikPosition LemmaPosition = "LEMMA" [LZ] §HauptLemma [LemmaVarianten] §GrammatikPosition
Lemma = [klassisch] [gesichert] WORT_KLEIN [LEER] HauptLemma = [klassisch] [gesichert] lemma
klassisch = "*" klassisch = "*"
gesichert = "$" gesichert = "$"
LemmaVarianten = "VARIANTEN" [LEER] LemmaVarianten = { (LZ|TR) lemma }+
§LVariante { TRENNER LVariante } [ (LZ|TR) LemmaZusatz] [LZ]
[TRENNER LVZusatz] [TRENNER]
LVariante = ~/(?:[a-z]|-)+/~ # Buchstabenfolge mit Trennzeichen "-" lemma = LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
LVZusatz = "ZUSATZ" zs_typ
zs_typ = "sim."
LemmaZusatz = "ZUSATZ" lzs_typ
lzs_typ = "sim."
#### GRAMMATIK-POSITION ###################################################### ## GRAMMATIK-POSITION ##
GrammatikPosition = "GRAMMATIK" [LEER] §wortart §TRENNER §Flexion [genus] GrammatikPosition = "GRAMMATIK" [LZ] §wortart §TR §Flexion [genus]
{GrammatikVariante} [TRENNER] {GrammatikVariante} [TR]
wortart = "nomen" | "n." | wortart = "nomen" | "n." |
"verb" | "v." | "verb" | "v." |
"adverb" | "adv." | "adverb" | "adv." |
"adjektiv" | "adj." "adjektiv" | "adj."
GrammatikVariante = TRENNER GVariante GrammatikVariante = TR GVariante
GVariante = Flexionen [genus] ":" Beleg GVariante = Flexionen [genus] ":" Beleg
Flexion = Flexion { "," §Flexion } Flexionen = Flexion { "," §Flexion }
Flexion = /-?[a-z]+/~ Flexion = /-?[a-z]+/~
genus = "maskulinum" | "m." | genus = "maskulinum" | "m." |
...@@ -57,8 +57,8 @@ genus = "maskulinum" | "m." | ...@@ -57,8 +57,8 @@ genus = "maskulinum" | "m." |
#### ARTIKEL-KOPF ############################################################ #### ARTIKEL-KOPF ############################################################
ArtikelKopf = SchreibweisenPosition ArtikelKopf = SchreibweisenPosition
SchreibweisenPosition = "SCHREIBWEISE" [LEER] §SWTyp ":" [LEER] SchreibweisenPosition = "SCHREIBWEISE" [LZ] §SWTyp ":" [LZ]
§SWVariante { TRENNER SWVariante} [LEER] §SWVariante { TR SWVariante} [LZ]
SWTyp = "script." | "script. fat-" SWTyp = "script." | "script. fat-"
SWVariante = Schreibweise ":" Beleg SWVariante = Schreibweise ":" Beleg
Schreibweise = "vizreg-" | "festregel(a)" | "fezdregl(a)" | "fat-" Schreibweise = "vizreg-" | "festregel(a)" | "fezdregl(a)" | "fat-"
...@@ -70,40 +70,42 @@ VerweisZiel = ~/<\w+>/~ ...@@ -70,40 +70,42 @@ VerweisZiel = ~/<\w+>/~
#### BEDEUTUNGS-POSITION ##################################################### #### BEDEUTUNGS-POSITION #####################################################
BedeutungsPosition = { "BEDEUTUNG" [LEER] §Bedeutung }+ BedeutungsPosition = { "BEDEUTUNG" [LZ] §Bedeutung }+
Bedeutung = (Interpretamente | Bedeutungskategorie) [Belege] Bedeutung = (Interpretamente | Bedeutungskategorie) [Belege]
Bedeutungskategorie = /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~ [LEER] Bedeutungskategorie = /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~ [LZ]
Interpretamente = LateinischeBedeutung [LEER] §DeutscheBedeutung [LEER] Interpretamente = LateinischeBedeutung [LZ] §DeutscheBedeutung [LZ]
LateinischeBedeutung = "LAT" /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~ LateinischeBedeutung = "LAT" /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
DeutscheBedeutung = "DEU" /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~ DeutscheBedeutung = "DEU" /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
Belege = "BELEGE" [LEER] { "*" EinBeleg } Belege = "BELEGE" [LZ] { "*" EinBeleg }
EinBeleg = { !([LEER] ("*" | "BEDEUTUNG" | "AUTOR" | "NAME" | "ZUSATZ")) EinBeleg = { !([LZ] ("*" | "BEDEUTUNG" | "AUTOR" | "NAME" | "ZUSATZ"))
/\s*.*\s*/ }+ /\s*.*\s*/ }+
[Zusatz] [Zusatz]
Zusatz = "ZUSATZ" /\s*.*/ TRENNER Zusatz = "ZUSATZ" /\s*.*/ TR
#### AUTOR/AUTORIN ########################################################### #### AUTOR/AUTORIN ###########################################################
Autorinfo = ("AUTORIN" | "AUTOR") Name Autorinfo = ("AUTORIN" | "AUTOR") Name
Name = WORT { WORT | NAMENS_ABKÜRZUNG } Name = { NAME | NAMENS_ABKÜRZUNG }+
#### ATOMARE AUSDRÜCKE ####################################################### #### ATOMARE AUSDRÜCKE #######################################################
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀ]\./ NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~
WORT = /[A-ZÄÖÜ]?[a-zäöüß]+/~ DEU_WORT = /[A-ZÄÖÜ]?[a-zäöüß]+/~
WORT_GROSS = /[A-ZÄÖÜ][a-zäöüß]+/~ DEU_GROSS = /[A-ZÄÖÜ][a-zäöüß]+/~
WORT_KLEIN = /[a-zäöüß]+/~ DEU_KLEIN = /[a-zäöüß]+/~
LAT_WORT = /[a-z]+/~ LAT_WORT = /[a-z]+/~
LAT_WORT_TEIL = /[a-z]+/
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~