Notice to GitKraken users: A vulnerability has been found in the SSH key generation of GitKraken versions 7.6.0 to 8.0.0 (https://www.gitkraken.com/blog/weak-ssh-key-fix). If you use GitKraken and have generated a SSH key using one of these versions, please remove it both from your local workstation and from your LRZ GitLab profile.

21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 9975adfb authored by di68kap's avatar di68kap
Browse files

- MLW fine tuning; better grammar testing support

parent 09536bdb
......@@ -15,9 +15,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import collections
import configparser
import copy
import inspect
import json
import os
try:
import regex as re
except ImportError:
......@@ -83,37 +86,69 @@ def mock_syntax_tree(sexpr):
SUITE_STAGES = {'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'}
UNIT_STAGES = {'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'}
def suite_from_configfile(config_filename):
"""Reads a grammar test suite from a config file.
def unit_from_configfile(config_filename):
"""Reads a grammar unit test from a config file.
"""
cfg = configparser.ConfigParser
cfg = configparser.ConfigParser()
cfg.read(config_filename)
suite = {}
unit = {}
for section in cfg.sections():
symbol, stage = section.split(':')
if stage not in SUITE_STAGES:
if symbol in SUITE_STAGES:
if stage not in UNIT_STAGES:
if symbol in UNIT_STAGES:
symbol, stage = stage, symbol
else:
raise ValueError('Stage %s not in: ' % (stage, str(SUITE_STAGES)))
raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
for testkey, testcode in cfg[section].items():
suite.setdefault(symbol, {}).setdefault(stage, {})[testkey] = testcode
return suite
# TODO: support for json, yaml, cson, toml
if testcode[:3] + testcode[-3:] in {"''''''", '""""""'}:
testcode = testcode[3:-3]
elif testcode[:1] + testcode[-1:] in {"''", '""'}:
testcode = testcode[1:-1]
unit.setdefault(symbol, {}).setdefault(stage, {})[testkey] = testcode
# print(json.dumps(unit, sort_keys=True, indent=4))
return unit
def unit_grammar(test_suite, parser_factory, transformer_factory):
def unit_from_json(config_filename):
"""Reads a grammar unit test from a json file.
"""
with open(config_filename, 'r') as f:
unit = json.load(f)
for symbol in unit:
for stage in unit[symbol]:
if stage not in UNIT_STAGES:
raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
return unit
# TODO: add support for json, yaml, cson, toml
def unit_from_file(config_filename):
"""Reads a grammar unit test from a file. The format of the file is
determined by the ending of its name.
"""
fname = config_filename
if fname.endswith(".json"):
return unit_from_json(fname)
elif fname.endswith(".ini"):
return unit_from_configfile(fname)
else:
raise ValueError("Unknown unit test file type: " + fname[fname.rfind('.'):])
def grammar_unit(test_unit, parser_factory, transformer_factory):
"""Unit tests for a grammar-parser and ast transformations.
"""
if isinstance(test_suite, str):
test_suite = suite_from_configfile(test_suite)
if isinstance(test_unit, str):
test_unit = unit_from_file(test_unit)
errata = []
parser = parser_factory()
transform = transformer_factory()
for parser_name, tests in test_suite.items():
assert set(tests.keys()).issubset(SUITE_STAGES)
for parser_name, tests in test_unit.items():
assert set(tests.keys()).issubset(UNIT_STAGES)
for test_name, test_code in tests.get('match', dict()).items():
cst = parser(test_code, parser_name)
......@@ -145,10 +180,38 @@ def unit_grammar(test_suite, parser_factory, transformer_factory):
return errata
def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown_filetypes=False):
"""Runs all grammar unit tests in a directory. A file is considered a test
unit, if it has the word "test" in its name.
"""
all_errors = collections.OrderedDict()
for filename in sorted(os.listdir(directory)):
if filename.lower().find("test") >= 0:
try:
print("Running grammar tests in: " + filename)
errata = grammar_unit(os.path.join(directory, filename),
parser_factory, transformer_factory)
if errata:
all_errors[filename] = errata
except ValueError as e:
if (not ignore_unknown_filetypes or
str(e).find("Unknown") < 0):
raise e
error_report = []
if all_errors:
for filename in all_errors:
error_report.append('Errors found by unit test "%s":' % filename)
for error in all_errors[filename]:
error_report.append('\t' + '\n\t'.join(error.split('\n')))
if error_report:
return ('Test suite "%s" revealed some errors:\n' %directory) + '\n'.join(error_report)
return ''
def runner(tests, namespace):
""" Runs all or some selected tests from a test suite. To run all
tests in a module, call ``runner("", globals())`` from within
that module.
""" Runs all or some selected Python unit tests found in the
namespace. To run all tests in a module, call
``runner("", globals())`` from within that module.
Args:
tests: Either a string or a list of strings that contains the
......
......@@ -17,40 +17,41 @@ Artikel = [LZ]
#### LEMMA-POSITION ##########################################################
LemmaPosition = "LEMMA" [LZ] §HauptLemma [LemmaVarianten] §GrammatikPosition
LemmaPosition = "LEMMA" [LZ] §HauptLemma §TR [LemmaVarianten] §GrammatikPosition
HauptLemma = [klassisch] [gesichert] lemma
klassisch = "*"
gesichert = "$"
LemmaVarianten = { (LZ|TR) lemma }+
[ (LZ|TR) LemmaZusatz] [LZ]
LemmaVarianten = "VARIANTEN" [LZ]
{ lemma §TR }+
[LemmaZusatz §ABS]
lemma = LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
lemma = LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
LemmaZusatz = "ZUSATZ" lzs_typ
lzs_typ = "sim."
LemmaZusatz = "ZUSATZ" §lzs_typ
lzs_typ = /sim\./
## GRAMMATIK-POSITION ##
GrammatikPosition = "GRAMMATIK" [LZ] §wortart §TR §Flexion [genus]
{GrammatikVariante} [TR]
GrammatikPosition = "GRAMMATIK" [LZ] §wortart §ABS §Flexion [genus]
{GrammatikVariante} [ABS]
wortart = "nomen" | "n." |
"verb" | "v." |
"adverb" | "adv." |
"adjektiv" | "adj."
wortart = "nomen" | "n." |
"verb" | "v." |
"adverb" | "adv." |
"adjektiv" | "adj."
GrammatikVariante = TR GVariante
GVariante = Flexionen [genus] ":" Beleg
GrammatikVariante = ABS GVariante
GVariante = Flexionen [genus] ":" Beleg
Flexionen = Flexion { "," §Flexion }
Flexion = /-?[a-z]+/~
Flexionen = Flexion { "," §Flexion }
Flexion = /-?[a-z]+/~
genus = "maskulinum" | "m." |
"femininum" | "f." |
"neutrum" | "n."
genus = "maskulinum" | "m." |
"femininum" | "f." |
"neutrum" | "n."
......@@ -58,7 +59,7 @@ genus = "maskulinum" | "m." |
ArtikelKopf = SchreibweisenPosition
SchreibweisenPosition = "SCHREIBWEISE" [LZ] §SWTyp ":" [LZ]
§SWVariante { TR SWVariante} [LZ]
§SWVariante { ABS SWVariante} [LZ]
SWTyp = "script." | "script. fat-"
SWVariante = Schreibweise ":" Beleg
Schreibweise = "vizreg-" | "festregel(a)" | "fezdregl(a)" | "fat-"
......@@ -81,7 +82,7 @@ Belege = "BELEGE" [LZ] { "*" EinBeleg }
EinBeleg = { !([LZ] ("*" | "BEDEUTUNG" | "AUTOR" | "NAME" | "ZUSATZ"))
/\s*.*\s*/ }+
[Zusatz]
Zusatz = "ZUSATZ" /\s*.*/ TR
Zusatz = "ZUSATZ" /\s*.*/ ABS
#### AUTOR/AUTORIN ###########################################################
......@@ -102,10 +103,11 @@ LAT_WORT = /[a-z]+/~
LAT_WORT_TEIL = /[a-z]+/
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
TR = /\s*;\s*/ | { NEUE_ZEILE }+ # Trenner
NEUE_ZEILE = /\n/~
TR = ABS | LZ # (beliebiger) Trenner
ABS = /\s*;\s*/ | { ZW }+ # Abschluss (durch Semikolon oder Zeilenwechsel)
ZW = /\n/~ # Zeilenwechsel
LZ = /\s+/ # Leerzeichen oder -zeilen
LZ = /\s+/ # Leerzeichen oder -zeilen
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
......@@ -66,32 +66,33 @@ class MLWGrammar(GrammarBase):
#### LEMMA-POSITION ##########################################################
LemmaPosition = "LEMMA" [LZ] §HauptLemma [LemmaVarianten] §GrammatikPosition
LemmaPosition = "LEMMA" [LZ] §HauptLemma §TR [LemmaVarianten] §GrammatikPosition
HauptLemma = [klassisch] [gesichert] lemma
klassisch = "*"
gesichert = "$"
LemmaVarianten = { (LZ|TR) lemma }+
[ (LZ|TR) LemmaZusatz] [LZ]
LemmaVarianten = "VARIANTEN" [LZ]
{ lemma §TR }+
[LemmaZusatz §ABS]
lemma = LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
lemma = LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
LemmaZusatz = "ZUSATZ" lzs_typ
lzs_typ = "sim."
LemmaZusatz = "ZUSATZ" §lzs_typ
lzs_typ = /sim\./
## GRAMMATIK-POSITION ##
GrammatikPosition = "GRAMMATIK" [LZ] §wortart §TR §Flexion [genus]
{GrammatikVariante} [TR]
GrammatikPosition = "GRAMMATIK" [LZ] §wortart §ABS §Flexion [genus]
{GrammatikVariante} [ABS]
wortart = "nomen" | "n." |
"verb" | "v." |
"adverb" | "adv." |
"adjektiv" | "adj."
GrammatikVariante = TR GVariante
GrammatikVariante = ABS GVariante
GVariante = Flexionen [genus] ":" Beleg
Flexionen = Flexion { "," §Flexion }
......@@ -107,7 +108,7 @@ class MLWGrammar(GrammarBase):
ArtikelKopf = SchreibweisenPosition
SchreibweisenPosition = "SCHREIBWEISE" [LZ] §SWTyp ":" [LZ]
§SWVariante { TR SWVariante} [LZ]
§SWVariante { ABS SWVariante} [LZ]
SWTyp = "script." | "script. fat-"
SWVariante = Schreibweise ":" Beleg
Schreibweise = "vizreg-" | "festregel(a)" | "fezdregl(a)" | "fat-"
......@@ -130,7 +131,7 @@ class MLWGrammar(GrammarBase):
EinBeleg = { !([LZ] ("*" | "BEDEUTUNG" | "AUTOR" | "NAME" | "ZUSATZ"))
/\s*.*\s*/ }+
[Zusatz]
Zusatz = "ZUSATZ" /\s*.*/ TR
Zusatz = "ZUSATZ" /\s*.*/ ABS
#### AUTOR/AUTORIN ###########################################################
......@@ -151,14 +152,15 @@ class MLWGrammar(GrammarBase):
LAT_WORT_TEIL = /[a-z]+/
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
TR = /\s*;\s*/ | { NEUE_ZEILE }+ # Trenner
NEUE_ZEILE = /\n/~
TR = ABS | LZ # (beliebiger) Trenner
ABS = /\s*;\s*/ | { ZW }+ # Abschluss (durch Semikolon oder Zeilenwechsel)
ZW = /\n/~ # Zeilenwechsel
LZ = /\s+/ # Leerzeichen oder -zeilen
LZ = /\s+/ # Leerzeichen oder -zeilen
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
"""
source_hash__ = "9b040cad48585464610e2e7869e7af41"
source_hash__ = "2d6f71148926868bfeba2e2a30b07fec"
parser_initialization__ = "upon instatiation"
COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)')
......@@ -167,8 +169,9 @@ class MLWGrammar(GrammarBase):
NIEMALS = RE('(?!.)', wR='')
DATEI_ENDE = NegativeLookahead(RE('.', wR=''))
LZ = RE('\\s+', wR='')
NEUE_ZEILE = RE('\\n')
TR = Alternative(RE('\\s*;\\s*', wR=''), OneOrMore(NEUE_ZEILE))
ZW = RE('\\n')
ABS = Alternative(RE('\\s*;\\s*', wR=''), OneOrMore(ZW))
TR = Alternative(ABS, LZ)
GROSSSCHRIFT = RE('[A-ZÄÖÜ]+')
LAT_WORT_TEIL = RE('[a-z]+', wR='')
LAT_WORT = RE('[a-z]+')
......@@ -179,7 +182,7 @@ class MLWGrammar(GrammarBase):
NAMENS_ABKÜRZUNG = RE('[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\\.')
Name = OneOrMore(Alternative(NAME, NAMENS_ABKÜRZUNG))
Autorinfo = Sequence(Alternative(Token("AUTORIN"), Token("AUTOR")), Name)
Zusatz = Sequence(Token("ZUSATZ"), RE('\\s*.*', wR=''), TR)
Zusatz = Sequence(Token("ZUSATZ"), RE('\\s*.*', wR=''), ABS)
EinBeleg = Sequence(OneOrMore(Sequence(NegativeLookahead(Sequence(Optional(LZ), Alternative(Token("*"), Token("BEDEUTUNG"), Token("AUTOR"), Token("NAME"), Token("ZUSATZ")))), RE('\\s*.*\\s*', wR=''))), Optional(Zusatz))
Belege = Sequence(Token("BELEGE"), Optional(LZ), ZeroOrMore(Sequence(Token("*"), EinBeleg)))
DeutscheBedeutung = Sequence(Token("DEU"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'))
......@@ -194,23 +197,23 @@ class MLWGrammar(GrammarBase):
Schreibweise = Alternative(Token("vizreg-"), Token("festregel(a)"), Token("fezdregl(a)"), Token("fat-"))
SWVariante = Sequence(Schreibweise, Token(":"), Beleg)
SWTyp = Alternative(Token("script."), Token("script. fat-"))
SchreibweisenPosition = Sequence(Token("SCHREIBWEISE"), Optional(LZ), Required(SWTyp), Token(":"), Optional(LZ), Required(SWVariante), ZeroOrMore(Sequence(TR, SWVariante)), Optional(LZ))
SchreibweisenPosition = Sequence(Token("SCHREIBWEISE"), Optional(LZ), Required(SWTyp), Token(":"), Optional(LZ), Required(SWVariante), ZeroOrMore(Sequence(ABS, SWVariante)), Optional(LZ))
ArtikelKopf = SchreibweisenPosition
genus = Alternative(Token("maskulinum"), Token("m."), Token("femininum"), Token("f."), Token("neutrum"), Token("n."))
Flexion = RE('-?[a-z]+')
Flexionen = Sequence(Flexion, ZeroOrMore(Sequence(Token(","), Required(Flexion))))
GVariante = Sequence(Flexionen, Optional(genus), Token(":"), Beleg)
GrammatikVariante = Sequence(TR, GVariante)
GrammatikVariante = Sequence(ABS, GVariante)
wortart = Alternative(Token("nomen"), Token("n."), Token("verb"), Token("v."), Token("adverb"), Token("adv."), Token("adjektiv"), Token("adj."))
GrammatikPosition = Sequence(Token("GRAMMATIK"), Optional(LZ), Required(wortart), Required(TR), Required(Flexion), Optional(genus), ZeroOrMore(GrammatikVariante), Optional(TR))
lzs_typ = Token("sim.")
LemmaZusatz = Sequence(Token("ZUSATZ"), lzs_typ)
GrammatikPosition = Sequence(Token("GRAMMATIK"), Optional(LZ), Required(wortart), Required(ABS), Required(Flexion), Optional(genus), ZeroOrMore(GrammatikVariante), Optional(ABS))
lzs_typ = RE('sim\\.', wR='')
LemmaZusatz = Sequence(Token("ZUSATZ"), Required(lzs_typ))
lemma = Sequence(LAT_WORT_TEIL, ZeroOrMore(Sequence(Alternative(Token("|"), Token("-")), LAT_WORT_TEIL)))
LemmaVarianten = Sequence(OneOrMore(Sequence(Alternative(LZ, TR), lemma)), Optional(Sequence(Alternative(LZ, TR), LemmaZusatz)), Optional(LZ))
LemmaVarianten = Sequence(Token("VARIANTEN"), Optional(LZ), OneOrMore(Sequence(lemma, Required(TR))), Optional(Sequence(LemmaZusatz, Required(ABS))))
gesichert = Token("$")
klassisch = Token("*")
HauptLemma = Sequence(Optional(klassisch), Optional(gesichert), lemma)
LemmaPosition = Sequence(Token("LEMMA"), Optional(LZ), Required(HauptLemma), Optional(LemmaVarianten), Required(GrammatikPosition))
LemmaPosition = Sequence(Token("LEMMA"), Optional(LZ), Required(HauptLemma), Required(TR), Optional(LemmaVarianten), Required(GrammatikPosition))
Artikel = Sequence(Optional(LZ), Required(LemmaPosition), Optional(ArtikelKopf), Required(BedeutungsPosition), Required(Autorinfo), Optional(LZ), DATEI_ENDE)
root__ = Artikel
......
[match:lemma]
1: facitergula
2: facitergul|a
3: fasc|itergula
[fail:lemma]
99: duo vocabula
[match:HauptLemma]
1: facitergula
2: *fascitergula
3: * fasciterugl|a
[match:LemmaVarianten]
1: VARIANTEN
fasc-itergula
fac-iet-ergula
fac-ist-ergula
fa-rcu-tergula
2: "VARIANTEN fasc-itergula"
3: "VARIANTEN fasc-itergula fac-iet-ergula ZUSATZ sim."
[fail:LemmaVarianten]
99: * fascitergula
LEMMA facitergula
LEMMA facitergul|a
VARIANTEN
fasc-itergula
fac-iet-ergula
fac-ist-ergula
fa-rcu-tergula
fascite-rcu-la
ZUSATZ sim.
......
This diff is collapsed.
......@@ -18,59 +18,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from DHParser import testing
from DHParser import parsers
# from DHParser.dsl import load_compiler_suite
from MLW_compiler import get_MLW_grammar, get_MLW_transformer
MLW_TEST_CASES_LEMMA_POSITION = {
"lemma": {
"match": {
1: "facitergula",
2: "facitergul|a",
3: "fasc|itergula"
},
"fail": {
99: "duo vocabula"
}
},
"HauptLemma" : {
"match": {
1: "facitergula",
2: "*fascitergula",
3: "* fasciterugl|a"
}
},
"LemmaVarianten": {
"match": {
1: """
fasc-itergula
fac-iet-ergula
fac-ist-ergula
fa-rcu-tergula
""",
2: " fasc-itergula",
3: " fasc-itergula fac-iet-ergula ZUSATZ sim.",
},
"fail": {
99: "* fascitergula"
}
}
}
class TestMLWGrammar:
def test_lemma_position(self):
errata = testing.unit_grammar(MLW_TEST_CASES_LEMMA_POSITION,
get_MLW_grammar,
get_MLW_transformer)
assert not errata, str(errata)
from DHParser import testing
from MLW_compiler import get_MLW_grammar, get_MLW_transformer
if __name__ == "__main__":
testing.runner("", globals())
error_report = testing.grammar_suite('grammar_tests', get_MLW_grammar, get_MLW_transformer)
assert not error_report, error_report
# class TestMLWGrammar:
# def test_lemma_position(self):
# errata = testing.grammar_unit('grammar_tests/test_lemmaposition.ini', # MLW_TEST_CASES_LEMMA_POSITION,
# get_MLW_grammar,
# get_MLW_transformer)
# assert not errata, str(errata)
#
#
# if __name__ == "__main__":
# testing.runner("", globals())
......@@ -31,7 +31,7 @@ from DHParser.syntaxtree import no_operation, traverse, remove_expendables, \
from DHParser.parsers import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
from DHParser.testing import unit_grammar, mock_syntax_tree
from DHParser.testing import grammar_unit, mock_syntax_tree
ARITHMETIC_EBNF = """
@ whitespace = linefeed
......@@ -108,9 +108,9 @@ class TestGrammarTest:
def test_testing_grammar(self):
parser_fac = parser_factory(ARITHMETIC_EBNF)
trans_fac = lambda : ARITHMETIC_EBNFTransform
errata = unit_grammar(self.cases, parser_fac, trans_fac)
errata = grammar_unit(self.cases, parser_fac, trans_fac)
assert not errata, str(errata)
errata = unit_grammar(self.failure_cases, parser_fac, trans_fac)
errata = grammar_unit(self.failure_cases, parser_fac, trans_fac)
# for e in errata:
# print(e)
assert len(errata) == 3
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment