Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit c049b06f authored by eckhart's avatar eckhart
Browse files

- MLW extended

parent 8e3b05a7
...@@ -45,20 +45,22 @@ GrammatikPosition = ZWW "GRAMMATIK" [LZ] §Grammatik { ABS GrammatikVariante } ...@@ -45,20 +45,22 @@ GrammatikPosition = ZWW "GRAMMATIK" [LZ] §Grammatik { ABS GrammatikVariante }
Grammatik = wortart §ABS flexion [genus] Grammatik = wortart §ABS flexion [genus]
wortart = "nomen" | "n." wortart = nomen | verb | adverb | adjektiv | praeposition
| "verb" | "v." nomen = "nomen" | "n."
| "adverb" | "adv." verb = "verb" | "v."
| "adjektiv" | "adj." adverb = "adverb" | "adv."
| "praeposition" | "praep." adjektiv = "adjektiv" | "adj."
praeposition = "praeposition" | "praep."
flexion = deklination | konjugation flexion = deklination | konjugation
deklination = FLEX ["," FLEX] deklination = FLEX ["," FLEX]
konjugation = FLEX konjugation = FLEX
FLEX = /-?[a-z]+/~ FLEX = /-?[a-z]+/~
genus = "maskulinum" | "m." genus = maskulinum | femininum | neutrum
| "femininum" | "f." maskulinum = "maskulinum" | "m."
| "neutrum" | "n." femininum = "femininum" | "f."
neutrum = "neutrum" | "n."
GrammatikVariante = [wortart ABS] flexion [genus] DPP Beleg { FORTSETZUNG Beleg } # Beleg { SEM Beleg } GrammatikVariante = [wortart ABS] flexion [genus] DPP Beleg { FORTSETZUNG Beleg } # Beleg { SEM Beleg }
......
...@@ -99,20 +99,22 @@ class MLWGrammar(Grammar): ...@@ -99,20 +99,22 @@ class MLWGrammar(Grammar):
Grammatik = wortart §ABS flexion [genus] Grammatik = wortart §ABS flexion [genus]
wortart = "nomen" | "n." wortart = nomen | verb | adverb | adjektiv | praeposition
| "verb" | "v." nomen = "nomen" | "n."
| "adverb" | "adv." verb = "verb" | "v."
| "adjektiv" | "adj." adverb = "adverb" | "adv."
| "praeposition" | "praep." adjektiv = "adjektiv" | "adj."
praeposition = "praeposition" | "praep."
flexion = deklination | konjugation flexion = deklination | konjugation
deklination = FLEX ["," FLEX] deklination = FLEX ["," FLEX]
konjugation = FLEX konjugation = FLEX
FLEX = /-?[a-z]+/~ FLEX = /-?[a-z]+/~
genus = "maskulinum" | "m." genus = maskulinum | femininum | neutrum
| "femininum" | "f." maskulinum = "maskulinum" | "m."
| "neutrum" | "n." femininum = "femininum" | "f."
neutrum = "neutrum" | "n."
GrammatikVariante = [wortart ABS] flexion [genus] DPP Beleg { FORTSETZUNG Beleg } # Beleg { SEM Beleg } GrammatikVariante = [wortart ABS] flexion [genus] DPP Beleg { FORTSETZUNG Beleg } # Beleg { SEM Beleg }
...@@ -309,7 +311,7 @@ class MLWGrammar(Grammar): ...@@ -309,7 +311,7 @@ class MLWGrammar(Grammar):
flexion = Forward() flexion = Forward()
genus = Forward() genus = Forward()
wortart = Forward() wortart = Forward()
source_hash__ = "ded96803a4eb4164ea8d2cf18924172b" source_hash__ = "d4c194f1b966734e852e0293584409fb"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)' COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)'
WHITESPACE__ = r'[\t ]*' WHITESPACE__ = r'[\t ]*'
...@@ -426,12 +428,20 @@ class MLWGrammar(Grammar): ...@@ -426,12 +428,20 @@ class MLWGrammar(Grammar):
EtymologieVariante = Alternative(LAT, Series(GRI, Option(EtymologieBesonderheit), Option(Series(Token("ETYM"), Etymologie)), DPP, Beleg)) EtymologieVariante = Alternative(LAT, Series(GRI, Option(EtymologieBesonderheit), Option(Series(Token("ETYM"), Etymologie)), DPP, Beleg))
EtymologiePosition = Series(ZWW, Token("ETYMOLOGIE"), Option(LZ), OneOrMore(EtymologieVariante)) EtymologiePosition = Series(ZWW, Token("ETYMOLOGIE"), Option(LZ), OneOrMore(EtymologieVariante))
GrammatikVariante = Series(Option(Series(wortart, ABS)), flexion, Option(genus), DPP, Beleg, ZeroOrMore(Series(FORTSETZUNG, Beleg))) GrammatikVariante = Series(Option(Series(wortart, ABS)), flexion, Option(genus), DPP, Beleg, ZeroOrMore(Series(FORTSETZUNG, Beleg)))
genus.set(Alternative(Token("maskulinum"), Token("m."), Token("femininum"), Token("f."), Token("neutrum"), Token("n."))) neutrum = Alternative(Token("neutrum"), Token("n."))
femininum = Alternative(Token("femininum"), Token("f."))
maskulinum = Alternative(Token("maskulinum"), Token("m."))
genus.set(Alternative(maskulinum, femininum, neutrum))
FLEX = RE('-?[a-z]+') FLEX = RE('-?[a-z]+')
konjugation = Synonym(FLEX) konjugation = Synonym(FLEX)
deklination = Series(FLEX, Option(Series(Token(","), FLEX))) deklination = Series(FLEX, Option(Series(Token(","), FLEX)))
flexion.set(Alternative(deklination, konjugation)) flexion.set(Alternative(deklination, konjugation))
wortart.set(Alternative(Token("nomen"), Token("n."), Token("verb"), Token("v."), Token("adverb"), Token("adv."), Token("adjektiv"), Token("adj."), Token("praeposition"), Token("praep."))) praeposition = Alternative(Token("praeposition"), Token("praep."))
adjektiv = Alternative(Token("adjektiv"), Token("adj."))
adverb = Alternative(Token("adverb"), Token("adv."))
verb = Alternative(Token("verb"), Token("v."))
nomen = Alternative(Token("nomen"), Token("n."))
wortart.set(Alternative(nomen, verb, adverb, adjektiv, praeposition))
Grammatik = Series(wortart, ABS, flexion, Option(genus), mandatory=1) Grammatik = Series(wortart, ABS, flexion, Option(genus), mandatory=1)
GrammatikPosition = Series(ZWW, Token("GRAMMATIK"), Option(LZ), Grammatik, ZeroOrMore(Series(ABS, GrammatikVariante)), mandatory=3) GrammatikPosition = Series(ZWW, Token("GRAMMATIK"), Option(LZ), Grammatik, ZeroOrMore(Series(ABS, GrammatikVariante)), mandatory=3)
LemmaVariante = Series(LAT_WORT, Option(Zusatz)) LemmaVariante = Series(LAT_WORT, Option(Zusatz))
...@@ -466,10 +476,11 @@ LemmaVariante_table = { ...@@ -466,10 +476,11 @@ LemmaVariante_table = {
} }
MLW_AST_transformation_table = { MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar # AST Transformations for the MLW-grammar
"+": [remove_empty, remove_nodes('ZWW', 'LZ', 'DPP', 'COMMENT__', 'ABS', 'SEM'), "+": [remove_empty, remove_nodes('ZWW', 'ZW', 'LZ', 'DPP', 'COMMENT__', 'ABS', 'SEM'),
remove_tokens(",", "{", "}", "=>")], remove_tokens],
"Autor": [reduce_single_child], "Autor": [reduce_single_child],
"Artikel": [], "Artikel": [],
"LemmaPosition": [remove_first], "LemmaPosition": [remove_first],
...@@ -477,7 +488,7 @@ MLW_AST_transformation_table = { ...@@ -477,7 +488,7 @@ MLW_AST_transformation_table = {
"klassisch": [reduce_single_child], "klassisch": [reduce_single_child],
"gesichert": [reduce_single_child], "gesichert": [reduce_single_child],
"LemmaVariante": [reduce_single_child, traverse_locally(LemmaVariante_table)], "LemmaVariante": [reduce_single_child, traverse_locally(LemmaVariante_table)],
"LemmaVarianten": [flatten, remove_nodes("ZW")], "LemmaVarianten": [flatten],
"LemmaWort": [reduce_single_child], "LemmaWort": [reduce_single_child],
"LemmaZusatz": [], "LemmaZusatz": [],
"lzs_typ": [], "lzs_typ": [],
...@@ -508,9 +519,9 @@ MLW_AST_transformation_table = { ...@@ -508,9 +519,9 @@ MLW_AST_transformation_table = {
[remove_first, flatten], [remove_first, flatten],
"Bedeutungskategorie": [], "Bedeutungskategorie": [],
"Beleg": [], "Beleg": [],
"BelegText": [partial(strip, condition=lambda context: is_expendable(context) "BelegText":
or has_content(context, '[".]')), [strip(lambda context: is_expendable(context) or has_content(context, '[".]')),
reduce_single_child], reduce_single_child],
"BelegStelle": [flatten], "BelegStelle": [flatten],
"Interpretamente": [], "Interpretamente": [],
"LateinischeBedeutung": [remove_nodes("LAT"), flatten], "LateinischeBedeutung": [remove_nodes("LAT"), flatten],
...@@ -518,23 +529,23 @@ MLW_AST_transformation_table = { ...@@ -518,23 +529,23 @@ MLW_AST_transformation_table = {
"LateinischerAusdruck": [flatten, reduce_single_child], "LateinischerAusdruck": [flatten, reduce_single_child],
"DeutscherAusdruck": [flatten, reduce_single_child], "DeutscherAusdruck": [flatten, reduce_single_child],
"LateinischesWort, DeutschesWort": [strip, collapse], "LateinischesWort, DeutschesWort": [strip, collapse],
"Belege": [flatten, remove_tokens("*")], "Belege": [flatten],
"Beleg": [], "Beleg": [],
"EinBeleg": [], "EinBeleg": [],
"Zitat": [flatten, remove_nodes("ZW")], "Zitat": [flatten],
"Zusatz": [reduce_single_child, flatten, remove_tokens(";;", ";")], "Zusatz": [reduce_single_child, flatten],
"ArtikelVerfasser": [remove_first], "ArtikelVerfasser": [remove_first],
"Stellenverzeichnis": [remove_first], "Stellenverzeichnis": [remove_first],
"Verweisliste": [flatten, remove_tokens("*")], "Verweisliste": [flatten],
"Stellenverweis": [flatten], "Stellenverweis": [flatten],
"Name": [], "Name": [],
"Stelle": [collapse], "Stelle": [collapse],
"SW_LAT": [replace_or_reduce], "SW_LAT": [replace_or_reduce],
"SW_DEU": [replace_or_reduce], "SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce], "SW_GRIECH": [replace_or_reduce],
"Verweis": [remove_tokens("=>")], "Verweis": [],
"VerweisZiel": [], "VerweisZiel": [],
"Anker": [remove_tokens("#"), reduce_single_child], "Anker": [reduce_single_child],
"Werk": [reduce_single_child], "Werk": [reduce_single_child],
"ZielName": [replace_by_single_child], "ZielName": [replace_by_single_child],
"URL": [flatten, keep_nodes('protokoll', 'domäne', 'pfad', 'ziel')], "URL": [flatten, keep_nodes('protokoll', 'domäne', 'pfad', 'ziel')],
......
...@@ -143,11 +143,6 @@ Match-test "1" ...@@ -143,11 +143,6 @@ Match-test "1"
### AST ### AST
(LemmaPosition (LemmaPosition
(Lemma
(LemmaWort
"facitergula"
)
)
(LemmaVarianten (LemmaVarianten
(LemmaVariante (LemmaVariante
"fascitergula" "fascitergula"
...@@ -168,20 +163,12 @@ Match-test "1" ...@@ -168,20 +163,12 @@ Match-test "1"
) )
) )
(GrammatikPosition (GrammatikPosition
(Grammatik (flexion
(wortart (deklination
"nomen" (FLEX
) "-ae"
(flexion
(deklination
(FLEX
"-ae"
)
) )
) )
(genus
"f."
)
) )
) )
) )
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment