Commit 8d0a544a authored by di68kap's avatar di68kap
Browse files

- MLW Fehlerkorrekturen

parent c59f4169
......@@ -470,8 +470,8 @@ class EBNFCompiler(Compiler):
compiler += [' def ' + method_name + '(self, node):',
' return node', '']
else:
compiler += [' def ' + method_name + '(self, node):',
' pass', '']
compiler += [' # def ' + method_name + '(self, node):',
' # return node', '']
compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(compiler)
......
......@@ -1982,6 +1982,14 @@ class Compiler:
"""
return 'on_' + node_name
def fallback_compiler(self, node: Node) -> Any:
"""This is a generic compiler function which will be called on
all those node types for which no compiler method `on_XXX` has
been defined."""
if node.children:
node.result = tuple(self.compile(nd) for nd in node.children)
return node
def compile(self, node: Node) -> Any:
"""
Calls the compilation method for the given node and returns the
......@@ -2002,7 +2010,10 @@ class Compiler:
"'_' or '__' or ending with '__' is reserved.)")
return None
else:
try:
compiler = self.__getattribute__(self.method_name(elem))
except AttributeError:
compiler = self.fallback_compiler
self.context.append(node)
result = compiler(node)
self.context.pop()
......
......@@ -54,16 +54,16 @@ AUTORIN Weber
STELLENVERZEICHNIS fascitergula
* Form.: Sangall.; 39 p. 421,16 {=> URL_1}
* Form.: Sangall.; 39 p. 421,16 {=> https://URL}
* Libri: confrat. I; app. A 6 p. 137,30 {-} // {-} bedeutet: keine URL vorhanden
* Catal.: thes. Germ.;
18,7 {=> URL_3}
28,11 (post 851) {=> URL_4}
76,15 {=> URL_5}
40,5 {=> URL_6}
129a,5 {=> URL_7}
24,8 {=> URL_8}
114,8 {=> URL_9}
18,7 {=> https://URL_3}
28,11 (post 851) {=> https://URL_4}
76,15 {=> https://URL_5}
40,5 {=> https://URL_6}
129a,5 {=> https://URL_7}
24,8 {=> https://URL_8}
114,8 {=> https://URL_9}
6,24 {-}
92,6 {-}
21,20 IIII {=> URL_10}
21,20 IIII {=> https://URL_10}
......@@ -59,22 +59,22 @@ U1Bedeutung {
U2Bedeutung {
display:block;
margin-left:2em;
margin-left:1em;
}
U3Bedeutung {
display:block;
margin-left:3em;
margin-left:1em;
}
U4Bedeutung {
display:block;
margin-left:4em;
margin-left:1em;
}
U5Bedeutung {
display:block;
margin-left:5em;
margin-left:1em;
}
Interpretamente {
......@@ -113,7 +113,7 @@ Stelle {
}
BelegText {
color: darkgoldenrod;
color: darkslateblue;
}
BelegText:before {
......@@ -124,12 +124,25 @@ BelegText:after {
content:"”";
}
Sonderbelege {
font-style: italic;
}
Sonderbelege:before {
content:" ("
}
Sonderbelege:after {
content: ") "
}
Zusatz {
font-style: italic;
}
Verweis {
color: blue;
text-decoration: underline;
}
pfad:after {
......
......@@ -64,8 +64,8 @@ BEDEUTUNG
* Alfan.: premn. phys. prol.; 4 p. 1 "semet ipsum ... iuste privat imperio,
quisquis nec se cognoscit nec ea, quibus imperat".
* Epist.: Worm.; I 34 p. 63,1 "in maximo imperio minima est licentia {v. notam ed.}".
* Trad.: Reichersb.; 44 "Penno habebat germanum ... sub inperio et servitio adhuc domini Paldmari".
* Otto: Frising. gest.; 2,29 p. 135,15 "orbis imperium affectas {sc. Fridericus}".
** {verschachtelte Belege:} Trad.: Reichersb.; 44 "Penno habebat germanum ... sub inperio et servitio adhuc domini Paldmari".
** Otto: Frising. gest.; 2,29 p. 135,15 "orbis imperium affectas {sc. Fridericus}".
* Chart.: Mekl.; 783 "quo {tempore} ... dominus Iohannes gubernavit eiusdem imperium civitatis."
{al. v. et {=> p. 1402, 47|URL}}.
......
......@@ -189,17 +189,20 @@ Edition = EINZEILER
Verweis = "{" VerweisKern "}"
VerweisKern = "=>" §((alias "|" ("-" | URL)) | URL)
Anker = "{" "@" §ziel "}"
URL = [ ([protokoll] domäne /\//) | /\// ] { pfad /\// } ziel
# URL = [ ([protokoll] domäne /\//) | /\// ] { pfad /\// } ziel
URL = [protokoll] [/\//] { pfad /\// } ziel
alias = FREITEXT
protokoll = /\w+:\/\//
domäne = /\w+\.\w+(?:\.\w+)*/
pfad = /\w+/
ziel = /[\w=?.%&\[\] ]+/
# domäne = /\w+\.\w+(?:\.\w+)*/
pfad = PFAD_NAME # /\w+/
ziel = PFAD_NAME # /[\w=?.%&\[\] ]+/
#### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
PFAD_NAME = /[\w=?.%&\[\] ]+/
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~
......
......@@ -31,7 +31,7 @@ from DHParser import is_filename, load_if_file, \
is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, \
remove_brackets, replace_parser, traverse_locally, remove_nodes, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
lstrip, rstrip, strip, keep_nodes, remove_anonymous_empty, has_parent
lstrip, rstrip, strip, keep_nodes, remove_anonymous_empty, has_parent, MockParser
from DHParser.log import logging
......@@ -248,17 +248,22 @@ class MLWGrammar(Grammar):
Verweis = "{" VerweisKern "}"
VerweisKern = "=>" §((alias "|" ("-" | URL)) | URL)
Anker = "{" "@" §ziel "}"
URL = [ ([protokoll] domäne /\//) | /\// ] { pfad /\// } ziel
# URL = [ ([protokoll] domäne /\//) | /\// ] { pfad /\// } ziel
URL = [protokoll] [/\//] { pfad /\// } ziel
alias = FREITEXT
protokoll = /\w+:\/\//
domäne = /\w+\.\w+(?:\.\w+)*/
pfad = /\w+/
ziel = /[\w=?.%&\[\] ]+/
# domäne = /\w+\.\w+(?:\.\w+)*/
pfad = PFAD_NAME # /\w+/
ziel = PFAD_NAME # /[\w=?.%&\[\] ]+/
#### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
PFAD_NAME = /[\w=?.%&\[\] ]+/
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~
......@@ -322,7 +327,7 @@ class MLWGrammar(Grammar):
flexion = Forward()
genus = Forward()
wortart = Forward()
source_hash__ = "c0bebb58c93f0f0986f0383b6ec022ea"
source_hash__ = "d2e7f9b37c45e9df1e67ccf382641f36"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)'
WHITESPACE__ = r'[\t ]*'
......@@ -367,12 +372,12 @@ class MLWGrammar(Grammar):
DEU_WORT.set(Alternative(DEU_GROSS, DEU_KLEIN, GROSSBUCHSTABE))
NAME = RE('[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+')
NAMENS_ABKÜRZUNG = RE('[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\\.')
ziel = RegExp('[\\w=?.%&\\[\\] ]+')
pfad = RegExp('\\w+')
domäne = RegExp('\\w+\\.\\w+(?:\\.\\w+)*')
PFAD_NAME = RegExp('[\\w=?.%&\\[\\] ]+')
ziel = Synonym(PFAD_NAME)
pfad = Synonym(PFAD_NAME)
protokoll = RegExp('\\w+://')
alias = Synonym(FREITEXT)
URL = Series(Option(Alternative(Series(Option(protokoll), domäne, RegExp('/')), RegExp('/'))), ZeroOrMore(Series(pfad, RegExp('/'))), ziel)
URL = Series(Option(protokoll), Option(RegExp('/')), ZeroOrMore(Series(pfad, RegExp('/'))), ziel)
Anker = Series(Token("{"), Token("@"), ziel, Token("}"), mandatory=2)
VerweisKern = Series(Token("=>"), Alternative(Series(alias, Token("|"), Alternative(Token("-"), URL)), URL), mandatory=1)
Verweis = Series(Token("{"), VerweisKern, Token("}"))
......@@ -568,8 +573,8 @@ MLW_AST_transformation_table = {
"SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce],
"Verweis": [],
"VerweisKern": [reduce_single_child],
"ziel": [], # [apply_if(replace_content(lambda s: ''), has_parent("URL"))],
"VerweisKern": [flatten],
"pfad, ziel": [reduce_single_child], # [apply_if(replace_content(lambda s: ''), has_parent("URL"))],
"Anker": [reduce_single_child],
"Werk": [reduce_single_child],
"ZielName": [replace_by_single_child],
......@@ -628,204 +633,57 @@ class MLWCompiler(Compiler):
"""Compiler for the abstract-syntax-tree of a MLW source file.
"""
ZÄHLER = (
('I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX'),
'ABCDEFGHI',
'123456789',
'abcdefghi',
'αβγδεζηθι'
)
def __init__(self, grammar_name="MLW", grammar_source=""):
super(MLWCompiler, self).__init__(grammar_name, grammar_source)
assert re.match('\w+\Z', grammar_name)
def on_Artikel(self, node):
def on_VerweisKern(self, node):
if node.children[0].parser.name == "FREITEXT":
node.children[1].result = ""
elif node.children[0].parser.name == "ziel":
node.children[0].result = "v. ibi."
return node
def on_LemmaPosition(self, node):
pass
def on_Lemma(self, node):
pass
def on_klassisch(self, node):
pass
def on_gesichert(self, node):
pass
def on_LemmaVarianten(self, node):
pass
def on_LemmaWort(self, node):
pass
def on_LemmaZusatz(self, node):
pass
def on_lzs_typ(self, node):
pass
def on_GrammatikPosition(self, node):
pass
def on_wortart(self, node):
pass
def on_GrammatikVarianten(self, node):
pass
def on_flexion(self, node):
pass
def on_FLEX(self, node):
pass
def on_genus(self, node):
pass
def on_EtymologiePosition(self, node):
pass
def on_EtymologieVarianten(self, node):
pass
def on_EtymologieVariante(self, node):
pass
def on_ArtikelKopf(self, node):
pass
def on_SchreibweisenPosition(self, node):
pass
def on_SWTyp(self, node):
pass
def on_SWVariante(self, node):
pass
def on_Schreibweise(self, node):
pass
def ergänze_Zähler(self, node, tiefe):
i = 0
for nd in node.children:
if nd.parser.name == "Bedeutung":
zähler = Node(MockParser("Zähler", ":RE"), self.ZÄHLER[tiefe][i])
i += 1
nd2 = nd.children[0]
nd2.children = (zähler,) + nd2.children
def on_BedeutungsPosition(self, node):
pass
def on_Bedeutung(self, node):
pass
def on_Bedeutungskategorie(self, node):
pass
def on_Interpretamente(self, node):
pass
def on_LateinischeBedeutung(self, node):
pass
def on_DeutscheBedeutung(self, node):
pass
def on_Belege(self, node):
pass
def on_EinBeleg(self, node):
pass
def on_Zusatz(self, node):
pass
def on_ArtikelVerfasser(self, node):
pass
def on_Name(self, node):
pass
def on_SW_LAT(self, node):
pass
def on_SW_DEU(self, node):
pass
def on_SW_GRIECH(self, node):
pass
def on_Beleg(self, node):
pass
def on_Verweis(self, node):
pass
def on_VerweisZiel(self, node):
pass
def on_ZielName(self, node):
pass
def on_NAMENS_ABKÜRZUNG(self, node):
pass
def on_NAME(self, node):
pass
def on_DEU_WORT(self, node):
pass
def on_DEU_GROSS(self, node):
pass
def on_DEU_KLEIN(self, node):
pass
def on_LAT_WORT(self, node):
pass
def on_LAT_WORT_TEIL(self, node):
pass
def on_GROSSSCHRIFT(self, node):
pass
def on_GROSSFOLGE(self, node):
pass
def on_BUCHSTABENFOLGE(self, node):
pass
def on_ZEICHENFOLGE(self, node):
pass
def on_TR(self, node):
pass
def on_ABS(self, node):
pass
def on_LZ(self, node):
pass
def on_ZW(self, node):
pass
def on_ZWW(self, node):
pass
def on_LÜCKE(self, node):
pass
def on_LEERRAUM(self, node):
pass
def on_LEERZEILE(self, node):
pass
self.ergänze_Zähler(node, 0)
return self.fallback_compiler(node)
def on_RZS(self, node):
pass
def on_U1Bedeutung(self, node):
self.ergänze_Zähler(node, 1)
return self.fallback_compiler(node)
def on_ZEILENSPRUNG(self, node):
pass
def on_U2Bedeutung(self, node):
self.ergänze_Zähler(node, 2)
return self.fallback_compiler(node)
def on_KOMMENTARZEILEN(self, node):
pass
def on_U3Bedeutung(self, node):
self.ergänze_Zähler(node, 3)
return self.fallback_compiler(node)
def on_DATEI_ENDE(self, node):
pass
def on_U4Bedeutung(self, node):
self.ergänze_Zähler(node, 4)
return self.fallback_compiler(node)
def on_NIEMALS(self, node):
pass
def on_U5Bedeutung(self, node):
self.ergänze_Zähler(node, 5)
return self.fallback_compiler(node)
def get_compiler(grammar_name="MLW", grammar_source="") -> MLWCompiler:
......
{
// // "comments"-Abschnitt scheint keine Wirkung zu haben!? (E.A. 4.2.2018)
// "comments": {
// // symbol used for single line comment. Remove this entry if your language does not support line comments
// "lineComment": "//",
// // symbols used for start and end a block comment. Remove this entry if your language does not support block comments
// "blockComment": [ "/*", "*/" ]
// },
// "comments"-Abschnitt scheint keine Wirkung zu haben!? (E.A. 4.2.2018)
"comments": {
// symbol used for single line comment. Remove this entry if your language does not support line comments
"lineComment": "//",
// symbols used for start and end a block comment. Remove this entry if your language does not support block comments
"blockComment": [ "/*", "*/" ]
},
// symbols used as brackets
"brackets": [
["{", "}"],
......
......@@ -7,12 +7,12 @@ F1: "https:"
F2: "http: //"
F3: "http:/"
[match:domäne]
[match:pfad]
M1: "badw.de"
M2: "www.badw.de"
M3: "badw"
[fail:domäne]
F1: "badw"
[fail:pfad]
F2: "https://badw.de"
[match:ziel]
......@@ -33,6 +33,7 @@ M4: "verzeichnis/ziel"
M5: "ziel"
M6: "hauptverzeichnis/unterverzeichnis/zielseite.html"
M7: "ziel.html"
M8: "https://www.badw.de"
[fail:URL]
F1: "verzeichnis/"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment