Commit 8d0a544a authored by di68kap's avatar di68kap
Browse files

- MLW Fehlerkorrekturen

parent c59f4169
...@@ -470,8 +470,8 @@ class EBNFCompiler(Compiler): ...@@ -470,8 +470,8 @@ class EBNFCompiler(Compiler):
compiler += [' def ' + method_name + '(self, node):', compiler += [' def ' + method_name + '(self, node):',
' return node', ''] ' return node', '']
else: else:
compiler += [' def ' + method_name + '(self, node):', compiler += [' # def ' + method_name + '(self, node):',
' pass', ''] ' # return node', '']
compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)] compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(compiler) return '\n'.join(compiler)
......
...@@ -1982,6 +1982,14 @@ class Compiler: ...@@ -1982,6 +1982,14 @@ class Compiler:
""" """
return 'on_' + node_name return 'on_' + node_name
def fallback_compiler(self, node: Node) -> Any:
"""This is a generic compiler function which will be called on
all those node types for which no compiler method `on_XXX` has
been defined."""
if node.children:
node.result = tuple(self.compile(nd) for nd in node.children)
return node
def compile(self, node: Node) -> Any: def compile(self, node: Node) -> Any:
""" """
Calls the compilation method for the given node and returns the Calls the compilation method for the given node and returns the
...@@ -2002,7 +2010,10 @@ class Compiler: ...@@ -2002,7 +2010,10 @@ class Compiler:
"'_' or '__' or ending with '__' is reserved.)") "'_' or '__' or ending with '__' is reserved.)")
return None return None
else: else:
compiler = self.__getattribute__(self.method_name(elem)) try:
compiler = self.__getattribute__(self.method_name(elem))
except AttributeError:
compiler = self.fallback_compiler
self.context.append(node) self.context.append(node)
result = compiler(node) result = compiler(node)
self.context.pop() self.context.pop()
......
...@@ -54,16 +54,16 @@ AUTORIN Weber ...@@ -54,16 +54,16 @@ AUTORIN Weber
STELLENVERZEICHNIS fascitergula STELLENVERZEICHNIS fascitergula
* Form.: Sangall.; 39 p. 421,16 {=> URL_1} * Form.: Sangall.; 39 p. 421,16 {=> https://URL}
* Libri: confrat. I; app. A 6 p. 137,30 {-} // {-} bedeutet: keine URL vorhanden * Libri: confrat. I; app. A 6 p. 137,30 {-} // {-} bedeutet: keine URL vorhanden
* Catal.: thes. Germ.; * Catal.: thes. Germ.;
18,7 {=> URL_3} 18,7 {=> https://URL_3}
28,11 (post 851) {=> URL_4} 28,11 (post 851) {=> https://URL_4}
76,15 {=> URL_5} 76,15 {=> https://URL_5}
40,5 {=> URL_6} 40,5 {=> https://URL_6}
129a,5 {=> URL_7} 129a,5 {=> https://URL_7}
24,8 {=> URL_8} 24,8 {=> https://URL_8}
114,8 {=> URL_9} 114,8 {=> https://URL_9}
6,24 {-} 6,24 {-}
92,6 {-} 92,6 {-}
21,20 IIII {=> URL_10} 21,20 IIII {=> https://URL_10}
...@@ -59,22 +59,22 @@ U1Bedeutung { ...@@ -59,22 +59,22 @@ U1Bedeutung {
U2Bedeutung { U2Bedeutung {
display:block; display:block;
margin-left:2em; margin-left:1em;
} }
U3Bedeutung { U3Bedeutung {
display:block; display:block;
margin-left:3em; margin-left:1em;
} }
U4Bedeutung { U4Bedeutung {
display:block; display:block;
margin-left:4em; margin-left:1em;
} }
U5Bedeutung { U5Bedeutung {
display:block; display:block;
margin-left:5em; margin-left:1em;
} }
Interpretamente { Interpretamente {
...@@ -113,7 +113,7 @@ Stelle { ...@@ -113,7 +113,7 @@ Stelle {
} }
BelegText { BelegText {
color: darkgoldenrod; color: darkslateblue;
} }
BelegText:before { BelegText:before {
...@@ -124,12 +124,25 @@ BelegText:after { ...@@ -124,12 +124,25 @@ BelegText:after {
content:"”"; content:"”";
} }
Sonderbelege {
font-style: italic;
}
Sonderbelege:before {
content:" ("
}
Sonderbelege:after {
content: ") "
}
Zusatz { Zusatz {
font-style: italic; font-style: italic;
} }
Verweis { Verweis {
color: blue; color: blue;
text-decoration: underline;
} }
pfad:after { pfad:after {
......
...@@ -64,8 +64,8 @@ BEDEUTUNG ...@@ -64,8 +64,8 @@ BEDEUTUNG
* Alfan.: premn. phys. prol.; 4 p. 1 "semet ipsum ... iuste privat imperio, * Alfan.: premn. phys. prol.; 4 p. 1 "semet ipsum ... iuste privat imperio,
quisquis nec se cognoscit nec ea, quibus imperat". quisquis nec se cognoscit nec ea, quibus imperat".
* Epist.: Worm.; I 34 p. 63,1 "in maximo imperio minima est licentia {v. notam ed.}". * Epist.: Worm.; I 34 p. 63,1 "in maximo imperio minima est licentia {v. notam ed.}".
* Trad.: Reichersb.; 44 "Penno habebat germanum ... sub inperio et servitio adhuc domini Paldmari". ** {verschachtelte Belege:} Trad.: Reichersb.; 44 "Penno habebat germanum ... sub inperio et servitio adhuc domini Paldmari".
* Otto: Frising. gest.; 2,29 p. 135,15 "orbis imperium affectas {sc. Fridericus}". ** Otto: Frising. gest.; 2,29 p. 135,15 "orbis imperium affectas {sc. Fridericus}".
* Chart.: Mekl.; 783 "quo {tempore} ... dominus Iohannes gubernavit eiusdem imperium civitatis." * Chart.: Mekl.; 783 "quo {tempore} ... dominus Iohannes gubernavit eiusdem imperium civitatis."
{al. v. et {=> p. 1402, 47|URL}}. {al. v. et {=> p. 1402, 47|URL}}.
......
...@@ -189,17 +189,20 @@ Edition = EINZEILER ...@@ -189,17 +189,20 @@ Edition = EINZEILER
Verweis = "{" VerweisKern "}" Verweis = "{" VerweisKern "}"
VerweisKern = "=>" §((alias "|" ("-" | URL)) | URL) VerweisKern = "=>" §((alias "|" ("-" | URL)) | URL)
Anker = "{" "@" §ziel "}" Anker = "{" "@" §ziel "}"
URL = [ ([protokoll] domäne /\//) | /\// ] { pfad /\// } ziel # URL = [ ([protokoll] domäne /\//) | /\// ] { pfad /\// } ziel
URL = [protokoll] [/\//] { pfad /\// } ziel
alias = FREITEXT alias = FREITEXT
protokoll = /\w+:\/\// protokoll = /\w+:\/\//
domäne = /\w+\.\w+(?:\.\w+)*/ # domäne = /\w+\.\w+(?:\.\w+)*/
pfad = /\w+/ pfad = PFAD_NAME # /\w+/
ziel = /[\w=?.%&\[\] ]+/ ziel = PFAD_NAME # /[\w=?.%&\[\] ]+/
#### GENERISCHE UND ATOMARE AUSDRÜCKE ######################################## #### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
PFAD_NAME = /[\w=?.%&\[\] ]+/
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~ NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~ NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~
......
...@@ -31,7 +31,7 @@ from DHParser import is_filename, load_if_file, \ ...@@ -31,7 +31,7 @@ from DHParser import is_filename, load_if_file, \
is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, \ is_empty, is_expendable, collapse, replace_content, remove_nodes, remove_content, \
remove_brackets, replace_parser, traverse_locally, remove_nodes, \ remove_brackets, replace_parser, traverse_locally, remove_nodes, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \ keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
lstrip, rstrip, strip, keep_nodes, remove_anonymous_empty, has_parent lstrip, rstrip, strip, keep_nodes, remove_anonymous_empty, has_parent, MockParser
from DHParser.log import logging from DHParser.log import logging
...@@ -248,17 +248,22 @@ class MLWGrammar(Grammar): ...@@ -248,17 +248,22 @@ class MLWGrammar(Grammar):
Verweis = "{" VerweisKern "}" Verweis = "{" VerweisKern "}"
VerweisKern = "=>" §((alias "|" ("-" | URL)) | URL) VerweisKern = "=>" §((alias "|" ("-" | URL)) | URL)
Anker = "{" "@" §ziel "}" Anker = "{" "@" §ziel "}"
URL = [ ([protokoll] domäne /\//) | /\// ] { pfad /\// } ziel # URL = [ ([protokoll] domäne /\//) | /\// ] { pfad /\// } ziel
URL = [protokoll] [/\//] { pfad /\// } ziel
alias = FREITEXT alias = FREITEXT
protokoll = /\w+:\/\// protokoll = /\w+:\/\//
domäne = /\w+\.\w+(?:\.\w+)*/ # domäne = /\w+\.\w+(?:\.\w+)*/
pfad = /\w+/ pfad = PFAD_NAME # /\w+/
ziel = /[\w=?.%&\[\] ]+/ ziel = PFAD_NAME # /[\w=?.%&\[\] ]+/
#### GENERISCHE UND ATOMARE AUSDRÜCKE ######################################## #### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
PFAD_NAME = /[\w=?.%&\[\] ]+/
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~ NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~ NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~
...@@ -322,7 +327,7 @@ class MLWGrammar(Grammar): ...@@ -322,7 +327,7 @@ class MLWGrammar(Grammar):
flexion = Forward() flexion = Forward()
genus = Forward() genus = Forward()
wortart = Forward() wortart = Forward()
source_hash__ = "c0bebb58c93f0f0986f0383b6ec022ea" source_hash__ = "d2e7f9b37c45e9df1e67ccf382641f36"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)' COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)'
WHITESPACE__ = r'[\t ]*' WHITESPACE__ = r'[\t ]*'
...@@ -367,12 +372,12 @@ class MLWGrammar(Grammar): ...@@ -367,12 +372,12 @@ class MLWGrammar(Grammar):
DEU_WORT.set(Alternative(DEU_GROSS, DEU_KLEIN, GROSSBUCHSTABE)) DEU_WORT.set(Alternative(DEU_GROSS, DEU_KLEIN, GROSSBUCHSTABE))
NAME = RE('[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+') NAME = RE('[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+')
NAMENS_ABKÜRZUNG = RE('[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\\.') NAMENS_ABKÜRZUNG = RE('[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\\.')
ziel = RegExp('[\\w=?.%&\\[\\] ]+') PFAD_NAME = RegExp('[\\w=?.%&\\[\\] ]+')
pfad = RegExp('\\w+') ziel = Synonym(PFAD_NAME)
domäne = RegExp('\\w+\\.\\w+(?:\\.\\w+)*') pfad = Synonym(PFAD_NAME)
protokoll = RegExp('\\w+://') protokoll = RegExp('\\w+://')
alias = Synonym(FREITEXT) alias = Synonym(FREITEXT)
URL = Series(Option(Alternative(Series(Option(protokoll), domäne, RegExp('/')), RegExp('/'))), ZeroOrMore(Series(pfad, RegExp('/'))), ziel) URL = Series(Option(protokoll), Option(RegExp('/')), ZeroOrMore(Series(pfad, RegExp('/'))), ziel)
Anker = Series(Token("{"), Token("@"), ziel, Token("}"), mandatory=2) Anker = Series(Token("{"), Token("@"), ziel, Token("}"), mandatory=2)
VerweisKern = Series(Token("=>"), Alternative(Series(alias, Token("|"), Alternative(Token("-"), URL)), URL), mandatory=1) VerweisKern = Series(Token("=>"), Alternative(Series(alias, Token("|"), Alternative(Token("-"), URL)), URL), mandatory=1)
Verweis = Series(Token("{"), VerweisKern, Token("}")) Verweis = Series(Token("{"), VerweisKern, Token("}"))
...@@ -568,8 +573,8 @@ MLW_AST_transformation_table = { ...@@ -568,8 +573,8 @@ MLW_AST_transformation_table = {
"SW_DEU": [replace_or_reduce], "SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce], "SW_GRIECH": [replace_or_reduce],
"Verweis": [], "Verweis": [],
"VerweisKern": [reduce_single_child], "VerweisKern": [flatten],
"ziel": [], # [apply_if(replace_content(lambda s: ''), has_parent("URL"))], "pfad, ziel": [reduce_single_child], # [apply_if(replace_content(lambda s: ''), has_parent("URL"))],
"Anker": [reduce_single_child], "Anker": [reduce_single_child],
"Werk": [reduce_single_child], "Werk": [reduce_single_child],
"ZielName": [replace_by_single_child], "ZielName": [replace_by_single_child],
...@@ -628,204 +633,57 @@ class MLWCompiler(Compiler): ...@@ -628,204 +633,57 @@ class MLWCompiler(Compiler):
"""Compiler for the abstract-syntax-tree of a MLW source file. """Compiler for the abstract-syntax-tree of a MLW source file.
""" """
ZÄHLER = (
('I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX'),
'ABCDEFGHI',
'123456789',
'abcdefghi',
'αβγδεζηθι'
)
def __init__(self, grammar_name="MLW", grammar_source=""): def __init__(self, grammar_name="MLW", grammar_source=""):
super(MLWCompiler, self).__init__(grammar_name, grammar_source) super(MLWCompiler, self).__init__(grammar_name, grammar_source)
assert re.match('\w+\Z', grammar_name) assert re.match('\w+\Z', grammar_name)
def on_Artikel(self, node): def on_VerweisKern(self, node):
if node.children[0].parser.name == "FREITEXT":
node.children[1].result = ""
elif node.children[0].parser.name == "ziel":
node.children[0].result = "v. ibi."
return node return node
def on_LemmaPosition(self, node): def ergänze_Zähler(self, node, tiefe):
pass i = 0
for nd in node.children:
def on_Lemma(self, node): if nd.parser.name == "Bedeutung":
pass zähler = Node(MockParser("Zähler", ":RE"), self.ZÄHLER[tiefe][i])
i += 1
def on_klassisch(self, node): nd2 = nd.children[0]
pass nd2.children = (zähler,) + nd2.children
def on_gesichert(self, node):
pass
def on_LemmaVarianten(self, node):
pass
def on_LemmaWort(self, node):
pass
def on_LemmaZusatz(self, node):
pass
def on_lzs_typ(self, node):
pass
def on_GrammatikPosition(self, node):
pass
def on_wortart(self, node):
pass
def on_GrammatikVarianten(self, node):
pass
def on_flexion(self, node):
pass
def on_FLEX(self, node):
pass
def on_genus(self, node):
pass
def on_EtymologiePosition(self, node):
pass
def on_EtymologieVarianten(self, node):
pass
def on_EtymologieVariante(self, node):
pass
def on_ArtikelKopf(self, node):
pass
def on_SchreibweisenPosition(self, node):
pass
def on_SWTyp(self, node):
pass
def on_SWVariante(self, node):
pass
def on_Schreibweise(self, node):
pass
def on_BedeutungsPosition(self, node): def on_BedeutungsPosition(self, node):
pass self.ergänze_Zähler(node, 0)
return self.fallback_compiler(node)
def on_Bedeutung(self, node):
pass
def on_Bedeutungskategorie(self, node):
pass
def on_Interpretamente(self, node):
pass
def on_LateinischeBedeutung(self, node):
pass
def on_DeutscheBedeutung(self, node):
pass
def on_Belege(self, node):
pass
def on_EinBeleg(self, node):
pass
def on_Zusatz(self, node):
pass
def on_ArtikelVerfasser(self, node):
pass
def on_Name(self, node):
pass
def on_SW_LAT(self, node):
pass
def on_SW_DEU(self, node):
pass
def on_SW_GRIECH(self, node):
pass
def on_Beleg(self, node):
pass
def on_Verweis(self, node):
pass
def on_VerweisZiel(self, node):
pass
def on_ZielName(self, node):
pass
def on_NAMENS_ABKÜRZUNG(self, node):
pass
def on_NAME(self, node):
pass
def on_DEU_WORT(self, node):
pass
def on_DEU_GROSS(self, node):
pass
def on_DEU_KLEIN(self, node):
pass
def on_LAT_WORT(self, node):
pass
def on_LAT_WORT_TEIL(self, node):
pass
def on_GROSSSCHRIFT(self, node):
pass
def on_GROSSFOLGE(self, node):
pass
def on_BUCHSTABENFOLGE(self, node):
pass
def on_ZEICHENFOLGE(self, node):
pass
def on_TR(self, node):
pass
def on_ABS(self, node):
pass
def on_LZ(self, node):
pass
def on_ZW(self, node):
pass
def on_ZWW(self, node):
pass
def on_LÜCKE(self, node):
pass
def on_LEERRAUM(self, node):
pass
def on_LEERZEILE(self, node):
pass
def on_RZS(self, node): def on_U1Bedeutung(self, node):
pass self.ergänze_Zähler(node, 1)
return self.fallback_compiler(node)
def on_ZEILENSPRUNG(self, node): def on_U2Bedeutung(self, node):
pass self.ergänze_Zähler(node, 2)
return self.fallback_compiler(node)
def on_KOMMENTARZEILEN(self, node): def on_U3Bedeutung(self, node):
pass self.ergänze_Zähler(node, 3)
return self.fallback_compiler(node)
def on_DATEI_ENDE(self, node): def on_U4Bedeutung(self, node):
pass self.ergänze_Zähler(node, 4)
return self.fallback_compiler(node)
def on_NIEMALS(self, node): def on_U5Bedeutung(self, node):
pass self.ergänze_Zähler(node, 5)
return self.fallback_compiler(node)
def get_compiler(grammar_name="MLW", grammar_source="") -> MLWCompiler: def get_compiler(grammar_name="MLW", grammar_source="") -> MLWCompiler:
......
{ {
// // "comments"-Abschnitt scheint keine Wirkung zu haben!? (E.A. 4.2.2018) // "comments"-Abschnitt scheint keine Wirkung zu haben!? (E.A. 4.2.2018)
// "comments": { "comments": {
// // symbol used for single line comment. Remove this entry if your language does not support line comments // symbol used for single line comment. Remove this entry if your language does not support line comments
// "lineComment": "//", "lineComment": "//",
// // symbols used for start and end a block comment. Remove this entry if your language does not support block comments // symbols used for start and end a block comment. Remove this entry if your language does not support block comments
// "blockComment": [ "/*", "*/" ] "blockComment": [ "/*", "*/" ]
// }, },
// symbols used as brackets // symbols used as brackets
"brackets": [ "brackets": [
["{", "}"], ["{", "}"],
...@@ -28,4 +28,4 @@ ...@@ -28,4 +28,4 @@
["\"", "\""], ["\"", "\""],
["'", "'"] ["'", "'"]
] ]
} }
\ No newline at end of file
...@@ -7,12 +7,12 @@ F1: "https:" ...@@ -7,12 +7,12 @@ F1: "https:"
F2: "http: //" F2: "http: //"
F3: "http:/" F3: "http:/"