The name of the initial branch for new projects is now "main" instead of "master". Existing projects remain unchanged. More information: https://doku.lrz.de/display/PUBLIC/GitLab

Commit 2467ebca authored by di68kap's avatar di68kap
Browse files

- MLW ergänzt

parent 4441a5ef
...@@ -212,12 +212,10 @@ class Node(collections.abc.Sized): ...@@ -212,12 +212,10 @@ class Node(collections.abc.Sized):
"""Initializes the ``Node``-object with the ``Parser``-Instance """Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result. that generated the node and the parser's result.
""" """
# self._result = '' # type: StrictResultType
# self.children = () # type: ChildrenType
# self._len = -1 # type: int
self.error_flag = 0 # type: int self.error_flag = 0 # type: int
self._errors = [] # type: List[Error] self._errors = [] # type: List[Error]
# self.result = result would suffice; if clause is merely an optimization for speed # Assignment to self.result initializes the attributes _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if leafhint: if leafhint:
self._result = result # type: StrictResultType self._result = result # type: StrictResultType
self.children = NoChildren # type: ChildrenType self.children = NoChildren # type: ChildrenType
......
LEMMA facitergul|a LEMMA facitergula
fasc-itergula fascitergula
fac-iet-ergula facietergula
fac-ist-ergula facistergula
fascite-rcu-la facitercula
ZUSATZ sim. ZUSATZ sim.
...@@ -19,7 +19,7 @@ SCHREIBWEISE ...@@ -19,7 +19,7 @@ SCHREIBWEISE
script.: script.:
vizreg-: v. ibi vizreg-: v. ibi
festregel(a): v. ibi festregel(a): v. ibi
fezdregl(a): v. ini fezdregl(a): v. ibi
BEDEUTUNG BEDEUTUNG
......
...@@ -13,6 +13,8 @@ Artikel = [LZ] ...@@ -13,6 +13,8 @@ Artikel = [LZ]
§LemmaPosition §LemmaPosition
[ArtikelKopf] [ArtikelKopf]
BedeutungsPosition BedeutungsPosition
[VerweisPosition]
{ SubArtikel }+
ArtikelVerfasser ArtikelVerfasser
[LZ] DATEI_ENDE [LZ] DATEI_ENDE
...@@ -22,7 +24,7 @@ Artikel = [LZ] ...@@ -22,7 +24,7 @@ Artikel = [LZ]
LemmaPosition = "LEMMA" [LZ] §Lemma TR [LemmaVarianten] LemmaPosition = "LEMMA" [LZ] §Lemma TR [LemmaVarianten]
GrammatikPosition [EtymologiePosition] GrammatikPosition [EtymologiePosition]
Lemma = { klassisch | gesichert } LemmaWort Lemma = [< klassisch | gesichert >] LemmaWort
klassisch = "*" klassisch = "*"
gesichert = "$" gesichert = "$"
......
"""algorithmen.py - diverse Algorithmen für das MLW"""
import re
# fasc - itergula
# fac - iet - ergula
# fac - ist - ergula
# facite - rcu - la
testfall_facitergula = """
facitergula
fascitergula
facietergula
facistergula
facitercula
"""
def teile_lemma(lemma_txt):
return [s for s in re.split(r'\s+|(?:\s*;\s*)|(?:\s*,\s*)', lemma_txt) if s]
def differenz(lemma, variante):
# finde den ersten Unterschied von links
l = 0
while l < min(len(lemma), len(variante)) and lemma[l] == variante[l]:
l += 1
# finde den ersten Unterschied von rechts
r = 1
while r <= min(len(lemma), len(variante)) and lemma[-r] == variante[-r]:
r += 1
r -= 1
l -= 1 # beginne 1 Zeichen vor dem ersten Unterschied
if l <= 1: l = 0 # einzelne Buchstaben nicht abtrennen
r -= 1 # beginne 1 Zeichen nach dem letzten Unterschied
if r <= 1: r = 0 # eingelne Buchstaben nicht abtrennen
# gib Zeichenkette der Unterschide ab dem letzten gemeinsamen (von links) bzw.
# ab dem ersten gemeinsamen (von rechts) Buchstaben mit Trennstrichen zurück
return (('-' if l > 0 else '') + variante[l:(-r) or None] + ('-' if r > 0 else ''))
def verdichtung_lemmavarianten(lemma_txt):
geteilt = teile_lemma(lemma_txt)
lemma = geteilt[0]
varianten = geteilt[1:]
for v in varianten:
print(differenz(lemma, v))
if __name__ == "__main__":
verdichtung_lemmavarianten(testfall_facitergula)
...@@ -19,6 +19,7 @@ save_path = os.getcwd() ...@@ -19,6 +19,7 @@ save_path = os.getcwd()
os.chdir("Beispiele") os.chdir("Beispiele")
for entry in os.listdir(): for entry in os.listdir():
if entry.lower().endswith('.mlw'): if entry.lower().endswith('.mlw'):
print('\n Parse: ' + entry)
raw_name = os.path.splitext(entry)[0] raw_name = os.path.splitext(entry)[0]
with logging(True): with logging(True):
result, messages, AST = compile_source(entry, result, messages, AST = compile_source(entry,
...@@ -26,9 +27,9 @@ for entry in os.listdir(): ...@@ -26,9 +27,9 @@ for entry in os.listdir():
get_grammar(), get_grammar(),
get_transformer(), get_transformer(),
get_compiler()) get_compiler())
# if AST: if AST:
# with open(raw_name + '.ast', 'w', encoding='utf-8') as f: with open(raw_name + '.ast', 'w', encoding='utf-8') as f:
# f.write(AST.as_sxpr(compact=False)) f.write(AST.as_sxpr(compact=False))
if messages: if messages:
print("Errors in: " + entry) print("Errors in: " + entry)
with open(raw_name + '.messages', 'w', encoding='utf-8') as f: with open(raw_name + '.messages', 'w', encoding='utf-8') as f:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment