05.11., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 2467ebca authored by di68kap's avatar di68kap

- MLW ergänzt

parent 4441a5ef
......@@ -212,12 +212,10 @@ class Node(collections.abc.Sized):
"""Initializes the ``Node``-object with the ``Parser``-Instance
that generated the node and the parser's result.
"""
# self._result = '' # type: StrictResultType
# self.children = () # type: ChildrenType
# self._len = -1 # type: int
self.error_flag = 0 # type: int
self._errors = [] # type: List[Error]
# self.result = result would suffice; if clause is merely an optimization for speed
# Assignment to self.result initializes the attributes _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if leafhint:
self._result = result # type: StrictResultType
self.children = NoChildren # type: ChildrenType
......
LEMMA facitergul|a
LEMMA facitergula
fasc-itergula
fac-iet-ergula
fac-ist-ergula
fascite-rcu-la
fascitergula
facietergula
facistergula
facitercula
ZUSATZ sim.
......@@ -19,7 +19,7 @@ SCHREIBWEISE
script.:
vizreg-: v. ibi
festregel(a): v. ibi
fezdregl(a): v. ini
fezdregl(a): v. ibi
BEDEUTUNG
......
......@@ -13,6 +13,8 @@ Artikel = [LZ]
§LemmaPosition
[ArtikelKopf]
BedeutungsPosition
[VerweisPosition]
{ SubArtikel }+
ArtikelVerfasser
[LZ] DATEI_ENDE
......@@ -22,7 +24,7 @@ Artikel = [LZ]
LemmaPosition = "LEMMA" [LZ] §Lemma TR [LemmaVarianten]
GrammatikPosition [EtymologiePosition]
Lemma = { klassisch | gesichert } LemmaWort
Lemma = [< klassisch | gesichert >] LemmaWort
klassisch = "*"
gesichert = "$"
......
"""algorithmen.py - diverse Algorithmen für das MLW"""
import re
# fasc - itergula
# fac - iet - ergula
# fac - ist - ergula
# facite - rcu - la
testfall_facitergula = """
facitergula
fascitergula
facietergula
facistergula
facitercula
"""
def teile_lemma(lemma_txt):
return [s for s in re.split(r'\s+|(?:\s*;\s*)|(?:\s*,\s*)', lemma_txt) if s]
def differenz(lemma, variante):
# finde den ersten Unterschied von links
l = 0
while l < min(len(lemma), len(variante)) and lemma[l] == variante[l]:
l += 1
# finde den ersten Unterschied von rechts
r = 1
while r <= min(len(lemma), len(variante)) and lemma[-r] == variante[-r]:
r += 1
r -= 1
l -= 1 # beginne 1 Zeichen vor dem ersten Unterschied
if l <= 1: l = 0 # einzelne Buchstaben nicht abtrennen
r -= 1 # beginne 1 Zeichen nach dem letzten Unterschied
if r <= 1: r = 0 # eingelne Buchstaben nicht abtrennen
# gib Zeichenkette der Unterschide ab dem letzten gemeinsamen (von links) bzw.
# ab dem ersten gemeinsamen (von rechts) Buchstaben mit Trennstrichen zurück
return (('-' if l > 0 else '') + variante[l:(-r) or None] + ('-' if r > 0 else ''))
def verdichtung_lemmavarianten(lemma_txt):
geteilt = teile_lemma(lemma_txt)
lemma = geteilt[0]
varianten = geteilt[1:]
for v in varianten:
print(differenz(lemma, v))
if __name__ == "__main__":
verdichtung_lemmavarianten(testfall_facitergula)
......@@ -19,6 +19,7 @@ save_path = os.getcwd()
os.chdir("Beispiele")
for entry in os.listdir():
if entry.lower().endswith('.mlw'):
print('\n Parse: ' + entry)
raw_name = os.path.splitext(entry)[0]
with logging(True):
result, messages, AST = compile_source(entry,
......@@ -26,9 +27,9 @@ for entry in os.listdir():
get_grammar(),
get_transformer(),
get_compiler())
# if AST:
# with open(raw_name + '.ast', 'w', encoding='utf-8') as f:
# f.write(AST.as_sxpr(compact=False))
if AST:
with open(raw_name + '.ast', 'w', encoding='utf-8') as f:
f.write(AST.as_sxpr(compact=False))
if messages:
print("Errors in: " + entry)
with open(raw_name + '.messages', 'w', encoding='utf-8') as f:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment