2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit f6b1769f authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- added function to recompile grammar files to testing

parent ca3dc76b
...@@ -27,8 +27,10 @@ except ImportError: ...@@ -27,8 +27,10 @@ except ImportError:
import re import re
from DHParser import Node, error_messages from DHParser import Node, error_messages
from DHParser.syntaxtree import MockParser
from DHParser.toolkit import compact_sexpr from DHParser.toolkit import compact_sexpr
from DHParser.syntaxtree import MockParser
from DHParser.ebnf import grammar_changed
from DHParser.dsl import compile_on_disk
def mock_syntax_tree(sexpr): def mock_syntax_tree(sexpr):
...@@ -85,6 +87,45 @@ def mock_syntax_tree(sexpr): ...@@ -85,6 +87,45 @@ def mock_syntax_tree(sexpr):
return Node(MockParser(name, ':' + class_name), result) return Node(MockParser(name, ':' + class_name), result)
def recompile_grammar(ebnf_filename, query_remove_error_files=True):
"""Recompiles an ebnf-grammar if necessary, that is if either no
corresponding 'XXXX_compiler.py'-file exists or if that file is
outdated.
Parameters:
ebnf_filename(str): The filename of the ebnf-source of the
grammar. In case this is a directory and not a file all
files within this directory ending with .ebnf will be
compiled.
"""
if os.path.isdir(ebnf_filename):
for entry in os.listdir(ebnf_filename):
if entry.lower().endswith('.ebnf') and os.path.isfile(entry):
recompile_grammar(entry)
return
base, ext = os.path.splitext(ebnf_filename)
compiler_name = base + '_compiler.py'
errors = []
if (not os.path.exists(compiler_name) or
grammar_changed(compiler_name, ebnf_filename)):
# print("recompiling parser for: " + ebnf_filename)
errors = compile_on_disk(ebnf_filename)
if errors:
# print("Errors while compiling: " + ebnf_filename + '!')
with open(base + '_errors.txt', 'w') as f:
for e in errors:
f.write(e)
f.write('\n')
if not errors:
if os.path.exists(base + '_errors.txt'):
if query_remove_error_files:
answer = input('Remove obsolete file ' + base + '_errors.txt (y/n)? ').lower()
if answer not in {'y', 'yes'}:
return
os.remove(base + '_errors.txt')
UNIT_STAGES = {'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'} UNIT_STAGES = {'match', 'fail', 'ast', 'cst', '__ast__', '__cst__'}
...@@ -123,7 +164,7 @@ def unit_from_json(config_filename): ...@@ -123,7 +164,7 @@ def unit_from_json(config_filename):
raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES))) raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
return unit return unit
# TODO: add support for json, yaml, cson, toml # TODO: add support for yaml, cson, toml
def unit_from_file(config_filename): def unit_from_file(config_filename):
......
...@@ -45,7 +45,7 @@ wortart = "nomen" | "n." | ...@@ -45,7 +45,7 @@ wortart = "nomen" | "n." |
GrammatikVarianten = { [wortart ABS] flexion [genus] ":" Beleg §ABS }+ GrammatikVarianten = { [wortart ABS] flexion [genus] ":" Beleg §ABS }+
Flexion = FLEX { "," §FLEX } flexion = FLEX { "," §FLEX }
FLEX = /-?[a-z]+/~ FLEX = /-?[a-z]+/~
genus = "maskulinum" | "m." | genus = "maskulinum" | "m." |
......
...@@ -72,36 +72,34 @@ class MLWGrammar(GrammarBase): ...@@ -72,36 +72,34 @@ class MLWGrammar(GrammarBase):
klassisch = "*" klassisch = "*"
gesichert = "$" gesichert = "$"
LemmaVarianten = "VARIANTEN" [LZ] LemmaVarianten = [LZ]
{ lemma §TR }+ { lemma §TR }+
[LemmaZusatz §ABS] [LemmaZusatz §ABS]
lemma = LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL } lemma = LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
LemmaZusatz = "ZUSATZ" §lzs_typ LemmaZusatz = "ZUSATZ" §lzs_typ
lzs_typ = /sim\./ lzs_typ = /sim\./
## GRAMMATIK-POSITION ## ## GRAMMATIK-POSITION ##
GrammatikPosition = "GRAMMATIK" [LZ] §wortart §ABS §Flexion [genus] GrammatikPosition = "GRAMMATIK" [LZ] §wortart §ABS §flexion [genus] §ABS
{GrammatikVariante} [ABS] [GrammatikVarianten]
wortart = "nomen" | "n." | wortart = "nomen" | "n." |
"verb" | "v." | "verb" | "v." |
"adverb" | "adv." | "adverb" | "adv." |
"adjektiv" | "adj." "adjektiv" | "adj."
GrammatikVariante = ABS GVariante GrammatikVarianten = { [wortart ABS] flexion [genus] ":" Beleg §ABS }+
GVariante = Flexionen [genus] ":" Beleg
Flexionen = Flexion { "," §Flexion } flexion = FLEX { "," §FLEX }
Flexion = /-?[a-z]+/~ FLEX = /-?[a-z]+/~
genus = "maskulinum" | "m." |
"femininum" | "f." |
"neutrum" | "n."
genus = "maskulinum" | "m." |
"femininum" | "f." |
"neutrum" | "n."
#### ARTIKEL-KOPF ############################################################ #### ARTIKEL-KOPF ############################################################
...@@ -113,10 +111,6 @@ class MLWGrammar(GrammarBase): ...@@ -113,10 +111,6 @@ class MLWGrammar(GrammarBase):
SWVariante = Schreibweise ":" Beleg SWVariante = Schreibweise ":" Beleg
Schreibweise = "vizreg-" | "festregel(a)" | "fezdregl(a)" | "fat-" Schreibweise = "vizreg-" | "festregel(a)" | "fezdregl(a)" | "fat-"
Beleg = Verweis
Verweis = ~/\w+/~
VerweisZiel = ~/<\w+>/~
#### BEDEUTUNGS-POSITION ##################################################### #### BEDEUTUNGS-POSITION #####################################################
...@@ -140,7 +134,13 @@ class MLWGrammar(GrammarBase): ...@@ -140,7 +134,13 @@ class MLWGrammar(GrammarBase):
Name = { NAME | NAMENS_ABKÜRZUNG }+ Name = { NAME | NAMENS_ABKÜRZUNG }+
#### ATOMARE AUSDRÜCKE ####################################################### #### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
Beleg = Verweis
Verweis = ZielName
VerweisZiel = "[" ZielName "]"
ZielName = ZEICHENFOLGE
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~ NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~ NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~
...@@ -152,6 +152,8 @@ class MLWGrammar(GrammarBase): ...@@ -152,6 +152,8 @@ class MLWGrammar(GrammarBase):
LAT_WORT_TEIL = /[a-z]+/ LAT_WORT_TEIL = /[a-z]+/
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~ GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
ZEICHENFOLGE = /\w+/~
TR = ABS | LZ # (beliebiger) Trenner TR = ABS | LZ # (beliebiger) Trenner
ABS = /\s*;\s*/ | { ZW }+ # Abschluss (durch Semikolon oder Zeilenwechsel) ABS = /\s*;\s*/ | { ZW }+ # Abschluss (durch Semikolon oder Zeilenwechsel)
ZW = /\n/~ # Zeilenwechsel ZW = /\n/~ # Zeilenwechsel
...@@ -160,7 +162,8 @@ class MLWGrammar(GrammarBase): ...@@ -160,7 +162,8 @@ class MLWGrammar(GrammarBase):
DATEI_ENDE = !/./ DATEI_ENDE = !/./
NIEMALS = /(?!.)/ NIEMALS = /(?!.)/
""" """
source_hash__ = "2d6f71148926868bfeba2e2a30b07fec" wortart = Forward()
source_hash__ = "d953e1f653ac37c660274f1c1dbbd7e2"
parser_initialization__ = "upon instatiation" parser_initialization__ = "upon instatiation"
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)') WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)')
...@@ -172,6 +175,7 @@ class MLWGrammar(GrammarBase): ...@@ -172,6 +175,7 @@ class MLWGrammar(GrammarBase):
ZW = RE('\\n') ZW = RE('\\n')
ABS = Alternative(RE('\\s*;\\s*', wR=''), OneOrMore(ZW)) ABS = Alternative(RE('\\s*;\\s*', wR=''), OneOrMore(ZW))
TR = Alternative(ABS, LZ) TR = Alternative(ABS, LZ)
ZEICHENFOLGE = RE('\\w+')
GROSSSCHRIFT = RE('[A-ZÄÖÜ]+') GROSSSCHRIFT = RE('[A-ZÄÖÜ]+')
LAT_WORT_TEIL = RE('[a-z]+', wR='') LAT_WORT_TEIL = RE('[a-z]+', wR='')
LAT_WORT = RE('[a-z]+') LAT_WORT = RE('[a-z]+')
...@@ -180,6 +184,10 @@ class MLWGrammar(GrammarBase): ...@@ -180,6 +184,10 @@ class MLWGrammar(GrammarBase):
DEU_WORT = RE('[A-ZÄÖÜ]?[a-zäöüß]+') DEU_WORT = RE('[A-ZÄÖÜ]?[a-zäöüß]+')
NAME = RE('[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+') NAME = RE('[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+')
NAMENS_ABKÜRZUNG = RE('[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\\.') NAMENS_ABKÜRZUNG = RE('[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\\.')
ZielName = ZEICHENFOLGE
VerweisZiel = Sequence(Token("["), ZielName, Token("]"))
Verweis = ZielName
Beleg = Verweis
Name = OneOrMore(Alternative(NAME, NAMENS_ABKÜRZUNG)) Name = OneOrMore(Alternative(NAME, NAMENS_ABKÜRZUNG))
Autorinfo = Sequence(Alternative(Token("AUTORIN"), Token("AUTOR")), Name) Autorinfo = Sequence(Alternative(Token("AUTORIN"), Token("AUTOR")), Name)
Zusatz = Sequence(Token("ZUSATZ"), RE('\\s*.*', wR=''), ABS) Zusatz = Sequence(Token("ZUSATZ"), RE('\\s*.*', wR=''), ABS)
...@@ -191,25 +199,21 @@ class MLWGrammar(GrammarBase): ...@@ -191,25 +199,21 @@ class MLWGrammar(GrammarBase):
Bedeutungskategorie = Sequence(RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'), Optional(LZ)) Bedeutungskategorie = Sequence(RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'), Optional(LZ))
Bedeutung = Sequence(Alternative(Interpretamente, Bedeutungskategorie), Optional(Belege)) Bedeutung = Sequence(Alternative(Interpretamente, Bedeutungskategorie), Optional(Belege))
BedeutungsPosition = OneOrMore(Sequence(Token("BEDEUTUNG"), Optional(LZ), Required(Bedeutung))) BedeutungsPosition = OneOrMore(Sequence(Token("BEDEUTUNG"), Optional(LZ), Required(Bedeutung)))
VerweisZiel = RE('<\\w+>', wL=WSP__)
Verweis = RE('\\w+', wL=WSP__)
Beleg = Verweis
Schreibweise = Alternative(Token("vizreg-"), Token("festregel(a)"), Token("fezdregl(a)"), Token("fat-")) Schreibweise = Alternative(Token("vizreg-"), Token("festregel(a)"), Token("fezdregl(a)"), Token("fat-"))
SWVariante = Sequence(Schreibweise, Token(":"), Beleg) SWVariante = Sequence(Schreibweise, Token(":"), Beleg)
SWTyp = Alternative(Token("script."), Token("script. fat-")) SWTyp = Alternative(Token("script."), Token("script. fat-"))
SchreibweisenPosition = Sequence(Token("SCHREIBWEISE"), Optional(LZ), Required(SWTyp), Token(":"), Optional(LZ), Required(SWVariante), ZeroOrMore(Sequence(ABS, SWVariante)), Optional(LZ)) SchreibweisenPosition = Sequence(Token("SCHREIBWEISE"), Optional(LZ), Required(SWTyp), Token(":"), Optional(LZ), Required(SWVariante), ZeroOrMore(Sequence(ABS, SWVariante)), Optional(LZ))
ArtikelKopf = SchreibweisenPosition ArtikelKopf = SchreibweisenPosition
genus = Alternative(Token("maskulinum"), Token("m."), Token("femininum"), Token("f."), Token("neutrum"), Token("n.")) genus = Alternative(Token("maskulinum"), Token("m."), Token("femininum"), Token("f."), Token("neutrum"), Token("n."))
Flexion = RE('-?[a-z]+') FLEX = RE('-?[a-z]+')
Flexionen = Sequence(Flexion, ZeroOrMore(Sequence(Token(","), Required(Flexion)))) flexion = Sequence(FLEX, ZeroOrMore(Sequence(Token(","), Required(FLEX))))
GVariante = Sequence(Flexionen, Optional(genus), Token(":"), Beleg) GrammatikVarianten = OneOrMore(Sequence(Optional(Sequence(wortart, ABS)), flexion, Optional(genus), Token(":"), Beleg, Required(ABS)))
GrammatikVariante = Sequence(ABS, GVariante) wortart.set(Alternative(Token("nomen"), Token("n."), Token("verb"), Token("v."), Token("adverb"), Token("adv."), Token("adjektiv"), Token("adj.")))
wortart = Alternative(Token("nomen"), Token("n."), Token("verb"), Token("v."), Token("adverb"), Token("adv."), Token("adjektiv"), Token("adj.")) GrammatikPosition = Sequence(Token("GRAMMATIK"), Optional(LZ), Required(wortart), Required(ABS), Required(flexion), Optional(genus), Required(ABS), Optional(GrammatikVarianten))
GrammatikPosition = Sequence(Token("GRAMMATIK"), Optional(LZ), Required(wortart), Required(ABS), Required(Flexion), Optional(genus), ZeroOrMore(GrammatikVariante), Optional(ABS))
lzs_typ = RE('sim\\.', wR='') lzs_typ = RE('sim\\.', wR='')
LemmaZusatz = Sequence(Token("ZUSATZ"), Required(lzs_typ)) LemmaZusatz = Sequence(Token("ZUSATZ"), Required(lzs_typ))
lemma = Sequence(LAT_WORT_TEIL, ZeroOrMore(Sequence(Alternative(Token("|"), Token("-")), LAT_WORT_TEIL))) lemma = Sequence(LAT_WORT_TEIL, ZeroOrMore(Sequence(Alternative(Token("|"), Token("-")), LAT_WORT_TEIL)))
LemmaVarianten = Sequence(Token("VARIANTEN"), Optional(LZ), OneOrMore(Sequence(lemma, Required(TR))), Optional(Sequence(LemmaZusatz, Required(ABS)))) LemmaVarianten = Sequence(Optional(LZ), OneOrMore(Sequence(lemma, Required(TR))), Optional(Sequence(LemmaZusatz, Required(ABS))))
gesichert = Token("$") gesichert = Token("$")
klassisch = Token("*") klassisch = Token("*")
HauptLemma = Sequence(Optional(klassisch), Optional(gesichert), lemma) HauptLemma = Sequence(Optional(klassisch), Optional(gesichert), lemma)
......
...@@ -20,26 +20,29 @@ See the License for the specific language governing permissions and ...@@ -20,26 +20,29 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
""" """
import os from DHParser.testing import recompile_grammar
recompile_grammar('.')
from DHParser.ebnf import grammar_changed
from DHParser.dsl import compile_on_disk # import os
#
# from DHParser.ebnf import grammar_changed
def compile(name): # from DHParser.dsl import compile_on_disk
base, ext = os.path.splitext(name) #
compiler_name = base + '_compiler.py' #
if (not os.path.exists(compiler_name) or # def compile(name):
grammar_changed(compiler_name, name)): # base, ext = os.path.splitext(name)
print("recompiling parser for: " + name) # compiler_name = base + '_compiler.py'
errors = compile_on_disk(name) # if (not os.path.exists(compiler_name) or
if errors: # grammar_changed(compiler_name, name)):
print("Errors while compiling: " + name + '!') # print("recompiling parser for: " + name)
with open(base + '_errors.txt', 'w') as f: # errors = compile_on_disk(name)
for e in errors: # if errors:
f.write(e) # print("Errors while compiling: " + name + '!')
f.write('\n') # with open(base + '_errors.txt', 'w') as f:
# for e in errors:
for entry in os.listdir(): # f.write(e)
if entry.lower().endswith('.ebnf') and os.path.isfile(entry): # f.write('\n')
compile(entry) #
# for entry in os.listdir():
# if entry.lower().endswith('.ebnf') and os.path.isfile(entry):
# compile(entry)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment