22.1.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 703308bd authored by di68kap's avatar di68kap

- changes to MLW example

parent 7ea1fb30
......@@ -40,6 +40,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'no_operation',
'replace_by_single_child',
'reduce_single_child',
'change_parser',
'is_whitespace',
'is_empty',
'is_expendable',
......@@ -522,7 +523,8 @@ def no_operation(node):
# ------------------------------------------------
#
# rearranging transformations:
# - tree may be rearranged (flattened)
# - tree may be rearranged (e.g.flattened)
# - nodes that are not leaves may be dropped
# - order is preserved
# - all leaves are kept
#
......@@ -551,6 +553,13 @@ def reduce_single_child(node):
node.result = node.result[0].result
def change_parser(node, new_parser_name):
"""Changes the parser of a Node to a mock parser with the given
name.
"""
node.parser = MockParser(new_parser_name)
# ------------------------------------------------
#
# destructive transformations:
......@@ -641,6 +650,7 @@ def remove_enclosing_delimiters(node):
########################################################################
#
# syntax tree validation functions
# EXPERIMENTAL!
#
########################################################################
......
......@@ -8,12 +8,13 @@
from functools import partial
import os
import sys
try:
import regex as re
except ImportError:
import re
from DHParser.toolkit import load_if_file
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
......@@ -21,7 +22,8 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD
WHITESPACE_KEYWORD, TOKEN_KEYWORD, change_parser
#######################################################################
......@@ -33,6 +35,9 @@ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
def MLWScanner(text):
return text
def get_MLW_scanner():
return MLWScanner
#######################################################################
#
......@@ -143,7 +148,7 @@ class MLWGrammar(GrammarBase):
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
"""
source_hash__ = "9fce888d1b21b2d11a6228e0b97f9291"
source_hash__ = "ce9155e0248ac27756283d067342182e"
parser_initialization__ = "upon instatiation"
COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)')
......@@ -195,6 +200,15 @@ class MLWGrammar(GrammarBase):
Artikel = Sequence(Optional(LEER), Required(LemmaPosition), Optional(ArtikelKopf), Required(BedeutungsPosition), Required(Autorinfo), Optional(LEER), DATEI_ENDE)
root__ = Artikel
def get_MLW_grammar():
global thread_local_MLW_grammar_singleton
try:
grammar = thread_local_MLW_grammar_singleton
return grammar
except NameError:
thread_local_MLW_grammar_singleton = MLWGrammar()
return thread_local_MLW_grammar_singleton
#######################################################################
#
......@@ -220,7 +234,6 @@ def join_strings(node, delimiter='\n'):
new_result.append(nd)
node.result = tuple(new_result)
MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar
"Artikel": no_operation,
......@@ -269,9 +282,8 @@ MLW_AST_transformation_table = {
"Autorinfo":
[partial(remove_tokens, tokens={'AUTORIN', 'AUTOR'})],
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT":
# test,
[remove_expendables, reduce_single_child],
"LEER": no_operation,
"LEER, TRENNER, ZSPRUNG": partial(change_parser, new_parser_name=WHITESPACE_KEYWORD),
"DATEI_ENDE": no_operation,
"NIEMALS": no_operation,
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
......@@ -284,9 +296,14 @@ MLW_AST_transformation_table = {
[remove_expendables, replace_by_single_child]
}
MLWTransform = partial(traverse, processing_table=MLW_AST_transformation_table)
def get_MLW_transformer():
return MLWTransform
#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
......@@ -297,8 +314,8 @@ class MLWCompiler(CompilerBase):
"""Compiler for the abstract-syntax-tree of a MLW source file.
"""
def __init__(self, grammar_name="MLW"):
super(MLWCompiler, self).__init__()
def __init__(self, grammar_name="MLW", grammar_source=""):
super(MLWCompiler, self).__init__(grammar_name, grammar_source)
assert re.match('\w+\Z', grammar_name)
def on_Artikel(self, node):
......@@ -434,6 +451,19 @@ class MLWCompiler(CompilerBase):
pass
def get_MLW_compiler(grammar_name="MLW",
grammar_source=""):
global thread_local_MLW_compiler_singleton
try:
compiler = thread_local_MLW_compiler_singleton
compiler.set_grammar_name(grammar_name, grammar_source)
return compiler
except NameError:
thread_local_MLW_compiler_singleton = \
MLWCompiler(grammar_name, grammar_source)
return thread_local_MLW_compiler_singleton
#######################################################################
#
# END OF DHPARSER-SECTIONS
......@@ -444,8 +474,16 @@ class MLWCompiler(CompilerBase):
def compile_MLW(source):
"""Compiles ``source`` and returns (result, errors, ast).
"""
return compile_source(source, MLWScanner,
MLWGrammar(), MLWTransform, MLWCompiler())
with logging("LOGS"):
compiler = get_MLW_compiler()
cname = compiler.__class__.__name__
log_file_name = os.path.basename(os.path.splitext(source)[0]) \
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, get_MLW_scanner(),
get_MLW_grammar(),
get_MLW_transformer(), compiler)
return result
if __name__ == "__main__":
if len(sys.argv) > 1:
......
#!/usr/bin/python
#######################################################################
#
......@@ -7,17 +7,21 @@
#######################################################################
from PyDSL import Pop, NegativeLookbehind, Capture, no_transformation, \
Token, Alternative, mixin_comment, RE, \
Sequence, remove_comments, Retrieve, is_scanner_token, \
Lookbehind, replace_by_single_child, remove_scanner_tokens, remove_whitespace, \
is_whitespace, ZeroOrMore, remove_enclosing_delimiters, CompilerBase, \
RegExp, NegativeLookahead, WHITESPACE_KEYWORD, GrammarBase, \
reduce_single_child, Optional, remove_children_if, remove_expendables, \
remove_tokens, is_comment, partial, OneOrMore, \
Forward, TOKEN_KEYWORD, Required, flatten, \
is_expendable, Lookahead
from functools import partial
import sys
try:
import regex as re
except ImportError:
import re
from DHParser.toolkit import load_if_file
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD
#######################################################################
......@@ -45,7 +49,6 @@ class MLWGrammar(GrammarBase):
@ whitespace = /[\t ]*/ # Zeilensprünge zählen nicht als Leerraum
@ literalws = both # Leerraum vor und nach Literalen wird automatisch entfernt
Artikel = [LEER]
§LemmaPosition [ArtikelKopf] §BedeutungsPosition §Autorinfo
[LEER] DATEI_ENDE
......@@ -75,7 +78,6 @@ class MLWGrammar(GrammarBase):
"adverb" | "adv." |
"adjektiv" | "adj."
GrammatikVarianten = TRENNER GVariante
GVariante = Flexionen [_genus] ":" Beleg
......@@ -112,33 +114,36 @@ class MLWGrammar(GrammarBase):
LateinischeBedeutung = "LAT" /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
DeutscheBedeutung = "DEU" /(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+/~
Belege = "BELEGE" [LEER] { "*" EinBeleg }
EinBeleg = { !(/\s*/ ("*" | "BEDEUTUNG" | "AUTOR" | "NAME" | "ZUSATZ")) /\s*.*\s*/ }+
EinBeleg = { !([LEER] ("*" | "BEDEUTUNG" | "AUTOR" | "NAME" | "ZUSATZ"))
/\s*.*\s*/ }+
[Zusatz]
Zusatz = "ZUSATZ" /\s*.*/
Zusatz = "ZUSATZ" /\s*.*/ TRENNER
#### AUTOR/AUTORIN ###########################################################
Autorinfo = ("AUTORIN" | "AUTOR") Name
Name = WORT { WORT | /[A-ZÄÖÜÁÀ]\./ }
Name = WORT { WORT | NAMENS_ABKÜRZUNG }
#### ATOMARE AUSDRÜCKE #######################################################
#### MISZELLANEEN ############################################################
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀ]\./
WORT = /[A-ZÄÖÜ]?[a-zäöüß]+/~
WORT_GROSS = /[A-ZÄÖÜ][a-zäöüß]+/~
WORT_KLEIN = /[a-zäöüß]+/~
LAT_WORT = /[a-z]+/~
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
WORT = /[A-ZÄÖÜ]?[a-zäöüß]+/~
WORT_GROSS = /[A-ZÄÖÜ][a-zäöüß]+/~
WORT_KLEIN = /[a-zäöüß]+/~
LAT_WORT = /[a-z]+/~
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
TRENNER = /\s*;\s*/ | { ZSPRUNG }+
ZSPRUNG = /\n/~
TRENNER = /\s*;\s*/ | { ZSPRUNG }+
ZSPRUNG = /\n/~
LEER = /\s+/ # horizontaler und(!) vertikaler Leerraum
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
LEER = /\s+/ # horizontaler und(!) vertikaler Leerraum
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
"""
source_hash__ = "f373a397a48cc57bcca18b90dd7028bf"
source_hash__ = "9fce888d1b21b2d11a6228e0b97f9291"
parser_initialization__ = "upon instatiation"
COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)')
......@@ -154,10 +159,11 @@ class MLWGrammar(GrammarBase):
WORT_KLEIN = RE('[a-zäöüß]+', wL='')
WORT_GROSS = RE('[A-ZÄÖÜ][a-zäöüß]+', wL='')
WORT = RE('[A-ZÄÖÜ]?[a-zäöüß]+', wL='')
Name = Sequence(WORT, ZeroOrMore(Alternative(WORT, RE('[A-ZÄÖÜÁÀ]\\.', wR='', wL=''))))
NAMENS_ABKÜRZUNG = RE('[A-ZÄÖÜÁÀ]\\.', wR='', wL='')
Name = Sequence(WORT, ZeroOrMore(Alternative(WORT, NAMENS_ABKÜRZUNG)))
Autorinfo = Sequence(Alternative(Token("AUTORIN"), Token("AUTOR")), Name)
Zusatz = Sequence(Token("ZUSATZ"), RE('\\s*.*', wR='', wL=''))
EinBeleg = Sequence(OneOrMore(Sequence(NegativeLookahead(Sequence(RE('\\s*', wR='', wL=''), Alternative(Token("*"), Token("BEDEUTUNG"), Token("AUTOR"), Token("NAME"), Token("ZUSATZ")))), RE('\\s*.*\\s*', wR='', wL=''))), Optional(Zusatz))
Zusatz = Sequence(Token("ZUSATZ"), RE('\\s*.*', wR='', wL=''), TRENNER)
EinBeleg = Sequence(OneOrMore(Sequence(NegativeLookahead(Sequence(Optional(LEER), Alternative(Token("*"), Token("BEDEUTUNG"), Token("AUTOR"), Token("NAME"), Token("ZUSATZ")))), RE('\\s*.*\\s*', wR='', wL=''))), Optional(Zusatz))
Belege = Sequence(Token("BELEGE"), Optional(LEER), ZeroOrMore(Sequence(Token("*"), EinBeleg)))
DeutscheBedeutung = Sequence(Token("DEU"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''))
LateinischeBedeutung = Sequence(Token("LAT"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''))
......@@ -196,10 +202,6 @@ class MLWGrammar(GrammarBase):
#
#######################################################################
# def test(node):
# print(node.as_sexpr())
def join_strings(node, delimiter='\n'):
new_result = []
n = 0
......@@ -211,18 +213,20 @@ def join_strings(node, delimiter='\n'):
while n < len(node.result) and not node.result[n].children:
n += 1
nd.result = delimiter.join((r.result for r in node.result[a:n]))
elif nd.parser.name != "Zusatz":
raise AssertionError(nd.as_sexpr())
else:
raise AssertionError(node.as_sexpr())
n += 1
new_result.append(nd)
node.result = tuple(new_result)
MLWTransTable = {
MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar
"Artikel": no_transformation,
"Artikel": no_operation,
"LemmaPosition":
[partial(remove_tokens, tokens={'LEMMA'})],
"Lemma": no_transformation,
"Lemma": no_operation,
"_tll, _wortart, _genus":
[remove_expendables, reduce_single_child],
"LemmaVarianten":
......@@ -244,32 +248,32 @@ MLWTransTable = {
[remove_expendables, reduce_single_child],
"Zusatz":
[remove_expendables, remove_tokens, reduce_single_child],
"ArtikelKopf": no_transformation,
"ArtikelKopf": no_operation,
"SchreibweisenPosition":
[partial(remove_tokens, tokens={'SCHREIBWEISE', ':'}),
flatten, partial(remove_tokens, tokens={','})],
"SWTyp": no_transformation,
"SWTyp": no_operation,
"BedeutungsPosition":
[flatten, partial(remove_tokens, tokens={'BEDEUTUNG'})],
"Bedeutung": no_transformation,
"Bedeutungskategorie": no_transformation,
"Interpretamente": no_transformation,
"Bedeutung": no_operation,
"Bedeutungskategorie": no_operation,
"Interpretamente": no_operation,
"LateinischeBedeutung, DeutscheBedeutung":
[remove_expendables, remove_tokens, reduce_single_child],
"Belege":
[flatten, remove_tokens],
"EinBeleg":
[flatten, remove_expendables, join_strings, reduce_single_child],
"Beleg": no_transformation,
"VerweisZiel": no_transformation,
"Beleg": no_operation,
"VerweisZiel": no_operation,
"Autorinfo":
[partial(remove_tokens, tokens={'AUTORIN', 'AUTOR'})],
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT":
# test,
# test,
[remove_expendables, reduce_single_child],
"LEER": no_transformation,
"DATEI_ENDE": no_transformation,
"NIEMALS": no_transformation,
"LEER": no_operation,
"DATEI_ENDE": no_operation,
"NIEMALS": no_operation,
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
[remove_expendables, reduce_single_child],
"*":
......@@ -280,6 +284,8 @@ MLWTransTable = {
[remove_expendables, replace_by_single_child]
}
MLWTransform = partial(traverse, processing_table=MLW_AST_transformation_table)
#######################################################################
#
......@@ -295,134 +301,160 @@ class MLWCompiler(CompilerBase):
super(MLWCompiler, self).__init__()
assert re.match('\w+\Z', grammar_name)
def Artikel(self, node):
def on_Artikel(self, node):
return node
def LemmaPosition(self, node):
def on_LemmaPosition(self, node):
pass
def Lemma(self, node):
def on_Lemma(self, node):
pass
def _tll(self, node):
def on__tll(self, node):
pass
def LemmaVarianten(self, node):
def on_LemmaVarianten(self, node):
pass
def LVariante(self, node):
def on_LVariante(self, node):
pass
def LVZusatz(self, node):
def on_LVZusatz(self, node):
pass
def GrammatikPosition(self, node):
def on_GrammatikPosition(self, node):
pass
def _wortart(self, node):
def on__wortart(self, node):
pass
def GrammatikVarianten(self, node):
def on_GrammatikVarianten(self, node):
pass
def GVariante(self, node):
def on_GVariante(self, node):
pass
def Flexionen(self, node):
def on_Flexionen(self, node):
pass
def Flexion(self, node):
def on_Flexion(self, node):
pass
def _genus(self, node):
def on__genus(self, node):
pass
def ArtikelKopf(self, node):
def on_ArtikelKopf(self, node):
pass
def SchreibweisenPosition(self, node):
def on_SchreibweisenPosition(self, node):
pass
def SWTyp(self, node):
def on_SWTyp(self, node):
pass
def SWVariante(self, node):
def on_SWVariante(self, node):
pass
def Schreibweise(self, node):
def on_Schreibweise(self, node):
pass
def Beleg(self, node):
def on_Beleg(self, node):
pass
def Verweis(self, node):
def on_Verweis(self, node):
pass
def VerweisZiel(self, node):
def on_VerweisZiel(self, node):
pass
def BedeutungsPosition(self, node):
def on_BedeutungsPosition(self, node):
pass
def Bedeutung(self, node):
def on_Bedeutung(self, node):
pass
def Bedeutungskategorie(self, node):
def on_Bedeutungskategorie(self, node):
pass
def Interpretamente(self, node):
def on_Interpretamente(self, node):
pass
def LateinischeBedeutung(self, node):
def on_LateinischeBedeutung(self, node):
pass
def DeutscheBedeutung(self, node):
def on_DeutscheBedeutung(self, node):
pass
def Belege(self, node):
def on_Belege(self, node):
pass
def EinBeleg(self, node):
def on_EinBeleg(self, node):
pass
def Zusatz(self, node):
def on_Zusatz(self, node):
pass
def Autorinfo(self, node):
def on_Autorinfo(self, node):
pass
def Name(self, node):
def on_Name(self, node):
pass
def WORT(self, node):
def on_NAMENS_ABKÜRZUNG(self, node):
pass
def WORT_GROSS(self, node):
def on_WORT(self, node):
pass
def WORT_KLEIN(self, node):
def on_WORT_GROSS(self, node):
pass
def LAT_WORT(self, node):
def on_WORT_KLEIN(self, node):
pass
def GROSSSCHRIFT(self, node):
def on_LAT_WORT(self, node):
pass
def LEER(self, node):
def on_GROSSSCHRIFT(self, node):
pass
def DATEI_ENDE(self, node):
def on_TRENNER(self, node):
pass
def NIEMALS(self, node):
def on_ZSPRUNG(self, node):
pass
def on_LEER(self, node):
pass
def on_DATEI_ENDE(self, node):
pass
def on_NIEMALS(self, node):
pass
#######################################################################
#
# END OF PYDSL-SECTIONS
# END OF DHPARSER-SECTIONS
#
#######################################################################
def compile_MLW(source):
"""Compiles ``source`` and returns (result, errors, ast).
"""
return compile_source(source, MLWScanner,
MLWGrammar(), MLWTransform, MLWCompiler())
if __name__ == "__main__":
if len(sys.argv) > 1:
result, errors, ast = compile_MLW(sys.argv[1])
if errors:
for error in errors:
print(error)
sys.exit(1)
else:
print(result)
else:
print("Usage: MLW_compiler.py [FILENAME]")
......@@ -31,8 +31,6 @@ MLW_compiler = os.path.join('..', 'MLW_compiler.py')
# print(grammar_changed(MLW_ebnf, MLW_compiler))
toolkit.logging_off()
if (not os.path.exists(MLW_compiler) or
grammar_changed(MLW_compiler, MLW_ebnf)):
print("recompiling parser")
......
This diff is collapsed.
......@@ -23,10 +23,11 @@ from functools import partial
import sys
sys.path.extend(['../', './'])
from DHParser import parsers
from DHParser.toolkit import is_logging, compile_python_object
from DHParser.syntaxtree import no_operation, traverse, remove_expendables, \
replace_by_single_child, reduce_single_child, flatten, TOKEN_KEYWORD
from DHParser.parsers import compile_source, test_grammar
from DHParser.parsers import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
......@@ -103,12 +104,12 @@ class TestGrammarTest:
}
}
def test_test_grammar(self):
def test_testing_grammar(self):
parser_fac = parser_factory(ARITHMETIC_EBNF)
trans_fac = lambda : ARITHMETIC_EBNFTransform
errata = test_grammar(self.cases, parser_fac, trans_fac)
errata = parsers.test_grammar(self.cases, parser_fac, trans_fac)
assert not errata
errata = test_grammar(self.failure_cases, parser_fac, trans_fac)
errata = parsers.test_grammar(self.failure_cases, parser_fac, trans_fac)
# for e in errata:
# print(e)
assert len(errata) == 3
......@@ -179,4 +180,4 @@ class TestRegex:
if __name__ == "__main__":
from run import runner
runner("TestGrammarTest", globals())
runner("", globals())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment