2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

LyrikCompiler_example.py 9.63 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#!/usr/bin/python

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


from functools import partial
import os
import sys
sys.path.extend(['../../', '../', './'])

try:
    import regex as re
except ImportError:
    import re
19
from DHParser import is_filename, Grammar, Compiler, Lookbehind, \
di68kap's avatar
di68kap committed
20
21
    Alternative, Pop, Token, Synonym, Whitespace, \
    Option, NegativeLookbehind, OneOrMore, RegExp, Series, Capture, \
22
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
Eckhart Arnold's avatar
Eckhart Arnold committed
23
24
    PreprocessorFunc, TransformationDict, remove_empty, reduce_single_child, \
    Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
25
    reduce_single_child, replace_by_single_child, remove_whitespace, \
Eckhart Arnold's avatar
Eckhart Arnold committed
26
    flatten, is_empty, collapse, replace_content, remove_brackets, \
di68kap's avatar
di68kap committed
27
    is_one_of, rstrip, strip, remove_tokens, remove_nodes, peek, \
Eckhart Arnold's avatar
Eckhart Arnold committed
28
    is_whitespace, TOKEN_PTYPE
29
from DHParser.log import logging
30
31
32
33


#######################################################################
#
Eckhart Arnold's avatar
Eckhart Arnold committed
34
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
35
36
37
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
38
def LyrikPreprocessor(text):
39
40
    return text

Eckhart Arnold's avatar
Eckhart Arnold committed
41
42
def get_preprocessor() -> PreprocessorFunc:
    return LyrikPreprocessor
43
44
45
46
47
48
49
50
51
52
53


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

class LyrikGrammar(Grammar):
    r"""Parser for a Lyrik source file, with this grammar:
    
Eckhart Arnold's avatar
Eckhart Arnold committed
54
    gedicht           = bibliographisches { LEERZEILE }+ [serie] §titel text /\s*/ ENDE
55
    
Eckhart Arnold's avatar
Eckhart Arnold committed
56
    bibliographisches = autor §"," [NZ] werk "," [NZ] ort "," [NZ] jahr "."
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
    autor             = namenfolge [verknüpfung]
    werk              = wortfolge ["." §untertitel] [verknüpfung]
    untertitel        = wortfolge [verknüpfung]
    ort               = wortfolge [verknüpfung]
    jahr              = JAHRESZAHL
    
    wortfolge         = { WORT }+
    namenfolge        = { NAME }+
    verknüpfung       = "<" ziel ">"
    ziel              = ZEICHENFOLGE
    
    serie             = !(titel vers NZ vers) { NZ zeile }+ { LEERZEILE }+
    
    titel             = { NZ zeile}+ { LEERZEILE }+
    zeile             = { ZEICHENFOLGE }+
    
    text              = { strophe {LEERZEILE} }+
    strophe           = { NZ vers }+
    vers              = { ZEICHENFOLGE }+
    
    WORT              = /\w+/~
    NAME              = /\w+\.?/~
    ZEICHENFOLGE      = /[^ \n<>]+/~
    NZ                = /\n/~
    LEERZEILE         = /\n[ \t]*(?=\n)/~
    JAHRESZAHL        = /\d\d\d\d/~
    ENDE              = !/./
    """
di68kap's avatar
di68kap committed
85
    source_hash__ = "6602d99972ef2883e28bd735e1fe0401"
86
87
    parser_initialization__ = "upon instantiation"
    COMMENT__ = r''
Eckhart Arnold's avatar
Eckhart Arnold committed
88
    WHITESPACE__ = r'[\t ]*'
89
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
di68kap's avatar
di68kap committed
90
    wsp__ = Whitespace(WSP_RE__)
Eckhart Arnold's avatar
Eckhart Arnold committed
91
    ENDE = NegativeLookahead(RegExp('.'))
di68kap's avatar
di68kap committed
92
93
94
95
96
97
    JAHRESZAHL = Series(RegExp('\\d\\d\\d\\d'), wsp__)
    LEERZEILE = Series(RegExp('\\n[ \\t]*(?=\\n)'), wsp__)
    NZ = Series(RegExp('\\n'), wsp__)
    ZEICHENFOLGE = Series(RegExp('[^ \\n<>]+'), wsp__)
    NAME = Series(RegExp('\\w+\\.?'), wsp__)
    WORT = Series(RegExp('\\w+'), wsp__)
98
99
100
101
102
103
104
    vers = OneOrMore(ZEICHENFOLGE)
    strophe = OneOrMore(Series(NZ, vers))
    text = OneOrMore(Series(strophe, ZeroOrMore(LEERZEILE)))
    zeile = OneOrMore(ZEICHENFOLGE)
    titel = Series(OneOrMore(Series(NZ, zeile)), OneOrMore(LEERZEILE))
    serie = Series(NegativeLookahead(Series(titel, vers, NZ, vers)), OneOrMore(Series(NZ, zeile)), OneOrMore(LEERZEILE))
    ziel = Synonym(ZEICHENFOLGE)
di68kap's avatar
di68kap committed
105
    verknüpfung = Series(Series(Token("<"), wsp__), ziel, Series(Token(">"), wsp__))
106
107
108
    namenfolge = OneOrMore(NAME)
    wortfolge = OneOrMore(WORT)
    jahr = Synonym(JAHRESZAHL)
Eckhart Arnold's avatar
Eckhart Arnold committed
109
110
    ort = Series(wortfolge, Option(verknüpfung))
    untertitel = Series(wortfolge, Option(verknüpfung))
di68kap's avatar
di68kap committed
111
    werk = Series(wortfolge, Option(Series(Series(Token("."), wsp__), untertitel, mandatory=1)), Option(verknüpfung))
Eckhart Arnold's avatar
Eckhart Arnold committed
112
    autor = Series(namenfolge, Option(verknüpfung))
di68kap's avatar
di68kap committed
113
114
    bibliographisches = Series(autor, Series(Token(","), wsp__), Option(NZ), werk, Series(Token(","), wsp__),
                               Option(NZ), ort, Series(Token(","), wsp__), Option(NZ), jahr, Series(Token("."), wsp__), mandatory=1)
Eckhart Arnold's avatar
Eckhart Arnold committed
115
    gedicht = Series(bibliographisches, OneOrMore(LEERZEILE), Option(serie), titel, text, RegExp('\\s*'), ENDE, mandatory=3)
116
117
118
119
120
121
122
123
    root__ = gedicht
    
def get_grammar() -> LyrikGrammar:
    global thread_local_Lyrik_grammar_singleton
    try:
        grammar = thread_local_Lyrik_grammar_singleton
    except NameError:
        thread_local_Lyrik_grammar_singleton = LyrikGrammar()
Eckhart Arnold's avatar
Eckhart Arnold committed
124
125
        grammar = thread_local_Lyrik_grammar_singleton
    return grammar
126
127
128
129
130
131
132
133


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

Eckhart Arnold's avatar
Eckhart Arnold committed
134
135
136
def halt(node):
    assert False

137
138
139
Lyrik_AST_transformation_table = {
    # AST Transformations for the Lyrik-grammar
    "+": remove_empty,
Eckhart Arnold's avatar
Eckhart Arnold committed
140
    "bibliographisches":
di68kap's avatar
di68kap committed
141
        [flatten, remove_nodes('NZ'), remove_whitespace, remove_tokens],
142
143
144
145
    "autor": [],
    "werk": [],
    "untertitel": [],
    "ort": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
146
    "jahr":
di68kap's avatar
di68kap committed
147
        [reduce_single_child, remove_whitespace, reduce_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
148
    "wortfolge":
di68kap's avatar
di68kap committed
149
        [flatten(is_one_of('WORT'), recursive=False), peek, rstrip, collapse],
Eckhart Arnold's avatar
Eckhart Arnold committed
150
    "namenfolge":
di68kap's avatar
di68kap committed
151
        [flatten(is_one_of('NAME'), recursive=False), peek, rstrip, collapse],
Eckhart Arnold's avatar
Eckhart Arnold committed
152
    "verknüpfung":
di68kap's avatar
di68kap committed
153
        [flatten, remove_tokens('<', '>'), remove_whitespace, reduce_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
154
    "ziel":
di68kap's avatar
di68kap committed
155
        [reduce_single_child, remove_whitespace, reduce_single_child],
Eckhart Arnold's avatar
Eckhart Arnold committed
156
157
158
159
    "gedicht, strophe, text":
        [flatten, remove_nodes('LEERZEILE'), remove_nodes('NZ')],
    "titel, serie":
        [flatten, remove_nodes('LEERZEILE'), remove_nodes('NZ'), collapse],
di68kap's avatar
di68kap committed
160
    "zeile": [strip],
Eckhart Arnold's avatar
Eckhart Arnold committed
161
    "vers":
di68kap's avatar
di68kap committed
162
        [strip, collapse],
163
164
    "WORT": [],
    "NAME": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
165
166
167
168
    "ZEICHENFOLGE":
        reduce_single_child,
    "NZ":
        reduce_single_child,
169
    "LEERZEILE": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
170
171
    "JAHRESZAHL":
        [reduce_single_child],
172
    "ENDE": [],
Eckhart Arnold's avatar
Eckhart Arnold committed
173
174
    ":Whitespace":
        replace_content(lambda node : " "),
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
    "*": replace_by_single_child
}

LyrikTransform = partial(traverse, processing_table=Lyrik_AST_transformation_table)


def get_transformer() -> TransformationFunc:
    return LyrikTransform


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

class LyrikCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a Lyrik source file.
    """

    def __init__(self, grammar_name="Lyrik", grammar_source=""):
        super(LyrikCompiler, self).__init__(grammar_name, grammar_source)
        assert re.match('\w+\Z', grammar_name)

    def on_gedicht(self, node):
        return node

    def on_bibliographisches(self, node):
        pass

    def on_autor(self, node):
        pass

    def on_werk(self, node):
        pass

    def on_untertitel(self, node):
        pass

    def on_ort(self, node):
        pass

    def on_jahr(self, node):
        pass

    def on_wortfolge(self, node):
        pass

    def on_namenfolge(self, node):
        pass

    def on_verknüpfung(self, node):
        pass

    def on_ziel(self, node):
        pass

    def on_serie(self, node):
        pass

    def on_titel(self, node):
        pass

    def on_zeile(self, node):
        pass

    def on_text(self, node):
        pass

    def on_strophe(self, node):
        pass

    def on_vers(self, node):
        pass

    def on_WORT(self, node):
        pass

    def on_NAME(self, node):
        pass

    def on_ZEICHENFOLGE(self, node):
        pass

    def on_NZ(self, node):
        pass

    def on_LEERZEILE(self, node):
        pass

    def on_JAHRESZAHL(self, node):
        pass

    def on_ENDE(self, node):
        pass


def get_compiler(grammar_name="Lyrik", grammar_source="") -> LyrikCompiler:
    global thread_local_Lyrik_compiler_singleton
    try:
        compiler = thread_local_Lyrik_compiler_singleton
        compiler.set_grammar_name(grammar_name, grammar_source)
        return compiler
    except NameError:
        thread_local_Lyrik_compiler_singleton = \
            LyrikCompiler(grammar_name, grammar_source)
        return thread_local_Lyrik_compiler_singleton 


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################


def compile_src(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
    with logging("LOGS"):
        compiler = get_compiler()
        cname = compiler.__class__.__name__
        log_file_name = os.path.basename(os.path.splitext(source)[0]) \
            if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'    
Eckhart Arnold's avatar
Eckhart Arnold committed
299
        result = compile_source(source, get_preprocessor(), 
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
                                get_grammar(),
                                get_transformer(), compiler)
    return result


if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_src(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
            sys.exit(1)
        else:
            print(result.as_xml() if isinstance(result, Node) else result)
    else:
        print("Usage: LyrikCompiler.py [FILENAME]")