dsl.py 12.4 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""dsl.py - Support for domain specific notations for DHParser
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

Copyright 2016  by Eckhart Arnold (arnold@badw.de)
                Bavarian Academy of Sciences an Humanities (badw.de)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.  See the License for the specific language governing
permissions and limitations under the License.

Module ``DSLsupport`` contains various functions to support the
compilation of domain specific languages based on an EBNF-grammar.
"""

di68kap's avatar
di68kap committed
24
import collections
25
import os
26

27
28
29
30
31
try:
    import regex as re
except ImportError:
    import re

32
33
34
from .ebnf import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler
from .toolkit import load_if_file, is_python_code, compile_python_object
from .parsers import GrammarBase, CompilerBase, full_compilation, nil_scanner
35
from .syntaxtree import Node
36

Eckhart Arnold's avatar
Eckhart Arnold committed
37
38
39
40
41

__all__ = ['GrammarError',
           'CompilationError',
           'load_compiler_suite',
           'compileDSL',
42
           'run_compiler']
Eckhart Arnold's avatar
Eckhart Arnold committed
43
44


45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
SECTION_MARKER = """\n
#######################################################################
#
# {marker}
#
#######################################################################
\n"""

RX_SECTION_MARKER = re.compile(SECTION_MARKER.format(marker=r'.*?SECTION.*?'))

SYMBOLS_SECTION = "SYMBOLS SECTION - Can be edited. Changes will be preserved."
SCANNER_SECTION = "SCANNER SECTION - Can be edited. Changes will be preserved."
PARSER_SECTION = "PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!"
AST_SECTION = "AST SECTION - Can be edited. Changes will be preserved."
COMPILER_SECTION = "COMPILER SECTION - Can be edited. Changes will be preserved."
di68kap's avatar
di68kap committed
60
END_SECTIONS_MARKER = "END OF DHPARSER-SECTIONS"
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87


class GrammarError(Exception):
    """Raised when (already) the grammar of a domain specific language (DSL)
    contains errors.
    """

    def __init__(self, error_messages, grammar_src):
        self.error_messages = error_messages
        self.grammar_src = grammar_src


class CompilationError(Exception):
    """Raised when a string or file in a domain specific language (DSL)
    contains errors.
    """

    def __init__(self, error_messages, dsl_text, dsl_grammar, AST):
        self.error_messages = error_messages
        self.dsl_text = dsl_text
        self.dsl_grammar = dsl_grammar
        self.AST = AST

    def __str__(self):
        return self.error_messages


di68kap's avatar
di68kap committed
88
89
DHPARSER_IMPORTS = """
from functools import partial
90
import sys
di68kap's avatar
di68kap committed
91
92
93
94
try:
    import regex as re
except ImportError:
    import re
95
from DHParser.toolkit import load_if_file    
96
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \\
di68kap's avatar
di68kap committed
97
98
    Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\
    Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
99
100
    ZeroOrMore, Forward, NegativeLookahead, mixin_comment, full_compilation
from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_children_if, \\
di68kap's avatar
di68kap committed
101
102
103
104
105
    reduce_single_child, replace_by_single_child, remove_whitespace, TOKEN_KEYWORD, \\
    no_operation, remove_expendables, remove_tokens, flatten, WHITESPACE_KEYWORD, \\
    is_whitespace, is_expendable
"""

106

107
108
109
110
DHPARSER_COMPILER = '''
def compile_{NAME}(source):
    """Compiles ``source`` and returns (result, errors, ast).
    """
111
    return full_compilation(source, {NAME}Scanner,
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
        {NAME}Grammar(), {NAME}_ASTPipeline, {NAME}Compiler())

if __name__ == "__main__":
    if len(sys.argv) > 1:
        result, errors, ast = compile_{NAME}(sys.argv[1])
        if errors:
            for error in errors:
                print(error)
                sys.exit(1)
        else:
            print(result)
    else:
        print("Usage: {NAME}_compiler.py [FILENAME]")
'''

127
128
129
130
131
132
133
134
135
136
137
138
139

def get_grammar_instance(grammar):
    """Returns a grammar object and the source code of the grammar, from
    the given `grammar`-data which can be either a file name, ebnf-code,
    python-code, a GrammarBase-derived grammar class or an instance of
    such a class (i.e. a grammar object already).
    """
    if isinstance(grammar, str):
        # read grammar
        grammar_src = load_if_file(grammar)
        if is_python_code(grammar):
            parser_py, errors, AST = grammar_src, '', None
        else:
140
141
            parser_py, errors, AST = full_compilation(grammar_src, None,
                                                      EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler())
142
        if errors:
143
            raise GrammarError('\n\n'.join(errors), grammar_src)
di68kap's avatar
di68kap committed
144
        parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
145
146
147
148
149
150
151
152
153
154
155
156
    else:
        # assume that dsl_grammar is a ParserHQ-object or Grammar class
        grammar_src = ''
        if isinstance(grammar, GrammarBase):
            parser_root = grammar
        else:
            # assume `grammar` is a grammar class and get the root object
            parser_root = grammar()
    return parser_root, grammar_src


def load_compiler_suite(compiler_suite):
Eckhart Arnold's avatar
Eckhart Arnold committed
157
158
159
    """Extracts a compiler suite from file or string ``compiler suite``
    and returns it as a tuple (scanner, parser, ast, compiler).
    """
160
161
162
163
164
    global RX_SECTION_MARKER
    assert isinstance(compiler_suite, str)
    source = load_if_file(compiler_suite)
    if is_python_code(compiler_suite):
        try:
165
            intro, imports, scanner_py, parser_py, ast_py, compiler_py, outro = \
166
167
168
169
                RX_SECTION_MARKER.split(source)
        except ValueError as error:
            raise ValueError('File "' + compiler_suite + '" seems to be corrupted. '
                                                         'Please delete or repair file manually.')
170
171
172
        scanner = compile_python_object(imports + scanner_py, '\w*Scanner$')
        ast = compile_python_object(imports + ast_py, '\w*Pipeline$')
        compiler = compile_python_object(imports + compiler_py, '\w*Compiler$')
173
174
175
    else:
        # assume source is an ebnf grammar
        parser_py, errors, AST = full_compilation(
176
            source, None, EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler())
177
        if errors:
178
            raise GrammarError('\n\n'.join(errors), source)
179
        scanner = nil_scanner
di68kap's avatar
di68kap committed
180
        ast = EBNF_ASTPipeline
181
        compiler = EBNFCompiler()
di68kap's avatar
di68kap committed
182
    parser = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
183
184
185
186

    return scanner, parser, ast, compiler


di68kap's avatar
di68kap committed
187
def compileDSL(text_or_file, dsl_grammar, ast_pipeline, compiler,
188
189
               scanner=nil_scanner):
    """Compiles a text in a domain specific language (DSL) with an
190
191
    EBNF-specified grammar. Returns the compiled text or raises a
    compilation error.
192
193
194
    """
    assert isinstance(text_or_file, str)
    assert isinstance(compiler, CompilerBase)
di68kap's avatar
di68kap committed
195
    assert isinstance(ast_pipeline, collections.abc.Sequence) or isinstance(ast_pipeline, dict)
196
    parser_root, grammar_src = get_grammar_instance(dsl_grammar)
197
198
199
    src = load_if_file(text_or_file)
    result, errors, AST = full_compilation(src, scanner, parser_root, ast_pipeline, compiler)
    if errors:  raise CompilationError('\n\n'.join(errors), src, grammar_src, AST)
200
201
202
    return result


203
def compileEBNF(ebnf_src, ebnf_grammar_obj=None, source_only=False):
204
205
206
207
208
209
210
    """Compiles an EBNF source file into a Grammar class.

    Please note: This functions returns a class which must be 
    instantiated before calling its parse()-method! Calling the method
    directly from the class (which is technically possible in python
    yields an error message complaining about a missing parameter,
    the cause of which may not be obvious at first sight. 
211
212
213
214
215
216
217
218

    Args:
        ebnf_src(str):  Either the file name of an EBNF grammar or
            the EBNF grammar itself as a string.
        ebnf_grammar_obj:  An existing instance of the 
            DHParser.EBNFcompiler.EBNFGrammar object. This can speed
            up compilation, because no new EBNFGrammar object needs to
            be instantiated.
219
220
        source_only (bool):  If True, the source code of the Grammar
            class is returned instead of the class itself.
221
222
    Returns:
        A Grammar class that can be instantiated for parsing a text
223
        which conforms to the language defined by ``ebnf_src``.
224
225
    """
    grammar = ebnf_grammar_obj or EBNFGrammar()
di68kap's avatar
di68kap committed
226
    grammar_src = compileDSL(ebnf_src, grammar, EBNF_ASTPipeline, EBNFCompiler())
227
228
    return grammar_src if source_only else \
        compile_python_object(DHPARSER_IMPORTS + grammar_src, '\w*Grammar$')
229
230


231
232
233
234
def run_compiler(source_file, compiler_suite="", extension=".xml"):
    """Compiles the a source file with a given compiler and writes the
    result to a file.

Eckhart Arnold's avatar
Eckhart Arnold committed
235
236
237
238
239
240
241
242
243
244
    If no ``compiler_suite`` is given it is assumed that the source
    file is an EBNF grammar. In this case the result will be a Python
    script containing a parser for that grammar as well as the
    skeletons for a scanner, AST transformation table, and compiler.
    If the Python script already exists only the parser name in the
    script will be updated. (For this to work, the different names
    need to be delimited section marker blocks.). `run_compiler()`
    returns a list of error messages or an empty list if no errors
    occurred.
    """
245
246
247
248
    filepath = os.path.normpath(source_file)
    with open(source_file, encoding="utf-8") as f:
        source = f.read()
    rootname = os.path.splitext(filepath)[0]
249
    compiler_name = os.path.basename(rootname)
250
251
252
253
254
255
    if compiler_suite:
        scanner, parser, trans, cclass = load_compiler_suite(compiler_suite)
        compiler = cclass()
    else:
        scanner = nil_scanner
        parser = EBNFGrammar()
di68kap's avatar
di68kap committed
256
        trans = EBNF_ASTPipeline
257
        compiler = EBNFCompiler(compiler_name, source)
258
    result, errors, ast = full_compilation(source, scanner, parser, trans, compiler)
259
260
261
    if errors:
        return errors

di68kap's avatar
di68kap committed
262
    elif trans == EBNF_ASTPipeline:  # either an EBNF- or no compiler suite given
263
264
265
266
267
268
269
        f = None

        global SECTION_MARKER, RX_SECTION_MARKER, SCANNER_SECTION, PARSER_SECTION, \
            AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER
        try:
            f = open(rootname + '_compiler.py', 'r', encoding="utf-8")
            source = f.read()
270
            intro, imports, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source)
271
        except (PermissionError, FileNotFoundError, IOError) as error:
272
273
            intro = '#!/usr/bin/python'
            outro = DHPARSER_COMPILER.format(NAME=compiler_name)
274
            imports = DHPARSER_IMPORTS
275
276
277
278
279
280
281
282
283
284
285
286
287
            scanner = compiler.gen_scanner_skeleton()
            ast = compiler.gen_AST_skeleton()
            compiler = compiler.gen_compiler_skeleton()
        except ValueError as error:
            raise ValueError('File "' + rootname + '_compiler.py" seems to be corrupted. '
                                                   'Please delete or repair file manually!')
        finally:
            if f:  f.close()

        try:
            f = open(rootname + '_compiler.py', 'w', encoding="utf-8")
            f.write(intro)
            f.write(SECTION_MARKER.format(marker=SYMBOLS_SECTION))
288
            f.write(imports)
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
            f.write(SECTION_MARKER.format(marker=SCANNER_SECTION))
            f.write(scanner)
            f.write(SECTION_MARKER.format(marker=PARSER_SECTION))
            f.write(result)
            f.write(SECTION_MARKER.format(marker=AST_SECTION))
            f.write(ast)
            f.write(SECTION_MARKER.format(marker=COMPILER_SECTION))
            f.write(compiler)
            f.write(SECTION_MARKER.format(marker=END_SECTIONS_MARKER))
            f.write(outro)
        except (PermissionError, FileNotFoundError, IOError) as error:
            print('# Could not write file "' + rootname + '_compiler.py" because of: '
                  + "\n# ".join(str(error).split('\n)')))
            print(result)
        finally:
            if f:  f.close()

    else:
        try:
            f = open(rootname + extension, 'w', encoding="utf-8")
            if isinstance(result, Node):
                f.write(result.as_xml())
            else:
                f.write(result)
        except (PermissionError, FileNotFoundError, IOError) as error:
            print('# Could not write file "' + rootname + '.py" because of: '
                  + "\n# ".join(str(error).split('\n)')))
            print(result)
        finally:
            if f:  f.close()

    return []