dhparser.py 12.4 KB
Newer Older
1
#!/usr/bin/python3
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

"""dhparser.py - command line tool for DHParser

Copyright 2016  by Eckhart Arnold (arnold@badw.de)
                Bavarian Academy of Sciences an Humanities (badw.de)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.  See the License for the specific language governing
permissions and limitations under the License.
19
"""
20
21
22
23

import os
import sys

eckhart's avatar
eckhart committed
24
25
26
27
28
29
30
31
scriptdir = os.path.dirname(os.path.realpath(__file__))
i = scriptdir.find('DHParser')
if i >= 0:
    dhparserdir = scriptdir[:i + 8]
    sys.path.append(dhparserdir)
else:
    dhparserdir = ''

32
from DHParser.compile import compile_source
eckhart's avatar
eckhart committed
33
from DHParser.dsl import compileDSL, compile_on_disk  # , recompile_grammar
Eckhart Arnold's avatar
Eckhart Arnold committed
34
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
35
from DHParser.log import logging
36
37
from DHParser.toolkit import re, typing
from typing import cast
38

39
LOGGING = False
40

41
EBNF_TEMPLATE = r"""-grammar
42

43
44
45
46
47
48
#######################################################################
#
#  EBNF-Directives
#
#######################################################################

49
@ whitespace  = vertical        # implicit whitespace, includes any number of line feeds
50
@ literalws   = right           # literals have implicit whitespace on the right hand side
Eckhart Arnold's avatar
Eckhart Arnold committed
51
@ comment     = /#.*/           # comments range from a '#'-character to the end of the line
52
53
54
55
56
57
58
59
60
@ ignorecase  = False           # literals and regular expressions are case-sensitive


#######################################################################
#
#  Structure and Components
#
#######################################################################

eckhart's avatar
eckhart committed
61
62
document = ~ { WORD } §EOF      # root parser: a sequence of words preceded
                                # by whitespace until the end of file
63
64
65
66
67
68
69

#######################################################################
#
#  Regular Expressions
#
#######################################################################

70
71
WORD     =  /\w+/~      # a sequence of letters, optional trailing whitespace
EOF      =  !/./        # no more characters ahead, end of file reached
72
73
"""

74
TEST_WORD_TEMPLATE = r'''[match:WORD]
75
76
M1: word
M2: one_word_with_underscores
77
78

[fail:WORD]
79
F1: two words
80
81
82
'''

TEST_DOCUMENT_TEMPLATE = r'''[match:document]
83
84
M1: """This is a sequence of words
    extending over several lines"""
85
M2: """  This sequence contains leading whitespace"""
86

87
[fail:document]
88
89
F1: """This test should fail, because neither
    comma nor full have been defined anywhere."""
90
'''
91
92
93
94
95
96
97
98
99
100

README_TEMPLATE = """# {name}

PLACE A SHORT DESCRIPTION HERE

Author: AUTHOR'S NAME <EMAIL>, AFFILIATION


## License

101
{name} is open source software under the [Apache 2.0 License](https://www.apache.org/licenses/LICENSE-2.0)
102
103
104

Copyright YEAR AUTHOR'S NAME <EMAIL>, AFFILIATION

105
106
107
108
109
110
111
112
113
114
115
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
116
117
118
"""


119
GRAMMAR_TEST_TEMPLATE = r'''#!/usr/bin/python3
120
121
122
123

"""tst_{name}_grammar.py - runs the unit tests for the {name}-grammar
"""

124
import os
125
126
import sys

di68kap's avatar
di68kap committed
127
128
LOGGING = False

129
sys.path.append(r'{dhparserdir}')
130

131
scriptpath = os.path.dirname(__file__)
132

eckhart's avatar
eckhart committed
133

134
135
136
137
138
139
140
try:
    from DHParser import dsl
    import DHParser.log
    from DHParser import testing
except ModuleNotFoundError:
    print('Could not import DHParser. Please adjust sys.path in file '
          '"%s" manually' % __file__)
141
    sys.exit(1)
142
143
144


def recompile_grammar(grammar_src, force):
eckhart's avatar
eckhart committed
145
    grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests')
146
    testing.create_test_templates(grammar_src, grammar_tests_dir)
147
    with DHParser.log.logging(False):
148
        # recompiles Grammar only if it has changed
149
150
        if not dsl.recompile_grammar(grammar_src, force=force,
                notify=lambda: print('recompiling ' + grammar_src)):
151
152
153
154
155
156
157
158
            print('\nErrors while recompiling "%s":' % grammar_src +
                  '\n--------------------------------------\n\n')
            with open('{name}_ebnf_ERRORS.txt') as f:
                print(f.read())
            sys.exit(1)


def run_grammar_tests(glob_pattern):
di68kap's avatar
di68kap committed
159
    with DHParser.log.logging(LOGGING):
160
        error_report = testing.grammar_suite(
eckhart's avatar
eckhart committed
161
162
            os.path.join(scriptpath, 'grammar_tests'),
            get_grammar, get_transformer,
163
164
165
166
167
            fn_patterns=[glob_pattern], report=True, verbose=True)
    return error_report


if __name__ == '__main__':
di68kap's avatar
di68kap committed
168
169
170
171
172
173
    argv = sys.argv[:]
    if len(argv) > 1 and sys.argv[1] == "--debug":
        LOGGING = True
        del argv[1]
    if (len(argv) >= 2 and (argv[1].endswith('.ebnf') or
        os.path.splitext(argv[1])[1].lower() in testing.TEST_READERS.keys())):
di68kap's avatar
di68kap committed
174
175
        # if called with a single filename that is either an EBNF file or a known
        # test file type then use the given argument
di68kap's avatar
di68kap committed
176
        arg = argv[1]
eckhart's avatar
eckhart committed
177
    else:
di68kap's avatar
di68kap committed
178
179
        # otherwise run all tests in the test directory
        arg = '*_test_*.ini'
180
181
182
    if arg.endswith('.ebnf'):
        recompile_grammar(arg, force=True)
    else:
eckhart's avatar
eckhart committed
183
        recompile_grammar(os.path.join(scriptpath, '{name}.ebnf'),
184
185
186
187
188
189
190
191
192
                          force=False)
        sys.path.append('.')
        from {name}Compiler import get_grammar, get_transformer
        error_report = run_grammar_tests(glob_pattern=arg)
        if error_report:
            print('\n')
            print(error_report)
            sys.exit(1)
        print('ready.\n')
193
'''
194
195


196
197
198
def create_project(path: str):
    """Creates the a new DHParser-project in the given `path`.
    """
199
    def create_file(name, content):
200
        """Create a file with `name` and write `content` to file."""
201
202
        if not os.path.exists(name):
            print('Creating file "%s".' % name)
203
            with open(name, 'w', encoding='utf-8') as f:
204
205
206
207
                f.write(content)
        else:
            print('"%s" already exists! Not overwritten.' % name)

208
    name = os.path.basename(path)
eckhart's avatar
eckhart committed
209
    if not re.match(r'(?!\d)\w+', name):
210
211
        print('Project name "%s" is not a valid identifier! Aborting.' % name)
        sys.exit(1)
212
    if os.path.exists(path) and not os.path.isdir(path):
213
        print('Cannot create new project, because a file named "%s" already exists!' % path)
214
        sys.exit(1)
215
216
217
218
    print('Creating new DHParser-project "%s".' % name)
    if not os.path.exists(path):
        os.mkdir(path)
    curr_dir = os.getcwd()
219
    os.chdir(path)
220
221
222
223
224
225
226
227
228
229
230
    if os.path.exists('grammar_tests'):
        if not os.path.isdir('grammar_tests'):
            print('Cannot overwrite existing file "grammar_tests"')
            sys.exit(1)
    else:
        os.mkdir('grammar_tests')

    create_file(os.path.join('grammar_tests', '01_test_word.ini'), TEST_WORD_TEMPLATE)
    create_file(os.path.join('grammar_tests', '02_test_document.ini'), TEST_DOCUMENT_TEMPLATE)
    create_file(name + '.ebnf', '# ' + name + EBNF_TEMPLATE)
    create_file('README.md', README_TEMPLATE.format(name=name))
eckhart's avatar
eckhart committed
231
232
    create_file('tst_%s_grammar.py' % name,
                GRAMMAR_TEST_TEMPLATE.format(name=name, dhparserdir=dhparserdir))
Eckhart Arnold's avatar
Eckhart Arnold committed
233
    create_file('example.dsl', 'Life is but a walking shadow\n')
234
    os.chmod('tst_%s_grammar.py' % name, 0o755)
eckhart's avatar
eckhart committed
235
236
237
238
239
    # The following is left to the user as an exercise
    # print('Creating file "%s".' % (name + 'Compiler.py'))
    # recompile_grammar(name + '.ebnf', force=True)
    print('\nNow generate a DSL compiler from the EBNF-grammar by running\n'
          '\n    python tst_%s_gramar.py\n' % name)
240
    os.chdir(curr_dir)
241

242

Eckhart Arnold's avatar
Eckhart Arnold committed
243
def selftest() -> bool:
244
    """Run a simple self-test of DHParser.
245
    """
Eckhart Arnold's avatar
Eckhart Arnold committed
246
247
248
    print("DHParser selftest...")
    print("\nSTAGE I:  Trying to compile EBNF-Grammar:\n")
    builtin_ebnf_parser = get_ebnf_grammar()
eckhart's avatar
eckhart committed
249
    docstring = str(builtin_ebnf_parser.__doc__)  # type: str
Eckhart Arnold's avatar
Eckhart Arnold committed
250
    ebnf_src = docstring[docstring.find('@'):]
Eckhart Arnold's avatar
Eckhart Arnold committed
251
252
    ebnf_transformer = get_ebnf_transformer()
    ebnf_compiler = get_ebnf_compiler('EBNF')
253
    result, errors, _ = compile_source(
254
        ebnf_src, None,
Eckhart Arnold's avatar
Eckhart Arnold committed
255
        builtin_ebnf_parser, ebnf_transformer, ebnf_compiler)
256
    generated_ebnf_parser = cast(str, result)
Eckhart Arnold's avatar
Eckhart Arnold committed
257
258
259

    if errors:
        print("Selftest FAILED :-(")
260
        print("\n\n".join(str(err) for err in errors))
Eckhart Arnold's avatar
Eckhart Arnold committed
261
262
        return False
    print(generated_ebnf_parser)
263
264
    print("\n\nSTAGE 2: Selfhosting-test: "
          "Trying to compile EBNF-Grammar with generated parser...\n")
Eckhart Arnold's avatar
Eckhart Arnold committed
265
266
267
268
269
270
271
    selfhosted_ebnf_parser = compileDSL(ebnf_src, None, generated_ebnf_parser,
                                        ebnf_transformer, ebnf_compiler)
    ebnf_compiler.gen_transformer_skeleton()
    print(selfhosted_ebnf_parser)
    return True


272
def cpu_profile(func, repetitions=1):
273
274
275
276
277
278
    """Profile the function `func`.
    """
    import cProfile
    import pstats
    profile = cProfile.Profile()
    profile.enable()
279
    success = True
280
    for _ in range(repetitions):
281
282
283
        success = func()
        if not success:
            break
284
    profile.disable()
285
    # after your program ends
286
287
288
    stats = pstats.Stats(profile)
    stats.strip_dirs()
    stats.sort_stats('time').print_stats(40)
289
    return success
290
291


292
293
294
def mem_profile(func):
    """Profile memory usage of `func`.
    """
295
296
297
298
299
300
301
302
303
304
305
    import tracemalloc
    tracemalloc.start()
    success = func()
    snapshot = tracemalloc.take_snapshot()
    top_stats = snapshot.statistics('lineno')
    print("[ Top 20 ]")
    for stat in top_stats[:20]:
        print(stat)
    return success


306
307
308
309
def main():
    """Creates a project (if a project name has been passed as command line
    parameter) or runs a quick self-test.
    """
310
    if len(sys.argv) > 1:
311
312
313
314
315
316
        if sys.argv[1].lower() == "--selftest":
            if not selftest():
                print("Selftest FAILED :-(\n")
                sys.exit(1)
            print("Selftest SUCCEEDED :-)\n")
        elif os.path.exists(sys.argv[1]) and os.path.isfile(sys.argv[1]):
317
318
319
            _errors = compile_on_disk(sys.argv[1],
                                      sys.argv[2] if len(sys.argv) > 2 else "")
            if _errors:
320
                print('\n\n'.join(str(err) for err in _errors))
321
322
323
                sys.exit(1)
        else:
            create_project(sys.argv[1])
324
    else:
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
        print('Usage: \n'
              '    dhparser.py DSL_FILENAME [COMPILER]  - to compile a file\n'
              '    dhparser.py PROJECTNAME  - to create a new project\n\n')
        choice = input('Would you now like to ...\n'
                       '  [1] create a new project\n'
                       '  [2] compile an ebnf-grammar or a dsl-file\n'
                       '  [3] run a self-test\n'
                       '  [q] to quit\n'
                       'Please chose 1, 2 or 3> ')
        if choice.strip() == '1':
            project_name = input('Please project name or path > ')
            create_project(project_name)
        elif choice.strip() == '2':
            file_path = input('Please enter a file path for compilation > ')
            if os.path.exists(file_path) and os.path.isfile(file_path):
                compiler_suite = input('Compiler suite or ENTER (for ebnf) > ')
eckhart's avatar
eckhart committed
341
342
                if not compiler_suite or (os.path.exists(compiler_suite)
                                          and os.path.isfile(compiler_suite)):
343
344
345
346
347
348
349
350
                    _errors = compile_on_disk(file_path, compiler_suite)
                    if _errors:
                        print('\n\n'.join(str(err) for err in _errors))
                        sys.exit(1)
                else:
                    print('Compiler suite %s not found! Aborting' % compiler_suite)
            else:
                print('File %s not found! Aborting.' % file_path)
351
                sys.exit(1)
352
        elif choice.strip() == '3':
353
            with logging(LOGGING):
354
355
356
357
358
359
                if not cpu_profile(selftest, 1):
                    print("Selftest FAILED :-(\n")
                    sys.exit(1)
                print("Selftest SUCCEEDED :-)\n")
        elif choice.strip().lower() not in {'q', 'quit', 'exit'}:
            print('No valid choice. Goodbye!')
360

361

362
363
if __name__ == "__main__":
    main()