FlexibleEBNFParser.py 21.3 KB
Newer Older
1
#!/usr/bin/env python3
2
3
4
5
6
7
8
9
10
11
12

#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################


from functools import partial
import os
import sys
di68kap's avatar
di68kap committed
13
from typing import Tuple, List
14

15
16
17
18
19
20
21
22
23
try:
    scriptpath = os.path.dirname(__file__)
except NameError:
    scriptpath = ''
dhparser_parentdir = os.path.abspath(os.path.join(scriptpath, r'../..'))
if scriptpath not in sys.path:
    sys.path.append(scriptpath)
if dhparser_parentdir not in sys.path:
    sys.path.append(dhparser_parentdir)
24
25
26
27
28

try:
    import regex as re
except ImportError:
    import re
di68kap's avatar
di68kap committed
29
from DHParser import start_logging, suspend_logging, resume_logging, is_filename, load_if_file, \
eckhart's avatar
eckhart committed
30
31
    Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, AnyChar, \
    Lookbehind, Lookahead, Alternative, Pop, Text, Synonym, Counted, Interleave, INFINITE, \
eckhart's avatar
eckhart committed
32
    Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
33
    ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
34
    grammar_changed, last_value, matching_bracket, PreprocessorFunc, is_empty, remove_if, \
35
    Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
36
    remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \
37
    reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
eckhart's avatar
eckhart committed
38
39
    replace_by_children, remove_empty, remove_tokens, flatten, all_of, any_of, \
    merge_adjacent, collapse, collapse_children_if, transform_content, WHITESPACE_PTYPE, \
40
41
    TOKEN_PTYPE, remove_children, remove_content, remove_brackets, change_tag_name, \
    remove_anonymous_tokens, keep_children, is_one_of, not_one_of, has_content, apply_if, peek, \
42
    remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
eckhart's avatar
eckhart committed
43
    transform_content, replace_content_with, forbid, assert_content, remove_infix_operator, \
44
    add_error, error_on, recompile_grammar, left_associative, lean_left, set_config_value, \
Eckhart Arnold's avatar
Eckhart Arnold committed
45
    get_config_value, node_maker, access_thread_locals, access_presets, \
46
    finalize_presets, ErrorCode, RX_NEVER_MATCH, set_tracer, resume_notices_on, \
eckhart's avatar
eckhart committed
47
48
    trace_history, has_descendant, neg, has_ancestor, optional_last_value, insert, \
    positions_of, replace_tag_names, add_attributes, delimit_children, merge_connected, \
di68kap's avatar
di68kap committed
49
50
    has_attr, has_parent, ThreadLocalSingletonFactory, NEVER_MATCH_PATTERN, Error, \
    gen_find_include_func, preprocess_includes, make_preprocessor, chain_preprocessors
51
52
53
54
55
56
57
58


#######################################################################
#
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

di68kap's avatar
di68kap committed
59
60
61
RE_INCLUDE = NEVER_MATCH_PATTERN
# To capture includes, replace the NEVER_MATCH_PATTERN
# by a pattern with group "name" here, e.g. r'\input{(?P<name>.*)}'
62

63

di68kap's avatar
di68kap committed
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def newTokenizer(original_text) -> Tuple[str, List[Error]]:
    # Here, a function body can be filled in that adds preprocessor tokens
    # to the source code and returns the modified source.
    return original_text, []


def preprocessor_factory() -> PreprocessorFunc:
    # below, the second parameter must always be the same as newGrammar.COMMENT__!
    find_next_include = gen_find_include_func(RE_INCLUDE, '#.*')
    include_prep = partial(preprocess_includes, find_next_include=find_next_include)
    tokenizing_prep = make_preprocessor(newTokenizer)
    return chain_preprocessors(include_prep, tokenizing_prep)


get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1)


def preprocess_new(source):
    return get_preprocessor()(source)
83
84
85
86
87
88
89
90


#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################

di68kap's avatar
di68kap committed
91
92
class FlexibleEBNFGrammar(Grammar):
    r"""Parser for a FlexibleEBNF source file.
93
    """
eckhart's avatar
eckhart committed
94
95
    countable = Forward()
    element = Forward()
96
    expression = Forward()
97
    source_hash__ = "c76fcc24e5077d4e150b771e6b60f0a1"
98
    disposable__ = re.compile('component$|pure_elem$|countable$|FOLLOW_UP$|SYM_REGEX$|ANY_SUFFIX$|EOF$')
99
    static_analysis_pending__ = []  # type: List[bool]
100
    parser_initialization__ = ["upon instantiation"]
eckhart's avatar
eckhart committed
101
102
    error_messages__ = {'definition': [(re.compile(r','), 'Delimiter "," not expected in definition!\\nEither this was meant to be a directive and the directive symbol @ is missing\\nor the error is due to inconsistent use of the comma as a delimiter\\nfor the elements of a sequence.')]}
    COMMENT__ = r'(?!#x[A-Fa-f0-9])#.*(?:\n|$)|\/\*(?:.|\n)*?\*\/|\(\*(?:.|\n)*?\*\)'
eckhart's avatar
eckhart committed
103
    comment_rx__ = re.compile(COMMENT__)
104
105
    WHITESPACE__ = r'\s*'
    WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
106
    wsp__ = Whitespace(WSP_RE__)
eckhart's avatar
eckhart committed
107
    dwsp__ = Drop(Whitespace(WSP_RE__))
eckhart's avatar
eckhart committed
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    HEXCODE = RegExp('[A-Fa-f0-9]{1,8}')
    SYM_REGEX = RegExp('(?!\\d)\\w+')
    RE_CORE = RegExp('(?:(?<!\\\\)\\\\(?:/)|[^/])*')
    regex_heuristics = Alternative(RegExp('[^ ]'), RegExp('[^/\\n*?+\\\\]*[*?+\\\\][^/\\n]/'))
    literal_heuristics = Alternative(RegExp('~?\\s*"(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^"]*)*"'), RegExp("~?\\s*'(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^']*)*'"), RegExp('~?\\s*`(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^`]*)*`'), RegExp('~?\\s*´(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^´]*)*´'), RegExp('~?\\s*/(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^/]*)*/'))
    char_range_heuristics = NegativeLookahead(Alternative(RegExp('[\\n\\t ]'), Series(dwsp__, literal_heuristics), Series(Option(Alternative(Text("::"), Text(":?"), Text(":"))), SYM_REGEX, RegExp('\\s*\\]'))))
    CH_LEADIN = Capture(Alternative(Text("0x"), Text("#x")))
    RE_LEADOUT = Capture(Text("/"))
    RE_LEADIN = Capture(Alternative(Series(Text("/"), Lookahead(regex_heuristics)), Text("^/")))
    TIMES = Capture(Text("*"))
    RNG_DELIM = Capture(Text(","))
    BRACE_SIGN = Capture(Alternative(Text("{"), Text("(")))
    RNG_BRACE = Capture(Retrieve(BRACE_SIGN))
    ENDL = Capture(Alternative(Text(";"), Text("")))
    AND = Capture(Alternative(Text(","), Text("")))
    OR = Capture(Alternative(Text("|"), Series(Text("/"), NegativeLookahead(regex_heuristics))))
124
    DEF = Capture(Alternative(Text("="), Text(":="), Text("::="), Text("<-"), RegExp(':\\n'), Text(": ")))
125
    EOF = Drop(Series(Drop(NegativeLookahead(RegExp('.'))), Drop(Option(Drop(Pop(DEF, match_func=optional_last_value)))), Drop(Option(Drop(Pop(OR, match_func=optional_last_value)))), Drop(Option(Drop(Pop(AND, match_func=optional_last_value)))), Drop(Option(Drop(Pop(ENDL, match_func=optional_last_value)))), Drop(Option(Drop(Pop(RNG_DELIM, match_func=optional_last_value)))), Drop(Option(Drop(Pop(BRACE_SIGN, match_func=optional_last_value)))), Drop(Option(Drop(Pop(CH_LEADIN, match_func=optional_last_value)))), Drop(Option(Drop(Pop(TIMES, match_func=optional_last_value)))), Drop(Option(Drop(Pop(RE_LEADIN, match_func=optional_last_value)))), Drop(Option(Drop(Pop(RE_LEADOUT, match_func=optional_last_value))))))
126
    whitespace = Series(RegExp('~'), dwsp__)
eckhart's avatar
eckhart committed
127
128
129
130
131
132
    any_char = Series(Text("."), dwsp__)
    free_char = Alternative(RegExp('[^\\n\\[\\]\\\\]'), RegExp('\\\\[nrt`´\'"(){}\\[\\]/\\\\]'))
    character = Series(Retrieve(CH_LEADIN), HEXCODE)
    char_range = Series(Text("["), Lookahead(char_range_heuristics), Option(Text("^")), Alternative(character, free_char), ZeroOrMore(Alternative(Series(Option(Text("-")), character), free_char)), Series(Text("]"), dwsp__))
    regexp = Series(Retrieve(RE_LEADIN), RE_CORE, Retrieve(RE_LEADOUT), dwsp__)
    plaintext = Alternative(Series(RegExp('`(?:(?<!\\\\)\\\\`|[^`])*?`'), dwsp__), Series(RegExp('´(?:(?<!\\\\)\\\\´|[^´])*?´'), dwsp__))
133
    literal = Alternative(Series(RegExp('"(?:(?<!\\\\)\\\\"|[^"])*?"'), dwsp__), Series(RegExp("'(?:(?<!\\\\)\\\\'|[^'])*?'"), dwsp__))
eckhart's avatar
eckhart committed
134
135
136
137
138
139
140
141
142
    symbol = Series(SYM_REGEX, dwsp__)
    multiplier = Series(RegExp('[1-9]\\d*'), dwsp__)
    no_range = Alternative(NegativeLookahead(multiplier), Series(Lookahead(multiplier), Retrieve(TIMES)))
    range = Series(RNG_BRACE, dwsp__, multiplier, Option(Series(Retrieve(RNG_DELIM), dwsp__, multiplier)), Pop(RNG_BRACE, match_func=matching_bracket), dwsp__)
    counted = Alternative(Series(countable, range), Series(countable, Retrieve(TIMES), dwsp__, multiplier), Series(multiplier, Retrieve(TIMES), dwsp__, countable, mandatory=3))
    option = Alternative(Series(NegativeLookahead(char_range), Series(Text("["), dwsp__), expression, Series(Text("]"), dwsp__), mandatory=2), Series(element, Series(Text("?"), dwsp__)))
    repetition = Alternative(Series(Series(Text("{"), dwsp__), no_range, expression, Series(Text("}"), dwsp__), mandatory=2), Series(element, Series(Text("*"), dwsp__), no_range))
    oneormore = Alternative(Series(Series(Text("{"), dwsp__), no_range, expression, Series(Text("}+"), dwsp__)), Series(element, Series(Text("+"), dwsp__)))
    group = Series(Series(Text("("), dwsp__), no_range, expression, Series(Text(")"), dwsp__), mandatory=2)
143
    retrieveop = Alternative(Series(Text("::"), dwsp__), Series(Text(":?"), dwsp__), Series(Text(":"), dwsp__))
eckhart's avatar
eckhart committed
144
145
    flowmarker = Alternative(Series(Text("!"), dwsp__), Series(Text("&"), dwsp__), Series(Text("<-!"), dwsp__), Series(Text("<-&"), dwsp__))
    ANY_SUFFIX = RegExp('[?*+]')
Eckhart Arnold's avatar
Eckhart Arnold committed
146
    literals = OneOrMore(literal)
eckhart's avatar
eckhart committed
147
    pure_elem = Series(element, NegativeLookahead(ANY_SUFFIX), mandatory=1)
Eckhart Arnold's avatar
Eckhart Arnold committed
148
    procedure = Series(SYM_REGEX, Series(Text("()"), dwsp__))
eckhart's avatar
eckhart committed
149
150
151
152
    term = Alternative(oneormore, counted, repetition, option, pure_elem)
    difference = Series(term, Option(Series(Series(Text("-"), dwsp__), Alternative(oneormore, pure_elem), mandatory=1)))
    lookaround = Series(flowmarker, Alternative(oneormore, pure_elem), mandatory=1)
    interleave = Series(difference, ZeroOrMore(Series(Series(Text("°"), dwsp__), Option(Series(Text("§"), dwsp__)), difference)))
153
    sequence = Series(Option(Series(Text("§"), dwsp__)), Alternative(interleave, lookaround), ZeroOrMore(Series(NegativeLookahead(Text("@")), NegativeLookahead(Series(symbol, Retrieve(DEF))), Retrieve(AND), dwsp__, Option(Series(Text("§"), dwsp__)), Alternative(interleave, lookaround))))
eckhart's avatar
eckhart committed
154
    FOLLOW_UP = Alternative(Text("@"), symbol, EOF)
155
    definition = Series(symbol, Retrieve(DEF), dwsp__, Option(Series(Retrieve(OR), dwsp__)), expression, Retrieve(ENDL), dwsp__, Lookahead(FOLLOW_UP), mandatory=1)
di68kap's avatar
di68kap committed
156
    component = Alternative(literals, procedure, expression)
Eckhart Arnold's avatar
Eckhart Arnold committed
157
158
159
160
    directive = Series(Series(Text("@"), dwsp__), symbol, Series(Text("="), dwsp__), component, ZeroOrMore(Series(Series(Text(","), dwsp__), component)), Lookahead(FOLLOW_UP), mandatory=1)
    element.set(Alternative(Series(Option(retrieveop), symbol, NegativeLookahead(Retrieve(DEF))), literal, plaintext, regexp, char_range, Series(character, dwsp__), any_char, whitespace, group))
    countable.set(Alternative(option, oneormore, element))
    expression.set(Series(sequence, ZeroOrMore(Series(Retrieve(OR), dwsp__, sequence))))
eckhart's avatar
eckhart committed
161
    syntax = Series(dwsp__, ZeroOrMore(Alternative(definition, directive)), EOF)
di68kap's avatar
di68kap committed
162
163
    resume_rules__ = {'definition': [re.compile(r'\n\s*(?=@|\w+\w*\s*=)')],
                      'directive': [re.compile(r'\n\s*(?=@|\w+\w*\s*=)')]}
164
165
    root__ = syntax
    
166

di68kap's avatar
di68kap committed
167
_raw_grammar = ThreadLocalSingletonFactory(FlexibleEBNFGrammar, ident=1)
168

di68kap's avatar
di68kap committed
169
def get_grammar() -> FlexibleEBNFGrammar:
170
    grammar = _raw_grammar()
eckhart's avatar
eckhart committed
171
172
173
174
    if get_config_value('resume_notices'):
        resume_notices_on(grammar)
    elif get_config_value('history_tracking'):
        set_tracer(grammar, trace_history)
di68kap's avatar
di68kap committed
175
176
177
178
179
    try:
        if not grammar.__class__.python_src__:
            grammar.__class__.python_src__ = get_grammar.python_src__
    except AttributeError:
        pass
180
    return grammar
181
    
di68kap's avatar
di68kap committed
182
def parse_FlexibleEBNF(document, start_parser = "root_parser__", *, complete_match=True):
183
    return get_grammar()(document, start_parser, complete_match)
184
185
186
187
188
189
190
191
192


#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

EBNF_AST_transformation_table = {
193
    # AST Transformations for the EBNF-grammar
eckhart's avatar
eckhart committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
    "<":
        [remove_children_if(all_of(not_one_of('regexp'), is_empty))],
    "syntax":
        [],
    "directive":
        [flatten, remove_tokens('@', '=', ',')],
    "procedure":
        [remove_tokens('()'), reduce_single_child],
    "literals":
        [replace_by_single_child],
    "definition":
        [flatten, remove_children('DEF', 'ENDL'),
         remove_tokens('=')],  # remove_tokens('=') is only for backwards-compatibility
    "expression":
        [replace_by_single_child, flatten, remove_children('OR'),
         remove_tokens('|')],  # remove_tokens('|') is only for backwards-compatibility
    "sequence":
        [replace_by_single_child, flatten, remove_children('AND')],
    "interleave":
        [replace_by_single_child, flatten, remove_tokens('°')],
    "lookaround":
        [],
    "difference":
        [remove_tokens('-'), replace_by_single_child],
    "term, pure_elem, element":
        [replace_by_single_child],
    "flowmarker, retrieveop":
        [reduce_single_child],
    "group":
        [remove_brackets],
    "oneormore, repetition, option":
        [reduce_single_child, remove_brackets,  # remove_tokens('?', '*', '+'),
         forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)(?:.|\n)*')],
    "counted":
        [remove_children('TIMES')],
    "range":
        [remove_children('BRACE_SIGN', 'RNG_BRACE', 'RNG_DELIM')],
    "symbol, literal, any_char":
        [reduce_single_child],
    "plaintext":
        [],
    "regexp":
        [remove_children('RE_LEADIN', 'RE_LEADOUT'), reduce_single_child],
    "char_range":
        [flatten, remove_tokens('[', ']')],
    "character":
        [remove_children('CH_LEADIN'), reduce_single_child],
    "free_char":
        [],
    (TOKEN_PTYPE, WHITESPACE_PTYPE, "whitespace"):
        [reduce_single_child],
    "EOF, DEF, OR, AND, ENDL, BRACE_SIGN, RNG_BRACE, RNG_DELIM, TIMES, "
    "RE_LEADIN, RE_CORE, RE_LEADOUT, CH_LEADIN":
        [],
    "*":
        [replace_by_single_child]
250
251
}

di68kap's avatar
di68kap committed
252

253

di68kap's avatar
di68kap committed
254
255
256
def CreateEBNFTransformer() -> TransformationFunc:
    """Creates a transformation function that does not share state with other
    threads or processes."""
257
258
    return partial(traverse, processing_table=EBNF_AST_transformation_table.copy())

259

260
def get_transformer() -> TransformationFunc:
di68kap's avatar
di68kap committed
261
262
    """Returns a thread/process-exclusive transformation function."""
    THREAD_LOCALS = access_thread_locals()
263
    try:
264
        transformer = THREAD_LOCALS.EBNF_00000001_transformer_singleton
265
    except AttributeError:
di68kap's avatar
di68kap committed
266
        THREAD_LOCALS.EBNF_00000001_transformer_singleton = CreateEBNFTransformer()
267
        transformer = THREAD_LOCALS.EBNF_00000001_transformer_singleton
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
    return transformer


#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################

class EBNFCompiler(Compiler):
    """Compiler for the abstract-syntax-tree of a EBNF source file.
    """

    def __init__(self):
        super(EBNFCompiler, self).__init__()

284
285
    def reset(self):
        super().reset()
286
        # initialize your variables here, not in the constructor!
di68kap's avatar
di68kap committed
287

288
289
290
291
292
293
294
295
296
    def on_syntax(self, node):
        return self.fallback_compiler(node)

    # def on_definition(self, node):
    #     return node

    # def on_directive(self, node):
    #     return node

eckhart's avatar
eckhart committed
297
298
299
300
301
302
303
304
305
    # def on_literals(self, node):
    #     return node

    # def on_procedure(self, node):
    #     return node

    # def on_FOLLOW_UP(self, node):
    #     return node

306
307
308
    # def on_expression(self, node):
    #     return node

309
    # def on_sequence(self, node):
310
311
    #     return node

eckhart's avatar
eckhart committed
312
313
314
315
316
317
318
319
320
    # def on_interleave(self, node):
    #     return node

    # def on_lookaround(self, node):
    #     return node

    # def on_difference(self, node):
    #     return node

321
    # def on_term(self, node):
322
323
    #     return node

eckhart's avatar
eckhart committed
324
325
326
327
328
329
330
331
332
333
334
335
    # def on_countable(self, node):
    #     return node

    # def on_pure_elem(self, node):
    #     return node

    # def on_element(self, node):
    #     return node

    # def on_ANY_SUFFIX(self, node):
    #     return node

336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
    # def on_flowmarker(self, node):
    #     return node

    # def on_retrieveop(self, node):
    #     return node

    # def on_group(self, node):
    #     return node

    # def on_oneormore(self, node):
    #     return node

    # def on_repetition(self, node):
    #     return node

    # def on_option(self, node):
    #     return node

eckhart's avatar
eckhart committed
354
355
356
357
358
359
360
361
362
363
364
365
    # def on_counted(self, node):
    #     return node

    # def on_range(self, node):
    #     return node

    # def on_no_range(self, node):
    #     return node

    # def on_multiplier(self, node):
    #     return node

366
367
368
369
370
371
372
373
374
375
376
377
    # def on_symbol(self, node):
    #     return node

    # def on_literal(self, node):
    #     return node

    # def on_plaintext(self, node):
    #     return node

    # def on_regexp(self, node):
    #     return node

eckhart's avatar
eckhart committed
378
379
380
381
382
383
384
385
386
387
388
389
    # def on_char_range(self, node):
    #     return node

    # def on_character(self, node):
    #     return node

    # def on_free_char(self, node):
    #     return node

    # def on_any_char(self, node):
    #     return node

390
391
392
393
394
395
    # def on_whitespace(self, node):
    #     return node

    # def on_EOF(self, node):
    #     return node

eckhart's avatar
eckhart committed
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
    # def on_DEF(self, node):
    #     return node

    # def on_OR(self, node):
    #     return node

    # def on_AND(self, node):
    #     return node

    # def on_ENDL(self, node):
    #     return node

    # def on_RNG_BRACE(self, node):
    #     return node

    # def on_BRACE_SIGN(self, node):
    #     return node

    # def on_RNG_DELIM(self, node):
    #     return node

    # def on_TIMES(self, node):
    #     return node

    # def on_RE_LEADIN(self, node):
    #     return node

    # def on_RE_LEADOUT(self, node):
    #     return node

    # def on_CH_LEADIN(self, node):
    #     return node

    # def on_char_range_heuristics(self, node):
    #     return node

    # def on_literal_heuristics(self, node):
    #     return node

    # def on_regex_heuristics(self, node):
    #     return node

    # def on_RE_CORE(self, node):
    #     return node

    # def on_SYM_REGEX(self, node):
    #     return node

    # def on_HEXCODE(self, node):
    #     return node

447

448

449
def get_compiler() -> EBNFCompiler:
di68kap's avatar
di68kap committed
450
451
    """Returns a thread/process-exclusive EBNFCompiler-singleton."""
    THREAD_LOCALS = access_thread_locals()
452
    try:
453
        compiler = THREAD_LOCALS.EBNF_00000001_compiler_singleton
454
    except AttributeError:
455
456
        THREAD_LOCALS.EBNF_00000001_compiler_singleton = EBNFCompiler()
        compiler = THREAD_LOCALS.EBNF_00000001_compiler_singleton
457
458
459
460
461
462
463
464
465
    return compiler


#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################

di68kap's avatar
di68kap committed
466
def compile_src(source):
467
468
    """Compiles ``source`` and returns (result, errors, ast).
    """
di68kap's avatar
di68kap committed
469
470
    result_tuple = compile_source(source, get_preprocessor(), get_grammar(), get_transformer(),
                                  get_compiler())
471
472
473
474
475
    return result_tuple


if __name__ == "__main__":
    # recompile grammar if needed
eckhart's avatar
eckhart committed
476
477
478
479
    if __file__.endswith('Parser.py'):
        grammar_path = os.path.abspath(__file__).replace('Parser.py', '.ebnf')
    else:
        grammar_path = os.path.splitext(__file__)[0] + '.ebnf'
480
481
482
483
484
485
486
    parser_update = False

    def notify():
        global parser_update
        parser_update = True
        print('recompiling ' + grammar_path)

eckhart's avatar
eckhart committed
487
    if os.path.exists(grammar_path) and os.path.isfile(grammar_path):
488
        if not recompile_grammar(grammar_path, force=False, notify=notify):
489
            error_file = os.path.basename(__file__).replace('Parser.py', '_ebnf_ERRORS.txt')
490
491
492
            with open(error_file, encoding="utf-8") as f:
                print(f.read())
            sys.exit(1)
493
494
        elif parser_update:
            print(os.path.basename(__file__) + ' has changed. '
495
                  'Please run again in order to apply updated compiler')
496
            sys.exit(0)
497
498
499
500
    else:
        print('Could not check whether grammar requires recompiling, '
              'because grammar was not found at: ' + grammar_path)

eckhart's avatar
eckhart committed
501
502
503
504
505
506
507
508
509
510
511
512
513
    from argparse import ArgumentParser
    parser = ArgumentParser(description="Parses a EBNF-file and shows its syntax-tree.")
    parser.add_argument('files', nargs=1)
    parser.add_argument('-d', '--debug', action='store_const', const='debug')
    parser.add_argument('-x', '--xml', action='store_const', const='xml')

    args = parser.parse_args()
    file_name, log_dir = args.files[0], ''

    if not os.path.exists(file_name):
        print('File "%s" not found!' % file_name)
        sys.exit(1)
    if not os.path.isfile(file_name):
514
        print('"%s" is not a file!' % file_name)
eckhart's avatar
eckhart committed
515
516
517
518
519
520
        sys.exit(1)

    if args.debug is not None:
        log_dir = 'LOGS'
        set_config_value('history_tracking', True)
        set_config_value('resume_notices', True)
521
        set_config_value('log_syntax_trees', {'cst', 'ast'})
eckhart's avatar
eckhart committed
522
523
524
525
526
527
528
529
530
531
    start_logging(log_dir)

    result, errors, _ = compile_src(file_name)

    if errors:
        cwd = os.getcwd()
        rel_path = file_name[len(cwd):] if file_name.startswith(cwd) else file_name
        for error in errors:
            print(rel_path + ':' + str(error))
        sys.exit(1)
532
    else:
eckhart's avatar
eckhart committed
533
534
        print(result.serialize(how='default' if args.xml is None else 'xml')
              if isinstance(result, Node) else result)