test_ebnf.py 15.1 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_ebnf.py - tests of the ebnf module of DHParser 
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
                             

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

Eckhart Arnold's avatar
Eckhart Arnold committed
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
from multiprocessing import Pool
Eckhart Arnold's avatar
Eckhart Arnold committed
25

Eckhart Arnold's avatar
Eckhart Arnold committed
26
27
sys.path.extend(['../', './'])

28
from DHParser.toolkit import compile_python_object, re
29
from DHParser.preprocess import nil_preprocessor
30
from DHParser import compile_source
31
32
from DHParser.error import has_errors
from DHParser.syntaxtree import WHITESPACE_PTYPE
33
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransform, get_ebnf_compiler
34
from DHParser.dsl import CompilationError, compileDSL, DHPARSER_IMPORTS, grammar_provider
35
36


37
38
39
40
41
42
43
44
45
46
47
class TestDirectives:
    mini_language = """
        expression =  term  { ("+" | "-") term }
        term       =  factor  { ("*" | "/") factor }
        factor     =  constant | "("  expression  ")"
        constant   =  digit { digit } [ //~ ]
        digit      = /0/ | /1/ | /2/ | /3/ | /4/ | /5/ | /6/ | /7/ | /8/ | /9/ 
        """

    def test_whitespace_linefeed(self):
        lang = "@ whitespace = linefeed\n" + self.mini_language
48
        MinilangParser = grammar_provider(lang)
49
50
        parser = MinilangParser()
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
51
        syntax_tree = parser("3 + 4 * 12")
52
        # parser.log_parsing_history("WSP")
53
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
54
        syntax_tree = parser("3 + 4 \n * 12")
55
56
        # parser.log_parsing_history("WSPLF")
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
57
        syntax_tree = parser("3 + 4 \n \n * 12")
58
        assert syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
59
        syntax_tree = parser("3 + 4 \n\n * 12")
60
61
62
63
        assert syntax_tree.collect_errors()

    def test_whitespace_vertical(self):
        lang = "@ whitespace = vertical\n" + self.mini_language
64
        parser = grammar_provider(lang)()
65
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
66
        syntax_tree = parser("3 + 4 * 12")
67
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
68
        syntax_tree = parser("3 + 4 \n * 12")
69
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
70
        syntax_tree = parser("3 + 4 \n \n * 12")
71
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
72
        syntax_tree = parser("3 + 4 \n\n * 12")
73
74
        assert not syntax_tree.collect_errors()

75
76
    def test_whitespace_horizontal(self):
        lang = "@ whitespace = horizontal\n" + self.mini_language
77
        parser = grammar_provider(lang)()
78
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
79
        syntax_tree = parser("3 + 4 * 12")
80
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
81
        syntax_tree = parser("3 + 4 \n * 12")
82
83
        assert syntax_tree.collect_errors()

84

Eckhart Arnold's avatar
Eckhart Arnold committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
class TestReservedSymbols:
    def test_comment_usage(self):
        lang = r"""
        @comment = /#.*(?:\n|$)/
        document = text [ COMMENT__ ]
        text = /[^#]+/
        """
        parser = grammar_provider(lang)()

    def test_whitespace(self):
        lang = r"""
        @whitespace = /\s*/
        document = WSP__ { word WSP__ }
        word = /\w+/ 
        """
        parser = grammar_provider(lang)()

    def test_mixin(self):
        lang = r"""
        @comment = /#.*(?:\n|$)/
        @whitespace = /\s*/
        document = WSP__ { word WSP__ }
        word = /\w+/ 
        """
        parser = grammar_provider(lang)()
        result = parser("test # kommentar")
        assert not result.error_flag, str(result.as_sxpr())


114
class TestEBNFParser:
Eckhart Arnold's avatar
Eckhart Arnold committed
115
    cases = {
116
117
118
119
120
121
122
123
124
125
126
127
128
129
        "list_": {
            "match": {
                1: "hund",
                2: "hund, katze,maus",
                3: "hund , katze"
            },
            "fail": {
                1: "123",
                2: '"literal"',
                3: "/regexp/"
            }
        }
    }

130
    def setup(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
131
        self.EBNF = get_ebnf_grammar()
132

133
134
    def test_RE(self):
        gr = get_ebnf_grammar()
135
136
137
138
139
        m = gr.regexp.main.regexp.match(r'/[\\\\]/ xxx /')
        rs = m.group()
        assert rs.find('x') < 0, rs.group()
        rx = re.compile(rs[1:-1])
        assert rx.match(r'\\')
140

141
142
    def test_literal(self):
        snippet = '"literal" '
Eckhart Arnold's avatar
Eckhart Arnold committed
143
        result = self.EBNF(snippet, 'literal')
144
145
        assert not result.error_flag
        assert str(result) == snippet
146
        assert result.find(lambda node: node.parser.ptype == WHITESPACE_PTYPE)
147

Eckhart Arnold's avatar
Eckhart Arnold committed
148
        result = self.EBNF(' "literal"', 'literal')
149
150
151
        assert result.error_flag  # literals catch following, but not leading whitespace


152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
class TestParserNameOverwriteBug:
    def test_term_bug(self):
        grammar = get_ebnf_grammar()
        st = grammar('impossible = [§"an optional requirement"]')
        # print(st.as_sxpr())
        get_ebnf_transformer()(st)
        # print(st.as_sxpr())
        lang = """series = "A" "B" §"C" "D"
        """
        parser = get_ebnf_grammar()
        st = grammar(lang)
        # print(st.as_sxpr())
        get_ebnf_transformer()(st)
        # print(st.as_sxpr())
        result = get_ebnf_compiler()(st)
        messages = st.collect_errors()
        assert not has_errors(messages), str(messages)


171
172
class TestSemanticValidation:
    def check(self, minilang, bool_filter=lambda x: x):
Eckhart Arnold's avatar
Eckhart Arnold committed
173
        grammar = get_ebnf_grammar()
Eckhart Arnold's avatar
Eckhart Arnold committed
174
        st = grammar(minilang)
175
        assert not st.collect_errors()
176
        EBNFTransform()(st)
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
        assert bool_filter(st.collect_errors())

    def test_illegal_nesting(self):
        self.check('impossible = { [ "an optional requirement" ] }')

    def test_illegal_nesting_option_required(self):
        self.check('impossible = [ §"an optional requirement" ]')

    def test_illegal_nesting_oneormore_option(self):
        self.check('impossible = { [ "no use"] }+')

    def test_legal_nesting(self):
        self.check('possible = { [ "+" ] "1" }', lambda x: not x)


class TestCompilerErrors:
    def test_error_propagation(self):
194
        ebnf = "@ literalws = wrongvalue  # testing error propagation\n"
Eckhart Arnold's avatar
Eckhart Arnold committed
195
196
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('ErrorPropagationTest'))
197
198
        assert messages

199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    def test_undefined_symbols(self):
        """Use of undefined symbols should be reported.
        """
        ebnf = """syntax = { intermediary }
                  intermediary = "This symbol is " [ badly_spelled ] "!"
                  bedly_spilled = "wrong" """
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
        assert messages

    def test_no_error(self):
        """But reserved symbols should not be repoted as undefined.
        """
        ebnf = """nothing =  WSP__ | COMMENT__\n"""
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
        assert not bool(messages), messages

217

218
class TestSelfHosting:
Eckhart Arnold's avatar
Eckhart Arnold committed
219
220
221
222
223
224
225
226
227
    grammar = r"""
        # EBNF-Grammar in EBNF

        @ comment    =  /#.*(?:\n|$)/                    # comments start with '#' and eat all chars up to and including '\n'
        @ whitespace =  /\s*/                            # whitespace includes linefeed
        @ literalws  =  right                            # trailing whitespace of literals will be ignored tacitly

        syntax     =  [~//] { definition | directive } §EOF
        definition =  symbol §"=" expression
228
        directive  =  "@" §symbol "=" ( regexp | literal | list_ )
Eckhart Arnold's avatar
Eckhart Arnold committed
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258

        expression =  term { "|" term }
        term       =  { factor }+
        factor     =  [flowmarker] [retrieveop] symbol !"="   # negative lookahead to be sure it's not a definition
                    | [flowmarker] literal
                    | [flowmarker] regexp
                    | [flowmarker] group
                    | [flowmarker] regexchain
                    | [flowmarker] oneormore
                    | repetition
                    | option

        flowmarker =  "!"  | "&"  | "§" |                # '!' negative lookahead, '&' positive lookahead, '§' required
                      "-!" | "-&"                        # '-' negative lookbehind, '-&' positive lookbehind
        retrieveop =  "::" | ":"                         # '::' pop, ':' retrieve

        group      =  "(" expression §")"
        regexchain =  ">" expression §"<"                # compiles "expression" into a singular regular expression
        oneormore  =  "{" expression "}+"
        repetition =  "{" expression §"}"
        option     =  "[" expression §"]"

        symbol     =  /(?!\d)\w+/~                       # e.g. expression, factor, parameter_list
        literal    =  /"(?:[^"]|\\")*?"/~                # e.g. "(", '+', 'while'
                    | /'(?:[^']|\\')*?'/~                # whitespace following literals will be ignored tacitly.
        regexp     =  /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~    # e.g. /\w+/, ~/#.*(?:\n|$)/~
                                                         # '~' is a whitespace-marker, if present leading or trailing
                                                         # whitespace of a regular expression will be ignored tacitly.
        list_      =  /\w+/~ { "," /\w+/~ }              # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
                                                         # BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
259
        EOF =  !/./
Eckhart Arnold's avatar
Eckhart Arnold committed
260
261
        """

262
263
    def test_self(self):
        compiler_name = "EBNF"
Eckhart Arnold's avatar
Eckhart Arnold committed
264
265
266
267
        compiler = get_ebnf_compiler(compiler_name, self.grammar)
        parser = get_ebnf_grammar()
        result, errors, syntax_tree = compile_source(self.grammar, None, parser,
                                            get_ebnf_transformer(), compiler)
268
269
270
        assert not errors, str(errors)
        # compile the grammar again using the result of the previous
        # compilation as parser
271
        compileDSL(self.grammar, nil_preprocessor, result, get_ebnf_transformer(), compiler)
272

Eckhart Arnold's avatar
Eckhart Arnold committed
273
274
275
276
277
278
279
280
281
282
283
    def multiprocessing_task(self):
        compiler_name = "EBNF"
        compiler = get_ebnf_compiler(compiler_name, self.grammar)
        parser = get_ebnf_grammar()
        result, errors, syntax_tree = compile_source(self.grammar, None, parser,
                                            get_ebnf_transformer(), compiler)
        return errors

    def test_multiprocessing(self):
        with Pool(processes=2) as pool:
            res = [pool.apply_async(self.multiprocessing_task, ()) for i in range(4)]
284
            errors = [r.get(timeout=10) for r in res]
Eckhart Arnold's avatar
Eckhart Arnold committed
285
286
        for i, e in enumerate(errors):
            assert not e, ("%i: " % i) + str(e)
287
288


289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
class TestBoundaryCases:
    def setup(self):
        self.gr = get_ebnf_grammar()
        self.tr = get_ebnf_transformer()
        self.cp = get_ebnf_compiler()

    def test_empty_grammar(self):
        t = self.gr("")
        self.tr(t)
        r = self.cp(t)
        assert r

    def test_single_statement_grammar(self):
        t = self.gr("i = /i/")
        self.tr(t)
        r = self.cp(t)
        assert r

    def test_two_statement_grammar(self):
        t = self.gr("i = k {k}\nk = /k/")
        self.tr(t)
        r = self.cp(t)
        assert r

313
314
315
316
    def test_unconnected_symbols(self):
        ebnf = """root = /.*/
                  unconnected = /.*/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
317
318
319
320
321
322
323
324
        result, messages, AST = compile_source(ebnf, nil_preprocessor,
                                               get_ebnf_grammar(),
                                               get_ebnf_transformer(),
                                               get_ebnf_compiler())
        if messages:
            assert not has_errors(messages), "Unconnected rules should result in a warning, " \
                "not an error: " + str(messages)
            grammar_src = result
325
326
            grammar = compile_python_object(DHPARSER_IMPORTS + grammar_src,
                                            'get_(?:\w+_)?grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
327
328
329
        else:
            assert False, "EBNF compiler should warn about unconnected rules."

330
331
332
333
        assert grammar['root'], "Grammar objects should be subscriptable by parser names!"
        try:
            unconnected = grammar['unconnected']
        except KeyError:
Eckhart Arnold's avatar
Eckhart Arnold committed
334
            assert False, "Grammar objects should be able to cope with unconnected parsers!"
335
336
337
338
339
340
        try:
            nonexistant = grammar['nonexistant']
            assert False, "Grammar object shoul raise a KeyError if subscripted by " \
                          "a non-existant parser name!"
        except KeyError:
            pass
341
342
343
344
345
346
347


class TestSynonymDetection:
    def test_synonym_detection(self):
        ebnf = """a = b
                  b = /b/
        """
348
        grammar = grammar_provider(ebnf)()
349
350
        assert grammar['a'].name == 'a', grammar['a'].name
        assert grammar['b'].name == 'b', grammar['b'].name
351
        assert grammar('b').as_sxpr().count('b') == 2
352

353

354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
class TestFlowControlOperators:
    def setup(self):
        self.t1 = """
        All work and no play 
        makes Jack a dull boy
        END
        """
        self.t2 = "All word and not play makes Jack a dull boy END\n"

    def test_lookbehind_indirect(self):
        lang = r"""
            document = ws sequence doc_end ws         
            sequence = { !end word ws }+
            doc_end  = -&SUCC_LB end        
            ws       = /\s*/
            end      = /END/
            word     = /\w+/
            SUCC_LB  = indirection
372
            indirection = /\s*?\n/
373
374
375
376
377
        """
        parser = grammar_provider(lang)()
        cst = parser(self.t1)
        assert not cst.error_flag, cst.as_sxpr()
        cst = parser(self.t2)
378
        # this should fail, because 'END' is not preceded by a line feed
379
380
        assert cst.error_flag, cst.as_sxpr()

381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
    def test_required_error_reporting(self):
        """Tests whether failures to comply with the required operator '§'
        are correctly reported as such.
        """
        lang1 = "nonsense == /\w+/~  # wrong_equal_sign"
        lang2 = "nonsense = [^{}%]+  # someone forgot the '/'-delimiters for regular expressions"
        try:
            parser_class = grammar_provider(lang1)
            assert False, "Compilation error expected."
        except CompilationError as error:
            pass
            # print(error)
        try:
            parser_class = grammar_provider(lang2)
            assert False, "Compilation error expected."
        except CompilationError as error:
            pass
            # print(error)

400

401
402
403
404
class TestAllSome:
    def test_all(self):
        ebnf = 'prefix = <"A" "B">'
        grammar = grammar_provider(ebnf)()
405
        assert grammar('B A').content == 'B A'
406
407
408
409

    def test_some(self):
        ebnf = 'prefix = <"A" | "B">'
        grammar = grammar_provider(ebnf)()
410
411
        assert grammar('B A').content == 'B A'
        assert grammar('B').content == 'B'
412

413

414
if __name__ == "__main__":
415
    from DHParser.testing import runner
416
417

    runner("TestTermBug", globals())