21.10.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

test_ebnf.py 11.8 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_ebnf.py - tests of the ebnf module of DHParser 
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
                             

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

23
24
25
26
try:
    import regex as re
except ImportError:
    import re
Eckhart Arnold's avatar
Eckhart Arnold committed
27
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from multiprocessing import Pool
Eckhart Arnold's avatar
Eckhart Arnold committed
29

Eckhart Arnold's avatar
Eckhart Arnold committed
30
31
sys.path.extend(['../', './'])

32
33
from DHParser.toolkit import compile_python_object
from DHParser.parser import compile_source, WHITESPACE_PTYPE, nil_preprocessor
34
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransformer, get_ebnf_compiler
35
from DHParser.dsl import CompilationError, compileDSL, DHPARSER_IMPORTS, grammar_provider
36
37


38
39
40
41
42
43
44
45
46
47
48
class TestDirectives:
    mini_language = """
        expression =  term  { ("+" | "-") term }
        term       =  factor  { ("*" | "/") factor }
        factor     =  constant | "("  expression  ")"
        constant   =  digit { digit } [ //~ ]
        digit      = /0/ | /1/ | /2/ | /3/ | /4/ | /5/ | /6/ | /7/ | /8/ | /9/ 
        """

    def test_whitespace_linefeed(self):
        lang = "@ whitespace = linefeed\n" + self.mini_language
49
        MinilangParser = grammar_provider(lang)
50
51
        parser = MinilangParser()
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
52
        syntax_tree = parser("3 + 4 * 12")
53
        # parser.log_parsing_history("WSP")
54
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
55
        syntax_tree = parser("3 + 4 \n * 12")
56
57
        # parser.log_parsing_history("WSPLF")
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
58
        syntax_tree = parser("3 + 4 \n \n * 12")
59
        assert syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
60
        syntax_tree = parser("3 + 4 \n\n * 12")
61
62
63
64
        assert syntax_tree.collect_errors()

    def test_whitespace_vertical(self):
        lang = "@ whitespace = vertical\n" + self.mini_language
65
        parser = grammar_provider(lang)()
66
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
67
        syntax_tree = parser("3 + 4 * 12")
68
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
69
        syntax_tree = parser("3 + 4 \n * 12")
70
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
71
        syntax_tree = parser("3 + 4 \n \n * 12")
72
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
73
        syntax_tree = parser("3 + 4 \n\n * 12")
74
75
        assert not syntax_tree.collect_errors()

76
77
    def test_whitespace_horizontal(self):
        lang = "@ whitespace = horizontal\n" + self.mini_language
78
        parser = grammar_provider(lang)()
79
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
80
        syntax_tree = parser("3 + 4 * 12")
81
        assert not syntax_tree.collect_errors()
Eckhart Arnold's avatar
Eckhart Arnold committed
82
        syntax_tree = parser("3 + 4 \n * 12")
83
84
        assert syntax_tree.collect_errors()

85

86
class TestEBNFParser:
Eckhart Arnold's avatar
Eckhart Arnold committed
87
    cases = {
88
89
90
91
92
93
94
95
96
97
98
99
100
101
        "list_": {
            "match": {
                1: "hund",
                2: "hund, katze,maus",
                3: "hund , katze"
            },
            "fail": {
                1: "123",
                2: '"literal"',
                3: "/regexp/"
            }
        }
    }

di68kap's avatar
di68kap committed
102

103
    def setup(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
104
        self.EBNF = get_ebnf_grammar()
105

106
107
    def test_RE(self):
        gr = get_ebnf_grammar()
108
109
110
111
112
        m = gr.regexp.main.regexp.match(r'/[\\\\]/ xxx /')
        rs = m.group()
        assert rs.find('x') < 0, rs.group()
        rx = re.compile(rs[1:-1])
        assert rx.match(r'\\')
113

114
115
    def test_literal(self):
        snippet = '"literal" '
Eckhart Arnold's avatar
Eckhart Arnold committed
116
        result = self.EBNF(snippet, 'literal')
117
118
        assert not result.error_flag
        assert str(result) == snippet
119
        assert result.find(lambda node: node.parser.ptype == WHITESPACE_PTYPE)
120

Eckhart Arnold's avatar
Eckhart Arnold committed
121
        result = self.EBNF(' "literal"', 'literal')
122
123
124
        assert result.error_flag  # literals catch following, but not leading whitespace


125
126
class TestSemanticValidation:
    def check(self, minilang, bool_filter=lambda x: x):
Eckhart Arnold's avatar
Eckhart Arnold committed
127
        grammar = get_ebnf_grammar()
Eckhart Arnold's avatar
Eckhart Arnold committed
128
        st = grammar(minilang)
129
        assert not st.collect_errors()
130
        EBNFTransformer(st)
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
        assert bool_filter(st.collect_errors())

    def test_illegal_nesting(self):
        self.check('impossible = { [ "an optional requirement" ] }')

    def test_illegal_nesting_option_required(self):
        self.check('impossible = [ §"an optional requirement" ]')

    def test_illegal_nesting_oneormore_option(self):
        self.check('impossible = { [ "no use"] }+')

    def test_legal_nesting(self):
        self.check('possible = { [ "+" ] "1" }', lambda x: not x)


class TestCompilerErrors:
    def test_error_propagation(self):
148
        ebnf = "@ literalws = wrongvalue  # testing error propagation\n"
Eckhart Arnold's avatar
Eckhart Arnold committed
149
150
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('ErrorPropagationTest'))
151
152
        assert messages

153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
    def test_undefined_symbols(self):
        """Use of undefined symbols should be reported.
        """
        ebnf = """syntax = { intermediary }
                  intermediary = "This symbol is " [ badly_spelled ] "!"
                  bedly_spilled = "wrong" """
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
        assert messages

    def test_no_error(self):
        """But reserved symbols should not be repoted as undefined.
        """
        ebnf = """nothing =  WSP__ | COMMENT__\n"""
        result, messages, st = compile_source(ebnf, None, get_ebnf_grammar(),
            get_ebnf_transformer(), get_ebnf_compiler('UndefinedSymbols'))
        assert not bool(messages), messages

171

172
class TestSelfHosting:
Eckhart Arnold's avatar
Eckhart Arnold committed
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    grammar = r"""
        # EBNF-Grammar in EBNF

        @ comment    =  /#.*(?:\n|$)/                    # comments start with '#' and eat all chars up to and including '\n'
        @ whitespace =  /\s*/                            # whitespace includes linefeed
        @ literalws  =  right                            # trailing whitespace of literals will be ignored tacitly

        syntax     =  [~//] { definition | directive } §EOF
        definition =  symbol §"=" expression
        directive  =  "@" §symbol §"=" ( regexp | literal | list_ )

        expression =  term { "|" term }
        term       =  { factor }+
        factor     =  [flowmarker] [retrieveop] symbol !"="   # negative lookahead to be sure it's not a definition
                    | [flowmarker] literal
                    | [flowmarker] regexp
                    | [flowmarker] group
                    | [flowmarker] regexchain
                    | [flowmarker] oneormore
                    | repetition
                    | option

        flowmarker =  "!"  | "&"  | "§" |                # '!' negative lookahead, '&' positive lookahead, '§' required
                      "-!" | "-&"                        # '-' negative lookbehind, '-&' positive lookbehind
        retrieveop =  "::" | ":"                         # '::' pop, ':' retrieve

        group      =  "(" expression §")"
        regexchain =  ">" expression §"<"                # compiles "expression" into a singular regular expression
        oneormore  =  "{" expression "}+"
        repetition =  "{" expression §"}"
        option     =  "[" expression §"]"

        symbol     =  /(?!\d)\w+/~                       # e.g. expression, factor, parameter_list
        literal    =  /"(?:[^"]|\\")*?"/~                # e.g. "(", '+', 'while'
                    | /'(?:[^']|\\')*?'/~                # whitespace following literals will be ignored tacitly.
        regexp     =  /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~    # e.g. /\w+/, ~/#.*(?:\n|$)/~
                                                         # '~' is a whitespace-marker, if present leading or trailing
                                                         # whitespace of a regular expression will be ignored tacitly.
        list_      =  /\w+/~ { "," /\w+/~ }              # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
                                                         # BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
        EOF =  !/./        
        """

216
217
    def test_self(self):
        compiler_name = "EBNF"
Eckhart Arnold's avatar
Eckhart Arnold committed
218
219
220
221
        compiler = get_ebnf_compiler(compiler_name, self.grammar)
        parser = get_ebnf_grammar()
        result, errors, syntax_tree = compile_source(self.grammar, None, parser,
                                            get_ebnf_transformer(), compiler)
222
223
224
        assert not errors, str(errors)
        # compile the grammar again using the result of the previous
        # compilation as parser
225
        compileDSL(self.grammar, nil_preprocessor, result, get_ebnf_transformer(), compiler)
226

Eckhart Arnold's avatar
Eckhart Arnold committed
227
228
229
230
231
232
233
234
235
236
237
    def multiprocessing_task(self):
        compiler_name = "EBNF"
        compiler = get_ebnf_compiler(compiler_name, self.grammar)
        parser = get_ebnf_grammar()
        result, errors, syntax_tree = compile_source(self.grammar, None, parser,
                                            get_ebnf_transformer(), compiler)
        return errors

    def test_multiprocessing(self):
        with Pool(processes=2) as pool:
            res = [pool.apply_async(self.multiprocessing_task, ()) for i in range(4)]
238
            errors = [r.get(timeout=10) for r in res]
Eckhart Arnold's avatar
Eckhart Arnold committed
239
240
        for i, e in enumerate(errors):
            assert not e, ("%i: " % i) + str(e)
241
242


243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
class TestBoundaryCases:
    def setup(self):
        self.gr = get_ebnf_grammar()
        self.tr = get_ebnf_transformer()
        self.cp = get_ebnf_compiler()

    def test_empty_grammar(self):
        t = self.gr("")
        self.tr(t)
        r = self.cp(t)
        assert r

    def test_single_statement_grammar(self):
        t = self.gr("i = /i/")
        self.tr(t)
        r = self.cp(t)
        assert r

    def test_two_statement_grammar(self):
        t = self.gr("i = k {k}\nk = /k/")
        self.tr(t)
        r = self.cp(t)
        assert r

267
268
269
270
271
    def test_unconnected_symbols(self):
        ebnf = """root = /.*/
                  unconnected = /.*/
        """
        try:
272
            grammar = grammar_provider(ebnf)()
273
274
            assert False, "EBNF compiler should complain about unconnected rules."
        except CompilationError as err:
275
276
277
278
279
280
281
            grammar_src = err.result
            grammar = compile_python_object(DHPARSER_IMPORTS + grammar_src,
                                            'get_(?:\w+_)?grammar$')()
        assert grammar['root'], "Grammar objects should be subscriptable by parser names!"
        try:
            unconnected = grammar['unconnected']
        except KeyError:
Eckhart Arnold's avatar
Eckhart Arnold committed
282
            assert False, "Grammar objects should be able to cope with unconnected parsers!"
283
284
285
286
287
288
        try:
            nonexistant = grammar['nonexistant']
            assert False, "Grammar object shoul raise a KeyError if subscripted by " \
                          "a non-existant parser name!"
        except KeyError:
            pass
289
290
        ebnf_testing = "@testing = True\n" + ebnf
        try:
291
            grammar = grammar_provider(ebnf_testing)()
292
293
294
        except CompilationError:
            assert False, "EBNF compiler should not complain about unconnected " \
                          "rules when directive @testing is set."
295
296
297
298
299
300
301


class TestSynonymDetection:
    def test_synonym_detection(self):
        ebnf = """a = b
                  b = /b/
        """
302
        grammar = grammar_provider(ebnf)()
303
304
        assert grammar['a'].name == 'a', grammar['a'].name
        assert grammar['b'].name == 'b', grammar['b'].name
305
        assert grammar('b').as_sxpr().count('b') == 2
306

307

308
if __name__ == "__main__":
309
310
    from DHParser.testing import runner
    runner("", globals())