test_parsers.py 5.11 KB
Newer Older
1
2
#!/usr/bin/python3

3
"""test_parsers.py - tests of the parsers-module of DHParser 
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import sys
23
from functools import partial
24

Eckhart Arnold's avatar
Eckhart Arnold committed
25
sys.path.extend(['../', './'])
26

27
from DHParser.toolkit import is_logging, logging, compile_python_object
28
from DHParser.syntaxtree import traverse, remove_expendables, \
29
    replace_by_single_child, reduce_single_child, flatten, TOKEN_PTYPE
di68kap's avatar
di68kap committed
30
from DHParser.parsers import compile_source
Eckhart Arnold's avatar
Eckhart Arnold committed
31
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
32
from DHParser.dsl import parser_factory, DHPARSER_IMPORTS
33
34


Eckhart Arnold's avatar
Eckhart Arnold committed
35
36
37
38
39
40
41
42
43
ARITHMETIC_EBNF = """
    @ whitespace = linefeed
    formula = [ //~ ] expr
    expr = expr ("+"|"-") term | term
    term = term ("*"|"/") factor | factor
    factor = /[0-9]+/~
    # example:  "5 + 3 * 4"
    """

44

Eckhart Arnold's avatar
Eckhart Arnold committed
45
46
ARITHMETIC_EBNF_transformation_table = {
    # AST Transformations for the DSL-grammar
47
48
49
    "formula": [remove_expendables],
    "term, expr": [replace_by_single_child, flatten],
    "factor": [remove_expendables, reduce_single_child],
50
    (TOKEN_PTYPE): [remove_expendables, reduce_single_child],
51
    "*": [remove_expendables, replace_by_single_child]
Eckhart Arnold's avatar
Eckhart Arnold committed
52
53
}

54

Eckhart Arnold's avatar
Eckhart Arnold committed
55
56
57
ARITHMETIC_EBNFTransform = partial(traverse, processing_table=ARITHMETIC_EBNF_transformation_table)


58
class TestInfiLoopsAndRecursion:
59
    def test_direct_left_recursion(self):
Eckhart Arnold's avatar
Eckhart Arnold committed
60
        minilang = ARITHMETIC_EBNF
61
        snippet = "5 + 3 * 4"
62
        parser = parser_factory(minilang)()
63
        assert parser
Eckhart Arnold's avatar
Eckhart Arnold committed
64
        syntax_tree = parser(snippet)
65
        assert not syntax_tree.collect_errors()
66
        assert snippet == str(syntax_tree)
Eckhart Arnold's avatar
Eckhart Arnold committed
67
68
        if is_logging():
            syntax_tree.log("test_LeftRecursion_direct.cst")
Eckhart Arnold's avatar
Eckhart Arnold committed
69
            # self.minilang_parser1.log_parsing_history("test_LeftRecursion_direct")
70
71
72
73

    def test_indirect_left_recursion(self):
        pass

74
75
76
    def test_inifinite_loops(self):
        minilang = """not_forever = { // } \n"""
        snippet = " "
77
        parser = parser_factory(minilang)()
Eckhart Arnold's avatar
Eckhart Arnold committed
78
        syntax_tree = parser(snippet)
79
80
81
        assert syntax_tree.error_flag
        # print(syntax_tree.collect_errors())

82

83
84
85
86
87
88
89
class TestRegex:
    def test_multilineRegex(self):
        mlregex = r"""
        regex =  /\w+    # one or more alphabetical characters including the underscore
                  [+]    # followed by a plus sign
                  \w*    # possibly followed by more alpha chracters/
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
90
91
        result, messages, syntax_tree = compile_source(mlregex, None, get_ebnf_grammar(),
                        get_ebnf_transformer(), get_ebnf_compiler('MultilineRegexTest'))
92
        assert result
93
94
95
96
97
98
99
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
        node, rest = parser.regex('abc+def')
        assert rest == ''
        assert node.parser.name == "regex"
        assert str(node) == 'abc+def'

100
101
102
103
104
105
106
107
108
109
110
111
    def test_token(self):
        tokenlang = r"""
            @whitespace = linefeed
            lang        = "" begin_token {/\w+/ ""} end_token
            begin_token = "\begin{document}"
            end_token   = "\end{document}"
            """
        testdoc = r"""
            \begin{document}
            test
            \end{document}
            """
Eckhart Arnold's avatar
Eckhart Arnold committed
112
113
        result, messages, syntax_tree = compile_source(tokenlang, None, get_ebnf_grammar(),
                                    get_ebnf_transformer(), get_ebnf_compiler("TokenTest"))
114
115
116
        assert result
        assert not messages
        parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
Eckhart Arnold's avatar
Eckhart Arnold committed
117
        result = parser(testdoc)
118
119
120
        # parser.log_parsing_history("test.log")
        assert not result.error_flag

121

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
class TestGrammar:
    def test_pos_values_initialized(self):
        # checks whether pos values in the parsing result and in the
        # history record have been initialized
        grammar = r"""@whitespace = horizontal
        haupt        = textzeile LEERZEILE
        textzeile    = { WORT }+
        WORT         = /[^ \t]+/~
        LEERZEILE    = /\n[ \t]*(?=\n)/~
        """
        result, messages, syntax_tree = compile_source(grammar, None, get_ebnf_grammar(),
                                        get_ebnf_transformer(), get_ebnf_compiler("PosTest"))
        assert result
        assert not messages
        with logging("LOGS"):
            parser = compile_python_object(DHPARSER_IMPORTS + result, '\w+Grammar$')()
            result = parser("no_file_name*")
        for record in parser.history:
            assert not record.node or record.node.pos >= 0


143
if __name__ == "__main__":
144
    from DHParser.testing import runner
di68kap's avatar
di68kap committed
145
    runner("", globals())