test_syntaxtree.py 9.65 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

26
from DHParser.error import Error
27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, TOKEN_PTYPE
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
29
    replace_by_single_child, flatten, remove_expendables
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32

33

34
35
class TestParseSxpression:
    def test_parse_s_expression(self):
36
37
38
39
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
40
        try:
41
42
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
43
44
45
46
                          "if argument is not a tree!"
        except ValueError:
            pass

47
48
49
50
51
52
53
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
54
55
56
57
58
59
60
61
62
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
               '(a (:PlainText "alpha ") (b "beta") (:PlainText " gamma"))'
        tree = parse_xml(' <a>   <b>beta</b>   </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><b>beta</b></a>'

63

64
65
66
67
68
69
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
70
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
71
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
72
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
73
74
75
76
77
78

    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

    def test_find(self):
79
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
80
81
        assert len(found) == 1
        assert found[0].result == 'e'
82
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
83
84
85
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

86
    def test_equality1(self):
87
88
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
89
90
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
91
92
93

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
94
        att  = {"term": [replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
95
96
                "factor": [remove_expendables, reduce_single_child],
                (TOKEN_PTYPE): [remove_expendables, reduce_single_child],
97
                "?": [remove_expendables, replace_by_single_child]}
98
        parser = grammar_provider(ebnf)()
99
100
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
101
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
102
        assert tree == compare_tree, tree.as_sxpr()
103

104
105
106
107
108
109
110
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

111
112
113
114
115
116
117
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
118
        tree = parser(ebnf)
119
120
121
122
123
124
125
126
127
128
129
130
131
132
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
        assert res1 == res2
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
        assert res3 == res2
        transform(tree)
        res4 = compiler(tree)
        assert res4 == res3

133
134
135
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
136
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
137
        nd2 = Node(None, "456")
138
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
139
        nd = Node(None, (nd1, nd2))
140
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
141
        nd.init_pos(0)
142
143
144
145
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

di68kap's avatar
di68kap committed
146
147
148

class TestRootNode:
    def test_error_handling(self):
149
150
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
151
        root = RootNode()
eckhart's avatar
eckhart committed
152
153
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
154
        root.swallow(tree)
di68kap's avatar
di68kap committed
155
        assert root.error_flag
eckhart's avatar
eckhart committed
156
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
157
        assert root.error_flag
eckhart's avatar
eckhart committed
158
159
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
160
161
162
163
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


164
165
166
167
168
169
170
# class TestErrorHandling:
#     def test_error_flag_propagation(self):
#         tree = parse_sxpr('(a (b c) (d (e (f (g h)))))')
#
#         def find_h(context):
#             node = context[-1]
#             if node.result == "h":
eckhart's avatar
eckhart committed
171
#                 node.new_error("an error deep inside the syntax tree")
172
173
174
175
176
177
178
179
180
181
182
#
#         assert not tree.error_flag
#         traverse(tree, {"*": find_h})
#         assert tree.error_flag, tree.as_sxpr()
#
#     def test_collect_errors(self):
#         tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
#         A = tree
#         B = next(tree.select(lambda node: str(node) == "1"))
#         D = next(tree.select(lambda node: node.parser.name == "D"))
#         F = next(tree.select(lambda node: str(node) == "3"))
eckhart's avatar
eckhart committed
183
184
#         B.new_error("Error in child node")
#         F.new_error("Error in child's child node")
185
186
187
188
189
190
191
192
#         tree.error_flag = Error.ERROR
#         errors = tree.collect_errors()
#         assert len(errors) == 2, str(errors)
#         assert A.error_flag
#         assert D.error_flag
#         errors = tree.collect_errors(clear_errors=True)
#         assert len(errors) == 2
#         assert not D.error_flag
193

194

195
class TestNodeFind():
196
    """Test the select-functions of class Node.
197
198
199
200
201
202
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
203
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
204
        matches = list(tree.select(matchf))
205
206
207
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
208
209
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
210
211
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
212
213
        assert list(tree.select(matchf2))
        assert not list(tree.select(matchf2, include_root=False))
214
215

    def test_getitem(self):
216
217
218
219
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        # print(tree.as_sxpr())
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
220
221
222
223
224
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
225
        matches = list(tree.select_by_tag('X', False))
226
227
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
228

229
    def test_contains(self):
230
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
231
        assert 'a' not in tree
232
233
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
234
235
236
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
237
        assert 'c' not in tree
238
        assert any(tree.select_by_tag('c', False))
239
240


241
class TestSerialization:
di68kap's avatar
di68kap committed
242
243
244
245
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
246
        tree = parse_sxpr('(A "B")')
247
        tree.attributes['attr'] = "value"
248
        tree2 = parse_sxpr('(A `(attr "value") "B")')
249
250
        assert tree.as_sxpr() ==  tree2.as_sxpr()
        tree.attributes['attr2'] = "value2"
251
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
252
253
254
        assert tree.as_sxpr() == tree3.as_sxpr()


di68kap's avatar
di68kap committed
255

256
if __name__ == "__main__":
257
    from DHParser.testing import runner
258
    runner("", globals())