test_syntaxtree.py 11.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

26
from DHParser.error import Error
27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, TOKEN_PTYPE
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
29
    replace_by_single_child, flatten, remove_expendables
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32

33

34
35
class TestParseSxpression:
    def test_parse_s_expression(self):
36
37
38
39
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
40
        try:
41
42
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
43
44
45
46
                          "if argument is not a tree!"
        except ValueError:
            pass

47
48
49
50
51
52
53
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
54
55
56
57
58
59
60
61
62
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
               '(a (:PlainText "alpha ") (b "beta") (:PlainText " gamma"))'
        tree = parse_xml(' <a>   <b>beta</b>   </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><b>beta</b></a>'

63

64
65
66
67
68
69
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
70
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
71
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
72
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
73
74
75
76
77

    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
78
79
80
81
82
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

83
    def test_find(self):
84
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
85
86
        assert len(found) == 1
        assert found[0].result == 'e'
87
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
88
89
90
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

91
    def test_equality1(self):
92
93
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
94
95
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
96
97
98

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
99
        att  = {"term": [replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
100
101
                "factor": [remove_expendables, reduce_single_child],
                (TOKEN_PTYPE): [remove_expendables, reduce_single_child],
102
                "?": [remove_expendables, replace_by_single_child]}
103
        parser = grammar_provider(ebnf)()
104
105
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
106
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
107
        assert tree == compare_tree, tree.as_sxpr()
108

109
110
111
112
113
114
115
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

116
117
118
119
120
121
122
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
123
        tree = parser(ebnf)
124
125
126
127
128
129
130
131
132
133
134
135
136
137
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
        assert res1 == res2
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
        assert res3 == res2
        transform(tree)
        res4 = compiler(tree)
        assert res4 == res3

138
139
140
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
141
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
142
        nd2 = Node(None, "456")
143
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
144
        nd = Node(None, (nd1, nd2))
145
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
146
        nd.init_pos(0)
147
148
149
150
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

di68kap's avatar
di68kap committed
151
152
153

class TestRootNode:
    def test_error_handling(self):
154
155
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
156
        root = RootNode()
eckhart's avatar
eckhart committed
157
158
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
159
        root.swallow(tree)
di68kap's avatar
di68kap committed
160
        assert root.error_flag
eckhart's avatar
eckhart committed
161
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
162
        assert root.error_flag
eckhart's avatar
eckhart committed
163
164
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
165
166
167
168
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


169
170
171
172
173
174
175
# class TestErrorHandling:
#     def test_error_flag_propagation(self):
#         tree = parse_sxpr('(a (b c) (d (e (f (g h)))))')
#
#         def find_h(context):
#             node = context[-1]
#             if node.result == "h":
eckhart's avatar
eckhart committed
176
#                 node.new_error("an error deep inside the syntax tree")
177
178
179
180
181
182
183
184
185
186
187
#
#         assert not tree.error_flag
#         traverse(tree, {"*": find_h})
#         assert tree.error_flag, tree.as_sxpr()
#
#     def test_collect_errors(self):
#         tree = parse_sxpr('(A (B 1) (C (D (E 2) (F 3))))')
#         A = tree
#         B = next(tree.select(lambda node: str(node) == "1"))
#         D = next(tree.select(lambda node: node.parser.name == "D"))
#         F = next(tree.select(lambda node: str(node) == "3"))
eckhart's avatar
eckhart committed
188
189
#         B.new_error("Error in child node")
#         F.new_error("Error in child's child node")
190
191
192
193
194
195
196
197
#         tree.error_flag = Error.ERROR
#         errors = tree.collect_errors()
#         assert len(errors) == 2, str(errors)
#         assert A.error_flag
#         assert D.error_flag
#         errors = tree.collect_errors(clear_errors=True)
#         assert len(errors) == 2
#         assert not D.error_flag
198

199

200
class TestNodeFind():
201
    """Test the select-functions of class Node.
202
203
204
205
206
207
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
208
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
209
        matches = list(tree.select(matchf))
210
211
212
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
213
214
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
215
216
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
217
        assert list(tree.select(matchf2, include_root=True))
218
        assert not list(tree.select(matchf2, include_root=False))
219
220

    def test_getitem(self):
221
222
223
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
224
225
226
227
228
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
229
        matches = list(tree.select_by_tag('X', False))
230
231
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
232

233
    def test_contains(self):
234
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
235
        assert 'a' not in tree
236
237
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
238
239
240
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
241
        assert 'c' not in tree
242
        assert any(tree.select_by_tag('c', False))
243
244


245
class TestSerialization:
di68kap's avatar
di68kap committed
246
247
248
249
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
250
        tree = parse_sxpr('(A "B")')
251
        tree.attributes['attr'] = "value"
252
        tree2 = parse_sxpr('(A `(attr "value") "B")')
253
254
        assert tree.as_sxpr() ==  tree2.as_sxpr()
        tree.attributes['attr2'] = "value2"
255
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
256
257
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
258
259
260
261
262
263
264
265
266
267
268
269
270
271
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s


272
273
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
274

275
        xml = tree.as_xml(inline_tags={'A'})
276
        assert xml == "<A><B>C</B><D>E</D></A>", xml
277

278
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
279
280
281

        tree.attributes['xml:space'] = 'preserve'
        xml = tree.as_xml()
282
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
283

284
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
285

286
287
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
288
        xml = tree.as_xml(inline_tags={'A'})
289
290
291
292
293
294
295
296
297
298
299
300
301
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
302

di68kap's avatar
di68kap committed
303

304
if __name__ == "__main__":
305
    from DHParser.testing import runner
306
    runner("", globals())