2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

test_syntaxtree.py 12.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

26
27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml, \
    MockParser
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
29
    replace_by_single_child, flatten, remove_expendables
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32
from DHParser.error import Error
33

34

35
36
class TestParseSxpression:
    def test_parse_s_expression(self):
37
38
39
40
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
41
        try:
42
43
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
44
45
46
47
                          "if argument is not a tree!"
        except ValueError:
            pass

48
49
50
51
52
53
54
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
55
56
57
58
59
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
60
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
61
62
63
64
65
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
66

67
68
69
70
71
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

72

73
74
75
76
77
78
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
79
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
80
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
81
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
82

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        tree_copy = copy.deepcopy(tree)

        assert tree == tree_copy
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
        assert not tree_copy.all_errors
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
        assert tree != tree_copy
        assert tree_copy == parse_sxpr('(a (b c) (d (e f) (h i)))')
        print(tree.as_sxpr())
        print(parse_sxpr('(a (b c) (d x))').as_sxpr())
        assert tree == parse_sxpr('(a (b c) (d x))')

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

106
107
108
109
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
110
111
112
113
114
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

115
    def test_find(self):
116
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
117
118
        assert len(found) == 1
        assert found[0].result == 'e'
119
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
120
121
122
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

123
    def test_equality1(self):
124
125
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
126
127
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
128
129
130

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
131
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
132
                "factor": [remove_expendables, reduce_single_child],
133
                "*": [remove_expendables, replace_by_single_child]}
134
        parser = grammar_provider(ebnf)()
135
136
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
137
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
138
        assert tree == compare_tree, tree.as_sxpr()
139

140
141
142
143
144
145
146
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

147
148
149
150
151
152
153
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
154
        tree = parser(ebnf)
155
156
157
158
159
160
161
162
163
164
165
166
167
168
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
        assert res1 == res2
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
        assert res3 == res2
        transform(tree)
        res4 = compiler(tree)
        assert res4 == res3

169
170
171
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
172
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
173
        nd2 = Node(None, "456")
174
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
175
        nd = Node(None, (nd1, nd2))
176
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
177
        nd.init_pos(0)
178
179
180
181
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

182
183
    def test_xml_sanitizer(self):
        node = Node(MockParser('tag'), '<&>')
184
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
185

di68kap's avatar
di68kap committed
186
187
188

class TestRootNode:
    def test_error_handling(self):
189
190
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
191
        root = RootNode()
eckhart's avatar
eckhart committed
192
193
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
194
        root.swallow(tree)
di68kap's avatar
di68kap committed
195
        assert root.error_flag
eckhart's avatar
eckhart committed
196
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
197
        assert root.error_flag
eckhart's avatar
eckhart committed
198
199
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
200
201
202
203
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


204
class TestNodeFind():
205
    """Test the select-functions of class Node.
206
207
208
209
210
211
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
212
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
213
        matches = list(tree.select(matchf))
214
215
216
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
217
218
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
219
220
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
221
        assert list(tree.select(matchf2, include_root=True))
222
        assert not list(tree.select(matchf2, include_root=False))
223
224

    def test_getitem(self):
225
226
227
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
228
229
230
231
232
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
233
        matches = list(tree.select_by_tag('X', False))
234
235
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
236

237
    def test_contains(self):
238
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
239
        assert 'a' not in tree
240
241
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
242
243
244
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
245
        assert 'c' not in tree
246
        assert any(tree.select_by_tag('c', False))
247
248


249
class TestSerialization:
di68kap's avatar
di68kap committed
250
251
252
253
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
254
        tree = parse_sxpr('(A "B")')
255
        tree.attr['attr'] = "value"
256
        tree2 = parse_sxpr('(A `(attr "value") "B")')
257
        assert tree.as_sxpr() ==  tree2.as_sxpr()
258
        tree.attr['attr2'] = "value2"
259
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
260
261
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
262
263
264
265
266
267
268
269
270
271
272
273
274
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
275
276
277
278
279
280
281
282
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
283

284
285
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
286

287
        xml = tree.as_xml(inline_tags={'A'})
288
        assert xml == "<A><B>C</B><D>E</D></A>", xml
289

290
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
291

292
        tree.attr['xml:space'] = 'preserve'
293
        xml = tree.as_xml()
294
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
295

296
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
297

298
299
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
300
        xml = tree.as_xml(inline_tags={'A'})
301
302
303
304
305
306
307
308
309
310
311
312
313
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
314

di68kap's avatar
di68kap committed
315

316
if __name__ == "__main__":
317
    from DHParser.testing import runner
318
    runner("", globals())