2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

test_syntaxtree.py 12.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

di68kap's avatar
di68kap committed
26
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, flatten_xml
Eckhart Arnold's avatar
Eckhart Arnold committed
27
from DHParser.transform import traverse, reduce_single_child, \
28
    replace_by_single_child, flatten, remove_expendables
29
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
30
from DHParser.dsl import grammar_provider
31
from DHParser.error import Error
32

33

34
35
class TestParseSxpression:
    def test_parse_s_expression(self):
36
37
38
39
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
40
        try:
41
42
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
43
44
45
46
                          "if argument is not a tree!"
        except ValueError:
            pass

47
48
49
50
51
52
53
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
54
55
56
57
58
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
59
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
60
61
62
63
64
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
65

66
67
68
69
70
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

71

72
73
74
75
76
77
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
78
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
79
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
80
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
81

82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        tree_copy = copy.deepcopy(tree)

        assert tree == tree_copy
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
        assert not tree_copy.all_errors
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
        assert tree != tree_copy
        assert tree_copy == parse_sxpr('(a (b c) (d (e f) (h i)))')
98
99
        # print(tree.as_sxpr())
        # print(parse_sxpr('(a (b c) (d x))').as_sxpr())
100
101
102
103
104
        assert tree == parse_sxpr('(a (b c) (d x))')

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

105
106
107
108
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
109
110
111
112
113
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

114
    def test_find(self):
115
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
116
117
        assert len(found) == 1
        assert found[0].result == 'e'
118
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
119
120
121
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

122
    def test_equality1(self):
123
124
        assert self.unique_tree == self.unique_tree
        assert self.recurr_tree != self.unique_tree
125
126
        assert parse_sxpr('(a (b c))') != parse_sxpr('(a (b d))')
        assert parse_sxpr('(a (b c))') == parse_sxpr('(a (b c))')
127
128
129

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
130
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
131
                "factor": [remove_expendables, reduce_single_child],
132
                "*": [remove_expendables, replace_by_single_child]}
133
        parser = grammar_provider(ebnf)()
134
135
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
136
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
137
        assert tree == compare_tree, tree.as_sxpr()
138

139
140
141
142
143
144
145
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
        assert cpy == self.unique_tree
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
        assert cpy != self.unique_tree

146
147
148
149
150
151
152
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
153
        tree = parser(ebnf)
154
155
156
157
158
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
159
160
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
161
162
163
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
164
165
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
166
167
        transform(tree)
        res4 = compiler(tree)
168
169
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
170

171
172
173
    def test_len_and_pos(self):
        """Test len-property of Node."""
        nd1 = Node(None, "123")
174
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
175
        nd2 = Node(None, "456")
176
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
177
        nd = Node(None, (nd1, nd2))
178
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
eckhart's avatar
eckhart committed
179
        nd.init_pos(0)
180
181
182
183
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

184
    def test_xml_sanitizer(self):
185
        node = Node('tag', '<&>')
186
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
187

di68kap's avatar
di68kap committed
188
189
190

class TestRootNode:
    def test_error_handling(self):
191
192
        tree = parse_sxpr('(A (B D) (C E))')
        tree.init_pos(0)
di68kap's avatar
di68kap committed
193
        root = RootNode()
eckhart's avatar
eckhart committed
194
195
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
196
        root.swallow(tree)
di68kap's avatar
di68kap committed
197
        assert root.error_flag
eckhart's avatar
eckhart committed
198
        errors = root.collect_errors()
di68kap's avatar
di68kap committed
199
        assert root.error_flag
eckhart's avatar
eckhart committed
200
201
        # assert errors == root.collect_errors(True)
        # assert not root.error_flag and not root.collect_errors()
di68kap's avatar
di68kap committed
202
203
204
205
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")


206
class TestNodeFind():
207
    """Test the select-functions of class Node.
208
209
210
211
212
213
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
214
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
215
        matches = list(tree.select(matchf))
216
217
218
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
219
220
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
221
222
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
223
        assert list(tree.select(matchf2, include_root=True))
224
        assert not list(tree.select(matchf2, include_root=False))
225
226

    def test_getitem(self):
227
228
229
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        assert tree[0] == parse_sxpr('(b X)')
        assert tree[2] == parse_sxpr('(e (X F))')
230
231
232
233
234
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
235
        matches = list(tree.select_by_tag('X', False))
236
237
        assert matches[0] == parse_sxpr('(X (c d))')
        assert matches[1] == parse_sxpr('(X F)')
238

239
    def test_contains(self):
240
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
241
        assert 'a' not in tree
242
243
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
244
245
246
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
247
        assert 'c' not in tree
248
        assert any(tree.select_by_tag('c', False))
249
250


251
class TestSerialization:
di68kap's avatar
di68kap committed
252
253
254
255
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
256
        tree = parse_sxpr('(A "B")')
257
        tree.attr['attr'] = "value"
258
        tree2 = parse_sxpr('(A `(attr "value") "B")')
259
        assert tree.as_sxpr() ==  tree2.as_sxpr()
260
        tree.attr['attr2'] = "value2"
261
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
262
263
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
264
265
266
267
268
269
270
271
272
273
274
275
276
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
277
278
279
280
281
282
283
284
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
285

286
287
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
288

289
        xml = tree.as_xml(inline_tags={'A'})
290
        assert xml == "<A><B>C</B><D>E</D></A>", xml
291

292
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
293

294
        tree.attr['xml:space'] = 'preserve'
eckhart's avatar
eckhart committed
295
        print(tree.attr)
296
        xml = tree.as_xml()
297
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
298

299
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
300

301
302
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
303
        xml = tree.as_xml(inline_tags={'A'})
304
305
306
307
308
309
310
311
312
313
314
315
316
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
317

di68kap's avatar
di68kap committed
318

319
if __name__ == "__main__":
320
    from DHParser.testing import runner
321
    runner("", globals())