2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

test_syntaxtree.py 14.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import json
24
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
25
26
sys.path.extend(['../', './'])

27
28
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
    flatten_xml, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
29
from DHParser.transform import traverse, reduce_single_child, \
Eckhart Arnold's avatar
Eckhart Arnold committed
30
    replace_by_single_child, flatten, remove_empty, remove_whitespace
31
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
32
from DHParser.dsl import grammar_provider
33
from DHParser.error import Error
34
from DHParser.parse import RE, Grammar
35

36

37
38
class TestParseSxpression:
    def test_parse_s_expression(self):
39
40
41
42
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
43
        try:
44
45
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
46
47
48
49
                          "if argument is not a tree!"
        except ValueError:
            pass

50
51
52
53
54
55
56
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
57
58
59
60
61
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
62
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
63
64
65
66
67
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
68

69
70
71
72
73
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

74

75
class TestParseJSON:
76
77
78
    def setup(self):
        self.tree = parse_sxpr('(a (b ä) (d (e ö) (h ü)))')
        d = self.tree.pick('d')
79
80
        d.attr['name'] = "James Bond"
        d.attr['id'] = '007'
81
82
83

    def test_json_obj_roundtrip(self):
        json_obj_tree = self.tree.to_json_obj()
84
        tree_copy = Node.from_json_obj(json_obj_tree)
85
86
87
88
89
90
91
92
        assert tree_copy.equals(self.tree)

    def test_json_rountrip(self):
        s = self.tree.as_json(indent=None, ensure_ascii=True)
        tree_copy = Node.from_json_obj(json.loads(s))
        assert tree_copy.equals(self.tree)
        s = self.tree.as_json(indent=2, ensure_ascii=False)
        tree_copy = Node.from_json_obj(json.loads(s))
93
94


95
96
97
98
99
100
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
101
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
102
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
103
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
104

105
106
107
108
109
110
111
112
113
114
115
    def test_content_property(self):
        tree = RootNode(parse_sxpr('(a (b c) (d e))'))
        content = tree.content
        b = tree.pick('b')
        d = tree.pick('d')
        b.result = "recently "
        d.result = "changed"
        assert content != tree.content
        assert content == 'ce'
        assert tree.content == 'recently changed'

116
117
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
Eckhart Arnold's avatar
Eckhart Arnold committed
118
        tree.with_pos(0)
119
120
        tree_copy = copy.deepcopy(tree)

121
        assert tree.equals(tree_copy)
122
123
124
125
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
126
        assert not tree_copy.errors
127
128
129
130
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
131
132
133
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))
134
135
136
137

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

138
139
140
141
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
142
143
144
145
146
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

147
    def test_find(self):
148
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
149
150
        assert len(found) == 1
        assert found[0].result == 'e'
151
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
152
153
154
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

155
    def test_equality1(self):
156
157
158
159
        assert self.unique_tree.equals(self.unique_tree)
        assert not self.recurr_tree.equals(self.unique_tree)
        assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
        assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
160
161
162

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
Eckhart Arnold's avatar
Eckhart Arnold committed
163
164
165
        att  = {"term": [remove_empty, remove_whitespace, replace_by_single_child, flatten],
                "factor": [remove_empty, remove_whitespace, reduce_single_child],
                "*": [remove_empty, remove_whitespace, replace_by_single_child]}
166
        parser = grammar_provider(ebnf)()
167
168
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
169
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
170
        assert tree.equals(compare_tree), tree.as_sxpr()
171

172
173
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
174
        assert cpy.equals(self.unique_tree)
175
176
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
177
        assert not cpy.equals(self.unique_tree)
178

179
180
181
182
183
184
185
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
186
        tree = parser(ebnf)
187
188
189
190
191
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
192
193
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
194
195
196
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
197
198
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
199
200
        transform(tree)
        res4 = compiler(tree)
201
202
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
203

204
205
    def test_len_and_pos(self):
        """Test len-property of Node."""
206
        nd1 = Node(ZOMBIE_TAG, "123")
207
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
208
        nd2 = Node(ZOMBIE_TAG, "456")
209
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
210
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
211
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
Eckhart Arnold's avatar
Eckhart Arnold committed
212
        nd.with_pos(0)
213
214
215
216
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

217
    def test_xml_sanitizer(self):
218
        node = Node('tag', '<&>')
219
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
220

di68kap's avatar
di68kap committed
221
222
223

class TestRootNode:
    def test_error_handling(self):
224
        tree = parse_sxpr('(A (B D) (C E))')
Eckhart Arnold's avatar
Eckhart Arnold committed
225
        tree.with_pos(0)
di68kap's avatar
di68kap committed
226
        root = RootNode()
eckhart's avatar
eckhart committed
227
228
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
229
        root.swallow(tree)
di68kap's avatar
di68kap committed
230
        assert root.error_flag
231
        errors = root.errors_sorted
di68kap's avatar
di68kap committed
232
        assert root.error_flag
233
234
        # assert errors == root.errors(True)
        # assert not root.error_flag and not root.errors()
di68kap's avatar
di68kap committed
235
236
237
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")

238
    def test_error_reporting(self):
239
        number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
240
241
242
243
        result = str(Grammar(number)("3.1416"))
        assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
            str(result)

di68kap's avatar
di68kap committed
244

245
class TestNodeFind():
246
    """Test the select-functions of class Node.
247
248
249
250
251
252
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
253
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
254
        matches = list(tree.select(matchf))
255
256
257
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
258
259
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
260
261
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
262
        assert list(tree.select(matchf2, include_root=True))
263
        assert not list(tree.select(matchf2, include_root=False))
264
265

    def test_getitem(self):
266
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
267
268
        assert tree[0].equals(parse_sxpr('(b X)'))
        assert tree[2].equals(parse_sxpr('(e (X F))'))
269
270
271
272
273
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
274
        matches = list(tree.select_by_tag('X', False))
275
276
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
277

278
    def test_contains(self):
279
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
280
        assert 'a' not in tree
281
282
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
283
284
285
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
286
        assert 'c' not in tree
287
        assert any(tree.select_by_tag('c', False))
288
289


290
class TestSerialization:
di68kap's avatar
di68kap committed
291
292
293
294
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
295
        tree = parse_sxpr('(A "B")')
296
        tree.attr['attr'] = "value"
297
        tree2 = parse_sxpr('(A `(attr "value") "B")')
298
        assert tree.as_sxpr() ==  tree2.as_sxpr()
299
        tree.attr['attr2'] = "value2"
300
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
301
302
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
303
304
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
eckhart's avatar
eckhart committed
305
        s = tree.as_sxpr(flatten_threshold=0)
eckhart's avatar
eckhart committed
306
307
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
eckhart's avatar
eckhart committed
308
        s = tree.as_sxpr(flatten_threshold=0)
eckhart's avatar
eckhart committed
309
310
311
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
eckhart's avatar
eckhart committed
312
        s = tree.as_sxpr(flatten_threshold=0)
eckhart's avatar
eckhart committed
313
314
315
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
316
317
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
eckhart's avatar
eckhart committed
318
        compact = tree.as_sxpr(compact=True, )
eckhart's avatar
eckhart committed
319
320
321
322
323
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
324

325
326
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
327

328
        xml = tree.as_xml(inline_tags={'A'})
329
        assert xml == "<A><B>C</B><D>E</D></A>", xml
330

331
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
332

333
        tree.attr['xml:space'] = 'preserve'
334
        xml = tree.as_xml()
335
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
336

337
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
338

339
340
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
341
        xml = tree.as_xml(inline_tags={'A'})
342
343
344
345
346
347
348
349
350
351
352
353
354
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
355

di68kap's avatar
di68kap committed
356

357
if __name__ == "__main__":
358
    from DHParser.testing import runner
359
    runner("", globals())