Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

test_syntaxtree.py 13.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

26
27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
    flatten_xml, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
Eckhart Arnold's avatar
Eckhart Arnold committed
29
    replace_by_single_child, flatten, remove_empty, remove_whitespace
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32
from DHParser.error import Error
33
from DHParser.parse import RE, Grammar
34

35

36
37
class TestParseSxpression:
    def test_parse_s_expression(self):
38
39
40
41
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
42
        try:
43
44
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
45
46
47
48
                          "if argument is not a tree!"
        except ValueError:
            pass

49
50
51
52
53
54
55
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
56
57
58
59
60
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
61
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
62
63
64
65
66
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
67

68
69
70
71
72
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

73

74
75
76
77
78
79
80
81
82
83
84
class TestParseJSON:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        d = tree.pick('d')
        d.attr['name'] = "James Bond"
        d.attr['id'] = '007'
        json_obj_tree = tree.to_json_obj()
        tree_copy = Node.from_json_obj(json_obj_tree)
        assert tree_copy.equals(tree)


85
86
87
88
89
90
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
91
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
92
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
93
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
94

95
96
97
98
99
100
101
102
103
104
105
    def test_content_property(self):
        tree = RootNode(parse_sxpr('(a (b c) (d e))'))
        content = tree.content
        b = tree.pick('b')
        d = tree.pick('d')
        b.result = "recently "
        d.result = "changed"
        assert content != tree.content
        assert content == 'ce'
        assert tree.content == 'recently changed'

106
107
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
Eckhart Arnold's avatar
Eckhart Arnold committed
108
        tree.with_pos(0)
109
110
        tree_copy = copy.deepcopy(tree)

111
        assert tree.equals(tree_copy)
112
113
114
115
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
116
        assert not tree_copy.errors
117
118
119
120
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
121
122
123
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))
124
125
126
127

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

128
129
130
131
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
132
133
134
135
136
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

137
    def test_find(self):
138
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
139
140
        assert len(found) == 1
        assert found[0].result == 'e'
141
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
142
143
144
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

145
    def test_equality1(self):
146
147
148
149
        assert self.unique_tree.equals(self.unique_tree)
        assert not self.recurr_tree.equals(self.unique_tree)
        assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
        assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
150
151
152

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
Eckhart Arnold's avatar
Eckhart Arnold committed
153
154
155
        att  = {"term": [remove_empty, remove_whitespace, replace_by_single_child, flatten],
                "factor": [remove_empty, remove_whitespace, reduce_single_child],
                "*": [remove_empty, remove_whitespace, replace_by_single_child]}
156
        parser = grammar_provider(ebnf)()
157
158
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
159
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
160
        assert tree.equals(compare_tree), tree.as_sxpr()
161

162
163
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
164
        assert cpy.equals(self.unique_tree)
165
166
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
167
        assert not cpy.equals(self.unique_tree)
168

169
170
171
172
173
174
175
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
176
        tree = parser(ebnf)
177
178
179
180
181
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
182
183
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
184
185
186
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
187
188
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
189
190
        transform(tree)
        res4 = compiler(tree)
191
192
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
193

194
195
    def test_len_and_pos(self):
        """Test len-property of Node."""
196
        nd1 = Node(ZOMBIE_TAG, "123")
197
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
198
        nd2 = Node(ZOMBIE_TAG, "456")
199
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
200
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
201
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
Eckhart Arnold's avatar
Eckhart Arnold committed
202
        nd.with_pos(0)
203
204
205
206
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

207
    def test_xml_sanitizer(self):
208
        node = Node('tag', '<&>')
209
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
210

di68kap's avatar
di68kap committed
211
212
213

class TestRootNode:
    def test_error_handling(self):
214
        tree = parse_sxpr('(A (B D) (C E))')
Eckhart Arnold's avatar
Eckhart Arnold committed
215
        tree.with_pos(0)
di68kap's avatar
di68kap committed
216
        root = RootNode()
eckhart's avatar
eckhart committed
217
218
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
219
        root.swallow(tree)
di68kap's avatar
di68kap committed
220
        assert root.error_flag
221
        errors = root.errors_sorted
di68kap's avatar
di68kap committed
222
        assert root.error_flag
223
224
        # assert errors == root.errors(True)
        # assert not root.error_flag and not root.errors()
di68kap's avatar
di68kap committed
225
226
227
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")

228
    def test_error_reporting(self):
229
        number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
230
231
232
233
        result = str(Grammar(number)("3.1416"))
        assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
            str(result)

di68kap's avatar
di68kap committed
234

235
class TestNodeFind():
236
    """Test the select-functions of class Node.
237
238
239
240
241
242
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
243
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
244
        matches = list(tree.select(matchf))
245
246
247
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
248
249
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
250
251
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
252
        assert list(tree.select(matchf2, include_root=True))
253
        assert not list(tree.select(matchf2, include_root=False))
254
255

    def test_getitem(self):
256
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
257
258
        assert tree[0].equals(parse_sxpr('(b X)'))
        assert tree[2].equals(parse_sxpr('(e (X F))'))
259
260
261
262
263
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
264
        matches = list(tree.select_by_tag('X', False))
265
266
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
267

268
    def test_contains(self):
269
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
270
        assert 'a' not in tree
271
272
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
273
274
275
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
276
        assert 'c' not in tree
277
        assert any(tree.select_by_tag('c', False))
278
279


280
class TestSerialization:
di68kap's avatar
di68kap committed
281
282
283
284
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
285
        tree = parse_sxpr('(A "B")')
286
        tree.attr['attr'] = "value"
287
        tree2 = parse_sxpr('(A `(attr "value") "B")')
288
        assert tree.as_sxpr() ==  tree2.as_sxpr()
289
        tree.attr['attr2'] = "value2"
290
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
291
292
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
293
294
295
296
297
298
299
300
301
302
303
304
305
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
306
307
308
309
310
311
312
313
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
314

315
316
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
317

318
        xml = tree.as_xml(inline_tags={'A'})
319
        assert xml == "<A><B>C</B><D>E</D></A>", xml
320

321
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
322

323
        tree.attr['xml:space'] = 'preserve'
324
        xml = tree.as_xml()
325
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
326

327
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
328

329
330
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
331
        xml = tree.as_xml(inline_tags={'A'})
332
333
334
335
336
337
338
339
340
341
342
343
344
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
345

di68kap's avatar
di68kap committed
346

347
if __name__ == "__main__":
348
    from DHParser.testing import runner
349
    runner("", globals())