Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

test_syntaxtree.py 13.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

26
27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
    flatten_xml, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
Eckhart Arnold's avatar
Eckhart Arnold committed
29
    replace_by_single_child, flatten, remove_empty, remove_whitespace
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32
from DHParser.error import Error
33
from DHParser.parse import RE, Grammar
34

35

36
37
class TestParseSxpression:
    def test_parse_s_expression(self):
38
39
40
41
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
42
        try:
43
44
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
45
46
47
48
                          "if argument is not a tree!"
        except ValueError:
            pass

49
50
51
52
53
54
55
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
56
57
58
59
60
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
61
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
62
63
64
65
66
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
67

68
69
70
71
72
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

73

74
75
76
77
78
79
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
80
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
81
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
82
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
83

84
85
86
87
88
89
90
91
92
93
94
    def test_content_property(self):
        tree = RootNode(parse_sxpr('(a (b c) (d e))'))
        content = tree.content
        b = tree.pick('b')
        d = tree.pick('d')
        b.result = "recently "
        d.result = "changed"
        assert content != tree.content
        assert content == 'ce'
        assert tree.content == 'recently changed'

95
96
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
Eckhart Arnold's avatar
Eckhart Arnold committed
97
        tree.with_pos(0)
98
99
        tree_copy = copy.deepcopy(tree)

100
        assert tree.equals(tree_copy)
101
102
103
104
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
105
        assert not tree_copy.errors
106
107
108
109
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
110
111
112
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))
113
114
115
116

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

117
118
119
120
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
121
122
123
124
125
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

126
    def test_find(self):
127
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
128
129
        assert len(found) == 1
        assert found[0].result == 'e'
130
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
131
132
133
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

134
    def test_equality1(self):
135
136
137
138
        assert self.unique_tree.equals(self.unique_tree)
        assert not self.recurr_tree.equals(self.unique_tree)
        assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
        assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
139
140
141

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
Eckhart Arnold's avatar
Eckhart Arnold committed
142
143
144
        att  = {"term": [remove_empty, remove_whitespace, replace_by_single_child, flatten],
                "factor": [remove_empty, remove_whitespace, reduce_single_child],
                "*": [remove_empty, remove_whitespace, replace_by_single_child]}
145
        parser = grammar_provider(ebnf)()
146
147
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
148
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
149
        assert tree.equals(compare_tree), tree.as_sxpr()
150

151
152
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
153
        assert cpy.equals(self.unique_tree)
154
155
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
156
        assert not cpy.equals(self.unique_tree)
157

158
159
160
161
162
163
164
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
165
        tree = parser(ebnf)
166
167
168
169
170
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
171
172
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
173
174
175
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
176
177
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
178
179
        transform(tree)
        res4 = compiler(tree)
180
181
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
182

183
184
    def test_len_and_pos(self):
        """Test len-property of Node."""
185
        nd1 = Node(ZOMBIE_TAG, "123")
186
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
187
        nd2 = Node(ZOMBIE_TAG, "456")
188
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
189
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
190
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
Eckhart Arnold's avatar
Eckhart Arnold committed
191
        nd.with_pos(0)
192
193
194
195
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

196
    def test_xml_sanitizer(self):
197
        node = Node('tag', '<&>')
198
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
199

di68kap's avatar
di68kap committed
200
201
202

class TestRootNode:
    def test_error_handling(self):
203
        tree = parse_sxpr('(A (B D) (C E))')
Eckhart Arnold's avatar
Eckhart Arnold committed
204
        tree.with_pos(0)
di68kap's avatar
di68kap committed
205
        root = RootNode()
eckhart's avatar
eckhart committed
206
207
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
208
        root.swallow(tree)
di68kap's avatar
di68kap committed
209
        assert root.error_flag
210
        errors = root.errors_sorted
di68kap's avatar
di68kap committed
211
        assert root.error_flag
212
213
        # assert errors == root.errors(True)
        # assert not root.error_flag and not root.errors()
di68kap's avatar
di68kap committed
214
215
216
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")

217
    def test_error_reporting(self):
218
        number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
219
220
221
222
        result = str(Grammar(number)("3.1416"))
        assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
            str(result)

di68kap's avatar
di68kap committed
223

224
class TestNodeFind():
225
    """Test the select-functions of class Node.
226
227
228
229
230
231
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
232
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
233
        matches = list(tree.select(matchf))
234
235
236
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
237
238
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
239
240
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
241
        assert list(tree.select(matchf2, include_root=True))
242
        assert not list(tree.select(matchf2, include_root=False))
243
244

    def test_getitem(self):
245
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
246
247
        assert tree[0].equals(parse_sxpr('(b X)'))
        assert tree[2].equals(parse_sxpr('(e (X F))'))
248
249
250
251
252
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
253
        matches = list(tree.select_by_tag('X', False))
254
255
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
256

257
    def test_contains(self):
258
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
259
        assert 'a' not in tree
260
261
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
262
263
264
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
265
        assert 'c' not in tree
266
        assert any(tree.select_by_tag('c', False))
267
268


269
class TestSerialization:
di68kap's avatar
di68kap committed
270
271
272
273
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
274
        tree = parse_sxpr('(A "B")')
275
        tree.attr['attr'] = "value"
276
        tree2 = parse_sxpr('(A `(attr "value") "B")')
277
        assert tree.as_sxpr() ==  tree2.as_sxpr()
278
        tree.attr['attr2'] = "value2"
279
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
280
281
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
282
283
284
285
286
287
288
289
290
291
292
293
294
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
295
296
297
298
299
300
301
302
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
303

304
305
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
306

307
        xml = tree.as_xml(inline_tags={'A'})
308
        assert xml == "<A><B>C</B><D>E</D></A>", xml
309

310
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
311

312
        tree.attr['xml:space'] = 'preserve'
313
        xml = tree.as_xml()
314
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
315

316
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
317

318
319
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
320
        xml = tree.as_xml(inline_tags={'A'})
321
322
323
324
325
326
327
328
329
330
331
332
333
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
334

di68kap's avatar
di68kap committed
335

336
if __name__ == "__main__":
337
    from DHParser.testing import runner
338
    runner("", globals())