Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

test_syntaxtree.py 13.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
24
25
sys.path.extend(['../', './'])

26
27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
    flatten_xml, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
28
from DHParser.transform import traverse, reduce_single_child, \
29
    replace_by_single_child, flatten, remove_expendables
30
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
31
from DHParser.dsl import grammar_provider
32
from DHParser.error import Error
33
from DHParser.parse import RE, Grammar
34

35

36
37
class TestParseSxpression:
    def test_parse_s_expression(self):
38
39
40
41
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
42
        try:
43
44
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
45
46
47
48
                          "if argument is not a tree!"
        except ValueError:
            pass

49
50
51
52
53
54
55
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
56
57
58
59
60
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
61
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
62
63
64
65
66
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
67

68
69
70
71
72
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

73

74
75
76
77
78
79
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
80
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
81
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
82
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
83

84
85
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
Eckhart Arnold's avatar
Eckhart Arnold committed
86
        tree.with_pos(0)
87
88
        tree_copy = copy.deepcopy(tree)

89
        assert tree.equals(tree_copy)
90
91
92
93
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
94
        assert not tree_copy.errors
95
96
97
98
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
99
100
101
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))
102
103
104
105

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

106
107
108
109
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
110
111
112
113
114
    def test_select_subnodes(self):
        tags = [node.tag_name
                for node in self.unique_tree.select(lambda nd: True, include_root=True)]
        assert ''.join(tags) == "abdfg", ''.join(tags)

115
    def test_find(self):
116
        found = list(self.unique_tree.select(lambda nd: not nd.children and nd.result == "e"))
117
118
        assert len(found) == 1
        assert found[0].result == 'e'
119
        found = list(self.recurr_tree.select(lambda nd: nd.tag_name == 'b'))
120
121
122
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

123
    def test_equality1(self):
124
125
126
127
        assert self.unique_tree.equals(self.unique_tree)
        assert not self.recurr_tree.equals(self.unique_tree)
        assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
        assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
128
129
130

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
131
        att  = {"term": [remove_expendables, replace_by_single_child, flatten],
Eckhart Arnold's avatar
Eckhart Arnold committed
132
                "factor": [remove_expendables, reduce_single_child],
133
                "*": [remove_expendables, replace_by_single_child]}
134
        parser = grammar_provider(ebnf)()
135
136
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
137
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
138
        assert tree.equals(compare_tree), tree.as_sxpr()
139

140
141
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
142
        assert cpy.equals(self.unique_tree)
143
144
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
145
        assert not cpy.equals(self.unique_tree)
146

147
148
149
150
151
152
153
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
154
        tree = parser(ebnf)
155
156
157
158
159
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
160
161
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
162
163
164
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
165
166
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
167
168
        transform(tree)
        res4 = compiler(tree)
169
170
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
171

172
173
    def test_len_and_pos(self):
        """Test len-property of Node."""
174
        nd1 = Node(ZOMBIE_TAG, "123")
175
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
176
        nd2 = Node(ZOMBIE_TAG, "456")
177
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
178
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
179
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
Eckhart Arnold's avatar
Eckhart Arnold committed
180
        nd.with_pos(0)
181
182
183
184
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

185
    def test_xml_sanitizer(self):
186
        node = Node('tag', '<&>')
187
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
188

di68kap's avatar
di68kap committed
189
190
191

class TestRootNode:
    def test_error_handling(self):
192
        tree = parse_sxpr('(A (B D) (C E))')
Eckhart Arnold's avatar
Eckhart Arnold committed
193
        tree.with_pos(0)
di68kap's avatar
di68kap committed
194
        root = RootNode()
eckhart's avatar
eckhart committed
195
196
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
197
        root.swallow(tree)
di68kap's avatar
di68kap committed
198
        assert root.error_flag
199
        errors = root.errors_sorted
di68kap's avatar
di68kap committed
200
        assert root.error_flag
201
202
        # assert errors == root.errors(True)
        # assert not root.error_flag and not root.errors()
di68kap's avatar
di68kap committed
203
204
205
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")

206
    def test_error_reporting(self):
207
        number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
208
209
210
211
        result = str(Grammar(number)("3.1416"))
        assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
            str(result)

di68kap's avatar
di68kap committed
212

213
class TestNodeFind():
214
    """Test the select-functions of class Node.
215
216
217
218
219
220
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
221
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
222
        matches = list(tree.select(matchf))
223
224
225
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
226
227
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
228
229
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
230
        assert list(tree.select(matchf2, include_root=True))
231
        assert not list(tree.select(matchf2, include_root=False))
232
233

    def test_getitem(self):
234
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
235
236
        assert tree[0].equals(parse_sxpr('(b X)'))
        assert tree[2].equals(parse_sxpr('(e (X F))'))
237
238
239
240
241
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
242
        matches = list(tree.select_by_tag('X', False))
243
244
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
245

246
    def test_contains(self):
247
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
248
        assert 'a' not in tree
249
250
        assert any(tree.select_by_tag('a', True))
        assert not any(tree.select_by_tag('a', False))
251
252
253
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
254
        assert 'c' not in tree
255
        assert any(tree.select_by_tag('c', False))
256
257


258
class TestSerialization:
di68kap's avatar
di68kap committed
259
260
261
262
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
263
        tree = parse_sxpr('(A "B")')
264
        tree.attr['attr'] = "value"
265
        tree2 = parse_sxpr('(A `(attr "value") "B")')
266
        assert tree.as_sxpr() ==  tree2.as_sxpr()
267
        tree.attr['attr2'] = "value2"
268
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
269
270
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
271
272
273
274
275
276
277
278
279
280
281
282
283
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        s = tree.as_sxpr()
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
284
285
286
287
288
289
290
291
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
292

293
294
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
295

296
        xml = tree.as_xml(inline_tags={'A'})
297
        assert xml == "<A><B>C</B><D>E</D></A>", xml
298

299
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
300

301
        tree.attr['xml:space'] = 'preserve'
302
        xml = tree.as_xml()
303
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
304

305
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
306

307
308
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
309
        xml = tree.as_xml(inline_tags={'A'})
310
311
312
313
314
315
316
317
318
319
320
321
322
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
323

di68kap's avatar
di68kap committed
324

325
if __name__ == "__main__":
326
    from DHParser.testing import runner
327
    runner("", globals())