24.09., 9:00 - 11:00: Due to updates GitLab will be unavailable for some minutes between 09:00 and 11:00.

test_syntaxtree.py 15.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#!/usr/bin/python3

"""test_syntaxtree.py - test of syntaxtree-module of DHParser 
                             
Author: Eckhart Arnold <arnold@badw.de>

Copyright 2017 Bavarian Academy of Sciences and Humanities

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

22
import copy
23
import json
24
import sys
Eckhart Arnold's avatar
Eckhart Arnold committed
25 26
sys.path.extend(['../', './'])

27
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
28
    flatten_xml, parse_json_syntaxtree, ZOMBIE_TAG
Eckhart Arnold's avatar
Eckhart Arnold committed
29
from DHParser.transform import traverse, reduce_single_child, \
Eckhart Arnold's avatar
Eckhart Arnold committed
30
    replace_by_single_child, flatten, remove_empty, remove_whitespace
31
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
32
from DHParser.dsl import grammar_provider
33
from DHParser.error import Error
34
from DHParser.parse import RE, Grammar
35

36

37 38
class TestParseSxpression:
    def test_parse_s_expression(self):
39 40 41 42
        tree = parse_sxpr('(a (b c))')
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(tree.as_sxpr())
        tree = parse_sxpr('(a i\nj\nk)')
        assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(tree.as_sxpr())
43
        try:
44 45
            tree = parse_sxpr('a b c')
            assert False, "parse_sxpr() should raise a ValueError " \
46 47 48 49
                          "if argument is not a tree!"
        except ValueError:
            pass

50 51 52 53 54 55 56
class TestParseXML:
    def test_roundtrip(self):
        tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
        xml = tree.as_xml()
        fxml = flatten_xml(xml)
        assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
        tree2 = parse_xml(fxml)
di68kap's avatar
di68kap committed
57 58 59 60 61
        assert fxml == flatten_xml(tree2.as_xml())

    def test_plaintext_handling(self):
        tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
        assert flatten_sxpr(tree.as_sxpr()) == \
62
               '(a (:Token "alpha ") (b "beta") (:Token " gamma"))'
63 64 65 66 67
        tree = parse_xml(' <a>  <b>beta</b>  </a> ')
        assert flatten_xml(tree.as_xml()) == '<a><:Token>  </:Token><b>beta</b><:Token>  </:Token></a>'
        assert tree.as_xml(inline_tags={'a'}, omit_tags={':Token'}) == '<a>  <b>beta</b>  </a>'
        tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
        assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
di68kap's avatar
di68kap committed
68

69 70 71 72 73
    def test_flatten_xml(self):
        tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
        flat_xml = flatten_xml(tree.as_xml())
        assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml

74

75
class TestParseJSON:
76
    def setup(self):
77
        self.tree = parse_sxpr('(a (b ä) (d (e ö) (h über)))').with_pos(0)
78
        d = self.tree.pick('d')
79 80
        d.attr['name'] = "James Bond"
        d.attr['id'] = '007'
81 82 83

    def test_json_obj_roundtrip(self):
        json_obj_tree = self.tree.to_json_obj()
84
        # print(json.dumps(json_obj_tree, ensure_ascii=False, indent=2))
85
        tree_copy = Node.from_json_obj(json_obj_tree)
Eckhart Arnold's avatar
Eckhart Arnold committed
86
        assert tree_copy.equals(self.tree), tree_copy.as_sxpr()
87 88 89 90 91 92 93

    def test_json_rountrip(self):
        s = self.tree.as_json(indent=None, ensure_ascii=True)
        tree_copy = Node.from_json_obj(json.loads(s))
        assert tree_copy.equals(self.tree)
        s = self.tree.as_json(indent=2, ensure_ascii=False)
        tree_copy = Node.from_json_obj(json.loads(s))
94

95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
    def test_attr_serialization_and_parsing(self):
        n = Node('employee', 'James Bond').with_pos(46)
        n.attr['branch'] = 'Secret Service'
        n.attr['id'] = '007'
        # json
        json = n.as_json()
        tree = parse_json_syntaxtree(json)
        print()

        # XML
        xml = n.as_xml()
        assert xml.find('_pos') < 0
        xml = n.as_xml('')
        assert xml.find('_pos') >= 0
        tree = parse_xml(xml)
        assert tree.pos == 46
        assert not '_pos' in tree.attr
        tree = parse_xml(xml, ignore_pos=True)
        assert '_pos' in tree.attr
        assert tree._pos < 0

        # S-Expression
        sxpr = n.as_sxpr()
        assert sxpr.find('pos') < 0
        sxpr = n.as_sxpr('')
        assert sxpr.find('pos') >= 0
        tree = parse_sxpr(sxpr)
        assert tree.pos == 46
        assert not 'pos' in tree.attr

125

126 127 128 129 130 131
class TestNode:
    """
    Tests for class Node 
    """
    def setup(self):
        self.unique_nodes_sexpr = '(a (b c) (d e) (f (g h)))'
132
        self.unique_tree = parse_sxpr(self.unique_nodes_sexpr)
133
        self.recurring_nodes_sexpr = '(a (b x) (c (d e) (b y)))'
134
        self.recurr_tree = parse_sxpr(self.recurring_nodes_sexpr)
135

136 137 138 139 140 141 142 143 144 145 146
    def test_content_property(self):
        tree = RootNode(parse_sxpr('(a (b c) (d e))'))
        content = tree.content
        b = tree.pick('b')
        d = tree.pick('d')
        b.result = "recently "
        d.result = "changed"
        assert content != tree.content
        assert content == 'ce'
        assert tree.content == 'recently changed'

147 148
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
Eckhart Arnold's avatar
Eckhart Arnold committed
149
        tree.with_pos(0)
150 151
        tree_copy = copy.deepcopy(tree)

152
        assert tree.equals(tree_copy)
153 154 155 156
        assert tree.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
157
        assert not tree_copy.errors
158 159 160 161
        assert tree.as_sxpr() != parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr('(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
162 163 164
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))
165 166 167 168

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()

169 170 171 172
    def test_str(self):
        assert str(self.unique_tree) == "ceh"
        assert str(self.recurr_tree) == "xey"

eckhart's avatar
eckhart committed
173 174
    def test_select_subnodes(self):
        tags = [node.tag_name
175
                for node in self.unique_tree.select_if(lambda nd: True, include_root=True)]
eckhart's avatar
eckhart committed
176 177
        assert ''.join(tags) == "abdfg", ''.join(tags)

178
    def test_find(self):
179
        found = list(self.unique_tree.select_if(lambda nd: not nd.children and nd.result == "e"))
180 181
        assert len(found) == 1
        assert found[0].result == 'e'
182
        found = list(self.recurr_tree.select_if(lambda nd: nd.tag_name == 'b'))
183 184 185
        assert len(found) == 2
        assert found[0].result == 'x' and found[1].result == 'y'

186
    def test_equality1(self):
187 188 189 190
        assert self.unique_tree.equals(self.unique_tree)
        assert not self.recurr_tree.equals(self.unique_tree)
        assert not parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b d))'))
        assert parse_sxpr('(a (b c))').equals(parse_sxpr('(a (b c))'))
191 192 193

    def test_equality2(self):
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
Eckhart Arnold's avatar
Eckhart Arnold committed
194 195 196
        att  = {"term": [remove_empty, remove_whitespace, replace_by_single_child, flatten],
                "factor": [remove_empty, remove_whitespace, reduce_single_child],
                "*": [remove_empty, remove_whitespace, replace_by_single_child]}
197
        parser = grammar_provider(ebnf)()
198 199
        tree = parser("20 / 4 * 3")
        traverse(tree, att)
200
        compare_tree = parse_sxpr("(term (term (factor 20) (:Token /) (factor 4)) (:Token *) (factor 3))")
201
        assert tree.equals(compare_tree), tree.as_sxpr()
202

203 204
    def test_copy(self):
        cpy = copy.deepcopy(self.unique_tree)
205
        assert cpy.equals(self.unique_tree)
206 207
        assert cpy.result[0].result != "epsilon" # just to make sure...
        cpy.result[0].result = "epsilon"
208
        assert not cpy.equals(self.unique_tree)
209

210 211 212 213 214 215 216
    def test_copy2(self):
        # test if Node.__deepcopy__ goes sufficiently deep for ast-
        # transformation and compiling to perform correctly after copy
        ebnf = 'term = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
        parser = get_ebnf_grammar()
        transform = get_ebnf_transformer()
        compiler = get_ebnf_compiler()
Eckhart Arnold's avatar
Eckhart Arnold committed
217
        tree = parser(ebnf)
218 219 220 221 222
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res1 = compiler(tree_copy)
        t2 = copy.deepcopy(tree_copy)
        res2 = compiler(t2)
223 224
        diff = ''.join([a for a, b in zip(res1, res2) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
225 226 227
        tree_copy = copy.deepcopy(tree)
        transform(tree_copy)
        res3 = compiler(tree_copy)
228 229
        diff = ''.join([a for a, b in zip(res2, res3) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
230 231
        transform(tree)
        res4 = compiler(tree)
232 233
        diff = ''.join([a for a, b in zip(res3, res4) if a != b])
        assert diff.isnumeric()  # differences should only be ID-Numbers
234

235 236
    def test_len_and_pos(self):
        """Test len-property of Node."""
237
        nd1 = Node(ZOMBIE_TAG, "123")
238
        assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
239
        nd2 = Node(ZOMBIE_TAG, "456")
240
        assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
241
        nd = Node(ZOMBIE_TAG, (nd1, nd2))
242
        assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
Eckhart Arnold's avatar
Eckhart Arnold committed
243
        nd.with_pos(0)
244 245 246 247
        assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
        assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
        assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos

248
    def test_xml_sanitizer(self):
249
        node = Node('tag', '<&>')
250
        assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
251

di68kap's avatar
di68kap committed
252 253 254

class TestRootNode:
    def test_error_handling(self):
255
        tree = parse_sxpr('(A (B D) (C E))')
Eckhart Arnold's avatar
Eckhart Arnold committed
256
        tree.with_pos(0)
di68kap's avatar
di68kap committed
257
        root = RootNode()
eckhart's avatar
eckhart committed
258 259
        root.new_error(tree.children[1], "error C")
        root.new_error(tree.children[0], "error B")
260
        root.swallow(tree)
di68kap's avatar
di68kap committed
261
        assert root.error_flag
262
        errors = root.errors_sorted
di68kap's avatar
di68kap committed
263
        assert root.error_flag
264 265
        # assert errors == root.errors(True)
        # assert not root.error_flag and not root.errors()
di68kap's avatar
di68kap committed
266 267 268
        error_str = "\n".join(str(e) for e in errors)
        assert error_str.find("A") < error_str.find("B")

269
    def test_error_reporting(self):
270
        number = RE(r'\d+') | RE(r'\d+') + RE(r'\.') + RE(r'\d+')
271 272 273 274
        result = str(Grammar(number)("3.1416"))
        assert result == '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ', \
            str(result)

di68kap's avatar
di68kap committed
275

di68kap's avatar
di68kap committed
276 277
class TestNodeFind:
    """Test the item-access-functions of class Node.
278 279 280 281 282 283
    """

    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name
        matchf = lambda node: match_tag_name(node, "X")
284
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
285
        matches = list(tree.select_if(matchf))
286 287 288
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
289 290
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
291 292
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
293 294
        assert list(tree.select_if(matchf2, include_root=True))
        assert not list(tree.select_if(matchf2, include_root=False))
295 296

    def test_getitem(self):
297
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
298 299
        assert tree[0].equals(parse_sxpr('(b X)'))
        assert tree[2].equals(parse_sxpr('(e (X F))'))
di68kap's avatar
di68kap committed
300
        assert tree[-1].equals(parse_sxpr('(e (X F))'))
301 302 303 304 305
        try:
            node = tree[3]
            assert False, "IndexError expected!"
        except IndexError:
            pass
306
        matches = list(tree.select('X', False))
307 308
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
309

310
    def test_contains(self):
311
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
312
        assert 'a' not in tree
313 314
        assert any(tree.select('a', True))
        assert not any(tree.select('a', False))
315 316 317
        assert 'b' in tree
        assert 'X' in tree
        assert 'e' in tree
318
        assert 'c' not in tree
319
        assert any(tree.select('c', False))
320

321 322 323 324 325 326 327 328 329 330 331
    def test_index(self):
        tree = parse_sxpr('(a (b 0) (c 1) (d 2))')
        assert tree.index('d') == 2
        assert tree.index('b') == 0
        assert tree.index('c') == 1
        try:
            i = tree.index('x')
            raise AssertionError('ValueError expected!')
        except ValueError:
            pass

332

333
class TestSerialization:
di68kap's avatar
di68kap committed
334 335 336 337
    def test_sxpr_roundtrip(self):
        pass

    def test_sexpr_attributes(self):
338
        tree = parse_sxpr('(A "B")')
339
        tree.attr['attr'] = "value"
340
        tree2 = parse_sxpr('(A `(attr "value") "B")')
341
        assert tree.as_sxpr() ==  tree2.as_sxpr()
342
        tree.attr['attr2'] = "value2"
343
        tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
344 345
        assert tree.as_sxpr() == tree3.as_sxpr()

eckhart's avatar
eckhart committed
346 347
    def test_sexpr(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
eckhart's avatar
eckhart committed
348
        s = tree.as_sxpr(flatten_threshold=0)
eckhart's avatar
eckhart committed
349 350
        assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
eckhart's avatar
eckhart committed
351
        s = tree.as_sxpr(flatten_threshold=0)
eckhart's avatar
eckhart committed
352 353 354
        assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
            '\n  )\n  (G\n    "H"\n  )\n)', s
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
eckhart's avatar
eckhart committed
355
        s = tree.as_sxpr(flatten_threshold=0)
eckhart's avatar
eckhart committed
356 357 358
        assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
            '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s

eckhart's avatar
eckhart committed
359 360
    def test_compact_representation(self):
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
eckhart's avatar
eckhart committed
361
        compact = tree.as_sxpr(compact=True, )
eckhart's avatar
eckhart committed
362 363 364 365 366
        assert compact == 'A\n  B\n    C "D"\n    E "F"\n  G "H"', compact
        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        compact = tree.as_sxpr(compact=True)
        assert compact == 'A\n  B\n    C\n      "D"\n      "X"\n    E "F"' \
            '\n  G\n    " H "\n    " Y "', compact
eckhart's avatar
eckhart committed
367

368 369
    def test_xml_inlining(self):
        tree = parse_sxpr('(A (B "C") (D "E"))')
370

371
        xml = tree.as_xml(inline_tags={'A'})
372
        assert xml == "<A><B>C</B><D>E</D></A>", xml
373

374
        assert tree.as_xml() == "<A>\n  <B>C</B>\n  <D>E</D>\n</A>", xml
375

376
        tree.attr['xml:space'] = 'preserve'
377
        xml = tree.as_xml()
378
        assert xml == '<A xml:space="preserve"><B>C</B><D>E</D></A>', xml
379

380
        tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
381

382 383
        xml = tree.as_xml(inline_tags={'B'})
        assert xml == "<A>\n  <B><C>D</C><E>F</E></B>\n  <G>H</G>\n</A>", xml
384
        xml = tree.as_xml(inline_tags={'A'})
385 386 387 388 389 390 391 392 393 394 395 396 397
        assert xml == "<A><B><C>D</C><E>F</E></B><G>H</G></A>", xml

        tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
        xml = tree.as_xml()
        assert xml == '<A>\n  <B>\n    <C>\n      D\n      X\n    </C>\n    ' \
            '<E>F</E>\n  </B>\n  <G>\n     H \n     Y \n  </G>\n</A>', xml
        xml = tree.as_xml(inline_tags={'A'})
        assert xml == '<A><B><C>D\nX</C><E>F</E></B><G> H \n Y </G></A>', xml

    # def test_xml2(self):
    #     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
    #     print(tree.as_xml())
    #     print(tree.as_xml(inline_tags={'A'}))
398

di68kap's avatar
di68kap committed
399

400
if __name__ == "__main__":
401
    from DHParser.testing import runner
402
    runner("", globals())