test_parse.py 62.8 KB
Newer Older
1001
        if is_logging():
1002
            log_ST(syntax_tree, "test_PopRetrieve_multi_line.cst")
1003

1004
    def test_autoretrieve(self):
1005
        lang = r"""@literalws = right
1006
            document   = { definition } § EOF
1007
            definition = symbol :defsign value
1008
            symbol     = /\w+/~                      
1009
1010
            defsign    = "=" | ":="
            value      = /\d+/~
eckhart's avatar
eckhart committed
1011
            EOF        = !/./ [ :?defsign ]   # eat up captured defsigns
1012
        """
1013
        # print(raw_compileEBNF(lang).result)
1014
        # set_config_value('compiled_EBNF_log', 'mylog.txt')
1015
        parser = grammar_provider(lang)()
1016
        st = parser("X := 1")
1017
        assert not st.error_flag, str(st.errors)
1018
        st1 = st
1019
1020
        st = parser("")
        assert not st.error_flag
1021
1022
1023

        lines = [line for line in lang.split('\n') if line.strip()]
        eof_line = lines.pop()
1024
        lines.insert(2, eof_line)
1025
1026
1027
        lang = '\n'.join(lines)
        parser = grammar_provider(lang)()
        st = parser("X := 1")
1028
        assert not st.errors, str(st.errors)
1029
1030
        assert st.equals(st1)

1031
1032
        del lines[2]
        lines.insert(3, eof_line)
1033
1034
1035
1036
1037
1038
1039
        lang = '\n'.join(lines)
        parser = grammar_provider(lang)()
        st = parser("X := 1")
        assert not st.errors
        assert st.equals(st1)

        # and, finally...
1040
        lang_variant = r"""@literalws = right
1041
1042
1043
1044
            document   = { definition } § EOF
            symbol     = /\w+/~                      
            defsign    = "=" | ":="
            value      = /\d+/~
1045
            EOF        = !/./ :?defsign   # eat up captured defsign, only if it has been retrieved
1046
1047
1048
1049
1050
1051
            definition = symbol :defsign value
        """
        parser = grammar_provider(lang_variant)()
        st = parser("X := 1")
        assert not st.errors
        assert st.equals(st1)
1052
        st = parser('')
1053
        assert "'EOF' expected" in str(st.errors), st.as_sxpr()
di68kap's avatar
di68kap committed
1054

1055

1056
class TestWhitespaceHandling:
1057
    minilang = """@literalws = right
1058
1059
1060
1061
1062
1063
1064
        doc = A B
        A = "A"
        B = "B"
        Rdoc = ar br
        ar = /A/
        br = /B/
        """
1065
    gr = grammar_provider(minilang)()
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077

    def test_token_whitespace(self):
        st = self.gr("AB", 'doc')
        assert not st.error_flag
        st = self.gr("A B", 'doc')
        assert not st.error_flag

    def test_regexp_whitespace(self):
        st = self.gr("AB", 'Rdoc')
        assert not st.error_flag
        st = self.gr("A B", 'Rdoc')
        assert st.error_flag
di68kap's avatar
di68kap committed
1078
1079
1080
1081
1082


class TestErrorReporting:
    grammar = """
        root      = series alpha | anything
eckhart's avatar
eckhart committed
1083
        series    = subseries &alpha
di68kap's avatar
di68kap committed
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
        subseries = alpha §beta
        alpha     = /[a-z]+/
        beta      = /[A-Z]+/
        anything  = /.*/
        """

    def setup(self):
        self.parser = grammar_provider(self.grammar)()

    def test_error_propagation(self):
        testcode1 = "halloB"
        testcode2 = "XYZ"
        testcode3 = "hallo "
        cst = self.parser(testcode1)
1098
        assert not cst.error_flag, str(cst.errors_sorted)
di68kap's avatar
di68kap committed
1099
1100
1101
1102
        cst = self.parser(testcode2)
        assert not cst.error_flag
        cst = self.parser(testcode3)
        assert cst.error_flag
1103
1104


di68kap's avatar
di68kap committed
1105
1106
class TestBorderlineCases:
    def test_not_matching(self):
1107
        minilang = """parser = /X/\n"""
di68kap's avatar
di68kap committed
1108
1109
1110
1111
        gr = grammar_provider(minilang)()
        cst = gr('X', 'parser')
        assert not cst.error_flag
        cst = gr(' ', 'parser')
eckhart's avatar
eckhart committed
1112
        assert cst.error_flag and cst.errors_sorted[0].code == PARSER_STOPPED_BEFORE_END
di68kap's avatar
di68kap committed
1113
        cst = gr('', 'parser')
eckhart's avatar
eckhart committed
1114
        assert cst.error_flag and cst.errors_sorted[0].code == PARSER_STOPPED_BEFORE_END
di68kap's avatar
di68kap committed
1115
1116
1117
1118
1119
1120
1121

    def test_matching(self):
        minilang = """parser = /.?/"""
        gr = grammar_provider(minilang)()
        cst = gr(' ', 'parser')
        assert not cst.error_flag
        cst = gr('  ', 'parser')
1122
        assert cst.error_flag and cst.errors_sorted[0].code == PARSER_STOPPED_BEFORE_END
di68kap's avatar
di68kap committed
1123
1124
1125
1126
        cst = gr('', 'parser')
        assert not cst.error_flag


eckhart's avatar
eckhart committed
1127
1128
1129
1130
1131
EBNF_with_Errors = r"""# Test code with errors. All places marked by a "$" should yield and error

@ comment    = /#.*(?:\n|$)/
@ whitespace = /\s*/
@ literalws  = right
1132
@ disposable = pure_elem, EOF
eckhart's avatar
eckhart committed
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
@ drop       = whitespace, EOF


# re-entry-rules for resuming after parsing-error
@ definition_resume = /\n\s*(?=@|\w+\w*\s*=)/
@ directive_resume  = /\n\s*(?=@|\w+\w*\s*=)/

# specialized error messages for certain cases

@ definition_error  = /,/, 'Delimiter "," not expected in definition!\nEither this was meant to '
                           'be a directive and the directive symbol @ is missing\nor the error is '
                           'due to inconsistent use of the comma as a delimiter\nfor the elements '
                           'of a sequence.'

#: top-level

syntax     = [~//] { definition | directive } §EOF
definition = symbol §:DEF~ expression :ENDL~
directive  = "@" §symbol "="
             (regexp | literals | symbol)
             { "," (regexp | literals | symbol) }

#: components

expression = sequence { :OR~ sequence }
sequence   = ["§"] ( interleave | lookaround )
             { :AND~ ["§"] ( interleave | lookaround ) }
interleave = difference { "°" ["§"] difference }
lookaround = flowmarker § (oneormore | pure_elem)
difference = term ["-" § (oneormore $ pure_elem)]               # <- ERROR
term       = oneormore | repetition | option | pure_elem        # resuming expected her

#: elements

pure_elem  = element § !/[?*+]/
element    = [retrieveop] symbol !DEF
           | literal
           | plaintext
           | regexp
           | whitespace
           | group$                                             # <- ERROR

#: flow-operators

flowmarker = "!"  | "&"                                         # resuming expected her
           | "<-!" | "<-&"
retr$ieveop = "::" | ":?" | ":"

#: groups

group      = "(" §expression ")"
oneormore  = "{" expression "}+" | element "+"
repetition = "{" §expressi$on "}" | element "*"                 # <- ERROR
option     = "[" §expression "]" | element "?"                  # resuming expected here

#: leaf-elements

symbol     = /(?!\d)\w+/~
$literals   = { literal }+                                      # <- ERROR
literal    = /"(?:(?<!\\)\\"|[^"])*?"/~                         # resuming expected her
           | /'(?:(?<!\\)\\'|[^'])*?'/~
plaintext  = /`(?:(?<!\\)\\`|[^`])*?`/~
regexp     = /\/(?:(?<!\\)\\(?:\/)|[^\/])*?\//~
whitespace = /~/~

#: delimiters

DEF        = `=` | `:=` | `::=`
OR         = `|`
AND        = `,` | ``
ENDL       = `;` | ``

EOF = !/./ [:?DEF] [:?OR] [:?AND] [:?ENDL]
"""

Eckhart Arnold's avatar
Eckhart Arnold committed
1208
class TestReentryAfterError:
1209
    testlang = """@literalws = right
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
    document = alpha [beta] gamma "."
      alpha = "ALPHA" abc
        abc = §"a" "b" "c"
      beta = "BETA" (bac | bca)
        bac = "b" "a" §"c"
        bca = "b" "c" §"a"
      gamma = "GAMMA" §(cab | cba)
        cab = "c" "a" §"b"
        cba = "c" "b" §"a"
    """
    gr = grammar_provider(testlang)()
1221

1222
1223
    def test_no_resume_rules(self):
        gr = self.gr;  gr.resume_rules = dict()
1224
1225
        content = 'ALPHA acb BETA bac GAMMA cab .'
        cst = gr(content)
1226
1227
1228
1229
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')

eckhart's avatar
eckhart committed
1230
1231
1232
1233
1234
1235
1236
1237
    def test_no_resume_rules_partial_parsing(self):
        gr = self.gr;  gr.resume_rules = dict()
        content = 'ALPHA acb'
        cst = gr(content, 'alpha')
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')

1238
1239
    def test_simple_resume_rule(self):
        gr = self.gr;  gr.resume_rules = dict()
1240
        gr.resume_rules__['alpha'] = [re.compile(r'(?=BETA)')]
1241
        content = 'ALPHA acb BETA bac GAMMA cab .'
1242
1243
1244
1245
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
1246
        # because of resuming, there should be only one error message
Eckhart Arnold's avatar
Eckhart Arnold committed
1247
        assert len([err for err in cst.errors_sorted if err.code >= 1000]) == 1
1248

1249
1250
    def test_failing_resume_rule(self):
        gr = self.gr;  gr.resume_rules = dict()
1251
        gr.resume_rules__['alpha'] = [re.compile(r'(?=XXX)')]
1252
        content = 'ALPHA acb BETA bac GAMMA cab .'
1253
1254
1255
1256
1257
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        # assert cst.pick('alpha').content.startswith('ALPHA')

eckhart's avatar
eckhart committed
1258
    def test_several_reentry_points(self):
1259
        gr = self.gr;  gr.resume_rules = dict()
1260
        gr.resume_rules__['alpha'] = [re.compile(r'(?=BETA)'), re.compile(r'(?=GAMMA)')]
1261
        content = 'ALPHA acb BETA bac GAMMA cab .'
1262
1263
1264
1265
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
1266
        # because of resuming, there should be only one error message
Eckhart Arnold's avatar
Eckhart Arnold committed
1267
        assert len([err for err in cst.errors_sorted if err.code >= 1000]) == 1
1268

1269
1270
    def test_several_reentry_points_second_point_matching(self):
        gr = self.gr;  gr.resume_rules = dict()
1271
        gr.resume_rules__['alpha'] = [re.compile(r'(?=BETA)'), re.compile(r'(?=GAMMA)')]
1272
1273
        content = 'ALPHA acb GAMMA cab .'
        cst = gr(content)
Eckhart Arnold's avatar
Eckhart Arnold committed
1274
        assert cst.error_flag
1275
        assert cst.content == content
1276
        assert cst.pick('alpha').content.startswith('ALPHA')
1277
        # because of resuming, there should be only one error message
1278
        assert len(cst.errors_sorted) == 1
1279
1280
        resume_notices_on(gr)
        cst = gr(content)
1281
        assert len(cst.errors) == 2 and any(err.code == RESUME_NOTICE for err in cst.errors)
1282

1283
1284
    def test_several_resume_rules_innermost_rule_matching(self):
        gr = self.gr;  gr.resume_rules = dict()
1285
1286
1287
        gr.resume_rules__['alpha'] = [re.compile(r'(?=BETA)'), re.compile(r'(?=GAMMA)')]
        gr.resume_rules__['beta'] = [re.compile(r'(?=GAMMA)')]
        gr.resume_rules__['bac'] = [re.compile(r'(?=GAMMA)')]
1288
1289
1290
1291
1292
        content = 'ALPHA abc BETA bad GAMMA cab .'
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
1293
        # because of resuming, there should be only one error message
Eckhart Arnold's avatar
Eckhart Arnold committed
1294
        assert len([err for err in cst.errors_sorted if err.code >= 1000]) == 1
1295
1296
1297
1298
1299
1300
        # multiple failures
        content = 'ALPHA acb BETA bad GAMMA cab .'
        cst = gr(content)
        assert cst.error_flag
        assert cst.content == content
        assert cst.pick('alpha').content.startswith('ALPHA')
1301
        # there should be only two error messages
Eckhart Arnold's avatar
Eckhart Arnold committed
1302
        assert len([err for err in cst.errors_sorted if err.code >= 1000]) == 2
1303

1304
1305
1306
1307
1308
1309
1310
1311
1312
    def test_algorithmic_resume(self):
        lang = r"""
            document = block_A block_B
            @ block_A_resume = next_valid_letter()
            @ block_A_skip = next_valid_letter()
            block_A = "a" §"b" "c"
            block_B = "x" "y" "z"
            """
        proc = """
1313
1314
1315
def next_valid_letter(text, start, end):
    L = len(text)
    end = min(L, max(L, end))
1316
1317
    while start < len(text):
        if str(text[start]) in 'abcxyz':
1318
1319
            return start, 0
        start += 1
1320
1321
    return -1, 0
"""
1322
1323
1324
1325
        parser = create_parser(lang, additional_code=proc)
        tree = parser('ab*xyz')
        assert 'block_A' in tree and 'block_B' in tree
        assert tree.pick('ZOMBIE__')
1326
1327


1328
    def test_skip_comment_on_resume(self):
1329
        lang = r"""@literalws = right
1330
            @ comment =  /(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)/  # Kommentare im C++-Stil
1331
            document = block_A block_B
1332
            @ block_A_resume = /(?=x)/
1333
1334
            block_A = "a" §"b" "c"
            block_B = "x" "y" "z"
1335
        """
1336
1337
1338
1339
        def mini_suite(grammar):
            tree = grammar('abc/*x*/xyz')
            assert not tree.errors
            tree = grammar('abDxyz')
1340
            mandatory_cont = (MANDATORY_CONTINUATION, MANDATORY_CONTINUATION_AT_EOF)
1341
1342
1343
1344
1345
1346
1347
            assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont
            tree = grammar('abD/*x*/xyz')
            assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont
            tree = grammar('aD /*x*/ c /* a */ /*x*/xyz')
            assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont

        # test regex-defined resume rule
1348
        grammar = grammar_provider(lang)()
1349
        mini_suite(grammar)
1350

1351
1352
    def test_unambiguous_error_location(self):
        lang = r"""
1353
            @ literalws   = right
1354
            @ drop        = whitespace, strings  # drop strings and whitespace early
1355
1356
1357
1358
1359
           
            @object_resume = /(?<=\})/
           
            json       = ~ value EOF
            value      = object | string 
eckhart's avatar
eckhart committed
1360
            object     = "{" [ member { "," §member } ] "}"
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
            member     = string §":" value
            string     = `"` CHARACTERS `"` ~

            CHARACTERS = { /[^"\\]+/ }                  
            EOF      =  !/./        # no more characters ahead, end of file reached
            """
        test_case = """{
                "missing member": "abcdef",
            }"""
        gr = grammar_provider(lang)()
        cst = gr(test_case)
1372
        assert any(err.code == MANDATORY_CONTINUATION for err in cst.errors)
1373

eckhart's avatar
eckhart committed
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
    def test_bigfattest(self):
        gr = copy.deepcopy(get_ebnf_grammar())
        resume_notices_on(gr)
        cst = gr(EBNF_with_Errors)
        locations = []
        for error in cst.errors_sorted:
            locations.append((error.line, error.column))
        assert locations == [(36, 37), (37, 1), (47, 19), (51, 1), (53, 5),
                             (57, 1), (59, 27), (60, 1), (65, 1), (66, 1)]


1385

1386
class TestConfiguredErrorMessages:
1387
    def test_configured_error_message(self):
1388
1389
1390
        lang = """
            document = series | /.*/
            @series_error = "a badly configured error message {5}"
1391
            series = /X/ | head §"C" "D"
1392
1393
1394
1395
            head = "A" "B"
            """
        parser = grammar_provider(lang)()
        st = parser("AB_D");  assert st.error_flag
1396
1397
        assert st.errors_sorted[0].code == MALFORMED_ERROR_STRING
        assert st.errors_sorted[1].code == MANDATORY_CONTINUATION
1398
1399


1400
1401
1402
1403
1404
1405
class TestUnknownParserError:
    def test_unknown_parser_error(self):
        gr = Grammar()
        try:
            gr("", "NonExistantParser")
            assert False, "UnknownParserError expected!"
1406
        except AttributeError:
1407
1408
1409
            pass


1410
class TestEarlyTokenWhitespaceDrop:
1411
    lang = r"""
1412
        @ drop = strings, whitespace
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
        expression = term  { ("+" | "-") term}
        term       = factor  { ("*"|"/") factor}
        factor     = number | variable | "("  expression  ")"
                   | constant | fixed
        variable   = /[a-z]/~
        number     = /\d+/~
        constant   = "A" | "B"
        fixed      = "X"
        """
    gr = grammar_provider(lang)()
1423

1424
    def test_drop(self):
1425
        cst = self.gr('4 + 3 * 5')
1426
        assert not cst.pick(':Text')
1427
        assert not cst.pick(':Whitespace')
1428
1429
        cst = self.gr('A + B')
        try:
1430
            _ = next(cst.select_if(lambda node: node.content == 'A'))
1431
1432
1433
1434
            assert False, "Tokens in compound expressions should be dropped!"
        except StopIteration:
            pass
        cst = self.gr('X * y')
1435
        assert next(cst.select_if(lambda node: node.content == 'X'))
1436

1437

Eckhart Arnold's avatar
Eckhart Arnold committed
1438
class TestMetaParser:
di68kap's avatar
di68kap committed
1439
    mp = CombinedParser()
1440
1441
    mp.grammar = Grammar()  # override placeholder warning
    mp.pname = "named"
1442
    mp.disposable = False
1443
    mp.tag_name = mp.pname
1444
1445
1446

    def test_return_value(self):
        nd = self.mp._return_value(Node('tagged', 'non-empty'))
Eckhart Arnold's avatar
Eckhart Arnold committed
1447
1448
1449
1450
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert len(nd.children) == 1
        assert nd.children[0].tag_name == 'tagged'
        assert nd.children[0].result == "non-empty"
1451
        nd = self.mp._return_value(Node('tagged', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
1452
1453
1454
1455
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert len(nd.children) == 1
        assert nd.children[0].tag_name == 'tagged'
        assert not nd.children[0].result
1456
        nd = self.mp._return_value(Node(':anonymous', 'content'))
Eckhart Arnold's avatar
Eckhart Arnold committed
1457
1458
1459
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert not nd.children
        assert nd.result == 'content'
1460
        nd = self.mp._return_value(Node(':anonymous', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
1461
1462
1463
        assert nd.tag_name == 'named', nd.as_sxpr()
        assert not nd.children
        assert not nd.content
1464
1465
1466
        nd = self.mp._return_value(EMPTY_NODE)
        assert nd.tag_name == 'named' and not nd.children, nd.as_sxpr()
        self.mp.pname = ''
1467
        self.mp.disposable = True
1468
1469
        self.mp.tag_name = ':unnamed'
        nd = self.mp._return_value(Node('tagged', 'non-empty'))
Eckhart Arnold's avatar
Eckhart Arnold committed
1470
1471
1472
        assert nd.tag_name == 'tagged', nd.as_sxpr()
        assert len(nd.children) == 0
        assert nd.content == 'non-empty'
1473
        nd = self.mp._return_value(Node('tagged', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
1474
1475
1476
        assert nd.tag_name == 'tagged', nd.as_sxpr()
        assert len(nd.children) == 0
        assert not nd.content
1477
        nd = self.mp._return_value(Node(':anonymous', 'content'))
Eckhart Arnold's avatar
Eckhart Arnold committed
1478
1479
1480
        assert nd.tag_name == ':anonymous', nd.as_sxpr()
        assert not nd.children
        assert nd.result == 'content'
1481
        nd = self.mp._return_value(Node('', ''))
Eckhart Arnold's avatar
Eckhart Arnold committed
1482
1483
1484
        assert nd.tag_name == '', nd.as_sxpr()
        assert not nd.children
        assert not nd.content
1485
1486
        assert self.mp._return_value(None) == EMPTY_NODE
        assert self.mp._return_value(EMPTY_NODE) == EMPTY_NODE
Eckhart Arnold's avatar
Eckhart Arnold committed
1487

1488
1489
1490
1491
    def test_return_values(self):
        self.mp.pname = "named"
        self.mp.tag_name = self.mp.pname
        rv = self.mp._return_values((Node('tag', 'content'), EMPTY_NODE))
eckhart's avatar
eckhart committed
1492
        # print(rv.as_sxpr())
1493
1494
        assert rv[-1].tag_name != EMPTY_NODE.tag_name, rv[-1].tag_name

1495
    def test_in_context(self):
1496
        minilang = r"""
1497
1498
1499
            term       = factor  { (DIV|MUL) factor}
            factor     = NUMBER | VARIABLE
            MUL        = "*" | &factor
eckhart's avatar
eckhart committed
1500
            DIV        = "/"
1501
1502
1503
1504
1505
            NUMBER     = /(?:0|(?:[1-9]\d*))(?:\.\d+)?/~
            VARIABLE   = /[A-Za-z]/~
            """
        gr = grammar_provider(minilang)()
        cst = gr("2x")
1506
        assert bool(cst.pick('MUL')), "Named empty nodes should not be dropped!!!"
Eckhart Arnold's avatar
Eckhart Arnold committed
1507

eckhart's avatar
eckhart committed
1508

di68kap's avatar
di68kap committed
1509
1510
1511
1512
1513
1514
1515
class TestParserCombining:
    def test_series(self):
        parser = RegExp(r'\d+') + RegExp(r'\.')
        assert isinstance(parser, Series)
        parser += RegExp(r'\d+')
        assert isinstance(parser, Series)
        assert len(parser.parsers) == 3
1516
        parser = Text(">") + parser
di68kap's avatar
di68kap committed
1517
1518
        assert isinstance(parser, Series)
        assert len(parser.parsers) == 4
1519
        parser = parser + Text("<")
di68kap's avatar
di68kap committed
1520
1521
1522
1523
1524
1525
1526
1527
1528
        assert isinstance(parser, Series)
        assert len(parser.parsers) == 5

    def test_alternative(self):
        parser = RegExp(r'\d+') | RegExp(r'\.')
        assert isinstance(parser, Alternative)
        parser |= RegExp(r'\d+')
        assert isinstance(parser, Alternative)
        assert len(parser.parsers) == 3
1529
        parser = Text(">") | parser
di68kap's avatar
di68kap committed
1530
1531
        assert isinstance(parser, Alternative)
        assert len(parser.parsers) == 4
1532
        parser = parser | Text("<")
di68kap's avatar
di68kap committed
1533
1534
1535
1536
1537
1538
1539
1540
1541
        assert isinstance(parser, Alternative)
        assert len(parser.parsers) == 5

    def test_interleave(self):
        parser = RegExp(r'\d+') * RegExp(r'\.')
        assert isinstance(parser, Interleave)
        parser *= RegExp(r'\d+')
        assert isinstance(parser, Interleave)
        assert len(parser.parsers) == 3
1542
        parser = Text(">") * parser
di68kap's avatar
di68kap committed
1543
1544
        assert isinstance(parser, Interleave)
        assert len(parser.parsers) == 4
1545
        parser = parser * Text("<")
di68kap's avatar
di68kap committed
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
        assert isinstance(parser, Interleave)
        assert len(parser.parsers) == 5

    def test_mixed_combinations(self):
        parser = RegExp(r'\d+') +  RegExp(r'\.') + RegExp(r'\d+') | RegExp(r'\d+')
        assert isinstance(parser, Alternative)
        assert len(parser.parsers) == 2
        assert isinstance(parser.parsers[0], Series)
        assert len(parser.parsers[0].parsers) == 3
        assert isinstance(parser.parsers[1], RegExp)


1558
class TestStaticAnalysis:
eckhart's avatar
eckhart committed
1559
1560
1561
1562
1563
1564
1565
1566
    def setup(self):
        self.static_analysis = get_config_value('static_analysis')
        set_config_value('static_analysis', 'early')

    def teardown(self):
        set_config_value('static_analysis', self.static_analysis)

    def test_cannot_capture_dropped_content(self):
1567
1568
        p = Capture(Drop(Whitespace(" ")))
        try:
1569
            _ = Grammar(p)
1570
            assert False, "GrammarError expected"
1571
1572
1573
        except GrammarError as ge:
            assert ge.errors and ge.errors[0][-1].code == CAPTURE_DROPPED_CONTENT_WARNING, \
                "Capture-dropped-content-Warning expected"
1574

1575
    def test_cyclical_ebnf_error(self):
1576
        doc = Text('proper');  doc.pname = "doc"
eckhart's avatar
eckhart committed
1577
        grammar = Grammar(doc)
1578
        # grammar.static_analysis__()
eckhart's avatar
eckhart committed
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
        lang = "doc = 'proper'  # this works!"
        lang1 = "doc = { doc }  # this parser never reaches a leaf parser."
        lang2 = """doc = word | sentence  # a more convoluted example
                word = [sentence] doc 
                sentence = { word }+ | sentence"""
        code, errors, ast = compile_ebnf(lang, preserve_AST=True)
        assert not ast.errors
        code, errors, ast = compile_ebnf(lang1, preserve_AST=True)
        assert any(e.code == PARSER_NEVER_TOUCHES_DOCUMENT for e in errors)
        code, errors, ast = compile_ebnf(lang2, preserve_AST=True)
        assert any(e.code == PARSER_NEVER_TOUCHES_DOCUMENT for e in errors)
        # for e in errors:
        #     print(e)
1592
1593


1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
class TestMemoization:
    def test_memoization(self):
        words = r'''@literalws = right
        list = word { ',' word } §EOF
        word = wordA | wordB | wordC
        wordA = `"` /[Aa]\w+/ '"'
        wordB = `"` /[Bb]\w+/ '"'
        wordC = `"` /[Cc]\w+/ '"'
        EOF = /$/'''
        grammar = create_parser(words, 'words')

        # print(grammar.python_src__)
        p1 = grammar.wordC.parsers[0]
        p2 = grammar.wordB.parsers[0]
        p3 = grammar.wordA.parsers[0]

        ps1 = grammar.wordC.parsers[-1]
        ps2 = grammar.wordB.parsers[-1]
        ps3 = grammar.wordA.parsers[-1]

        p4 = ps1.parsers[0]
        p5 = ps1.parsers[0]
        p6 = ps1.parsers[0]

        assert (p.eq_class == p1.eq_class for p in (p2, p3, p4, p5, p6))
        assert (p.eq_class == ps1.eq_class for p in (ps2, ps3))

        cst = grammar('"camma", "beta", "alpha"')


1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
class TestStringAlternative:
    def test_longest_match(self):
        l = ['a', 'ab', 'ca', 'cd']
        assert longest_match(l, '0abdd') == ''
        assert longest_match(l, 'axcde') == 'a'
        assert longest_match(l, 'cab') == 'ca'
        assert longest_match(l, 'b') == ''
        assert longest_match(l, 'x') == ''
        assert longest_match(l, 'a') == 'a'
        assert longest_match(l, 'ab') == 'ab'
        assert longest_match(l, 'ca') == 'ca'
        assert longest_match(l, 'cd') == 'cd'
        assert longest_match(l, 'cb') == ''
        assert longest_match(l, 'cdc') == 'cd'
        assert longest_match(l, 'c') == ''
        l = ['a', 'ab', 'abc', 'abcd']
        assert longest_match(l, 'abxyz') == 'ab'
        assert longest_match(l, 'abcdxyz') == 'abcd'
        assert longest_match(l, 'abcxyz') == 'abc'
        assert longest_match(l, 'axyz') == 'a'
        assert longest_match(l, 'axyz') == 'a'
        assert longest_match(l, 'ax') == 'a'
        assert longest_match(l, '') == ''
        assert longest_match([], 'a') == ''
        assert longest_match([], '') == ''
        l = ['abc', 'xy']
        assert longest_match(l, 'xyzt', 2) == 'xy'
        assert longest_match(l, 'abcdefg', 1) == 'abc'
        assert longest_match(l, 'abcdefg', 2) == 'abc'
        assert longest_match(l, 'ax12345', 2) == ''
        assert longest_match(l, 'a', 2) == ''



1658
if __name__ == "__main__":
1659
    from DHParser.testing import runner
1660
    runner("", globals())
1661