error.py 14.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# error.py - error handling for DHParser
#
# Copyright 2016  by Eckhart Arnold (arnold@badw.de)
#                 Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the License for the specific language governing
# permissions and limitations under the License.
17

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""
Module ``error`` defines class Error and a few helpful functions that are
needed for error reporting of DHParser. Usually, what is of interest are
the string representations of the error objects. For example::

    from DHParser import compile_source, has_errors

    result, errors, ast = compile_source(source, preprocessor, grammar,
                                         transformer, compiler)
    if errors:
        for error in errors:
            print(error)

        if has_errors(errors):
            print("There have been fatal errors!")
            sys.exit(1)
        else:
            print("There have been warnings, but no errors.")
36
37
38

The central class of module DHParser's ``error``  is the ``Error``-class.
The easiest way to create an error object is by instantiating
39
the Error class with an error message and a source position::
40

41
42
43
    >>> error = Error('Somethigs went wrong', 123)
    >>> print(error)
    Error (1000): Something went wrong
44

45
46
However, in order to report errors, usually at least a line and
column-number
47

48
49
"""

50
import os
eckhart's avatar
eckhart committed
51
from typing import Iterable, Iterator, Union, List, Any, Sequence, Tuple
52

53
from DHParser.preprocess import SourceMapFunc
54
from DHParser.stringview import StringView
55
from DHParser.toolkit import linebreaks, line_col, is_filename
56

57

Eckhart Arnold's avatar
Eckhart Arnold committed
58
59
__all__ = ('ErrorCode',
           'Error',
60
           'is_fatal',
61
62
63
64
           'is_error',
           'is_warning',
           'has_errors',
           'only_errors',
65
           'adjust_error_locations',
66
           'canonical_error_strings',
67
68
69
70
71
72
73
74
75
76
77
78
79
80
           'NO_ERROR',
           'NOTICE',
           'WARNING',
           'ERROR',
           'FATAL',
           'HIGHEST',
           'RESUME_NOTICE',
           'REDECLARED_TOKEN_WARNING',
           'UNUSED_ERROR_HANDLING_WARNING',
           'LEFT_RECURSION_WARNING',
           'UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING',
           'CANNOT_VERIFY_TRANSTABLE_WARNING',
           'CAPTURE_DROPPED_CONTENT_WARNING',
           'OPTIONAL_REDUNDANTLY_NESTED_WARNING',
81
           'UNCONNECTED_SYMBOL_WARNING',
82
           'REORDERING_OF_ALTERNATIVES_REQUIRED',
83
84
           'MANDATORY_CONTINUATION',
           'MANDATORY_CONTINUATION_AT_EOF',
eckhart's avatar
eckhart committed
85
           'PARSER_NEVER_TOUCHES_DOCUMENT',
86
87
88
89
           'PARSER_LOOKAHEAD_FAILURE_ONLY',
           'PARSER_STOPPED_BEFORE_END',
           'PARSER_LOOKAHEAD_MATCH_ONLY',
           'CAPTURE_STACK_NOT_EMPTY',
90
           'AUTORETRIEVED_SYMBOL_NOT_CLEARED',
91
92
93
94
95
96
97
98
99
100
101
102
           'MALFORMED_ERROR_STRING',
           'AMBIGUOUS_ERROR_HANDLING',
           'REDEFINED_DIRECTIVE',
           'UNDEFINED_RETRIEVE',
           'DIRECTIVE_FOR_NONEXISTANT_SYMBOL',
           'INAPPROPRIATE_SYMBOL_FOR_DIRECTIVE',
           'CAPTURE_WITHOUT_PARSERNAME',
           'LOOKAHEAD_WITH_OPTIONAL_PARSER',
           'BADLY_NESTED_OPTIONAL_PARSER',
           'BAD_MANDATORY_SETUP',
           'DUPLICATE_PARSERS_IN_ALTERNATIVE',
           'BAD_ORDER_OF_ALTERNATIVES',
103
           'BAD_REPETITION_COUNT',
104
           'MALFORMED_REGULAR_EXPRESSION',
105
           'EMPTY_GRAMMAR_ERROR',
106
107
           'TREE_PROCESSING_CRASH',
           'COMPILER_CRASH',
108
109
           'AST_TRANSFORM_CRASH',
           'RECURSION_DEPTH_LIMIT_HIT')
110
111


Eckhart Arnold's avatar
Eckhart Arnold committed
112
113
114
115
class ErrorCode(int):
    pass


116
117
118
119
120
121
122
123
# error levels

NO_ERROR = ErrorCode(0)
NOTICE   = ErrorCode(1)
WARNING  = ErrorCode(100)
ERROR    = ErrorCode(1000)
FATAL    = ErrorCode(10000)
HIGHEST  = FATAL
124

125
# notice codes
126

127
RESUME_NOTICE                            = ErrorCode(50)
128

129
# warning codes
130

131
132
REDECLARED_TOKEN_WARNING                 = ErrorCode(120)
UNUSED_ERROR_HANDLING_WARNING            = ErrorCode(130)
133
LEFT_RECURSION_WARNING                   = ErrorCode(140)  # obsolete!
134

135
136
137
138
UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING   = ErrorCode(610)
CANNOT_VERIFY_TRANSTABLE_WARNING         = ErrorCode(620)
CAPTURE_DROPPED_CONTENT_WARNING          = ErrorCode(630)
OPTIONAL_REDUNDANTLY_NESTED_WARNING      = ErrorCode(630)
139
UNCONNECTED_SYMBOL_WARNING               = ErrorCode(640)
140

141
142
REORDERING_OF_ALTERNATIVES_REQUIRED      = ErrorCode(710)

143
# error codes
144

145
146
MANDATORY_CONTINUATION                   = ErrorCode(1010)
MANDATORY_CONTINUATION_AT_EOF            = ErrorCode(1015)
eckhart's avatar
eckhart committed
147
PARSER_NEVER_TOUCHES_DOCUMENT            = ErrorCode(1020)
148
149
150
151
PARSER_LOOKAHEAD_FAILURE_ONLY            = ErrorCode(1030)
PARSER_STOPPED_BEFORE_END                = ErrorCode(1040)
PARSER_LOOKAHEAD_MATCH_ONLY              = ErrorCode(1045)
CAPTURE_STACK_NOT_EMPTY                  = ErrorCode(1050)
eckhart's avatar
eckhart committed
152
AUTORETRIEVED_SYMBOL_NOT_CLEARED         = ErrorCode(1055)
153
154
155
156
157
158
MALFORMED_ERROR_STRING                   = ErrorCode(1060)
AMBIGUOUS_ERROR_HANDLING                 = ErrorCode(1070)
REDEFINED_DIRECTIVE                      = ErrorCode(1080)
UNDEFINED_RETRIEVE                       = ErrorCode(1090)
DIRECTIVE_FOR_NONEXISTANT_SYMBOL         = ErrorCode(1100)
INAPPROPRIATE_SYMBOL_FOR_DIRECTIVE       = ErrorCode(1110)
159

160
# EBNF-specific static analysis errors
161

162
163
164
165
166
167
CAPTURE_WITHOUT_PARSERNAME               = ErrorCode(1510)
LOOKAHEAD_WITH_OPTIONAL_PARSER           = ErrorCode(1520)
BADLY_NESTED_OPTIONAL_PARSER             = ErrorCode(1530)
BAD_MANDATORY_SETUP                      = ErrorCode(1550)
DUPLICATE_PARSERS_IN_ALTERNATIVE         = ErrorCode(1560)
BAD_ORDER_OF_ALTERNATIVES                = ErrorCode(1570)
168
BAD_REPETITION_COUNT                     = ErrorCode(1580)
169
MALFORMED_REGULAR_EXPRESSION             = ErrorCode(1585)
170
EMPTY_GRAMMAR_ERROR                      = ErrorCode(1590)
eckhart's avatar
eckhart committed
171

172
# fatal errors
eckhart's avatar
eckhart committed
173

174
175
176
TREE_PROCESSING_CRASH                    = ErrorCode(10100)
COMPILER_CRASH                           = ErrorCode(10200)
AST_TRANSFORM_CRASH                      = ErrorCode(10300)
177
RECURSION_DEPTH_LIMIT_HIT                = ErrorCode(10400)
178

179

180
class Error:
181
182
183
184
    """The Error class encapsulates the all information for a single
    error.
    """

185
186
187
    __slots__ = ['message', 'code', '_pos', 'line', 'column', 'length',
                 'end_line', 'end_column', 'related', 'orig_pos', 'orig_doc',
                 'relatedUri']
188

di68kap's avatar
di68kap committed
189
    def __init__(self, message: str, pos: int, code: ErrorCode = ERROR,
190
191
192
                 line: int = -1, column: int = -1, length: int = 1,
                 related: Sequence[Tuple['Error', str]] = [],
                 orig_pos: int = -1, orig_doc: str = '') -> None:
Eckhart Arnold's avatar
Eckhart Arnold committed
193
194
        assert isinstance(code, ErrorCode)
        assert not isinstance(pos, ErrorCode)
195
        assert code >= 0
di68kap's avatar
di68kap committed
196
        assert pos >= 0
197
        assert length >= 1
di68kap's avatar
di68kap committed
198
199
        self.message = message    # type: str
        self._pos = pos           # type: int
200
201
        # Add some logic to avoid double assignment of the same error code?
        # Problem: Same code might allowedly be used by two different parsers/compilers
di68kap's avatar
di68kap committed
202
203
        self.code = code          # type: ErrorCode
        self.orig_pos = orig_pos  # type: int
204
        self.orig_doc = orig_doc  # type: str
di68kap's avatar
di68kap committed
205
206
        self.line = line          # type: int
        self.column = column      # type: int
207
        # support for Language Server Protocol Diagnostics
208
209
210
211
212
        # see: https://microsoft.github.io/language-server-protocol/specifications/specification-current/#diagnostic
        self.length = length      # type: int
        self.end_line = -1        # type: int
        self.end_column = -1      # type: int
        self.related = tuple(related)   # type: Sequence[Tuple['Error', str]]
213
214
215
216

    def __str__(self):
        prefix = ''
        if self.line > 0:
di68kap's avatar
di68kap committed
217
            prefix = "%i:%i: " % (max(self.line, 0), max(self.column, 0))
218
        return prefix + "%s (%i): %s" % (self.severity, self.code, self.message)
219

220
    def __repr__(self):
221
222
        return 'Error("%s", %s, %i, %i, %i, %i)' \
               % (self.message, repr(self.code), self.pos, self.orig_pos, self.line, self.column)
223

eckhart's avatar
eckhart committed
224
    @property
di68kap's avatar
di68kap committed
225
    def pos(self) -> int:
eckhart's avatar
eckhart committed
226
227
        return self._pos

228
229
230
231
232
    @pos.setter
    def pos(self, value: int):
        self._pos = value
        # reset line and column values, because they might now not be valid any more
        self.line, self.column = -1, -1
233
        self.end_line, self.end_column = -1, -1
234

235
    @property
di68kap's avatar
di68kap committed
236
    def severity(self):
237
        """Returns a string representation of the error level, e.g. "warning"."""
238
        if self.code < WARNING:
Eckhart Arnold's avatar
Eckhart Arnold committed
239
            return "Notice"
240
        elif self.code < ERROR:
Eckhart Arnold's avatar
Eckhart Arnold committed
241
            return "Warning"
242
        elif self.code < FATAL:
Eckhart Arnold's avatar
Eckhart Arnold committed
243
244
245
            return "Error"
        else:
            return "Fatal"
246

247
248
249
250
251
252
253
    def visualize(self, document: str) -> str:
        """Shows the line of the document and the position where the error
        occurred."""
        start = document.rfind('\n', 0, self.pos) + 1
        stop = document.find('\n', self.pos)
        return document[start:stop] + '\n' + ' ' * (self.pos - start) + '^\n'

eckhart's avatar
eckhart committed
254
255
256
257
    def signature(self) -> bytes:
        """Returns a signature to quickly check the equality of errors"""
        return (self.line << 32 | self.column << 16 | self.code).to_bytes(8, 'big')

258
259
260
261
    def rangeObj(self) -> dict:
        """Returns the range (position plus length) of the error as an LSP-Range-Object.
        https://microsoft.github.io/language-server-protocol/specifications/specification-current/#range
        """
Eckhart Arnold's avatar
Eckhart Arnold committed
262
263
264
        assert self.line >= 1 and self.column >= 1 and self.end_line >= 1 and self.end_column >= 1
        return {'start': {'line': self.line - 1, 'character': self.column - 1},
                'end': {'line': self.end_line - 1, 'character': self.end_column - 1}}
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283

    def diagnosticObj(self) -> dict:
        """Returns the Error as as Language Server Protocol Diagnostic object.
        https://microsoft.github.io/language-server-protocol/specifications/specification-current/#diagnostic
        """
        def relatedObj(relatedError: Sequence[Tuple['Error', str]]) -> dict:
            err, uri = relatedError
            return {
                'location': {'uri': uri, 'range': err.rangeObj()},
                'message': err.message
            }

        if self.code < WARNING:
            severity = 3
        elif self.code < ERROR:
            severity = 2
        else:
            severity = 1

eckhart's avatar
eckhart committed
284
        diagnostic = {
285
286
287
288
289
290
291
            'range': self.rangeObj(),
            'severity': severity,
            'code': self.code,
            'source': 'DHParser',
            'message': self.message,
            # 'tags': []
        }
292
293
294
        if self.related:
            diagnostic['relatedInformation'] = [relatedObj(err) for err in self.related]
        return diagnostic
295

296
def is_warning(code: Union[Error, int]) -> bool:
297
    """Returns True, if error is merely a warning or a message."""
298
    if isinstance(code, Error):  code = code.code
299
    return code < ERROR
300
301


302
def is_error(code: Union[Error, int]) -> bool:
303
    """Returns True, if error is a (fatal) error, not just a warning."""
304
    if isinstance(code, Error):  code = code.code
305
    return code >= ERROR
306
307


308
def is_fatal(code: Union[Error, int]) -> bool:
309
310
    """Returns True, ir error is fatal. Fatal errors are typically raised
    when a crash (i.e. Python exception) occurs at later stages of the
311
    processing pipeline (e.g. ast transformation, compiling). """
312
    if isinstance(code, Error):  code = code.code
313
    return code >= FATAL
314
315


316
# def Warning(message: str, pos, code: ErrorCode = WARNING,
317
318
319
320
321
322
323
#             orig_pos: int = -1, line: int = -1, column: int = -1) -> Error:
#     """
#     Syntactic sugar for creating Error-objects that contain only a warning.
#     Raises a ValueError if `code` is not within the range for warnings.
#     """
#     if not is_warning(code):
#         raise ValueError("Tried to create a warning with a error code {}. "
324
#                          "Warning codes must be smaller than {}".format(code, ERROR))
325
326
327
#     return Error(message, pos, code, orig_pos, line, column)


328
def has_errors(messages: Iterable[Error], level: int = ERROR) -> bool:
329
330
331
332
333
    """
    Returns True, if at least one entry in `messages` has at
    least the given error `level`.
    """
    for err_obj in messages:
334
        if err_obj.code >= level:
335
336
337
338
            return True
    return False


339
def only_errors(messages: Iterable[Error], level: int = ERROR) -> Iterator[Error]:
340
341
342
343
    """
    Returns an Iterator that yields only those messages that have
    at least the given error level.
    """
344
    return (err for err in messages if err.code >= level)
345
346


347
348
#######################################################################
#
349
350
# support for canonical representation, i.e.
# filename:line:column:severity (code):error string
351
352
353
354
#
#######################################################################


eckhart's avatar
eckhart committed
355
356
def adjust_error_locations(errors: List[Error],
                           original_text: Union[StringView, str],
eckhart's avatar
eckhart committed
357
358
                           source_mapping: SourceMapFunc = lambda i: i):
    """Adds (or adjusts) line and column numbers of error messages inplace.
359
360
361

    Args:
        errors:  The list of errors as returned by the method
362
            ``errors()`` of a Node object
363
364
365
366
367
368
369
370
371
372
        original_text:  The source text on which the errors occurred.
            (Needed in order to determine the line and column numbers.)
        source_mapping:  A function that maps error positions to their
            positions in the original source file.
    """
    line_breaks = linebreaks(original_text)
    for err in errors:
        assert err.pos >= 0
        err.orig_pos = source_mapping(err.pos)
        err.line, err.column = line_col(line_breaks, err.orig_pos)
373
374
375
376
377
        # adjust length in case it exceeds the text size. As this is non-fatal
        # it should be adjusted rather than an error raised to avoid
        # unnecessary special-case treatments in other places
        if err.orig_pos + err.length > len(original_text):
            err.length = len(original_text) - err.orig_pos
378
        err.end_line, err.end_column = line_col(line_breaks, err.orig_pos + err.length)
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398


def canonical_error_strings(errors: List[Error], source_file_name: str = '') -> List[str]:
    """Returns the list of error strings in canonical form that can be parsed by most
    editors, i.e. "relative filepath : line : column : severity (code) : error string"
    """
    if errors:
        if is_filename(source_file_name):
            cwd = os.getcwd()
            if source_file_name.startswith(cwd):
                rel_path = source_file_name[len(cwd)]
            else:
                rel_path = source_file_name
            error_strings = [rel_path + ':' + str(err) for err in errors]
        else:
            error_strings = [str(err) for err in errors]
    else:
        error_strings = []
    return error_strings