Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
badw-it
DHParser
Commits
5da469de
Commit
5da469de
authored
Dec 29, 2017
by
eckhart
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- correct error locations when compiling with preprocessor
parent
3dab649e
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
107 additions
and
114 deletions
+107
-114
DHParser/error.py
DHParser/error.py
+52
-37
DHParser/parse.py
DHParser/parse.py
+14
-11
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+20
-36
DHParser/testing.py
DHParser/testing.py
+3
-3
DHParser/toolkit.py
DHParser/toolkit.py
+1
-7
TODO.md
TODO.md
+7
-0
test/test_error.py
test/test_error.py
+10
-20
No files found.
DHParser/error.py
View file @
5da469de
...
...
@@ -17,9 +17,9 @@ permissions and limitations under the License.
"""
import
bisect
import
functools
from
typing
import
Iterable
,
Iterator
,
Union
,
Tuple
,
List
from
DHParser.preprocess
import
SourceMapFunc
from
DHParser.stringview
import
StringView
__all__
=
(
'Error'
,
...
...
@@ -28,11 +28,12 @@ __all__ = ('Error',
'has_errors'
,
'only_errors'
,
'linebreaks'
,
'line_col'
)
'line_col'
,
'remap_error_locations'
)
class
Error
:
__slots__
=
[
'message'
,
'level'
,
'code'
,
'pos'
,
'line'
,
'column'
]
__slots__
=
[
'message'
,
'level'
,
'code'
,
'pos'
,
'orig_pos'
,
'line'
,
'column'
]
# error levels
...
...
@@ -49,12 +50,13 @@ class Error:
MANDATORY_CONTINUATION
=
1001
def
__init__
(
self
,
message
:
str
,
code
:
int
=
ERROR
,
pos
:
int
=
-
1
,
line
:
int
=
-
1
,
column
:
int
=
-
1
)
->
None
:
def
__init__
(
self
,
message
:
str
,
code
:
int
=
ERROR
,
pos
:
int
=
-
1
,
orig_
pos
:
int
=
-
1
,
line
:
int
=
-
1
,
column
:
int
=
-
1
)
->
None
:
self
.
message
=
message
assert
code
>=
0
self
.
code
=
code
self
.
pos
=
pos
self
.
orig_pos
=
orig_pos
self
.
line
=
line
self
.
column
=
column
...
...
@@ -65,8 +67,8 @@ class Error:
return
prefix
+
"%s: %s"
%
(
self
.
level_str
,
self
.
message
)
def
__repr__
(
self
):
return
'Error("%s", %s, %i, %i, %i)'
\
%
(
self
.
message
,
repr
(
self
.
code
),
self
.
pos
,
self
.
line
,
self
.
column
)
return
'Error("%s", %s, %i, %i,
%i,
%i)'
\
%
(
self
.
message
,
repr
(
self
.
code
),
self
.
pos
,
self
.
orig_pos
,
self
.
line
,
self
.
column
)
@
property
def
level_str
(
self
):
...
...
@@ -110,6 +112,13 @@ def only_errors(messages: Iterable[Error], level: int = Error.ERROR) -> Iterator
return
(
err
for
err
in
messages
if
err
.
code
>=
level
)
#######################################################################
#
# Setting of line, column and position properties of error messages.
#
#######################################################################
def
linebreaks
(
text
:
Union
[
StringView
,
str
])
->
List
[
int
]:
"""
Returns a list of indices all line breaks in the text.
...
...
@@ -123,24 +132,13 @@ def linebreaks(text: Union[StringView, str]) -> List[int]:
return
lbr
@
functools
.
singledispatch
def
line_col
(
text
:
Union
[
StringView
,
str
],
pos
:
int
)
->
Tuple
[
int
,
int
]:
"""
Returns the position within a text as (line, column)-tuple.
"""
if
pos
<
0
or
pos
>
len
(
text
):
# one character behind EOF is still an allowed position!
raise
ValueError
(
'Position %i outside text of length %s !'
%
(
pos
,
len
(
text
)))
line
=
text
.
count
(
"
\n
"
,
0
,
pos
)
+
1
column
=
pos
-
text
.
rfind
(
"
\n
"
,
0
,
pos
)
return
line
,
column
@
line_col
.
register
(
list
)
def
_line_col
(
lbreaks
:
List
[
int
],
pos
:
int
)
->
Tuple
[
int
,
int
]:
def
line_col
(
lbreaks
:
List
[
int
],
pos
:
int
)
->
Tuple
[
int
,
int
]:
"""
Returns the position within a text as (line, column)-tuple based
on a list of all line breaks, including -1 and EOF.
"""
if
not
lbreaks
and
pos
>=
0
:
return
0
,
pos
if
pos
<
0
or
pos
>
lbreaks
[
-
1
]:
# one character behind EOF is still an allowed position!
raise
ValueError
(
'Position %i outside text of length %s !'
%
(
pos
,
lbreaks
[
-
1
]))
line
=
bisect
.
bisect_left
(
lbreaks
,
pos
)
...
...
@@ -148,20 +146,37 @@ def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
return
line
,
column
# def error_messages(source_text:str, errors: List[Error]) -> List[str]:
# """Adds line, column information for error messages, if the position
# is given.
#
# Args:
# source_text (str): The source text on which the errors occurred.
# (Needed in order to determine the line and column numbers.)
# errors (list): The list of errors as returned by the method
# ``collect_errors()`` of a Node object
# Returns:
# The same list of error messages, which now contain line and
# column numbers.
# def line_col(text: Union[StringView, str], pos: int) -> Tuple[int, int]:
# """
# Returns the position within a text as (line, column)-tuple.
# """
# for err in errors:
# if err.pos >= 0 and err.line <= 0:
# err.line, err.column = line_col(source_text, err.pos)
# return errors
# if pos < 0 or pos > len(text): # one character behind EOF is still an allowed position!
# raise ValueError('Position %i outside text of length %s !' % (pos, len(text)))
# line = text.count("\n", 0, pos) + 1
# column = pos - text.rfind("\n", 0, pos)
# return line, column
def
remap_error_locations
(
errors
:
List
[
Error
],
original_text
:
Union
[
StringView
,
str
],
source_mapping
:
SourceMapFunc
=
lambda
i
:
i
)
->
List
[
Error
]:
"""Adds (or adjusts) line and column numbers of error messages in place.
Args:
errors: The list of errors as returned by the method
``collect_errors()`` of a Node object
original_text: The source text on which the errors occurred.
(Needed in order to determine the line and column numbers.)
source_mapping: A function that maps error positions to their
positions in the original source file.
Returns:
The list of errors. (Returning the list of errors is just syntactical
sugar. Be aware that the line, col and orig_pos attributes have been
changed in place.)
"""
line_breaks
=
linebreaks
(
original_text
)
for
err
in
errors
:
assert
err
.
pos
>=
0
err
.
orig_pos
=
source_mapping
(
err
.
pos
)
err
.
line
,
err
.
column
=
line_col
(
line_breaks
,
err
.
orig_pos
)
return
errors
DHParser/parse.py
View file @
5da469de
...
...
@@ -60,7 +60,7 @@ import copy
import
html
import
os
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
linebreaks
,
line_col
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
linebreaks
,
line_col
,
remap_error_locations
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
ParserBase
,
WHITESPACE_PTYPE
,
\
TOKEN_PTYPE
,
ZOMBIE_PARSER
...
...
@@ -332,6 +332,7 @@ def add_parser_guard(parser_func):
# otherwise also cache None-results
parser
.
visited
[
location
]
=
(
None
,
rest
)
else
:
assert
node
.
_pos
<
0
node
.
_pos
=
grammar
.
document_length__
-
location
assert
node
.
_pos
>=
0
,
str
(
"%i != %i"
%
(
grammar
.
document_length__
,
location
))
if
(
grammar
.
last_rb__loc__
>
location
...
...
@@ -431,7 +432,7 @@ class Parser(ParserBase):
# add "aspect oriented" wrapper around parser calls
# for memoizing, left recursion and tracing
if
not
isinstance
(
self
,
Forward
):
# should Forward-Parser no be guarded? Not sure...
guarded_parser_call
=
add_parser_guard
(
self
.
__class__
.
__call__
)
# The following check is necessary for classes that don't override
# the __call__() method, because in these cases the non-overridden
...
...
@@ -2251,6 +2252,7 @@ def compile_source(source: str,
log_file_name
=
logfile_basename
(
source
,
compiler
)
if
preprocessor
is
None
:
source_text
=
original_text
source_mapping
=
lambda
i
:
i
else
:
source_text
,
source_mapping
=
with_source_mapping
(
preprocessor
(
original_text
))
syntax_tree
=
parser
(
source_text
)
...
...
@@ -2263,17 +2265,18 @@ def compile_source(source: str,
# likely that error list gets littered with compile error messages
result
=
None
efl
=
syntax_tree
.
error_flag
messages
=
syntax_tree
.
collect_errors
(
source_text
,
clear_errors
=
True
)
messages
=
syntax_tree
.
collect_errors
(
clear_errors
=
True
)
if
not
is_error
(
efl
):
transformer
(
syntax_tree
)
efl
=
max
(
efl
,
syntax_tree
.
error_flag
)
messages
.
extend
(
syntax_tree
.
collect_errors
(
source_text
,
clear_errors
=
True
))
messages
.
extend
(
syntax_tree
.
collect_errors
(
clear_errors
=
True
))
if
is_logging
():
syntax_tree
.
log
(
log_file_name
+
'.ast'
)
if
not
is_error
(
syntax_tree
.
error_flag
):
result
=
compiler
(
syntax_tree
)
# print(syntax_tree.as_sxpr())
messages
.
extend
(
syntax_tree
.
collect_errors
(
source_text
))
messages
.
extend
(
syntax_tree
.
collect_errors
())
syntax_tree
.
error_flag
=
max
(
syntax_tree
.
error_flag
,
efl
)
remap_error_locations
(
messages
,
original_text
,
source_mapping
)
return
result
,
messages
,
syntax_tree
DHParser/syntaxtree.py
View file @
5da469de
...
...
@@ -23,7 +23,7 @@ from functools import partial
from
DHParser.error
import
Error
,
linebreaks
,
line_col
from
DHParser.stringview
import
StringView
from
DHParser.toolkit
import
is_logging
,
log_dir
,
identity
,
re
,
typing
from
DHParser.toolkit
import
is_logging
,
log_dir
,
re
,
typing
from
typing
import
Any
,
Callable
,
cast
,
Iterator
,
List
,
Union
,
Tuple
,
Hashable
,
Optional
...
...
@@ -320,8 +320,8 @@ class Node(collections.abc.Sized):
self
.
children
=
(
result
,)
self
.
_result
=
self
.
children
self
.
error_flag
=
result
.
error_flag
if
self
.
_pos
<
0
:
self
.
_pos
=
result
.
_pos
#
if self._pos < 0:
#
self._pos = result._pos
else
:
if
isinstance
(
result
,
tuple
):
self
.
children
=
result
...
...
@@ -329,8 +329,8 @@ class Node(collections.abc.Sized):
if
result
:
if
self
.
error_flag
==
0
:
self
.
error_flag
=
max
(
child
.
error_flag
for
child
in
self
.
children
)
if
self
.
_pos
<
0
:
self
.
_pos
=
result
[
0
].
_pos
#
if self._pos < 0:
#
self._pos = result[0]._pos
else
:
self
.
children
=
NoChildren
self
.
_result
=
str
(
result
)
...
...
@@ -367,23 +367,10 @@ class Node(collections.abc.Sized):
def
pos
(
self
)
->
int
:
"""Returns the position of the Node's content in the source text."""
if
self
.
_pos
<
0
:
raise
AssertionError
(
"
p
osition value not initialized!"
)
raise
AssertionError
(
"
P
osition value not initialized!"
)
return
self
.
_pos
# @pos.setter
# def pos(self, pos: int):
# assert self._pos == pos, str("%i != %i" % (self._pos, pos))
# offset = 0
# # recursively adjust pos-values of all children
# for child in self.children:
# assert child.pos == pos + offset
# offset += len(child)
# # add pos-values to Error-objects
# for err in self._errors:
# err.pos = pos
def
init_pos
(
self
,
pos
:
int
,
overwrite
:
bool
=
False
)
->
'Node'
:
"""
(Re-)initialize position value. Usually, the parser guard
...
...
@@ -431,29 +418,19 @@ class Node(collections.abc.Sized):
return
self
def
collect_errors
(
self
,
document
:
Union
[
StringView
,
str
]
=
''
,
clear_errors
=
False
)
\
->
List
[
Error
]:
def
collect_errors
(
self
,
clear_errors
=
False
)
->
List
[
Error
]:
"""
Recursively adds line- and column-numbers to all error objects.
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
"""
if
self
.
error_flag
:
lbreaks
=
linebreaks
(
document
)
if
document
else
[]
return
self
.
_collect_errors
(
lbreaks
,
clear_errors
)
else
:
return
[]
def
_collect_errors
(
self
,
lbreaks
:
List
[
int
]
=
[],
clear_errors
=
False
)
->
List
[
Error
]:
errors
=
self
.
errors
if
errors
and
lbreaks
:
for
err
in
errors
:
err
.
pos
=
self
.
pos
err
.
line
,
err
.
column
=
line_col
(
lbreaks
,
err
.
pos
)
if
self
.
children
:
for
child
in
self
.
children
:
errors
.
extend
(
child
.
_
collect_errors
(
lbreaks
,
clear_errors
))
errors
.
extend
(
child
.
collect_errors
(
clear_errors
))
if
clear_errors
:
self
.
_errors
=
[]
self
.
error_flag
=
0
...
...
@@ -467,7 +444,7 @@ class Node(collections.abc.Sized):
def
_tree_repr
(
self
,
tab
,
open_fn
,
close_fn
,
data_fn
=
identity
,
density
=
0
)
->
str
:
def
_tree_repr
(
self
,
tab
,
open_fn
,
close_fn
,
data_fn
=
lambda
i
:
i
,
density
=
0
)
->
str
:
"""
Generates a tree representation of this node and its children
in string from.
...
...
@@ -569,7 +546,7 @@ class Node(collections.abc.Sized):
txt
=
'<'
+
node
.
tag_name
# s += ' pos="%i"' % node.pos
if
src
:
txt
+=
' line="%i" col="%i"'
%
line_col
(
src
,
node
.
pos
)
txt
+=
' line="%i" col="%i"'
%
line_col
(
line_breaks
,
node
.
pos
)
if
showerrors
and
node
.
errors
:
txt
+=
' err="%s"'
%
''
.
join
(
str
(
err
).
replace
(
'"'
,
r
'\"'
)
for
err
in
node
.
errors
)
return
txt
+
">
\n
"
...
...
@@ -578,6 +555,7 @@ class Node(collections.abc.Sized):
"""Returns the closing string for the representation of `node`."""
return
'
\n
</'
+
node
.
tag_name
+
'>'
line_breaks
=
linebreaks
(
src
)
if
src
else
[]
return
self
.
_tree_repr
(
' '
,
opening
,
closing
,
density
=
1
)
...
...
@@ -663,6 +641,10 @@ def mock_syntax_tree(sxpr):
sxpr
=
sxpr
[
match
.
end
():].
strip
()
if
sxpr
[
0
]
==
'('
:
result
=
tuple
(
mock_syntax_tree
(
block
)
for
block
in
next_block
(
sxpr
))
pos
=
0
for
node
in
result
:
node
.
_pos
=
pos
pos
+=
len
(
node
)
else
:
lines
=
[]
while
sxpr
and
sxpr
[
0
]
!=
')'
:
...
...
@@ -678,7 +660,9 @@ def mock_syntax_tree(sxpr):
lines
.
append
(
sxpr
[:
match
.
end
()])
sxpr
=
sxpr
[
match
.
end
():]
result
=
"
\n
"
.
join
(
lines
)
return
Node
(
MockParser
(
name
,
':'
+
class_name
),
result
)
node
=
Node
(
MockParser
(
name
,
':'
+
class_name
),
result
)
node
.
_pos
=
0
return
node
TransformationFunc
=
Union
[
Callable
[[
Node
],
Any
],
partial
]
...
...
DHParser/testing.py
View file @
5da469de
...
...
@@ -25,7 +25,7 @@ import os
from
DHParser.toolkit
import
is_logging
,
clear_logs
,
re
from
DHParser.syntaxtree
import
mock_syntax_tree
,
flatten_sxpr
from
DHParser.error
import
is_error
from
DHParser.error
import
is_error
,
remap_error_locations
__all__
=
(
'unit_from_configfile'
,
'unit_from_json'
,
...
...
@@ -181,10 +181,10 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
tests
.
setdefault
(
'__ast__'
,
{})[
test_name
]
=
ast
ast
.
log
(
"match_%s_%s.ast"
%
(
parser_name
,
test_name
))
if
is_error
(
cst
.
error_flag
):
errors
=
remap_error_locations
(
cst
.
collect_errors
(),
test_code
)
errata
.
append
(
'Match test "%s" for parser "%s" failed:
\n\t
Expr.: %s
\n\n\t
%s
\n\n
'
%
(
test_name
,
parser_name
,
'
\n\t
'
.
join
(
test_code
.
split
(
'
\n
'
)),
'
\n\t
'
.
join
(
str
(
m
).
replace
(
'
\n
'
,
'
\n\t\t
'
)
for
m
in
cst
.
collect_errors
(
test_code
))))
'
\n\t
'
.
join
(
str
(
m
).
replace
(
'
\n
'
,
'
\n\t\t
'
)
for
m
in
errors
)))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
# write parsing-history log only in case of failure!
if
is_logging
():
...
...
DHParser/toolkit.py
View file @
5da469de
...
...
@@ -66,8 +66,7 @@ __all__ = ('logging',
'expand_table'
,
'compile_python_object'
,
'smart_list'
,
'sane_parser_name'
,
'identity'
)
'sane_parser_name'
)
#######################################################################
...
...
@@ -408,11 +407,6 @@ def sane_parser_name(name) -> bool:
return
name
and
name
[:
2
]
!=
'__'
and
name
[
-
2
:]
!=
'__'
def
identity
(
anything
:
Any
)
->
Any
:
"""Identity function for functional programming style."""
return
anything
#######################################################################
#
# initialization
...
...
TODO.md
View file @
5da469de
...
...
@@ -2,6 +2,13 @@ General TODO-List
=================
Readability of Code
-------------------
**direct vs inverse location counting in `parse.py:guarded_call`**
:
use location value counting from the beginning rather than the end of
the text
Optimizations
-------------
...
...
test/test_error.py
View file @
5da469de
...
...
@@ -29,43 +29,33 @@ from DHParser.error import linebreaks, line_col
class
TestErrorSupport
:
def
mini_suite
(
self
,
s
,
data
,
offset
):
l
,
c
=
line_col
(
data
,
0
)
def
mini_suite
(
self
,
s
,
lbreaks
,
offset
):
l
,
c
=
line_col
(
lbreaks
,
0
)
assert
(
l
,
c
)
==
(
1
,
1
),
str
((
l
,
c
))
l
,
c
=
line_col
(
data
,
0
+
offset
)
l
,
c
=
line_col
(
lbreaks
,
0
+
offset
)
assert
(
l
,
c
)
==
(
1
+
offset
,
1
),
str
((
l
,
c
))
l
,
c
=
line_col
(
data
,
1
+
offset
)
l
,
c
=
line_col
(
lbreaks
,
1
+
offset
)
assert
(
l
,
c
)
==
(
1
+
offset
,
2
),
str
((
l
,
c
))
l
,
c
=
line_col
(
data
,
9
+
offset
)
l
,
c
=
line_col
(
lbreaks
,
9
+
offset
)
assert
(
l
,
c
)
==
(
1
+
offset
,
10
),
str
((
l
,
c
))
l
,
c
=
line_col
(
data
,
10
+
offset
)
l
,
c
=
line_col
(
lbreaks
,
10
+
offset
)
assert
(
l
,
c
)
==
(
2
+
offset
,
1
),
str
((
l
,
c
))
l
,
c
=
line_col
(
data
,
18
+
offset
)
l
,
c
=
line_col
(
lbreaks
,
18
+
offset
)
assert
(
l
,
c
)
==
(
2
+
offset
,
9
),
str
((
l
,
c
))
l
,
c
=
line_col
(
data
,
19
+
offset
)
l
,
c
=
line_col
(
lbreaks
,
19
+
offset
)
assert
(
l
,
c
)
==
(
2
+
offset
,
10
),
str
((
l
,
c
))
try
:
l
,
c
=
line_col
(
data
,
-
1
)
l
,
c
=
line_col
(
lbreaks
,
-
1
)
assert
False
,
"ValueError expected for negative position."
except
ValueError
:
pass
try
:
l
,
c
=
line_col
(
data
,
len
(
s
)
+
1
)
l
,
c
=
line_col
(
lbreaks
,
len
(
s
)
+
1
)
assert
False
,
"ValueError expected for postion > pos(EOF)+1."
except
ValueError
:
pass
def
test_line_col
(
self
):
s
=
"123456789
\n
123456789"
self
.
mini_suite
(
s
,
s
,
0
)
s
=
"
\n
123456789
\n
123456789"
self
.
mini_suite
(
s
,
s
,
1
)
s
=
"123456789
\n
123456789
\n
"
self
.
mini_suite
(
s
,
s
,
0
)
s
=
"
\n
123456789
\n
123456789
\n
"
self
.
mini_suite
(
s
,
s
,
1
)
def
test_line_col_bisect
(
self
):
s
=
"123456789
\n
123456789"
self
.
mini_suite
(
s
,
linebreaks
(
s
),
0
)
s
=
"
\n
123456789
\n
123456789"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment