Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
The container registry cleanup task is now completed and the registry can be used normally.
Open sidebar
badw-it
DHParser
Commits
77ce2fb1
Commit
77ce2fb1
authored
Oct 11, 2019
by
di68kap
Browse files
- parse.py: reentry_point(): skip comments when finding reentry point
parent
ffe2143d
Changes
4
Hide whitespace changes
Inline
Side-by-side
DHParser/parse.py
View file @
77ce2fb1
...
...
@@ -32,7 +32,8 @@ for an example.
from
collections
import
defaultdict
import
copy
from
typing
import
Callable
,
cast
,
List
,
Tuple
,
Set
,
Dict
,
DefaultDict
,
Union
,
Optional
,
Any
from
typing
import
Callable
,
cast
,
List
,
Tuple
,
Set
,
Iterator
,
Dict
,
\
DefaultDict
,
Union
,
Optional
,
Any
from
DHParser.configuration
import
get_config_value
from
DHParser.error
import
Error
,
linebreaks
,
line_col
...
...
@@ -119,31 +120,67 @@ class ParserError(Exception):
ResumeList
=
List
[
Union
[
str
,
Any
]]
# list of strings or regular expressiones
def
reentry_point
(
rest
:
StringView
,
rules
:
ResumeList
)
->
int
:
def
reentry_point
(
rest
:
StringView
,
rules
:
ResumeList
,
comment_regex
)
->
int
:
"""
Finds the point where parsing should resume after a ParserError has been caught.
Makes sure that this reentry-point does not lie inside a comment.
Args:
rest: The rest of the parsed text or, in other words, the point where
a ParserError was thrown.
rules: A list of strings or regular expressions. The rest of the text is
searched for each of these. The closest match is the point where
parsing will be resumed.
comment_regex: A regular expression object that matches comments.
Returns:
The integer index of the closest reentry point or -1 if no reentry-point
was found.
"""
upper_limit
=
len
(
rest
)
+
1
i
=
upper_limit
comments
=
None
# typ: Optional[Iterator]
def
next_comment
()
->
Tuple
[
int
,
int
]:
nonlocal
rest
,
comments
if
comments
:
try
:
m
=
next
(
comments
)
a
,
b
=
m
.
span
()
return
rest
.
index
(
a
),
rest
.
index
(
b
)
except
StopIteration
:
comments
=
None
return
-
1
,
-
2
def
search_next
(
rx
,
start
:
int
=
0
)
->
Tuple
[
int
,
int
]:
nonlocal
rest
,
i
m
=
rest
.
search
(
rx
,
start
)
if
m
:
start
,
end
=
m
.
span
()
return
min
(
rest
.
index
(
start
),
i
),
end
-
start
return
-
1
,
0
# find closest match
# TODO: ignore commented out passages !!!!
for
rule
in
rules
:
comments
=
rest
.
finditer
(
comment_regex
)
a
,
b
=
next_comment
()
if
isinstance
(
rule
,
str
):
k
=
rest
.
find
(
rule
)
while
a
<
b
<=
k
:
a
,
b
=
next_comment
()
while
a
<=
k
<
b
:
k
=
rest
.
find
(
rule
,
k
+
len
(
rule
))
while
a
<
b
<=
k
:
a
,
b
=
next_comment
()
i
=
min
(
k
if
k
>=
0
else
upper_limit
,
i
)
else
:
m
=
rest
.
search
(
rule
)
if
m
:
i
=
min
(
rest
.
index
(
m
.
start
()),
i
)
k
,
length
=
search_next
(
rule
)
while
a
<
b
<=
k
:
a
,
b
=
next_comment
()
while
a
<=
k
<
b
:
k
,
length
=
search_next
(
rule
,
k
+
length
)
while
a
<
b
<=
k
:
a
,
b
=
next_comment
()
i
=
min
(
k
if
k
>=
0
else
upper_limit
,
i
)
# in case no rule matched return -1
if
i
==
upper_limit
:
i
=
-
1
...
...
@@ -320,7 +357,7 @@ class Parser:
gap
=
len
(
text
)
-
len
(
pe
.
rest
)
rules
=
grammar
.
resume_rules__
.
get
(
self
.
pname
,
[])
rest
=
pe
.
rest
[
len
(
pe
.
node
):]
i
=
reentry_point
(
rest
,
rules
)
i
=
reentry_point
(
rest
,
rules
,
grammar
.
comment_rx__
)
if
i
>=
0
or
self
==
grammar
.
start_parser__
:
# apply reentry-rule or catch error at root-parser
if
i
<
0
:
...
...
@@ -513,6 +550,8 @@ PARSER_PLACEHOLDER = Parser()
#
########################################################################
RX_NEVER_MATCH
=
re
.
compile
(
r
'..(?<=^)'
)
def
mixin_comment
(
whitespace
:
str
,
comment
:
str
)
->
str
:
"""
...
...
@@ -663,6 +702,9 @@ class Grammar:
Instance Attributes:
all_parsers__: A set of all parsers connected to this grammar object
comment_rx__: The compiled regular expression for comments. If no
comments have been defined, it defaults to RX_NEVER_MATCH
start_parser__: During parsing, the parser with which the parsing process
was started (see method `__call__`) or `None` if no parsing process
is running.
...
...
@@ -803,6 +845,8 @@ class Grammar:
def
__init__
(
self
,
root
:
Parser
=
None
)
->
None
:
self
.
all_parsers__
=
set
()
# type: Set[Parser]
self
.
comment_rx__
=
re
.
compile
(
self
.
COMMENT__
)
\
if
hasattr
(
self
,
'COMMENT__'
)
and
self
.
COMMENT__
else
RX_NEVER_MATCH
self
.
start_parser__
=
None
# type: Optional[Parser]
self
.
_dirty_flag__
=
False
# type: bool
self
.
history_tracking__
=
False
# type: bool
...
...
@@ -1790,7 +1834,8 @@ class Series(NaryParser):
if
pos
<
self
.
mandatory
:
return
None
,
text
else
:
reloc
=
reentry_point
(
text_
,
self
.
skip
)
if
self
.
skip
else
-
1
reloc
=
reentry_point
(
text_
,
self
.
skip
,
self
.
grammar
.
comment_rx__
)
\
if
self
.
skip
else
-
1
error
,
node
,
text_
=
mandatory_violation
(
self
.
grammar
,
text_
,
isinstance
(
parser
,
Lookahead
),
parser
.
repr
,
self
.
err_msgs
,
reloc
)
...
...
@@ -2014,7 +2059,8 @@ class AllOf(NaryParser):
if
self
.
num_parsers
-
len
(
parsers
)
<
self
.
mandatory
:
return
None
,
text
else
:
reloc
=
reentry_point
(
text_
,
self
.
skip
)
if
self
.
skip
else
-
1
reloc
=
reentry_point
(
text_
,
self
.
skip
,
self
.
grammar
.
comment_rx__
)
\
if
self
.
skip
else
-
1
expected
=
'< '
+
' '
.
join
([
parser
.
repr
for
parser
in
parsers
])
+
' >'
lookahead
=
any
([
isinstance
(
p
,
Lookahead
)
for
p
in
parsers
])
error
,
err_node
,
text_
=
mandatory_violation
(
...
...
DHParser/stringview.py
View file @
77ce2fb1
...
...
@@ -200,10 +200,11 @@ class StringView: # collections.abc.Sized
else
:
return
self
.
_fullstring
.
find
(
sub
,
start
,
end
)
elif
start
is
None
and
end
is
None
:
return
self
.
_text
.
find
(
sub
,
self
.
_begin
,
self
.
_end
)
-
self
.
_begin
return
max
(
self
.
_text
.
find
(
sub
,
self
.
_begin
,
self
.
_end
)
-
self
.
_begin
,
-
1
)
else
:
_start
,
_end
=
real_indices
(
start
,
end
,
self
.
_len
)
return
self
.
_text
.
find
(
sub
,
self
.
_begin
+
_start
,
self
.
_begin
+
_end
)
-
self
.
_begin
return
max
(
self
.
_text
.
find
(
sub
,
self
.
_begin
+
_start
,
self
.
_begin
+
_end
)
-
self
.
_begin
,
-
1
)
@
cython
.
locals
(
_start
=
cython
.
int
,
_end
=
cython
.
int
)
def
rfind
(
self
,
sub
:
str
,
start
:
Optional
[
int
]
=
None
,
end
:
Optional
[
int
]
=
None
)
->
int
:
...
...
@@ -218,10 +219,11 @@ class StringView: # collections.abc.Sized
else
:
return
self
.
_fullstring
.
rfind
(
sub
,
start
,
end
)
if
start
is
None
and
end
is
None
:
return
self
.
_text
.
rfind
(
sub
,
self
.
_begin
,
self
.
_end
)
-
self
.
_begin
return
max
(
self
.
_text
.
rfind
(
sub
,
self
.
_begin
,
self
.
_end
)
-
self
.
_begin
,
-
1
)
else
:
_start
,
_end
=
real_indices
(
start
,
end
,
self
.
_len
)
return
self
.
_text
.
rfind
(
sub
,
self
.
_begin
+
_start
,
self
.
_begin
+
_end
)
-
self
.
_begin
return
max
(
self
.
_text
.
rfind
(
sub
,
self
.
_begin
+
_start
,
self
.
_begin
+
_end
)
-
self
.
_begin
,
-
1
)
def
startswith
(
self
,
prefix
:
str
,
...
...
@@ -276,13 +278,15 @@ class StringView: # collections.abc.Sized
"""
return
tuple
(
index
-
self
.
_begin
for
index
in
absolute_indices
)
def
search
(
self
,
regex
):
def
search
(
self
,
regex
,
start
:
Optional
[
int
]
=
None
,
end
:
Optional
[
int
]
=
None
):
"""Executes regex.search on the StringView object and returns the
result, which is either a match-object or None. Keep in mind that
match.end(), match.span() etc. are mapped to the underlying text,
not the StringView-object!!!
"""
return
regex
.
search
(
self
.
_text
,
pos
=
self
.
_begin
,
endpos
=
self
.
_end
)
start
=
self
.
_begin
if
start
is
None
else
self
.
_begin
+
start
end
=
self
.
_end
if
end
is
None
else
self
.
_begin
+
end
return
regex
.
search
(
self
.
_text
,
start
,
end
)
def
finditer
(
self
,
regex
):
"""Executes regex.finditer on the StringView object and returns the
...
...
test/test_parse.py
View file @
77ce2fb1
...
...
@@ -777,7 +777,7 @@ class TestReentryAfterError:
# there should be only two error messages
assert
len
(
cst
.
errors_sorted
)
==
2
def
test_skip_comment_on_re
entry
(
self
):
def
test_skip_comment_on_re
sume
(
self
):
lang
=
r
"""
@ comment = /(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)/ # Kommentare im C++-Stil
document = block_A block_B
...
...
@@ -788,11 +788,12 @@ class TestReentryAfterError:
grammar
=
grammar_provider
(
lang
)()
tree
=
grammar
(
'abc/*x*/xyz'
)
assert
not
tree
.
errors
tree
=
grammar
(
'ab
d
xyz'
)
tree
=
grammar
(
'ab
D
xyz'
)
mandatory_cont
=
(
Error
.
MANDATORY_CONTINUATION
,
Error
.
MANDATORY_CONTINUATION_AT_EOF
)
assert
len
(
tree
.
errors
)
==
1
and
tree
.
errors
[
0
].
code
in
mandatory_cont
tree
=
grammar
(
'abd/*x*/xyz'
)
print
(
tree
.
as_sxpr
())
tree
=
grammar
(
'abD/*x*/xyz'
)
assert
len
(
tree
.
errors
)
==
1
and
tree
.
errors
[
0
].
code
in
mandatory_cont
tree
=
grammar
(
'aD /*x*/ c /* a */ /*x*/xyz'
)
assert
len
(
tree
.
errors
)
==
1
and
tree
.
errors
[
0
].
code
in
mandatory_cont
...
...
test/test_stringview.py
View file @
77ce2fb1
...
...
@@ -106,11 +106,18 @@ class TestStringView:
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
.
find
(
'5'
)
==
5
assert
sv
.
find
(
' '
)
<
0
assert
sv
.
find
(
'0'
,
1
)
<
0
assert
sv
.
find
(
'9'
,
0
,
8
)
<
0
assert
sv
.
find
(
' '
)
==
-
1
assert
sv
.
find
(
'0'
,
1
)
==
-
1
assert
sv
.
find
(
'9'
,
0
,
8
)
==
-
1
assert
sv
.
find
(
'45'
,
1
,
8
)
==
4
def
test_rfind
(
self
):
s
=
" 123321 "
sv
=
StringView
(
s
,
1
,
-
1
)
assert
sv
.
rfind
(
'3'
)
==
3
assert
sv
.
find
(
'3'
)
==
2
assert
sv
.
rfind
(
'a'
)
==
-
1
def
test_startswith
(
self
):
s
=
" 0123456789 "
sv
=
StringView
(
s
,
1
,
-
1
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment