Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
fbc08ddc
Commit
fbc08ddc
authored
Feb 13, 2019
by
eckhart
Browse files
- DHParser/testing.py: Korrektur für Lookahead-parser-Fehlerbehandlung
parent
e8025e8c
Changes
6
Show whitespace changes
Inline
Side-by-side
DHParser/parse.py
View file @
fbc08ddc
...
...
@@ -1512,6 +1512,7 @@ NO_MANDATORY = 1000
def
mandatory_violation
(
grammar
:
Grammar
,
text_
:
StringView
,
failed_on_lookahead
:
bool
,
expected
:
str
,
err_msgs
:
MessagesType
,
reloc
:
int
)
->
Tuple
[
Error
,
Node
,
StringView
]:
...
...
@@ -1526,6 +1527,8 @@ def mandatory_violation(grammar: Grammar,
:param grammar: the grammar
:param text_: the point, where the mandatory vialoation. As usual the
string view represents the remaining text from this point.
:param failed_on_lookahead: True if the violating parser was a
Lookahead-Parser.
:param expected: the expected (but not found) text at this point.
:param err_msgs: A list of pairs of regular expressions (or simple
strings for that matter) and error messages that are chosen
...
...
@@ -1556,8 +1559,8 @@ def mandatory_violation(grammar: Grammar,
grammar
.
tree__
.
add_error
(
err_node
,
error
)
else
:
msg
=
'%s expected, "%s" found!'
%
(
expected
,
found
)
error
=
Error
(
msg
,
location
,
Error
.
MANDATORY_CONTINUATION
if
text_
else
Error
.
MANDATORY_CONTINUATION
_AT_EOF
)
error
=
Error
(
msg
,
location
,
Error
.
MANDATORY_CONTINUATION
_AT_EOF
if
(
failed_on_lookahead
and
not
text_
)
else
Error
.
MANDATORY_CONTINUATION
)
grammar
.
tree__
.
add_error
(
err_node
,
error
)
return
error
,
err_node
,
text_
[
i
:]
...
...
@@ -1637,7 +1640,8 @@ class Series(NaryParser):
else
:
reloc
=
reentry_point
(
text_
,
self
.
skip
)
if
self
.
skip
else
-
1
error
,
node
,
text_
=
mandatory_violation
(
self
.
grammar
,
text_
,
parser
.
repr
,
self
.
err_msgs
,
reloc
)
self
.
grammar
,
text_
,
isinstance
(
parser
,
Lookahead
),
parser
.
repr
,
self
.
err_msgs
,
reloc
)
# check if parsing of the series can be resumed somewhere
if
reloc
>=
0
:
nd
,
text_
=
parser
(
text_
)
# try current parser again
...
...
@@ -1859,8 +1863,9 @@ class AllOf(NaryParser):
else
:
reloc
=
reentry_point
(
text_
,
self
.
skip
)
if
self
.
skip
else
-
1
expected
=
'< '
+
' '
.
join
([
parser
.
repr
for
parser
in
parsers
])
+
' >'
lookahead
=
any
(
isinstance
(
p
,
Lookahead
)
for
p
in
parsers
)
error
,
err_node
,
text_
=
mandatory_violation
(
self
.
grammar
,
text_
,
expected
,
self
.
err_msgs
,
reloc
)
self
.
grammar
,
text_
,
lookahead
,
expected
,
self
.
err_msgs
,
reloc
)
results
+=
(
err_node
,)
if
reloc
<
0
:
parsers
=
[]
...
...
DHParser/testing.py
View file @
fbc08ddc
...
...
@@ -151,7 +151,7 @@ def unit_from_config(config_str):
section_match
=
RX_SECTION
.
match
(
cfg
,
pos
)
if
pos
!=
len
(
cfg
)
and
not
re
.
match
(
'\s+$'
,
cfg
[
pos
:]):
if
pos
!=
len
(
cfg
)
and
not
re
.
match
(
r
'\s+$'
,
cfg
[
pos
:]):
raise
SyntaxError
(
'in line %i'
%
(
cfg
[:
pos
].
count
(
'
\n
'
)
+
1
))
return
unit
...
...
@@ -322,39 +322,18 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
parser
=
parser_factory
()
transform
=
transformer_factory
()
# is_lookahead = set() # type: Set[str] # Dictionary of parser names
with_lookahead
=
set
()
# type: Set[Optional[Parser]]
# lookahead_flag = False # type: bool
def
has_lookahead
(
parser_name
:
str
)
->
bool
:
"""Returns True if the parser or any of its descendant parsers it a
Lookahead parser."""
lookahead_found
=
False
def
find_lookahead
(
p
:
Parser
):
"""Raises a StopIterationError if parser `p` is or contains
a Lookahead-parser."""
nonlocal
is_lookahead
,
with_lookahead
,
lookahead_flag
if
p
in
with_lookahead
:
lookahead_flag
=
True
else
:
if
isinstance
(
p
,
Lookahead
):
is_lookahead
.
add
(
p
.
tag_name
)
with_lookahead
.
add
(
p
)
lookahead_flag
=
True
else
:
if
any
(
child
for
child
in
(
getattr
(
p
,
'parsers'
,
[])
or
[
getattr
(
p
,
'parser'
,
None
)])
if
isinstance
(
child
,
Lookahead
)):
with_lookahead
.
add
(
p
)
lookahead_flag
=
True
def
has_lookahead
(
parser_name
:
str
):
"""Returns `True`, if given parser is or contains a Lookahead-parser."""
nonlocal
is_lookahead
,
with_lookahead
,
lookahead_flag
,
parser
p
=
parser
[
parser_name
]
if
p
in
with_lookahead
:
return
True
lookahead_flag
=
False
p
.
apply
(
find_lookahead
)
if
lookahead_flag
:
with_lookahead
.
add
(
p
)
return
True
return
False
nonlocal
lookahead_found
if
not
lookahead_found
:
lookahead_found
=
isinstance
(
p
,
Lookahead
)
parser
[
parser_name
].
apply
(
find_lookahead
)
return
lookahead_found
def
lookahead_artifact
(
parser
,
raw_errors
):
"""
...
...
@@ -364,14 +343,12 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
This is required for testing of parsers that put a lookahead
operator at the end. See test_testing.TestLookahead.
"""
nonlocal
is_lookahead
return
((
len
(
raw_errors
)
==
2
# case 1: superfluous data for lookahead
and
raw_errors
[
-
1
].
code
==
Error
.
PARSER_LOOKAHEAD_MATCH_ONLY
and
raw_errors
[
-
2
].
code
==
Error
.
PARSER_STOPPED_BEFORE_END
)
# case 2: mandatory lookahead failure at end of text
or
(
len
(
raw_errors
)
==
1
and
raw_errors
[
-
1
].
code
==
Error
.
MANDATORY_CONTINUATION_AT_EOF
)
and
any
(
tn
in
with_lookahead
for
tn
in
parser
.
history__
[
-
1
].
call_stack
))
and
raw_errors
[
-
1
].
code
==
Error
.
MANDATORY_CONTINUATION_AT_EOF
))
for
parser_name
,
tests
in
test_unit
.
items
():
assert
parser_name
,
"Missing parser name in test %s!"
%
unit_name
...
...
@@ -566,12 +543,12 @@ def grammar_suite(directory, parser_factory, transformer_factory,
########################################################################
RX_DEFINITION_OR_SECTION
=
re
.
compile
(
'(?:^|
\n
)[
\t
]*(\w+(?=[
\t
]*=)|#:.*(?=
\n
|$|#))'
)
RX_DEFINITION_OR_SECTION
=
re
.
compile
(
r
'(?:^|\n)[ \t]*(\w+(?=[ \t]*=)|#:.*(?=\n|$|#))'
)
SymbolsDictType
=
Dict
[
str
,
List
[
str
]]
def
extract_symbols
(
ebnf_text_or_file
:
str
)
->
SymbolsDictType
:
"""
r
"""
Extracts all defined symbols from an EBNF-grammar. This can be used to
prepare grammar-tests. The symbols will be returned as lists of strings
which are grouped by the sections to which they belong and returned as
...
...
@@ -606,7 +583,7 @@ def extract_symbols(ebnf_text_or_file: str) -> SymbolsDictType:
to lists of symbols that appear under that section.
"""
def
trim_section_name
(
name
:
str
)
->
str
:
return
re
.
sub
(
'[^\w-]'
,
'_'
,
name
.
replace
(
'#:'
,
''
).
strip
())
return
re
.
sub
(
r
'[^\w-]'
,
'_'
,
name
.
replace
(
'#:'
,
''
).
strip
())
ebnf
=
load_if_file
(
ebnf_text_or_file
)
deflist
=
RX_DEFINITION_OR_SECTION
.
findall
(
ebnf
)
...
...
test/test_dsl.py
View file @
fbc08ddc
...
...
@@ -58,7 +58,7 @@ class TestCompileFunctions:
class
TestCompilerGeneration
:
trivial_lang
=
"""
trivial_lang
=
r
"""
text = { word | WSPC } "." [/\s/]
word = /\w+/
WSPC = /\s+/
...
...
test/test_ebnf.py
View file @
fbc08ddc
...
...
@@ -346,7 +346,7 @@ class TestBoundaryCases:
"not an error: "
+
str
(
messages
)
grammar_src
=
result
grammar
=
compile_python_object
(
DHPARSER_IMPORTS
+
grammar_src
,
'get_(?:\w+_)?grammar$'
)()
r
'get_(?:\w+_)?grammar$'
)()
else
:
assert
False
,
"EBNF compiler should warn about unconnected rules."
...
...
@@ -405,7 +405,7 @@ class TestFlowControlOperators:
"""Tests whether failures to comply with the required operator '§'
are correctly reported as such.
"""
lang1
=
"nonsense == /\w+/~ # wrong_equal_sign"
lang1
=
r
"nonsense == /\w+/~ # wrong_equal_sign"
lang2
=
"nonsense = [^{}%]+ # someone forgot the '/'-delimiters for regular expressions"
try
:
parser_class
=
grammar_provider
(
lang1
)
...
...
@@ -477,6 +477,7 @@ class TestErrorCustomization:
st
=
parser
(
"ABCD"
);
assert
not
st
.
error_flag
st
=
parser
(
"A_CD"
);
assert
not
st
.
error_flag
st
=
parser
(
"AB_D"
);
assert
st
.
error_flag
print
(
st
.
errors_sorted
)
assert
st
.
errors_sorted
[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
assert
st
.
errors_sorted
[
0
].
message
==
"a user defined error message"
# transitivity of mandatory-operator
...
...
@@ -497,7 +498,7 @@ class TestErrorCustomization:
assert
st
.
errors_sorted
[
0
].
message
==
"a user defined error message"
def
test_multiple_error_messages
(
self
):
lang
=
"""
lang
=
r
"""
document = series | /.*/
@series_error = '_', "the underscore is wrong in this place"
@series_error = '*', "the asterix is wrong in this place"
...
...
@@ -573,7 +574,7 @@ class TestErrorCustomizationErrors:
class
TestCustomizedResumeParsing
:
def
setup
(
self
):
lang
=
"""
lang
=
r
"""
@ alpha_resume = 'BETA', GAMMA_STR
@ beta_resume = GAMMA_RE
@ bac_resume = /GA\w+/
...
...
test/test_parse.py
View file @
fbc08ddc
...
...
@@ -133,10 +133,10 @@ class TestFlowControl:
self
.
t2
=
"All word and not play makes Jack a dull boy END
\n
"
def
test_lookbehind
(
self
):
ws
=
RegExp
(
'\s*'
)
ws
=
RegExp
(
r
'\s*'
)
end
=
RegExp
(
"END"
)
doc_end
=
Lookbehind
(
RegExp
(
'
\\
s*?
\\
n'
))
+
end
word
=
RegExp
(
'\w+'
)
word
=
RegExp
(
r
'\w+'
)
sequence
=
OneOrMore
(
NegativeLookahead
(
end
)
+
word
+
ws
)
document
=
ws
+
sequence
+
doc_end
+
ws
...
...
@@ -149,11 +149,11 @@ class TestFlowControl:
def
test_lookbehind_indirect
(
self
):
class
LookbehindTestGrammar
(
Grammar
):
parser_initialization__
=
[
"upon instantiation"
]
ws
=
RegExp
(
'
\
\
s*'
)
ws
=
RegExp
(
r
'\s*'
)
end
=
RegExp
(
'END'
)
SUCC_LB
=
RegExp
(
'
\\
s*?
\\
n'
)
doc_end
=
Series
(
Lookbehind
(
SUCC_LB
),
end
)
word
=
RegExp
(
'\w+'
)
word
=
RegExp
(
r
'\w+'
)
sequence
=
OneOrMore
(
Series
(
NegativeLookahead
(
end
),
word
,
ws
))
document
=
Series
(
ws
,
sequence
,
doc_end
,
ws
)
root__
=
document
...
...
@@ -176,7 +176,7 @@ class TestRegex:
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
,
str
(
messages
)
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
r
'\w+Grammar$'
)()
node
=
parser
(
'abc+def'
,
parser
.
regex
)
assert
not
node
.
error_flag
assert
node
.
tag_name
==
"regex"
...
...
@@ -192,7 +192,7 @@ class TestRegex:
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
,
str
(
messages
)
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
r
'\w+Grammar$'
)()
node
=
parser
(
'abc+def'
,
parser
.
regex
)
assert
not
node
.
error_flag
assert
node
.
tag_name
==
"regex"
...
...
@@ -207,7 +207,7 @@ class TestRegex:
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
r
'\w+Grammar$'
)()
node
,
rest
=
parser
.
regex
(
'Alpha'
)
assert
node
assert
not
node
.
error_flag
...
...
@@ -223,7 +223,7 @@ class TestRegex:
get_ebnf_transformer
(),
get_ebnf_compiler
(
'MultilineRegexTest'
))
assert
result
assert
not
messages
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
r
'\w+Grammar$'
)()
node
,
rest
=
parser
.
regex
(
'Alpha'
)
assert
node
.
error_flag
...
...
@@ -244,7 +244,7 @@ class TestRegex:
get_ebnf_transformer
(),
get_ebnf_compiler
(
"TokenTest"
))
assert
result
assert
not
messages
,
str
(
messages
)
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
r
'\w+Grammar$'
)()
result
=
parser
(
testdoc
)
# log_parsing_history(parser, "test.log")
assert
not
result
.
error_flag
...
...
@@ -267,13 +267,13 @@ class TestGrammar:
# checks whether pos values in the parsing result and in the
# history record have been initialized
with
logging
(
"LOGS"
):
grammar
=
compile_python_object
(
DHPARSER_IMPORTS
+
self
.
pyparser
,
'\w+Grammar$'
)()
grammar
=
compile_python_object
(
DHPARSER_IMPORTS
+
self
.
pyparser
,
r
'\w+Grammar$'
)()
grammar
(
"no_file_name*"
)
for
record
in
grammar
.
history__
:
assert
not
record
.
node
or
record
.
node
.
pos
>=
0
def
test_select_parsing
(
self
):
grammar
=
compile_python_object
(
DHPARSER_IMPORTS
+
self
.
pyparser
,
'\w+Grammar$'
)()
grammar
=
compile_python_object
(
DHPARSER_IMPORTS
+
self
.
pyparser
,
r
'\w+Grammar$'
)()
grammar
(
"wort"
,
"WORT"
)
grammar
(
"eine Zeile"
,
"textzeile"
)
grammar
(
"kein Haupt"
,
"haupt"
)
...
...
@@ -281,7 +281,7 @@ class TestGrammar:
def
test_grammar_subclassing
(
self
):
class
Arithmetic
(
Grammar
):
'''
r
'''
expression = term { ("+" | "-") term }
term = factor { ("*" | "/") factor }
factor = INTEGER | "(" expression ")"
...
...
@@ -413,14 +413,14 @@ class TestAllOfSomeOf:
class
TestPopRetrieve
:
mini_language
=
"""
mini_language
=
r
"""
document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign # never use delimiter between capture and pop except for retrival!
delimiter_sign = /`+/
text = /[^`]+/
"""
mini_lang2
=
"""
mini_lang2
=
r
"""
@braces_filter=counterpart
document = { text | codeblock }
codeblock = braces { text | opening_braces | (!:braces closing_braces) } ::braces
...
...
@@ -429,7 +429,7 @@ class TestPopRetrieve:
closing_braces = /\}+/
text = /[^{}]+/
"""
mini_lang3
=
"""
mini_lang3
=
r
"""
document = { text | env }
env = (specialtag | opentag) text [closespecial | closetag]
opentag = "<" name ">"
...
...
@@ -485,7 +485,7 @@ class TestPopRetrieve:
def
test_cache_neutrality
(
self
):
"""Test that packrat-caching does not interfere with the variable-
changing parsers: Capture and Retrieve."""
lang
=
"""
lang
=
r
"""
text = opening closing
opening = (unmarked_package | marked_package)
closing = ::variable
...
...
@@ -758,7 +758,7 @@ class TestUnknownParserError:
class
TestEarlyTokenWhitespaceDrop
:
def
setup
(
self
):
self
.
lang
=
"""
self
.
lang
=
r
"""
@ drop = token, whitespace
expression = term { ("+" | "-") term}
term = factor { ("*"|"/") factor}
...
...
test/test_syntaxtree.py
View file @
fbc08ddc
...
...
@@ -206,7 +206,7 @@ class TestRootNode:
assert
error_str
.
find
(
"A"
)
<
error_str
.
find
(
"B"
)
def
test_error_reporting
(
self
):
number
=
RE
(
'\d+'
)
|
RE
(
'\d+'
)
+
RE
(
'\.'
)
+
RE
(
'\d+'
)
number
=
RE
(
r
'\d+'
)
|
RE
(
r
'\d+'
)
+
RE
(
r
'\.'
)
+
RE
(
r
'\d+'
)
result
=
str
(
Grammar
(
number
)(
"3.1416"
))
assert
result
==
'3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> '
,
\
str
(
result
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment