Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
92513cc4
Commit
92513cc4
authored
Oct 23, 2018
by
di68kap
Browse files
- allow lookahead in unit tests
parent
d1adc0ce
Changes
4
Hide whitespace changes
Inline
Side-by-side
DHParser/error.py
View file @
92513cc4
...
...
@@ -80,8 +80,9 @@ class Error:
MANDATORY_CONTINUATION
=
ErrorCode
(
1001
)
PARSER_DID_NOT_MATCH
=
ErrorCode
(
1002
)
PARSER_STOPPED_BEFORE_END
=
ErrorCode
(
1003
)
CAPTURE_STACK_NOT_EMPTY
=
ErrorCode
(
1004
)
PARSER_LOOKAHEAD_MATCH_ONLY
=
ErrorCode
(
1003
)
PARSER_STOPPED_BEFORE_END
=
ErrorCode
(
1004
)
CAPTURE_STACK_NOT_EMPTY
=
ErrorCode
(
1005
)
def
__init__
(
self
,
message
:
str
,
pos
,
code
:
ErrorCode
=
ERROR
,
orig_pos
:
int
=
-
1
,
line
:
int
=
-
1
,
column
:
int
=
-
1
)
->
None
:
...
...
DHParser/parse.py
View file @
92513cc4
...
...
@@ -650,17 +650,22 @@ class Grammar:
parser
.
grammar
=
self
def
__call__
(
self
,
document
:
str
,
start_parser
=
"root__"
)
->
Node
:
def
__call__
(
self
,
document
:
str
,
start_parser
=
"root__"
,
track_history
=
False
)
->
Node
:
"""
Parses a document with with parser-combinators.
Args:
document (str): The source text to be parsed.
start_parser (str): The name of the parser with which
to
start. This is useful for testing particular parsers
start_parser (str
or Parser
): The name of the parser with which
to
start. This is useful for testing particular parsers
(i.e. particular parts of the EBNF-Grammar.)
track_history (bool): If true, the parsing history will be
recorded in self.history__. If logging is turned on (i.e.
DHParser.log.is_logging() returns true), the parsing history
will always be recorded, even if `False` is passed to
the `track_history` parameter.
Returns:
Node: The root node
o
t the parse tree.
Node: The root node t
o
the parse tree.
"""
def
tail_pos
(
predecessors
:
Union
[
List
[
Node
],
Tuple
[
Node
,
...]])
->
int
:
...
...
@@ -677,7 +682,10 @@ class Grammar:
parser
.
reset
()
else
:
self
.
_dirty_flag__
=
True
self
.
history_tracking__
=
is_logging
()
self
.
history_tracking__
=
track_history
or
is_logging
()
# safe tracking state, because history_tracking__ might be set to false, later,
# but original tracking state is needed for additional error information.
track_history
=
self
.
history_tracking__
self
.
document__
=
StringView
(
document
)
self
.
document_length__
=
len
(
self
.
document__
)
self
.
document_lbreaks__
=
linebreaks
(
document
)
if
self
.
history_tracking__
else
[]
...
...
@@ -701,11 +709,22 @@ class Grammar:
fwd
=
rest
.
find
(
"
\n
"
)
+
1
or
len
(
rest
)
skip
,
rest
=
rest
[:
fwd
],
rest
[
fwd
:]
if
result
is
None
:
error_msg
=
'Parser did not match! Invalid source file?'
\
'
\n
Most advanced: %s
\n
Last match: %s;'
%
\
(
str
(
HistoryRecord
.
most_advanced_match
(
self
.
history__
)),
str
(
HistoryRecord
.
last_match
(
self
.
history__
)))
error_code
=
Error
.
PARSER_DID_NOT_MATCH
err_info
=
''
if
not
track_history
else
\
'
\n
Most advanced: %s
\n
Last match: %s;'
%
\
(
str
(
HistoryRecord
.
most_advanced_match
(
self
.
history__
)),
str
(
HistoryRecord
.
last_match
(
self
.
history__
)))
# Check if a Lookahead-Parser did match. Needed for testing, because
# in a test case this is not necessarily an error.
last_record
=
self
.
history__
[
-
2
]
if
len
(
self
.
history__
)
>
1
else
[]
if
last_record
and
parser
!=
self
.
root__
\
and
last_record
.
status
==
HistoryRecord
.
MATCH
\
and
any
(
isinstance
(
parser
,
Lookahead
)
for
parser
in
last_record
.
call_stack
):
error_msg
=
'Parser did not match except for lookahead! '
+
err_info
error_code
=
Error
.
PARSER_LOOKAHEAD_MATCH_ONLY
else
:
error_msg
=
'Parser did not match!'
+
err_info
error_code
=
Error
.
PARSER_DID_NOT_MATCH
else
:
stitches
.
append
(
result
)
error_msg
=
"Parser stopped before end"
+
\
...
...
@@ -1150,7 +1169,7 @@ class OneOrMore(UnaryOperator):
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content
'Wo viel der Weisheit, da auch viel des Grämens.'
>>> str(Grammar(sentence)('.')) # an empty sentence also matches
' <<< Error on "." | Parser did not match!
Invalid source file?\n Most advanced: None\n Last match: None;
>>> '
' <<< Error on "." | Parser did not match! >>> '
EBNF-Notation: ``{ ... }+``
...
...
@@ -1201,7 +1220,7 @@ class Series(NaryOperator):
>>> Grammar(variable_name)('variable_1').content
'variable_1'
>>> str(Grammar(variable_name)('1_variable'))
' <<< Error on "1_variable" | Parser did not match!
Invalid source file?\n Most advanced: None\n Last match: None;
>>> '
' <<< Error on "1_variable" | Parser did not match! >>> '
EBNF-Notation: ``... ...`` (sequence of parsers separated by a blank or new line)
...
...
DHParser/testing.py
View file @
92513cc4
...
...
@@ -36,7 +36,7 @@ import json
import
os
import
sys
from
DHParser.error
import
is_error
,
adjust_error_locations
from
DHParser.error
import
Error
,
is_error
,
adjust_error_locations
from
DHParser.log
import
is_logging
,
clear_logs
,
log_ST
,
log_parsing_history
from
DHParser.parse
import
UnknownParserError
from
DHParser.syntaxtree
import
Node
,
RootNode
,
parse_sxpr
,
flatten_sxpr
,
ZOMBIE_PARSER
...
...
@@ -308,6 +308,17 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
parser
=
parser_factory
()
transform
=
transformer_factory
()
def
lookahead_artifact
(
raw_errors
):
"""
Returns True, if the error merely occured, because the parser
stopped in front of a seuqence that was captured by a lookahead
operator. This is required for testing of parsers that put a
lookahead operator at the end. See test_testing.TestLookahead.
"""
return
len
(
raw_errors
)
==
2
\
and
raw_errors
[
-
1
].
code
==
Error
.
PARSER_LOOKAHEAD_MATCH_ONLY
\
and
raw_errors
[
-
2
].
code
==
Error
.
PARSER_STOPPED_BEFORE_END
for
parser_name
,
tests
in
test_unit
.
items
():
assert
parser_name
,
"Missing parser name in test %s!"
%
unit_name
assert
not
any
(
test_type
in
RESULT_STAGES
for
test_type
in
tests
),
\
...
...
@@ -337,7 +348,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
infostr
=
' match-test "'
+
test_name
+
'" ... '
errflag
=
len
(
errata
)
try
:
cst
=
parser
(
test_code
,
parser_name
)
cst
=
parser
(
test_code
,
parser_name
,
track_history
=
True
)
except
UnknownParserError
as
upe
:
cst
=
RootNode
()
cst
=
cst
.
new_error
(
Node
(
ZOMBIE_PARSER
,
""
).
init_pos
(
0
),
str
(
upe
))
...
...
@@ -349,8 +360,9 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
transform
(
ast
)
tests
.
setdefault
(
'__ast__'
,
{})[
test_name
]
=
ast
# log_ST(ast, "match_%s_%s.ast" % (parser_name, clean_test_name))
if
is_error
(
cst
.
error_flag
):
errors
=
adjust_error_locations
(
cst
.
collect_errors
(),
test_code
)
raw_errors
=
cst
.
collect_errors
()
if
is_error
(
cst
.
error_flag
)
and
not
lookahead_artifact
(
raw_errors
):
errors
=
adjust_error_locations
(
raw_errors
,
test_code
)
errata
.
append
(
'Match test "%s" for parser "%s" failed:
\n\t
Expr.: %s
\n\n\t
%s
\n\n
'
%
(
test_name
,
parser_name
,
'
\n\t
'
.
join
(
test_code
.
split
(
'
\n
'
)),
'
\n\t
'
.
join
(
str
(
m
).
replace
(
'
\n
'
,
'
\n\t\t
'
)
for
m
in
errors
)))
...
...
@@ -385,13 +397,13 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
errflag
=
len
(
errata
)
# cst = parser(test_code, parser_name)
try
:
cst
=
parser
(
test_code
,
parser_name
)
cst
=
parser
(
test_code
,
parser_name
,
track_history
=
True
)
except
UnknownParserError
as
upe
:
node
=
Node
(
ZOMBIE_PARSER
,
""
).
init_pos
(
0
)
cst
=
RootNode
(
node
).
new_error
(
node
,
str
(
upe
))
errata
.
append
(
'Unknown parser "{}" in fail test "{}"!'
.
format
(
parser_name
,
test_name
))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
if
not
is_error
(
cst
.
error_flag
):
if
not
is_error
(
cst
.
error_flag
)
and
not
lookahead_artifact
(
cst
.
collect_errors
())
:
errata
.
append
(
'Fail test "%s" for parser "%s" yields match instead of '
'expected failure!'
%
(
test_name
,
parser_name
))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
...
...
test/test_testing.py
View file @
92513cc4
...
...
@@ -27,11 +27,13 @@ from functools import partial
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.syntaxtree
import
parse_sxpr
,
flatten_sxpr
,
TOKEN_PTYPE
from
DHParser.transform
import
traverse
,
remove_expendables
,
\
from
DHParser.transform
import
traverse
,
remove_expendables
,
remove_empty
,
\
replace_by_single_child
,
reduce_single_child
,
flatten
from
DHParser.dsl
import
grammar_provider
from
DHParser.testing
import
get_report
,
grammar_unit
,
unit_from_file
,
\
reset_unit
from
DHParser.log
import
logging
CFG_FILE_1
=
'''
# a comment
...
...
@@ -143,6 +145,19 @@ ARITHMETIC_EBNF_transformation_table = {
ARITHMETIC_EBNFTransform
=
partial
(
traverse
,
processing_table
=
ARITHMETIC_EBNF_transformation_table
)
def
clean_report
():
if
os
.
path
.
exists
(
'REPORT'
):
files
=
os
.
listdir
(
'REPORT'
)
flag
=
False
for
file
in
files
:
if
re
.
match
(
r
'unit_test_\d+\.md'
,
file
):
os
.
remove
(
os
.
path
.
join
(
'REPORT'
,
file
))
else
:
flag
=
True
if
not
flag
:
os
.
rmdir
(
'REPORT'
)
class
TestGrammarTest
:
cases
=
{
"factor"
:
{
...
...
@@ -198,16 +213,7 @@ class TestGrammarTest:
}
def
teardown
(
self
):
if
os
.
path
.
exists
(
'REPORT'
):
files
=
os
.
listdir
(
'REPORT'
)
flag
=
False
for
file
in
files
:
if
re
.
match
(
r
'unit_test_\d+\.md'
,
file
):
os
.
remove
(
os
.
path
.
join
(
'REPORT'
,
file
))
else
:
flag
=
True
if
not
flag
:
os
.
rmdir
(
'REPORT'
)
clean_report
()
def
test_testing_grammar
(
self
):
parser_fac
=
grammar_provider
(
ARITHMETIC_EBNF
)
...
...
@@ -222,7 +228,6 @@ class TestGrammarTest:
# print(e)
assert
len
(
errata
)
==
3
,
str
(
errata
)
# def test_get_report(self):
# parser_fac = grammar_provider(ARITHMETIC_EBNF)
# trans_fac = lambda : ARITHMETIC_EBNFTransform
...
...
@@ -242,6 +247,75 @@ class TestGrammarTest:
assert
errata
class
TestLookahead
:
"""
Testing of Expressions with trailing Lookahead-Parser.
"""
EBNF
=
r
"""
document = { category | entry } { LF }
category = {LF } sequence_of_letters { /:/ sequence_of_letters } /:/ &(LF sequence_of_letters)
entry = { LF } sequence_of_letters !/:/
sequence_of_letters = /[A-Za-z0-9 ]+/
LF = / *\n/
"""
cases
=
{
"category"
:
{
"match"
:
{
1
:
"""Mountains: big:
K2"""
},
"fail"
:
{
6
:
"""Mountains: big:"""
}
}
}
fail_cases
=
{
"category"
:
{
"match"
:
{
1
:
"""Mountains: b"""
,
# stop sign ":" is missing
2
:
"""Rivers:
# not allowed"""
},
"fail"
:
{
1
:
"""Mountains: big:
K2"""
}
}
}
def
setup
(
self
):
self
.
grammar_fac
=
grammar_provider
(
TestLookahead
.
EBNF
)
self
.
trans_fac
=
lambda
:
partial
(
traverse
,
processing_table
=
{
"*"
:
[
flatten
,
remove_empty
]})
def
teardown
(
self
):
clean_report
()
def
test_selftest
(
self
):
doc
=
"""
Mountains: big:
Mount Everest
K2
Mountains: medium:
Denali
Alpomayo
Rivers:
Nile
"""
grammar
=
self
.
grammar_fac
()
cst
=
grammar
(
doc
)
assert
not
cst
.
error_flag
# trans = self.trans_fac()
# trans(cst)
# print(cst.as_sxpr())
def
test_unit_lookahead
(
self
):
errata
=
grammar_unit
(
self
.
cases
,
self
.
grammar_fac
,
self
.
trans_fac
)
assert
not
errata
errata
=
grammar_unit
(
self
.
fail_cases
,
self
.
grammar_fac
,
self
.
trans_fac
)
assert
errata
class
TestSExpr
:
"""
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment