Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
ecd84a51
Commit
ecd84a51
authored
Jan 05, 2019
by
eckhart
Browse files
- support for resuming after parser failure directives in ebnf.py - bugfixes and tests!
parent
94edc4d8
Changes
5
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
ecd84a51
...
...
@@ -35,7 +35,7 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace,
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
RootNode
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
,
expand_table
,
\
GLOBALS
,
CONFIG_PRESET
,
get_config_value
,
typing
GLOBALS
,
CONFIG_PRESET
,
get_config_value
,
unrepr
,
typing
from
DHParser.transform
import
TransformationFunc
,
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
\
remove_tokens
,
flatten
,
forbid
,
assert_content
,
remove_infix_operator
...
...
@@ -570,6 +570,7 @@ class EBNFCompiler(Compiler):
definitions
.
append
((
self
.
RAW_WS_KEYWORD
,
"r'{whitespace}'"
.
format
(
**
self
.
directives
)))
definitions
.
append
((
self
.
COMMENT_KEYWORD
,
"r'{comment}'"
.
format
(
**
self
.
directives
)))
definitions
.
append
((
self
.
RESUME_RULES_KEYWORD
,
repr
(
self
.
directives
[
'resume'
])))
print
(
self
.
directives
[
'resume'
])
# prepare parser class header and docstring and
# add EBNF grammar to the doc string of the parser class
...
...
@@ -736,6 +737,18 @@ class EBNFCompiler(Compiler):
self
.
tree
.
new_error
(
node
,
'Directive "%s" must have one, but not %i values.'
%
(
key
,
len
(
node
.
children
)
-
1
))
def
extract_regex
(
nd
:
Node
)
->
str
:
value
=
nd
.
content
.
strip
(
"~"
)
# cast(str, node.children[1].result).strip("~")
if
value
!=
nd
.
content
:
# cast(str, node.children[1].result)
self
.
tree
.
new_error
(
node
,
"Whitespace marker '~' not allowed in definition "
"of %s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
value
=
escape_re
(
value
[
1
:
-
1
])
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
return
value
if
key
in
{
'comment'
,
'whitespace'
}:
check_argnum
()
if
node
.
children
[
1
].
parser
.
name
==
"symbol"
:
...
...
@@ -746,15 +759,7 @@ class EBNFCompiler(Compiler):
self
.
tree
.
new_error
(
node
,
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
else
:
value
=
node
.
children
[
1
].
content
.
strip
(
"~"
)
# cast(str, node.children[1].result).strip("~")
if
value
!=
node
.
children
[
1
].
content
:
# cast(str, node.children[1].result)
self
.
tree
.
new_error
(
node
,
"Whitespace marker '~' not allowed in definition "
"of %s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
value
=
escape_re
(
value
[
1
:
-
1
])
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
value
=
extract_regex
(
node
.
children
[
1
])
if
key
==
'whitespace'
and
not
re
.
match
(
value
,
''
):
self
.
tree
.
new_error
(
node
,
"Implicit whitespace should always "
"match the empty string, /%s/ does not."
%
value
)
...
...
@@ -813,17 +818,19 @@ class EBNFCompiler(Compiler):
self
.
tree
.
new_error
(
node
,
'Directive "%s" accepts only regular expressions or '
'plain strings as arguments, but no symbols without '
'quotation marks!'
%
key
)
symbol
=
key
[:
-
6
]
symbol
=
key
[:
-
7
]
if
symbol
in
self
.
directives
[
'resume'
]:
self
.
tree
.
new_error
(
node
,
'Reentry conditions for "%s" have already been defined'
' earlier!'
%
symbol
)
else
:
reentry_conditions
=
[]
for
child
in
node
.
children
:
if
child
.
parser
.
name
==
'regex'
:
reentry_conditions
.
append
(
"re.compile(r'')"
%
child
.
content
)
for
child
in
node
.
children
[
1
:]
:
if
child
.
parser
.
name
==
'regex
p
'
:
reentry_conditions
.
append
(
unrepr
(
"re.compile(r'
%s
')"
%
extract_regex
(
child
))
)
else
:
reentry_conditions
.
append
(
repr
(
child
.
content
))
s
=
child
.
content
.
strip
()
s
=
s
.
strip
(
'"'
)
if
s
[
0
]
==
'"'
else
s
.
strip
(
"'"
)
reentry_conditions
.
append
(
s
)
self
.
directives
[
'resume'
][
symbol
]
=
reentry_conditions
else
:
...
...
DHParser/parse.py
View file @
ecd84a51
...
...
@@ -146,7 +146,7 @@ def reentry_point(rest: StringView, rules: ResumeList) -> int:
else
:
m
=
rest
.
search
(
rule
)
if
m
:
i
=
min
(
rest
.
index
(
m
.
start
swith
()),
i
)
i
=
min
(
rest
.
index
(
m
.
start
()),
i
)
# in case no rule matched return -1
if
i
==
upper_limit
:
i
=
-
1
...
...
@@ -1372,7 +1372,7 @@ class Series(NaryOperator):
# i = max(1, text.index(match.regs[1][0])) if match else 1
i
=
0
location
=
self
.
grammar
.
document_length__
-
len
(
text_
)
node
=
Node
(
self
,
text_
[:
i
]).
init_pos
(
location
)
node
=
Node
(
None
,
text_
[:
i
]).
init_pos
(
location
)
# self.grammar.tree__.add_error(
# node, Error("§ %s violation" % parser.repr, location, Error.MESSAGE))
# # node.errors.append(Error("§ %s violation" % parser.repr,
...
...
DHParser/toolkit.py
View file @
ecd84a51
...
...
@@ -48,6 +48,7 @@ __all__ = ('escape_re',
'escape_control_characters'
,
'is_filename'
,
'concurrent_ident'
,
'unrepr'
,
'lstrip_docstring'
,
'issubtype'
,
'isgenerictype'
,
...
...
@@ -169,6 +170,29 @@ def concurrent_ident() -> str:
return
multiprocessing
.
current_process
().
name
+
'_'
+
str
(
threading
.
get_ident
())
class
unrepr
:
"""
unrepr encapsulates a string representing a python function in such
a way that the representation of the string yields the function call
itself rather then a string representing the function call and delimited
by quotation marks.
Example:
>>> "re.compile(r'abc+')"
"re.compile(r'abc+')"
>>> unrepr("re.compile(r'abc+')")
re.compile(r'abc+')
"""
def
__init__
(
self
,
s
):
self
.
s
=
s
def
__str__
(
self
):
return
self
.
s
def
__repr__
(
self
):
return
self
.
s
#######################################################################
#
# type system support
...
...
test/test_ebnf.py
View file @
ecd84a51
...
...
@@ -28,7 +28,7 @@ sys.path.extend(['../', './'])
from
DHParser.toolkit
import
compile_python_object
,
re
from
DHParser.preprocess
import
nil_preprocessor
from
DHParser
import
compile_source
from
DHParser.error
import
has_errors
from
DHParser.error
import
has_errors
,
Error
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
EBNFTransform
,
get_ebnf_compiler
from
DHParser.dsl
import
CompilationError
,
compileDSL
,
DHPARSER_IMPORTS
,
grammar_provider
...
...
@@ -448,6 +448,92 @@ class TestAllSome:
assert
grammar
(
'B'
).
content
==
'B'
class
TestCuratedErrors
:
"""
Cureted Errors replace existing errors with alternative
error codes and messages that are more helptful to the user.
"""
def
test_user_error_declaration
(
self
):
lang
=
"""
document = series | /.*/
series = "X" | head §"C" "D"
head = "A" "B"
@series_error = "a user defined error message"
"""
try
:
parser
=
grammar_provider
(
lang
)()
assert
False
,
"Error definition after symbol definition should fail!"
except
CompilationError
as
e
:
pass
def
test_curated_mandatory_continuation
(
self
):
lang
=
"""
document = series | /.*/
@series_error = "a user defined error message"
series = "X" | head §"C" "D"
head = "A" "B"
"""
# from DHParser.dsl import compileDSL
# from DHParser.preprocess import nil_preprocessor
# from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
# grammar_src = compileDSL(lang, nil_preprocessor, get_ebnf_grammar(),
# get_ebnf_transformer(), get_ebnf_compiler("test", lang))
# print(grammar_src)
parser
=
grammar_provider
(
lang
)()
st
=
parser
(
"X"
);
assert
not
st
.
error_flag
st
=
parser
(
"ABCD"
);
assert
not
st
.
error_flag
st
=
parser
(
"A_CD"
);
assert
not
st
.
error_flag
st
=
parser
(
"AB_D"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
assert
st
.
collect_errors
()[
0
].
message
==
"a user defined error message"
# transitivity of mandatory-operator
st
=
parser
(
"ABC_"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
assert
st
.
collect_errors
()[
0
].
message
==
"a user defined error message"
class
TestCustomizedResumeParsing
:
def
setup
(
self
):
lang
=
"""
@ alpha_resume = 'BETA', 'GAMMA'
@ beta_resume = 'GAMMA'
@ bac_resume = /GA\w+/
document = alpha [beta] gamma "."
alpha = "ALPHA" abc
abc = §"a" "b" "c"
beta = "BETA" (bac | bca)
bac = "b" "a" §"c"
bca = "b" "c" §"a"
gamma = "GAMMA" §(cab | cba)
cab = "c" "a" §"b"
cba = "c" "b" §"a"
"""
try
:
self
.
gr
=
grammar_provider
(
lang
)()
except
CompilationError
as
ce
:
print
(
ce
)
def
test_several_resume_rules_innermost_rule_matching
(
self
):
gr
=
self
.
gr
content
=
'ALPHA abc BETA bad GAMMA cab .'
cst
=
gr
(
content
)
# print(cst.as_sxpr())
assert
cst
.
error_flag
assert
cst
.
content
==
content
assert
cst
.
pick
(
'alpha'
).
content
.
startswith
(
'ALPHA'
)
# because of resuming, there should be only on error message
assert
len
(
cst
.
collect_errors
())
==
1
# multiple failures
content
=
'ALPHA acb BETA bad GAMMA cab .'
cst
=
gr
(
content
)
# print(cst.as_sxpr())
assert
cst
.
error_flag
assert
cst
.
content
==
content
assert
cst
.
pick
(
'alpha'
).
content
.
startswith
(
'ALPHA'
)
# because of resuming, there should be only on error message
assert
len
(
cst
.
collect_errors
())
==
2
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
...
...
test/test_error.py
View file @
ecd84a51
...
...
@@ -71,50 +71,6 @@ class TestErrorSupport:
self
.
mini_suite
(
s
,
linebreaks
(
s
),
1
)
class
TestCuratedErrors
:
"""
Cureted Errors replace existing errors with alternative
error codes and messages that are more helptful to the user.
"""
def
test_user_error_declaration
(
self
):
lang
=
"""
document = series | /.*/
series = "X" | head §"C" "D"
head = "A" "B"
@series_error = "a user defined error message"
"""
try
:
parser
=
grammar_provider
(
lang
)()
assert
False
,
"Error definition after symbol definition should fail!"
except
CompilationError
as
e
:
pass
def
test_curated_mandatory_continuation
(
self
):
lang
=
"""
document = series | /.*/
@series_error = "a user defined error message"
series = "X" | head §"C" "D"
head = "A" "B"
"""
# from DHParser.dsl import compileDSL
# from DHParser.preprocess import nil_preprocessor
# from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
# grammar_src = compileDSL(lang, nil_preprocessor, get_ebnf_grammar(),
# get_ebnf_transformer(), get_ebnf_compiler("test", lang))
# print(grammar_src)
parser
=
grammar_provider
(
lang
)()
st
=
parser
(
"X"
);
assert
not
st
.
error_flag
st
=
parser
(
"ABCD"
);
assert
not
st
.
error_flag
st
=
parser
(
"A_CD"
);
assert
not
st
.
error_flag
st
=
parser
(
"AB_D"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
assert
st
.
collect_errors
()[
0
].
message
==
"a user defined error message"
# transitivity of mandatory-operator
st
=
parser
(
"ABC_"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
assert
st
.
collect_errors
()[
0
].
message
==
"a user defined error message"
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
runner
(
""
,
globals
())
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment