Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
f00daf04
Commit
f00daf04
authored
Jan 07, 2019
by
eckhart
Browse files
- resume rules can now refer to symbols that represent regular repressions or literals
parent
6e490550
Changes
3
Hide whitespace changes
Inline
Side-by-side
CHANGES.txt
View file @
f00daf04
DHParser Version 0.8.5 (DATE?)
..............................
- parse.Parser.apply() reworked
DHParser Version 0.8.4 (6.1.2019)
.................................
..
.................................
- customized resuming after parser errors! (still experimental, see
tests/test_ebnf.TestCustomizedResumeParsing as well as
...
...
DHParser/ebnf.py
View file @
f00daf04
...
...
@@ -541,6 +541,46 @@ class EBNFCompiler(Compiler):
"""
pass
# TODO: add verification code here
def
_check_rx
(
self
,
node
:
Node
,
rx
:
str
)
->
str
:
"""
Checks whether the string `rx` represents a valid regular
expression. Makes sure that multiline regular expressions are
prepended by the multiline-flag. Returns the regular expression string.
"""
flags
=
self
.
re_flags
|
{
'x'
}
if
rx
.
find
(
'
\n
'
)
>=
0
else
self
.
re_flags
if
flags
:
rx
=
"(?%s)%s"
%
(
""
.
join
(
flags
),
rx
)
try
:
re
.
compile
(
rx
)
except
Exception
as
re_error
:
self
.
tree
.
new_error
(
node
,
"malformed regular expression %s: %s"
%
(
repr
(
rx
),
str
(
re_error
)))
return
rx
def
_extract_regex
(
self
,
node
:
Node
)
->
str
:
"""Extracts regular expression string from regexp-Node."""
value
=
node
.
content
.
strip
(
"~"
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
value
=
escape_re
(
value
[
1
:
-
1
])
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
return
value
def
_generate_resume_rule
(
self
,
nd
:
Node
)
->
Union
[
str
,
unrepr
]:
"""Generates a resume rules from the nodes content. Returns an
empty string in case the node is neither regexp nor literal.
"""
if
nd
.
parser
.
name
==
'regexp'
:
return
unrepr
(
"re.compile(r'%s')"
%
self
.
_extract_regex
(
nd
))
elif
nd
.
parser
.
name
==
'literal'
:
s
=
nd
.
content
.
strip
()
return
s
.
strip
(
'"'
)
if
s
[
0
]
==
'"'
else
s
.
strip
(
"'"
)
return
''
def
assemble_parser
(
self
,
definitions
:
List
[
Tuple
[
str
,
str
]],
root_node
:
Node
)
->
str
:
"""
Creates the Python code for the parser after compilation of
...
...
@@ -570,7 +610,28 @@ class EBNFCompiler(Compiler):
+
", comment="
+
self
.
COMMENT_KEYWORD
+
")"
)))
definitions
.
append
((
self
.
RAW_WS_KEYWORD
,
"r'{whitespace}'"
.
format
(
**
self
.
directives
)))
definitions
.
append
((
self
.
COMMENT_KEYWORD
,
"r'{comment}'"
.
format
(
**
self
.
directives
)))
definitions
.
append
((
self
.
RESUME_RULES_KEYWORD
,
repr
(
self
.
directives
[
'resume'
])))
# prepare and add resume-rules
resume_rules
=
dict
()
# type: Dict[str, List[Union[str, unrpr]]]
for
symbol
,
raw_rules
in
self
.
directives
[
'resume'
].
items
():
refined_rules
=
[]
for
rule
in
raw_rules
:
if
isinstance
(
rule
,
unrepr
)
and
rule
.
s
.
isidentifier
():
try
:
nd
=
self
.
rules
[
rule
.
s
][
0
].
children
[
1
]
refined
=
self
.
_generate_resume_rule
(
nd
)
except
IndexError
:
refined
=
""
if
refined
:
refined_rules
.
append
(
refined
)
else
:
self
.
tree
.
new_error
(
nd
,
'Symbol "%s" cannot be used in resume rule, since'
' it represents neither literal nor regexp!'
)
else
:
refined_rules
.
append
(
rule
)
resume_rules
[
symbol
]
=
refined_rules
definitions
.
append
((
self
.
RESUME_RULES_KEYWORD
,
repr
(
resume_rules
)))
# prepare parser class header and docstring and
# add EBNF grammar to the doc string of the parser class
...
...
@@ -703,23 +764,6 @@ class EBNFCompiler(Compiler):
return
rule
,
defn
def
_check_rx
(
self
,
node
:
Node
,
rx
:
str
)
->
str
:
"""
Checks whether the string `rx` represents a valid regular
expression. Makes sure that multiline regular expressions are
prepended by the multiline-flag. Returns the regular expression string.
"""
flags
=
self
.
re_flags
|
{
'x'
}
if
rx
.
find
(
'
\n
'
)
>=
0
else
self
.
re_flags
if
flags
:
rx
=
"(?%s)%s"
%
(
""
.
join
(
flags
),
rx
)
try
:
re
.
compile
(
rx
)
except
Exception
as
re_error
:
self
.
tree
.
new_error
(
node
,
"malformed regular expression %s: %s"
%
(
repr
(
rx
),
str
(
re_error
)))
return
rx
def
on_directive
(
self
,
node
:
Node
)
->
str
:
key
=
node
.
children
[
0
].
content
assert
key
not
in
self
.
directives
[
'tokens'
]
...
...
@@ -737,18 +781,6 @@ class EBNFCompiler(Compiler):
self
.
tree
.
new_error
(
node
,
'Directive "%s" must have one, but not %i values.'
%
(
key
,
len
(
node
.
children
)
-
1
))
def
extract_regex
(
nd
:
Node
)
->
str
:
value
=
nd
.
content
.
strip
(
"~"
)
# cast(str, node.children[1].result).strip("~")
if
value
!=
nd
.
content
:
# cast(str, node.children[1].result)
self
.
tree
.
new_error
(
node
,
"Whitespace marker '~' not allowed in definition "
"of %s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
value
=
escape_re
(
value
[
1
:
-
1
])
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
return
value
if
key
in
{
'comment'
,
'whitespace'
}:
check_argnum
()
if
node
.
children
[
1
].
parser
.
name
==
"symbol"
:
...
...
@@ -759,7 +791,7 @@ class EBNFCompiler(Compiler):
self
.
tree
.
new_error
(
node
,
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
else
:
value
=
extract_regex
(
node
.
children
[
1
])
value
=
self
.
_
extract_regex
(
node
.
children
[
1
])
if
key
==
'whitespace'
and
not
re
.
match
(
value
,
''
):
self
.
tree
.
new_error
(
node
,
"Implicit whitespace should always "
"match the empty string, /%s/ does not."
%
value
)
...
...
@@ -814,10 +846,10 @@ class EBNFCompiler(Compiler):
self
.
directives
[
'error'
][
symbol
]
=
error_msg
elif
key
.
endswith
(
'_resume'
):
if
not
all
(
child
.
parser
.
name
in
(
'literal'
,
'regexp'
)
for
child
in
node
.
children
[
1
:]):
self
.
tree
.
new_error
(
node
,
'Directive "%s" accepts only regular expressions or '
'plain strings as arguments, but no symbols without '
'quotation marks!'
%
key
)
#
if not all(child.parser.name in ('literal', 'regexp') for child in node.children[1:]):
#
self.tree.new_error(node, 'Directive "%s" accepts only regular expressions or '
#
'plain strings as arguments, but no symbols without '
#
'quotation marks!' % key)
symbol
=
key
[:
-
7
]
if
symbol
in
self
.
directives
[
'resume'
]:
self
.
tree
.
new_error
(
node
,
'Reentry conditions for "%s" have already been defined'
...
...
@@ -825,12 +857,13 @@ class EBNFCompiler(Compiler):
else
:
reentry_conditions
=
[]
# type: List[Union[unrepr, str]]
for
child
in
node
.
children
[
1
:]:
if
child
.
parser
.
name
==
'regexp'
:
reentry_conditions
.
append
(
unrepr
(
"re.compile(r'%s')"
%
extract_regex
(
child
)))
else
:
s
=
child
.
content
.
strip
()
s
=
s
.
strip
(
'"'
)
if
s
[
0
]
==
'"'
else
s
.
strip
(
"'"
)
reentry_conditions
.
append
(
s
)
rule
=
self
.
_generate_resume_rule
(
child
)
if
rule
:
reentry_conditions
.
append
(
rule
)
else
:
# child.parser.name == 'symbol'
if
child
.
content
not
in
self
.
symbols
:
self
.
symbols
[
child
.
content
]
=
node
reentry_conditions
.
append
(
unrepr
(
child
.
content
.
strip
()))
self
.
directives
[
'resume'
][
symbol
]
=
reentry_conditions
else
:
...
...
test/test_ebnf.py
View file @
f00daf04
...
...
@@ -473,12 +473,6 @@ class TestCuratedErrors:
series = "X" | head §"C" "D"
head = "A" "B"
"""
# from DHParser.dsl import compileDSL
# from DHParser.preprocess import nil_preprocessor
# from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
# grammar_src = compileDSL(lang, nil_preprocessor, get_ebnf_grammar(),
# get_ebnf_transformer(), get_ebnf_compiler("test", lang))
# print(grammar_src)
parser
=
grammar_provider
(
lang
)()
st
=
parser
(
"X"
);
assert
not
st
.
error_flag
st
=
parser
(
"ABCD"
);
assert
not
st
.
error_flag
...
...
@@ -491,19 +485,13 @@ class TestCuratedErrors:
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
assert
st
.
collect_errors
()[
0
].
message
==
"a user defined error message"
def
test_curated_error_
message_
case_sensitive
(
self
):
def
test_curated_error_case_sensitive
(
self
):
lang
=
"""
document = Series | /.*/
@Series_error = "a user defined error message"
Series = "X" | head §"C" "D"
head = "A" "B"
"""
# from DHParser.dsl import compileDSL
# from DHParser.preprocess import nil_preprocessor
# from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
# grammar_src = compileDSL(lang, nil_preprocessor, get_ebnf_grammar(),
# get_ebnf_transformer(), get_ebnf_compiler("test", lang))
# print(grammar_src)
parser
=
grammar_provider
(
lang
)()
st
=
parser
(
"ABC_"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
...
...
@@ -513,8 +501,8 @@ class TestCuratedErrors:
class
TestCustomizedResumeParsing
:
def
setup
(
self
):
lang
=
"""
@ alpha_resume = 'BETA',
'
GAMMA
'
@ beta_resume =
'
GAMMA
'
@ alpha_resume = 'BETA', GAMMA
_STR
@ beta_resume = GAMMA
_RE
@ bac_resume = /GA\w+/
document = alpha [beta] gamma "."
alpha = "ALPHA" abc
...
...
@@ -525,6 +513,8 @@ class TestCustomizedResumeParsing:
gamma = "GAMMA" §(cab | cba)
cab = "c" "a" §"b"
cba = "c" "b" §"a"
GAMMA_RE = /GA\w+/
GAMMA_STR = "GAMMA"
"""
try
:
self
.
gr
=
grammar_provider
(
lang
)()
...
...
@@ -541,7 +531,7 @@ class TestCustomizedResumeParsing:
assert
cst
.
pick
(
'alpha'
).
content
.
startswith
(
'ALPHA'
)
# because of resuming, there should be only on error message
assert
len
(
cst
.
collect_errors
())
==
1
# multiple failures
content
=
'ALPHA acb BETA bad GAMMA cab .'
cst
=
gr
(
content
)
# print(cst.as_sxpr())
...
...
@@ -551,6 +541,15 @@ class TestCustomizedResumeParsing:
# because of resuming, there should be only on error message
assert
len
(
cst
.
collect_errors
())
==
2
content
=
'ALPHA acb GAMMA cab .'
cst
=
gr
(
content
)
# print(cst.as_sxpr())
assert
cst
.
error_flag
assert
cst
.
content
==
content
assert
cst
.
pick
(
'alpha'
).
content
.
startswith
(
'ALPHA'
)
# because of resuming, there should be only on error message
assert
len
(
cst
.
collect_errors
())
==
1
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment