Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
bae74d44
Commit
bae74d44
authored
Sep 19, 2017
by
Eckhart Arnold
Browse files
- Merged Required-Operator into Series-Parser; changed terminology from "Required" to "mandatory"
parent
e9b10f1f
Changes
3
Hide whitespace changes
Inline
Side-by-side
DHParser/error.py
View file @
bae74d44
...
...
@@ -34,10 +34,16 @@ __all__ = ('Error',
class
Error
:
__slots__
=
[
'message'
,
'level'
,
'code'
,
'pos'
,
'line'
,
'column'
]
# error levels
WARNING
=
1
ERROR
=
1000
HIGHEST
=
ERROR
# error codes
MANDATORY_CONTINUATION
=
1001
def
__init__
(
self
,
message
:
str
,
level
:
int
=
ERROR
,
code
:
Hashable
=
0
):
self
.
message
=
message
assert
level
>=
0
...
...
DHParser/parser.py
View file @
bae74d44
...
...
@@ -78,7 +78,7 @@ from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, s
StringView
,
EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
ParserBase
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
ZOMBIE_PARSER
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
line_col
,
error_messages
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
line_col
from
DHParser.toolkit
import
load_if_file
__all__
=
(
'PreprocessorFunc'
,
...
...
@@ -1394,44 +1394,81 @@ class Series(NaryOperator):
EBNF-Notation: `... ...` (sequence of parsers separated by a blank or new line)
EBNF-Example: `series = letter letter_or_digit`
"""
RX_ARGUMENT
=
re
.
compile
(
r
'\s(\S)'
)
NOPE
=
1000
def
__init__
(
self
,
*
parsers
:
Parser
,
name
:
str
=
''
)
->
None
:
def
__init__
(
self
,
*
parsers
:
Parser
,
mandatory
:
int
=
NOPE
,
name
:
str
=
''
)
->
None
:
super
(
Series
,
self
).
__init__
(
*
parsers
,
name
=
name
)
assert
len
(
self
.
parsers
)
>=
1
L
=
len
(
self
.
parsers
)
assert
1
<=
L
<
Series
.
NOPE
,
'Length %i of series exceeds maximum length of %i'
\
%
(
L
,
Series
.
NOPE
)
if
mandatory
<
0
:
mandatory
+=
L
assert
0
<=
mandatory
<
L
or
mandatory
==
Series
.
NOPE
self
.
mandatory
=
mandatory
def
__deepcopy__
(
self
,
memo
):
parsers
=
copy
.
deepcopy
(
self
.
parsers
,
memo
)
return
self
.
__class__
(
*
parsers
,
mandatory
=
self
.
mandatory
,
name
=
self
.
name
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Node
,
StringView
]:
results
=
()
# type: Tuple[Node, ...]
text_
=
text
# type: StringView
pos
=
0
for
parser
in
self
.
parsers
:
node
,
text_
=
parser
(
text_
)
if
not
node
:
return
None
,
text
if
pos
<
self
.
mandatory
:
return
None
,
text
else
:
m
=
text
.
search
(
Series
.
RX_ARGUMENT
)
i
=
max
(
1
,
text
.
index
(
m
.
regs
[
1
][
0
]))
if
m
else
1
node
=
Node
(
self
,
text
[:
i
])
text_
=
text
[
i
:]
node
.
add_error
(
'%s expected; "%s" found!'
%
(
str
(
parser
),
text
[:
10
]),
code
=
Error
.
MANDATORY_CONTINUATION
)
results
+=
(
node
,)
if
node
.
error_flag
:
break
pos
+=
1
assert
len
(
results
)
<=
len
(
self
.
parsers
)
return
Node
(
self
,
results
),
text_
def
__repr__
(
self
):
return
" "
.
join
(
parser
.
repr
for
parser
in
self
.
parsers
)
return
" "
.
join
([
parser
.
repr
for
parser
in
self
.
parsers
[:
self
.
mandatory
]]
+
([
'§'
]
if
self
.
mandatory
!=
Series
.
NOPE
else
[])
+
[
parser
.
repr
for
parser
in
self
.
parsers
[
self
.
mandatory
:]])
# The following operator definitions add syntactical sugar, so one can write:
# `RE('\d+') + Optional(RE('\.\d+)` instead of `Series(RE('\d+'), Optional(RE('\.\d+))`
@
staticmethod
def
combined_mandatory
(
left
,
right
):
left_mandatory
,
left_length
=
(
left
.
mandatory
,
len
(
left
.
parsers
))
\
if
isinstance
(
left
,
Series
)
else
(
Series
.
NOPE
,
1
)
if
left_mandatory
!=
Series
.
NOPE
:
return
left_mandatory
right_mandatory
=
right
.
mandatory
if
isinstance
(
right
,
Series
)
else
Series
.
NOPE
if
right_mandatory
!=
Series
.
NOPE
:
return
right_mandatory
+
left_length
return
Series
.
NOPE
def
__add__
(
self
,
other
:
Parser
)
->
'Series'
:
other_parsers
=
cast
(
'Series'
,
other
).
parsers
if
isinstance
(
other
,
Series
)
\
else
cast
(
Tuple
[
Parser
,
...],
(
other
,))
# type: Tuple[Parser, ...]
return
Series
(
*
(
self
.
parsers
+
other_parsers
))
return
Series
(
*
(
self
.
parsers
+
other_parsers
),
mandatory
=
self
.
combined_mandatory
(
self
,
other
))
def
__radd__
(
self
,
other
:
Parser
)
->
'Series'
:
other_parsers
=
cast
(
'Series'
,
other
).
parsers
if
isinstance
(
other
,
Series
)
\
else
cast
(
Tuple
[
Parser
,
...],
(
other
,))
# type: Tuple[Parser, ...]
return
Series
(
*
(
other_parsers
+
self
.
parsers
))
return
Series
(
*
(
other_parsers
+
self
.
parsers
),
mandatory
=
self
.
combined_mandatory
(
other
,
self
))
def
__iadd__
(
self
,
other
:
Parser
)
->
'Series'
:
other_parsers
=
cast
(
'Series'
,
other
).
parsers
if
isinstance
(
other
,
Series
)
\
else
cast
(
Tuple
[
Parser
,
...],
(
other
,))
# type: Tuple[Parser, ...]
self
.
parsers
+=
other_parsers
self
.
mandatory
=
self
.
combined_mandatory
(
self
,
other
)
return
self
...
...
@@ -1526,9 +1563,8 @@ class Required(FlowOperator):
i
=
max
(
1
,
text
.
index
(
m
.
regs
[
1
][
0
]))
if
m
else
1
node
=
Node
(
self
,
text
[:
i
])
text_
=
text
[
i
:]
# assert False, "*"+text[:i]+"*"
node
.
add_error
(
'%s expected; "%s" found!'
%
(
str
(
self
.
parser
),
text
[:
10
]))
node
.
add_error
(
'%s expected; "%s" found!'
%
(
str
(
self
.
parser
),
text
[:
10
]),
code
=
Error
.
MANDATORY_CONTINUATION
)
return
node
,
text_
def
__repr__
(
self
):
...
...
@@ -1561,19 +1597,6 @@ class NegativeLookahead(Lookahead):
return
not
bool_value
# def iter_right_branch(node) -> Iterator[Node]:
# """
# Iterates over the right branch of `node` starting with node itself.
# Iteration is stopped if either there are no child nodes any more or
# if the parser of a node is a Lookahead parser. (Reason is: Since
# lookahead nodes do not advance the parser, it does not make sense
# to look back to them.)
# """
# while node:
# yield node # for well-formed EBNF code
# node = node.children[-1] if node.children else None
class
Lookbehind
(
FlowOperator
):
"""EXPERIMENTAL!!!"""
def
__init__
(
self
,
parser
:
Parser
,
name
:
str
=
''
)
->
None
:
...
...
test/test_parser.py
View file @
bae74d44
...
...
@@ -25,8 +25,9 @@ from functools import partial
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.toolkit
import
is_logging
,
logging
,
compile_python_object
,
StringView
from
DHParser.error
import
Error
from
DHParser.parser
import
compile_source
,
Retrieve
,
Grammar
,
Forward
,
Token
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
,
Alternative
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
DHParser.dsl
import
grammar_provider
,
DHPARSER_IMPORTS
...
...
@@ -250,6 +251,75 @@ class TestGrammar:
assert
not
CST
.
error_flag
,
CST
.
as_sxpr
()
class
TestSeries
:
def
test_non_mandatory
(
self
):
lang
=
"""
document = series | /.*/
series = "A" "B" "C" "D"
"""
parser
=
grammar_provider
(
lang
)()
st
=
parser
(
"ABCD"
);
assert
not
st
.
error_flag
st
=
parser
(
"A_CD"
);
assert
not
st
.
error_flag
st
=
parser
(
"AB_D"
);
assert
not
st
.
error_flag
def
test_mandatory
(
self
):
"""Test for the §-operator. The Series-parser should raise an
error for any non-match that occurs after the mandatory-operator.
"""
lang
=
"""
document = series | /.*/
series = "A" "B" §"C" "D"
"""
parser
=
grammar_provider
(
lang
)()
st
=
parser
(
"ABCD"
);
assert
not
st
.
error_flag
st
=
parser
(
"A_CD"
);
assert
not
st
.
error_flag
st
=
parser
(
"AB_D"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
st
=
parser
(
"ABC_"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
def
test_series_composition
(
self
):
TA
,
TB
,
TC
,
TD
,
TE
=
(
Token
(
b
)
for
b
in
"ABCDE"
)
s1
=
Series
(
TA
,
TB
,
TC
,
mandatory
=
2
)
s2
=
Series
(
TD
,
TE
)
combined
=
Alternative
(
s1
+
s2
,
RegExp
(
'.*'
))
parser
=
Grammar
(
combined
)
st
=
parser
(
"ABCDE"
);
assert
not
st
.
error_flag
st
=
parser
(
"A_CDE"
);
assert
not
st
.
error_flag
st
=
parser
(
"AB_DE"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
st
=
parser
(
"ABC_E"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
combined
=
Alternative
(
s2
+
s1
,
RegExp
(
'.*'
))
parser
=
Grammar
(
combined
)
st
=
parser
(
"DEABC"
);
assert
not
st
.
error_flag
st
=
parser
(
"_EABC"
);
assert
not
st
.
error_flag
st
=
parser
(
"D_ABC"
);
assert
not
st
.
error_flag
st
=
parser
(
"DE_BC"
);
assert
not
st
.
error_flag
st
=
parser
(
"DEA_C"
);
assert
not
st
.
error_flag
st
=
parser
(
"DEAB_"
);
assert
st
.
error_flag
assert
st
.
collect_errors
()[
0
].
code
==
Error
.
MANDATORY_CONTINUATION
class
TestPopRetrieve
:
mini_language
=
"""
document = { text | codeblock }
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment