Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
f53a902c
Commit
f53a902c
authored
Oct 19, 2017
by
Eckhart Arnold
Browse files
EBNF Syntax of AllOf-, SomeOf-parsers implementes + tests
parent
cd78155a
Changes
5
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
f53a902c
...
...
@@ -71,7 +71,7 @@ except ImportError:
import re
from DHParser import logging, is_filename, load_if_file,
\\
Grammar, Compiler, nil_preprocessor,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym,
AllOf, SomeOf, Unordered,
\\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
last_value, counterpart, accumulate, PreprocessorFunc,
\\
...
...
DHParser/ebnf.py
View file @
f53a902c
...
...
@@ -19,20 +19,19 @@ permissions and limitations under the License.
import
keyword
from
collections
import
OrderedDict
from
functools
import
partial
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
from
DHParser.
toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
,
typing
from
DHParser.
error
import
Error
from
DHParser.parser
import
Grammar
,
mixin_comment
,
nil_preprocessor
,
Forward
,
RegExp
,
RE
,
\
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
,
\
Required
,
Compiler
,
PreprocessorFunc
Compiler
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.
error
import
Error
from
DHParser.
toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
from
DHParser.transform
import
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
\
remove_tokens
,
flatten
,
forbid
,
assert_content
,
remove_infix_operator
from
DHParser.versionnumber
import
__version__
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
,
Union
__all__
=
(
'get_ebnf_preprocessor'
,
'get_ebnf_grammar'
,
'get_ebnf_transformer'
,
...
...
@@ -66,79 +65,6 @@ def get_ebnf_preprocessor() -> PreprocessorFunc:
########################################################################
# class EBNFGrammar(Grammar):
# r"""Parser for an EBNF_variant source file, with this grammar:
#
# # EBNF-Grammar in EBNF
#
# @ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
# @ whitespace = /\s*/ # whitespace includes linefeed
# @ literalws = right # trailing whitespace of literals will be ignored tacitly
#
# syntax = [~//] { definition | directive } §EOF
# definition = symbol §"=" §expression
# directive = "@" §symbol §"=" §( regexp | literal | list_ )
#
# expression = term { "|" term }
# term = { factor }+
# factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
# | [flowmarker] literal
# | [flowmarker] regexp
# | [flowmarker] group
# | [flowmarker] oneormore
# | repetition
# | option
#
# flowmarker = "!" | "&" | "§" # '!' negative lookahead, '&' positive lookahead, '§' required
# | "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
# retrieveop = "::" | ":" # '::' pop, ':' retrieve
#
# group = "(" expression §")"
# oneormore = "{" expression "}+"
# repetition = "{" expression §"}"
# option = "[" expression §"]"
#
# symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
# literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
# | /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
# regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# # '~' is a whitespace-marker, if present leading or trailing
# # whitespace of a regular expression will be ignored tacitly.
# list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# # BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
# EOF = !/./
# """
# expression = Forward()
# source_hash__ = "4735db10f0b79d44209d1de0184b2ca0"
# parser_initialization__ = "upon instantiation"
# COMMENT__ = r'#.*(?:\n|$)'
# WHITESPACE__ = r'\s*'
# WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
# wspL__ = ''
# wspR__ = WSP__
# EOF = NegativeLookahead(RegExp('.'))
# list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
# regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
# literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
# symbol = RE('(?!\\d)\\w+')
# option = Series(Token("["), expression, Required(Token("]")))
# repetition = Series(Token("{"), expression, Required(Token("}")))
# oneormore = Series(Token("{"), expression, Token("}+"))
# group = Series(Token("("), expression, Required(Token(")")))
# retrieveop = Alternative(Token("::"), Token(":"))
# flowmarker = Alternative(Token("!"), Token("&"), Token("§"), Token("-!"), Token("-&"))
# factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
# Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp),
# Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore), repetition, option)
# term = OneOrMore(factor)
# expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
# directive = Series(Token("@"), Required(symbol), Required(Token("=")),
# Required(Alternative(regexp, literal, list_)))
# definition = Series(symbol, Required(Token("=")), Required(expression))
# syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
# root__ = syntax
class
EBNFGrammar
(
Grammar
):
r
"""Parser for an EBNF source file, with this grammar:
...
...
@@ -157,8 +83,9 @@ class EBNFGrammar(Grammar):
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] group
| [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition
| option
...
...
@@ -166,10 +93,11 @@ class EBNFGrammar(Grammar):
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" expression
§
"}"
option = "[" expression
§
"]"
repetition = "{"
§
expression "}"
option = "["
§
expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
...
...
@@ -182,8 +110,8 @@ class EBNFGrammar(Grammar):
EOF = !/./
"""
expression
=
Forward
()
source_hash__
=
"
a131abc5259738631000cda90d2fc65b
"
tialization__
=
"upon instantiation"
source_hash__
=
"
3fc9f5a340f560e847d9af0b61a68743
"
parser_ini
tialization__
=
"upon instantiation"
COMMENT__
=
r
'#.*(?:\n|$)'
WHITESPACE__
=
r
'\s*'
WSP__
=
mixin_comment
(
whitespace
=
WHITESPACE__
,
comment
=
COMMENT__
)
...
...
@@ -197,17 +125,22 @@ class EBNFGrammar(Grammar):
option
=
Series
(
Token
(
"["
),
expression
,
Token
(
"]"
),
mandatory
=
1
)
repetition
=
Series
(
Token
(
"{"
),
expression
,
Token
(
"}"
),
mandatory
=
1
)
oneormore
=
Series
(
Token
(
"{"
),
expression
,
Token
(
"}+"
))
unordered
=
Series
(
Token
(
"<"
),
expression
,
Token
(
">"
),
mandatory
=
1
)
group
=
Series
(
Token
(
"("
),
expression
,
Token
(
")"
),
mandatory
=
1
)
retrieveop
=
Alternative
(
Token
(
"::"
),
Token
(
":"
))
flowmarker
=
Alternative
(
Token
(
"!"
),
Token
(
"&"
),
Token
(
"-!"
),
Token
(
"-&"
))
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
oneormore
),
repetition
,
option
)
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
oneormore
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
unordered
),
repetition
,
option
)
term
=
OneOrMore
(
Series
(
Option
(
Token
(
"§"
)),
factor
))
expression
.
set
(
Series
(
term
,
ZeroOrMore
(
Series
(
Token
(
"|"
),
term
))))
directive
=
Series
(
Token
(
"@"
),
symbol
,
Token
(
"="
),
Alternative
(
regexp
,
literal
,
list_
),
mandatory
=
1
)
directive
=
Series
(
Token
(
"@"
),
symbol
,
Token
(
"="
),
Alternative
(
regexp
,
literal
,
list_
),
mandatory
=
1
)
definition
=
Series
(
symbol
,
Token
(
"="
),
expression
,
mandatory
=
1
)
syntax
=
Series
(
Option
(
RE
(
''
,
wR
=
''
,
wL
=
WSP__
)),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
EOF
,
mandatory
=
2
)
syntax
=
Series
(
Option
(
RE
(
''
,
wR
=
''
,
wL
=
WSP__
)),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
EOF
,
mandatory
=
2
)
root__
=
syntax
...
...
@@ -275,6 +208,8 @@ EBNF_AST_transformation_table = {
replace_by_single_child
,
"group"
:
[
remove_brackets
,
replace_by_single_child
],
"unordered"
:
remove_brackets
,
"oneormore, repetition, option"
:
[
reduce_single_child
,
remove_brackets
,
forbid
(
'repetition'
,
'option'
,
'oneormore'
),
assert_content
(
r
'(?!§)'
)],
...
...
@@ -893,6 +828,9 @@ class EBNFCompiler(Compiler):
raise
EBNFCompilerError
(
"Group nodes should have been eliminated by "
"AST transformation!"
)
def
on_unordered
(
self
,
node
)
->
str
:
# TODO: implementation must support AllOf as well as SomeOf
return
self
.
non_terminal
(
node
,
'Unordered'
)
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
# called only for symbols on the right hand side!
symbol
=
str
(
node
)
# ; assert result == cast(str, node.result)
...
...
DHParser/parser.py
View file @
f53a902c
...
...
@@ -93,6 +93,7 @@ __all__ = ('PreprocessorFunc',
'Alternative'
,
'AllOf'
,
'SomeOf'
,
'Unordered'
,
'FlowOperator'
,
'Required'
,
'Lookahead'
,
...
...
@@ -110,7 +111,6 @@ __all__ = ('PreprocessorFunc',
'compile_source'
)
########################################################################
#
# Grammar and parsing infrastructure
...
...
@@ -1633,6 +1633,17 @@ class SomeOf(NaryOperator):
return
'<'
+
' | '
.
join
(
parser
.
repr
for
parser
in
self
.
parsers
)
+
'>'
def
Unordered
(
parser
:
NaryOperator
,
name
:
str
=
''
)
->
NaryOperator
:
"""Returns an AllOf- or SomeOf-parser depending on whether `parser`
is a Series (AllOf) or an Alternative (SomeOf).
"""
if
isinstance
(
parser
,
Series
):
return
AllOf
(
parser
,
name
=
name
)
elif
isinstance
(
parser
,
Alternative
):
return
SomeOf
(
parser
,
name
=
name
)
else
:
raise
AssertionError
(
"Unordered can take only Series or Alternative as parser."
)
########################################################################
#
...
...
examples/EBNF/EBNF.ebnf
View file @
f53a902c
...
...
@@ -15,7 +15,7 @@ factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be
| [flowmarker] regexp
| [flowmarker] oneormore
| [flowmarker] group
#
| [flowmarker]
set
| [flowmarker]
unordered
| repetition
| option
...
...
test/test_ebnf.py
View file @
f53a902c
...
...
@@ -397,6 +397,18 @@ class TestFlowControlOperators:
# print(error)
class
TestAllSome
:
def
test_all
(
self
):
ebnf
=
'prefix = <"A" "B">'
grammar
=
grammar_provider
(
ebnf
)()
assert
grammar
(
'B A'
).
content
()
==
'B A'
def
test_some
(
self
):
ebnf
=
'prefix = <"A" | "B">'
grammar
=
grammar_provider
(
ebnf
)()
assert
grammar
(
'B A'
).
content
()
==
'B A'
assert
grammar
(
'B'
).
content
()
==
'B'
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment