Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
ef868768
Commit
ef868768
authored
Jan 05, 2019
by
eckhart
Browse files
- refactoring of ebnf.py
parent
4f01e917
Changes
6
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
ef868768
...
...
@@ -78,61 +78,56 @@ def get_ebnf_preprocessor() -> PreprocessorFunc:
class
EBNFGrammar
(
Grammar
):
r
"""
Parser for an EBNF source file, with this grammar::
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] plaintext
| [flowmarker] regexp
| [flowmarker] whitespace
| [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" §expression "}"
option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
Parser for an EBNF source file, with this grammar:
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" (regexp | literal | symbol) { "," (regexp | literal | symbol) }
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] plaintext
| [flowmarker] regexp
| [flowmarker] whitespace
| [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" §expression "}"
option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
EOF = !/./
"""
expression
=
Forward
()
source_hash__
=
"82a7c668f86b83f86515078e6c9093ed"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'#.*(?:\n|$)'
WHITESPACE__
=
r
'\s*'
WSP_RE__
=
mixin_comment
(
whitespace
=
WHITESPACE__
,
comment
=
COMMENT__
)
wsp__
=
Whitespace
(
WSP_RE__
)
EOF
=
NegativeLookahead
(
RegExp
(
'.'
))
list_
=
Series
(
RegExp
(
'
\\
w+'
),
wsp__
,
ZeroOrMore
(
Series
(
Series
(
Token
(
","
),
wsp__
),
RegExp
(
'
\\
w+'
),
wsp__
)))
whitespace
=
Series
(
RegExp
(
'~'
),
wsp__
)
regexp
=
Series
(
RegExp
(
'/(?:
\\\\
/|[^/])*?/'
),
wsp__
)
plaintext
=
Series
(
RegExp
(
'`(?:[^"]|
\\\\
")*?`'
),
wsp__
)
...
...
@@ -147,18 +142,17 @@ class EBNFGrammar(Grammar):
flowmarker
=
Alternative
(
Series
(
Token
(
"!"
),
wsp__
),
Series
(
Token
(
"&"
),
wsp__
),
Series
(
Token
(
"-!"
),
wsp__
),
Series
(
Token
(
"-&"
),
wsp__
))
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Series
(
Token
(
"="
),
wsp__
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
plaintext
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
whitespace
),
Series
(
Option
(
flowmarker
),
oneormore
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
unordered
),
repetition
,
option
)
NegativeLookahead
(
Series
(
Token
(
"="
),
wsp__
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
plaintext
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
whitespace
),
Series
(
Option
(
flowmarker
),
oneormore
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
unordered
),
repetition
,
option
)
term
=
OneOrMore
(
Series
(
Option
(
Series
(
Token
(
"§"
),
wsp__
)),
factor
))
expression
.
set
(
Series
(
term
,
ZeroOrMore
(
Series
(
Series
(
Token
(
"|"
),
wsp__
),
term
))))
directive
=
Series
(
Series
(
Token
(
"@"
),
wsp__
),
symbol
,
Series
(
Token
(
"="
),
wsp__
),
Alternative
(
regexp
,
literal
,
list_
),
mandatory
=
1
)
Alternative
(
regexp
,
literal
,
symbol
),
ZeroOrMore
(
Series
(
Series
(
Token
(
","
),
wsp__
),
Alternative
(
regexp
,
literal
,
symbol
))),
mandatory
=
1
)
definition
=
Series
(
symbol
,
Series
(
Token
(
"="
),
wsp__
),
expression
,
mandatory
=
1
)
syntax
=
Series
(
Option
(
Series
(
wsp__
,
RegExp
(
''
))),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
EOF
,
mandatory
=
2
)
syntax
=
Series
(
Option
(
Series
(
wsp__
,
RegExp
(
''
))),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
EOF
,
mandatory
=
2
)
root__
=
syntax
...
...
@@ -217,7 +211,7 @@ EBNF_AST_transformation_table = {
"syntax"
:
[],
# otherwise '"*": replace_by_single_child' would be applied
"directive, definition"
:
remove_tokens
(
'@'
,
'='
)
,
[
flatten
,
remove_tokens
(
'@'
,
'='
,
','
)]
,
"expression"
:
[
replace_by_single_child
,
flatten
,
remove_tokens
(
'|'
)],
# remove_infix_operator],
"term"
:
...
...
@@ -236,8 +230,8 @@ EBNF_AST_transformation_table = {
reduce_single_child
,
(
TOKEN_PTYPE
,
WHITESPACE_PTYPE
):
reduce_single_child
,
"list_"
:
[
flatten
,
remove_infix_operator
],
#
"list_":
#
[flatten, remove_infix_operator],
"*"
:
replace_by_single_child
}
...
...
@@ -734,21 +728,24 @@ class EBNFCompiler(Compiler):
return
""
self
.
defined_directives
.
add
(
key
)
def
check_argnum
(
n
:
int
=
1
):
if
len
(
node
.
children
)
>
n
+
1
:
self
.
tree
.
new_error
(
node
,
'Directive "%s" must have one, but not %i values.'
%
(
key
,
len
(
node
.
children
)
-
1
))
if
key
in
{
'comment'
,
'whitespace'
}:
if
node
.
children
[
1
].
parser
.
name
==
"list_"
:
if
len
(
node
.
children
[
1
].
result
)
!=
1
:
self
.
tree
.
new_error
(
node
,
'Directive "%s" must have one, but not %i values.'
%
(
key
,
len
(
node
.
children
[
1
].
result
)))
value
=
self
.
compile
(
node
.
children
[
1
]).
pop
()
check_argnum
()
if
node
.
children
[
1
].
parser
.
name
==
"symbol"
:
value
=
node
.
children
[
1
].
content
if
key
==
'whitespace'
and
value
in
EBNFCompiler
.
WHITESPACE
:
value
=
EBNFCompiler
.
WHITESPACE
[
value
]
# replace whitespace-name by regex
else
:
self
.
tree
.
new_error
(
node
,
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
else
:
value
=
node
.
children
[
1
].
content
.
strip
(
"~"
)
# cast(str, node.children[
# 1].result).strip("~")
if
value
!=
node
.
children
[
1
].
content
:
# cast(str, node.children[1].result)
:
value
=
node
.
children
[
1
].
content
.
strip
(
"~"
)
#
cast(str, node.children[
1].result).strip("~")
if
value
!=
node
.
children
[
1
].
content
:
# cast(str, node.children[1].result)
self
.
tree
.
new_error
(
node
,
"Whitespace marker '~' not allowed in definition "
"of %s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
...
...
@@ -761,6 +758,7 @@ class EBNFCompiler(Compiler):
self
.
directives
[
key
]
=
value
elif
key
==
'ignorecase'
:
check_argnum
()
if
node
.
children
[
1
].
content
.
lower
()
not
in
{
"off"
,
"false"
,
"no"
}:
self
.
re_flags
.
add
(
'i'
)
...
...
@@ -769,7 +767,7 @@ class EBNFCompiler(Compiler):
# self.directives['testing'] = value.lower() not in {"off", "false", "no"}
elif
key
==
'literalws'
:
value
=
{
item
.
lower
()
for
item
in
self
.
compile
(
node
.
children
[
1
]
)
}
value
=
{
child
.
content
.
strip
().
lower
()
for
child
in
node
.
children
[
1
:
]}
if
((
value
-
{
'left'
,
'right'
,
'both'
,
'none'
})
or
(
'none'
in
value
and
len
(
value
)
>
1
)):
self
.
tree
.
new_error
(
node
,
'Directive "literalws" allows only `left`, `right`, '
...
...
@@ -779,7 +777,7 @@ class EBNFCompiler(Compiler):
self
.
directives
[
key
]
=
list
(
wsp
)
elif
key
in
{
'tokens'
,
'preprocessor_tokens'
}:
tokens
=
self
.
compile
(
node
.
children
[
1
])
tokens
=
{
child
.
content
.
strip
()
for
child
in
node
.
children
[
1
:]}
redeclared
=
self
.
directives
[
'tokens'
]
&
tokens
if
redeclared
:
self
.
tree
.
new_error
(
node
,
'Tokens %s have already been declared earlier. '
...
...
@@ -788,17 +786,14 @@ class EBNFCompiler(Compiler):
self
.
directives
[
'tokens'
]
|=
tokens
-
redeclared
elif
key
.
endswith
(
'_filter'
):
filter_set
=
self
.
compile
(
node
.
children
[
1
])
if
not
isinstance
(
filter_set
,
set
)
or
len
(
filter_set
)
!=
1
:
self
.
tree
.
new_error
(
node
,
'Directive "%s" accepts exactly on symbol, not %s'
%
(
key
,
str
(
filter_set
)))
self
.
directives
[
'filter'
][
key
[:
-
7
]]
=
filter_set
.
pop
()
check_argnum
()
self
.
directives
[
'filter'
][
key
[:
-
7
]]
=
node
.
children
[
1
].
content
.
strip
()
elif
key
.
endswith
(
'_error'
):
check_argnum
()
if
not
node
.
children
[
1
].
parser
.
name
==
"literal"
:
self
.
tree
.
new_error
(
node
,
'Directive "%s" requires message string as argument'
)
error_msg
=
node
.
children
[
1
].
content
if
not
isinstance
(
error_msg
,
str
):
self
.
tree
.
new_error
(
node
,
'Directive "%s" requires message string as argument'
%
(
key
,
str
(
filter_set
)))
symbol
=
key
[:
-
6
]
if
symbol
in
self
.
rules
:
self
.
tree
.
new_error
(
node
,
'Custom error message for symbol "%s"'
%
symbol
...
...
@@ -1018,11 +1013,6 @@ class EBNFCompiler(Compiler):
return
self
.
WHITESPACE_PARSER_KEYWORD
def
on_list_
(
self
,
node
)
->
Set
[
str
]:
assert
node
.
children
return
set
(
item
.
result
.
strip
()
for
item
in
node
.
children
)
def
get_ebnf_compiler
(
grammar_name
=
""
,
grammar_source
=
""
)
->
EBNFCompiler
:
try
:
compiler
=
GLOBALS
.
ebnf_compiler_singleton
...
...
examples/EBNF/EBNF.ebnf
View file @
ef868768
...
...
@@ -6,7 +6,7 @@
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" (
regexp | literal |
list_ )
directive = "@" §symbol "=" (regexp | literal |
symbol) { "," (regexp | literal | symbol) }
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
...
...
@@ -37,6 +37,5 @@ literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
examples/EBNF/EBNFCompiler.py
View file @
ef868768
...
...
@@ -54,64 +54,22 @@ def get_preprocessor() -> PreprocessorFunc:
#######################################################################
class
EBNFGrammar
(
Grammar
):
r
"""Parser for an EBNF source file, with this grammar:
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] plaintext
| [flowmarker] regexp
| [flowmarker] whitespace
| [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" §expression "}"
option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
r
"""Parser for an EBNF source file.
"""
expression
=
Forward
()
source_hash__
=
"6099690fa36228d49e6c35ec60750c59"
list_
=
Forward
()
source_hash__
=
"8a91723fddb6b9ab6dbdb69ac5263492"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'#.*(?:\n|$)'
WHITESPACE__
=
r
'\s*'
WSP_RE__
=
mixin_comment
(
whitespace
=
WHITESPACE__
,
comment
=
COMMENT__
)
wsp__
=
Whitespace
(
WSP_RE__
)
EOF
=
NegativeLookahead
(
RegExp
(
'.'
))
list_
=
Series
(
RegExp
(
'
\\
w+'
),
wsp__
,
ZeroOrMore
(
Series
(
Series
(
Token
(
","
),
wsp__
),
RegExp
(
'
\\
w+'
),
wsp__
)))
whitespace
=
Series
(
RegExp
(
'~'
),
wsp__
)
regexp
=
Series
(
RegExp
(
'/(?:
\\\\
/|[^/])*?/'
),
wsp__
)
plaintext
=
Series
(
RegExp
(
'`(?:[^"]|
\\\\
")*?`'
),
wsp__
)
literal
=
Alternative
(
Series
(
RegExp
(
'"(?:[^"]|
\\\\
")*?"'
),
wsp__
),
Series
(
RegExp
(
"'(?:[^']|
\\\\
')*?'"
),
wsp__
))
literal
=
Alternative
(
Series
(
RegExp
(
'"(?:[^"]|
\\\\
")*?"'
),
wsp__
),
Series
(
RegExp
(
"'(?:[^']|
\\\\
')*?'"
),
wsp__
))
symbol
=
Series
(
RegExp
(
'(?!
\\
d)
\\
w+'
),
wsp__
)
option
=
Series
(
Series
(
Token
(
"["
),
wsp__
),
expression
,
Series
(
Token
(
"]"
),
wsp__
),
mandatory
=
1
)
repetition
=
Series
(
Series
(
Token
(
"{"
),
wsp__
),
expression
,
Series
(
Token
(
"}"
),
wsp__
),
mandatory
=
1
)
...
...
@@ -119,21 +77,28 @@ class EBNFGrammar(Grammar):
unordered
=
Series
(
Series
(
Token
(
"<"
),
wsp__
),
expression
,
Series
(
Token
(
">"
),
wsp__
),
mandatory
=
1
)
group
=
Series
(
Series
(
Token
(
"("
),
wsp__
),
expression
,
Series
(
Token
(
")"
),
wsp__
),
mandatory
=
1
)
retrieveop
=
Alternative
(
Series
(
Token
(
"::"
),
wsp__
),
Series
(
Token
(
":"
),
wsp__
))
flowmarker
=
Alternative
(
Series
(
Token
(
"!"
),
wsp__
),
Series
(
Token
(
"&"
),
wsp__
),
Series
(
Token
(
"-!"
),
wsp__
),
Series
(
Token
(
"-&"
),
wsp__
))
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Series
(
Token
(
"="
),
wsp__
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
plaintext
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
whitespace
),
Series
(
Option
(
flowmarker
),
oneormore
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
unordered
),
repetition
,
option
)
flowmarker
=
Alternative
(
Series
(
Token
(
"!"
),
wsp__
),
Series
(
Token
(
"&"
),
wsp__
),
Series
(
Token
(
"-!"
),
wsp__
),
Series
(
Token
(
"-&"
),
wsp__
))
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Series
(
Token
(
"="
),
wsp__
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
plaintext
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
whitespace
),
Series
(
Option
(
flowmarker
),
oneormore
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
unordered
),
repetition
,
option
)
term
=
OneOrMore
(
Series
(
Option
(
Series
(
Token
(
"§"
),
wsp__
)),
factor
))
expression
.
set
(
Series
(
term
,
ZeroOrMore
(
Series
(
Series
(
Token
(
"|"
),
wsp__
),
term
))))
directive
=
Series
(
Series
(
Token
(
"@"
),
wsp__
),
symbol
,
Series
(
Token
(
"="
),
wsp__
),
Alternative
(
regexp
,
literal
,
list_
),
mandatory
=
1
)
definition
=
Series
(
symbol
,
Series
(
Token
(
"="
),
wsp__
),
expression
,
mandatory
=
1
)
directive
=
Series
(
Series
(
Token
(
"@"
),
wsp__
),
symbol
,
Series
(
Token
(
"="
),
wsp__
),
list_
,
mandatory
=
1
)
list_
.
set
(
Series
(
Alternative
(
regexp
,
literal
,
symbol
),
ZeroOrMore
(
Series
(
Series
(
Token
(
","
),
wsp__
),
Alternative
(
regexp
,
literal
,
symbol
)))))
syntax
=
Series
(
Option
(
Series
(
wsp__
,
RegExp
(
''
))),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
EOF
,
mandatory
=
2
)
root__
=
syntax
def
get_grammar
()
->
EBNFGrammar
:
try
:
grammar
=
GLOBALS
.
EBNF_
2
_grammar_singleton
grammar
=
GLOBALS
.
EBNF_
1
_grammar_singleton
except
AttributeError
:
GLOBALS
.
EBNF_
2
_grammar_singleton
=
EBNFGrammar
()
grammar
=
GLOBALS
.
EBNF_
2
_grammar_singleton
GLOBALS
.
EBNF_
1
_grammar_singleton
=
EBNFGrammar
()
grammar
=
GLOBALS
.
EBNF_
1
_grammar_singleton
return
grammar
...
...
examples/EBNF/EBNF_old.ebnf
deleted
100644 → 0
View file @
4f01e917
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] plaintext
| [flowmarker] regexp
| [flowmarker] whitespace
| [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" §expression "}"
option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
examples/EBNF/EBNF_oldCompiler.py
deleted
100755 → 0
View file @
4f01e917
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
import
collections
from
functools
import
partial
import
os
import
sys
sys
.
path
.
append
(
r
'C:\Users\di68kap\PycharmProjects\DHParser'
)
try
:
import
regex
as
re
except
ImportError
:
import
re
from
DHParser
import
logging
,
is_filename
,
load_if_file
,
\
Grammar
,
Compiler
,
nil_preprocessor
,
PreprocessorToken
,
Whitespace
,
\
Lookbehind
,
Lookahead
,
Alternative
,
Pop
,
Token
,
Synonym
,
AllOf
,
SomeOf
,
Unordered
,
\
Option
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Retrieve
,
Series
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
Required
,
mixin_comment
,
compile_source
,
\
grammar_changed
,
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
TransformationDict
,
transformation_factory
,
\
traverse
,
remove_children_if
,
merge_children
,
is_anonymous
,
matches_re
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_whitespace
,
\
is_empty
,
is_expendable
,
collapse
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
remove_nodes
,
remove_content
,
remove_brackets
,
replace_parser
,
remove_anonymous_tokens
,
\
keep_children
,
is_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
,
\
remove_anonymous_empty
,
keep_nodes
,
traverse_locally
,
strip
,
lstrip
,
rstrip
,
\
replace_content
,
replace_content_by
#######################################################################
#
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def
EBNF_oldPreprocessor
(
text
):
return
text
,
lambda
i
:
i
def
get_preprocessor
()
->
PreprocessorFunc
:
return
EBNF_oldPreprocessor
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class
EBNF_oldGrammar
(
Grammar
):
r
"""Parser for an EBNF_old source file, with this grammar:
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] plaintext
| [flowmarker] regexp
| [flowmarker] whitespace
| [flowmarker] oneormore
| [flowmarker] group
| [flowmarker] unordered
| repetition
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" §expression ")"
unordered = "<" §expression ">" # elements of expression in arbitrary order
oneormore = "{" expression "}+"
repetition = "{" §expression "}"
option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
"""
expression
=
Forward
()
source_hash__
=
"249997be7111ca806939bf18070e136e"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'#.*(?:\n|$)'
WHITESPACE__
=
r
'\s*'
WSP_RE__
=
mixin_comment
(
whitespace
=
WHITESPACE__
,
comment
=
COMMENT__
)
wsp__
=
Whitespace
(
WSP_RE__
)
EOF
=
NegativeLookahead
(
RegExp
(
'.'
))
list_
=
Series
(
RegExp
(
'
\\
w+'
),
wsp__
,
ZeroOrMore
(
Series
(
Series
(
Token
(
","
),
wsp__
),
RegExp
(
'
\\
w+'
),
wsp__
)))
whitespace
=
Series
(
RegExp
(
'~'
),
wsp__
)
regexp
=
Series
(
RegExp
(
'~?/(?:
\\\\
/|[^/])*?/~?'
),
wsp__
)
plaintext
=
Series
(
RegExp
(
'`(?:[^"]|
\\\\
")*?`'
),
wsp__
)
literal
=
Alternative
(
Series
(
RegExp
(
'"(?:[^"]|
\\\\
")*?"'
),
wsp__
),
Series
(
RegExp
(
"'(?:[^']|
\\\\
')*?'"
),
wsp__
))
symbol
=
Series
(
RegExp
(
'(?!
\\
d)
\\
w+'
),
wsp__
)
option
=
Series
(
Series
(
Token
(
"["
),
wsp__
),
expression
,
Series
(
Token
(
"]"
),
wsp__
),
mandatory
=
1
)
repetition
=
Series
(
Series
(
Token
(
"{"
),
wsp__
),
expression
,
Series
(
Token
(
"}"
),
wsp__
),
mandatory
=
1
)
oneormore
=
Series
(
Series
(
Token
(
"{"
),
wsp__
),
expression
,
Series
(
Token
(
"}+"
),
wsp__
))
unordered
=
Series
(
Series
(
Token
(
"<"
),
wsp__
),
expression
,
Series
(
Token
(
">"
),
wsp__
),
mandatory
=
1
)
group
=
Series
(
Series
(
Token
(
"("
),
wsp__
),
expression
,
Series
(
Token
(
")"
),
wsp__
),
mandatory
=
1
)
retrieveop
=
Alternative
(
Series
(
Token
(
"::"
),
wsp__
),
Series
(
Token
(
":"
),
wsp__
))
flowmarker
=
Alternative
(
Series
(
Token
(
"!"
),
wsp__
),
Series
(
Token
(
"&"
),
wsp__
),
Series
(
Token
(
"-!"
),
wsp__
),
Series
(
Token
(
"-&"
),
wsp__
))
factor
=
Alternative
(
Series
(
Option
(
flowmarker
),
Option
(
retrieveop
),
symbol
,
NegativeLookahead
(
Series
(
Token
(
"="
),
wsp__
))),
Series
(
Option
(
flowmarker
),
literal
),
Series
(
Option
(
flowmarker
),
plaintext
),
Series
(
Option
(
flowmarker
),
regexp
),
Series
(
Option
(
flowmarker
),
whitespace
),
Series
(
Option
(
flowmarker
),
oneormore
),
Series
(
Option
(
flowmarker
),
group
),
Series
(
Option
(
flowmarker
),
unordered
),
repetition
,
option
)
term
=
OneOrMore
(
Series
(
Option
(
Series
(
Token
(
"§"
),
wsp__
)),
factor
))
expression
.
set
(
Series
(
term
,
ZeroOrMore
(
Series
(
Series
(
Token
(
"|"
),
wsp__
),
term
))))
directive
=
Series
(
Series
(
Token
(
"@"
),
wsp__
),
symbol
,
Series
(
Token
(
"="
),
wsp__
),
Alternative
(
regexp
,
literal
,
list_
),
mandatory
=
1
)
definition
=
Series
(
symbol
,
Series
(
Token
(
"="
),
wsp__
),
expression
,
mandatory
=
1
)
syntax
=
Series
(
Option
(
Series
(
wsp__
,
RegExp
(
''
))),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
EOF
,
mandatory
=
2
)
root__
=
syntax
def
get_grammar
()
->
EBNF_oldGrammar
:
try
:
grammar
=
GLOBALS
.
EBNF_old_1_grammar_singleton
except
AttributeError
:
GLOBALS
.
EBNF_old_1_grammar_singleton
=
EBNF_oldGrammar
()
grammar
=
GLOBALS
.
EBNF_old_1_grammar_singleton
return
grammar
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
EBNF_old_AST_transformation_table
=
{
# AST Transformations for the EBNF_old-grammar
"+"
:
remove_empty
,
"syntax"
:
[],
"definition"
:
[],
"directive"
:
[],
"expression"
:
[],
"term"
:
[],
"factor"
:
[
replace_or_reduce
],
"flowmarker"
:
[
replace_or_reduce
],
"retrieveop"
:
[
replace_or_reduce
],
"group"
:
[],
"unordered"
:
[],
"oneormore"
:
[],
"repetition"
:
[],
"option"
:
[],
"symbol"
:
[],
"literal"
:
[
replace_or_reduce
],
"plaintext"
:
[],
"regexp"
:
[],
"whitespace"
:
[],
"list_"
:
[],
"EOF"
:
[],
":Token"
:
reduce_single_child
,
"*"
:
replace_by_single_child
}
def
EBNF_oldTransform
()
->
TransformationDict
:
return
partial
(
traverse
,
processing_table
=
EBNF_old_AST_transformation_table
.
copy
())
def
get_transformer
()
->
TransformationFunc
:
global
thread_local_EBNF_old_transformer_singleton
try
:
transformer
=
thread_local_EBNF_old_transformer_singleton
except
NameError
:
thread_local_EBNF_old_transformer_singleton
=
EBNF_oldTransform
()
transformer
=
thread_local_EBNF_old_transformer_singleton
return
transformer
#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.