EBNF_old_2.ebnf 2.25 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# EBNF-Grammar in EBNF

@ comment    =  /#.*(?:\n|$)/                    # comments start with '#' and eat all chars up to and including '\n'
@ whitespace =  /\s*/                            # whitespace includes linefeed
@ literalws  =  right                            # trailing whitespace of literals will be ignored tacitly

syntax     =  [~//] { definition | directive } §EOF
definition =  symbol §"=" expression
directive  =  "@" §symbol §"=" ( regexp | literal | list_ )

expression =  term { "|" term }
term       =  { factor }+
factor     =  [flowmarker] chain
            | [flowmarker] [retrieveop] symbol !"="   # negative lookahead to be sure it's not a definition
            | [flowmarker] literal
            | [flowmarker] regexp
            | [flowmarker] group
            | [flowmarker] oneormore
            | repetition
            | option

flowmarker =  "!"  | "&"  | "§" |                # '!' negative lookahead, '&' positive lookahead, '§' required
              "-!" | "-&"                        # '-' negative lookbehind, '-&' positive lookbehind
retrieveop =  "::" | ":"                         # '::' pop, ':' retrieve

group      =  "(" expression §")"
option     =  "[" expression §"]"
oneormore  =  "{" expression "}+"
repetition =  "{" expression §"}"

chain      = { link "--" }+ link                 # chained regular expressions
link       = regexp | symbol | literal           # semantic restriction: symbol must evaluate to a regexp or chain

symbol     =  /(?!\d)\w+/~                       # e.g. expression, factor, parameter_list
literal    =  /"(?:[^"]|\\")*?"/~                # e.g. "(", '+', 'while'
            | /'(?:[^']|\\')*?'/~                # whitespace following literals will be ignored tacitly.
regexp     =  /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~    # e.g. /\w+/, ~/#.*(?:\n|$)/~
                                                 # '~' is a whitespace-marker, if present leading or trailing
                                                 # whitespace of a regular expression will be ignored tacitly.
list_      =  /\w+\s*(?:,\s*\w+\s*)*/~           # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
                                                 # BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF =  !/./