Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
d38fa945
Commit
d38fa945
authored
Apr 07, 2017
by
Eckhart Arnold
Browse files
- default whitespace does not include linefeed any more; minor error corrections
parent
d79756fd
Changes
7
Hide whitespace changes
Inline
Side-by-side
EBNFcompiler.py
View file @
d38fa945
...
...
@@ -112,6 +112,7 @@ class EBNFGrammar(GrammarBase):
root__
=
syntax
# TODO: Add some sanity checks to Transformations, e.g. "Required(Optional(..." should yield an error.
EBNFTransTable
=
{
# AST Transformations for EBNF-grammar
"syntax"
:
...
...
@@ -126,9 +127,9 @@ EBNFTransTable = {
"factor, flowmarker, retrieveop"
:
replace_by_single_child
,
"group"
:
[
remove_
bracket
s
,
replace_by_single_child
],
"oneormore, repetition, option"
:
[
reduce_single_child
,
remove_
bracket
s
],
[
remove_
enclosing_delimiter
s
,
replace_by_single_child
],
"oneormore, repetition, option
, regexchain
"
:
[
reduce_single_child
,
remove_
enclosing_delimiter
s
],
"symbol, literal, regexp, list_"
:
[
remove_expendables
,
reduce_single_child
],
(
TOKEN_KEYWORD
,
WHITESPACE_KEYWORD
):
...
...
@@ -176,6 +177,7 @@ class EBNFCompiler(CompilerBase):
in EBNF-Notation.
"""
COMMENT_KEYWORD
=
"COMMENT__"
DEFAULT_WHITESPACE
=
'[
\t
]*'
RESERVED_SYMBOLS
=
{
TOKEN_KEYWORD
,
WHITESPACE_KEYWORD
,
COMMENT_KEYWORD
}
KNOWN_DIRECTIVES
=
{
'comment'
,
'whitespace'
,
'tokens'
,
'literalws'
}
VOWELS
=
{
'A'
,
'E'
,
'I'
,
'O'
,
'U'
}
# what about cases like 'hour', 'universe' etc.?
...
...
@@ -201,9 +203,9 @@ class EBNFCompiler(CompilerBase):
self
.
definition_names
=
[]
self
.
recursive
=
set
()
self
.
root
=
""
self
.
directives
=
{
'whitespace'
:
'\s*'
,
self
.
directives
=
{
'whitespace'
:
self
.
DEFAULT_WHITESPACE
,
'comment'
:
''
,
'literalws'
:
[
'
wR='
+
WHITESPACE_KEYWORD
]}
'literalws'
:
[
'
right'
]}
def
gen_scanner_skeleton
(
self
):
name
=
self
.
grammar_name
+
"Scanner"
...
...
@@ -370,6 +372,10 @@ class EBNFCompiler(CompilerBase):
elif
value
[
0
]
+
value
[
-
1
]
==
'//'
:
value
=
self
.
_check_rx
(
node
,
value
[
1
:
-
1
])
else
:
if
value
==
"linefeed"
:
value
=
'\s*'
elif
value
==
"standard"
:
value
=
self
.
DEFAULT_WHITESPACE
value
=
self
.
_check_rx
(
node
,
value
)
self
.
directives
[
key
]
=
value
elif
key
==
'literalws'
:
...
...
@@ -445,6 +451,9 @@ class EBNFCompiler(CompilerBase):
def
oneormore
(
self
,
node
):
return
self
.
non_terminal
(
node
,
'OneOrMore'
)
def
regexchain
(
self
,
node
):
raise
EBNFCompilerError
(
"Not yet implemented!"
)
def
group
(
self
,
node
):
raise
EBNFCompilerError
(
"Group nodes should have been eliminated by "
"AST transformation!"
)
...
...
ParserCombinators_obsolete.py
View file @
d38fa945
...
...
@@ -586,7 +586,7 @@ AST_SYMBOLS = {'replace_by_single_child', 'reduce_single_child',
'is_whitespace'
,
'is_expendable'
,
'remove_whitespace'
,
# 'remove_scanner_tokens', 'is_scanner_token',
'remove_expendables'
,
'flatten'
,
'remove_tokens'
,
'remove_
bracket
s'
,
'remove_
enclosing_delimiter
s'
,
'TOKEN_KEYWORD'
,
'WHITESPACE_KEYWORD'
,
'partial'
}
...
...
examples/EBNF/EBNF.ebnf
View file @
d38fa945
...
...
@@ -10,11 +10,11 @@ directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { factor }+
factor = [flowmarker] chain
| [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] group
| [flowmarker] regexchain
| [flowmarker] oneormore
| repetition
| option
...
...
@@ -24,11 +24,11 @@ flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&'
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
option = "["
expression
§"]"
regexchain = "<" expression §">" # compiles "expression" into a singular regular
expression
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
option = "[" expression §"]"
chain = { link "--" }+ link # chained regular expressions
link = regexp | symbol | literal # semantic restriction: symbol must evaluate to a regexp or chain
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
...
...
examples/EBNF/EBNF_old_2.ebnf
0 → 100644
View file @
d38fa945
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { factor }+
factor = [flowmarker] chain
| [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] group
| [flowmarker] oneormore
| repetition
| option
flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&' positive lookahead, '§' required
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
option = "[" expression §"]"
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
chain = { link "--" }+ link # chained regular expressions
link = regexp | symbol | literal # semantic restriction: symbol must evaluate to a regexp or chain
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+\s*(?:,\s*\w+\s*)*/~ # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
examples/MLW/MLW.ebnf
View file @
d38fa945
# EBNF-Syntax für MLW-Artikel
@ comment = /#.*(?:\n|$)/ # Kommentare beginnen mit '#' und reichen bis zum Zeilenende
@ whitespace = /[\t
\r\
]*/
# Auch
Zeilensprünge zählen als Leerraum
@ whitespace = /[\t ]*/
#
Zeilensprünge zählen
nicht
als Leerraum
@ literalws = both # Leerraum vor und nach Literalen wird automatisch entfernt
...
...
parser.py
View file @
d38fa945
...
...
@@ -680,7 +680,7 @@ class FlowOperator(UnaryOperator):
class
Required
(
FlowOperator
):
#
TODO:
Add constructor that checks for logical errors, like `Required(Optional(...))` constructs
# Add constructor that checks for logical errors, like `Required(Optional(...))` constructs
?
def
__call__
(
self
,
text
):
node
,
text_
=
self
.
parser
(
text
)
if
not
node
:
...
...
syntaxtree.py
View file @
d38fa945
...
...
@@ -53,7 +53,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'remove_expendables'
,
'remove_tokens'
,
'flatten'
,
'remove_
bracket
s'
,
'remove_
enclosing_delimiter
s'
,
'AST_SYMBOLS'
]
...
...
@@ -561,7 +561,7 @@ def flatten(node):
node
.
result
=
tuple
(
new_result
)
def
remove_
bracket
s
(
node
):
def
remove_
enclosing_delimiter
s
(
node
):
"""Removes any enclosing delimiters from a structure (e.g. quotation marks
from a literal or braces from a group).
"""
...
...
@@ -575,5 +575,5 @@ AST_SYMBOLS = {'replace_by_single_child', 'reduce_single_child',
'is_whitespace'
,
'is_expendable'
,
'remove_whitespace'
,
# 'remove_scanner_tokens', 'is_scanner_token',
'remove_expendables'
,
'flatten'
,
'remove_tokens'
,
'remove_
bracket
s'
,
'remove_
enclosing_delimiter
s'
,
'TOKEN_KEYWORD'
,
'WHITESPACE_KEYWORD'
,
'partial'
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment