Commit d38fa945 authored by Eckhart Arnold's avatar Eckhart Arnold

- default whitespace does not include linefeed any more; minor error corrections

parent d79756fd
......@@ -112,6 +112,7 @@ class EBNFGrammar(GrammarBase):
root__ = syntax
# TODO: Add some sanity checks to Transformations, e.g. "Required(Optional(..." should yield an error.
EBNFTransTable = {
# AST Transformations for EBNF-grammar
"syntax":
......@@ -126,9 +127,9 @@ EBNFTransTable = {
"factor, flowmarker, retrieveop":
replace_by_single_child,
"group":
[remove_brackets, replace_by_single_child],
"oneormore, repetition, option":
[reduce_single_child, remove_brackets],
[remove_enclosing_delimiters, replace_by_single_child],
"oneormore, repetition, option, regexchain":
[reduce_single_child, remove_enclosing_delimiters],
"symbol, literal, regexp, list_":
[remove_expendables, reduce_single_child],
(TOKEN_KEYWORD, WHITESPACE_KEYWORD):
......@@ -176,6 +177,7 @@ class EBNFCompiler(CompilerBase):
in EBNF-Notation.
"""
COMMENT_KEYWORD = "COMMENT__"
DEFAULT_WHITESPACE = '[\t ]*'
RESERVED_SYMBOLS = {TOKEN_KEYWORD, WHITESPACE_KEYWORD, COMMENT_KEYWORD}
KNOWN_DIRECTIVES = {'comment', 'whitespace', 'tokens', 'literalws'}
VOWELS = {'A', 'E', 'I', 'O', 'U'} # what about cases like 'hour', 'universe' etc.?
......@@ -201,9 +203,9 @@ class EBNFCompiler(CompilerBase):
self.definition_names = []
self.recursive = set()
self.root = ""
self.directives = {'whitespace': '\s*',
self.directives = {'whitespace': self.DEFAULT_WHITESPACE,
'comment': '',
'literalws': ['wR=' + WHITESPACE_KEYWORD]}
'literalws': ['right']}
def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner"
......@@ -370,6 +372,10 @@ class EBNFCompiler(CompilerBase):
elif value[0] + value[-1] == '//':
value = self._check_rx(node, value[1:-1])
else:
if value == "linefeed":
value = '\s*'
elif value == "standard":
value = self.DEFAULT_WHITESPACE
value = self._check_rx(node, value)
self.directives[key] = value
elif key == 'literalws':
......@@ -445,6 +451,9 @@ class EBNFCompiler(CompilerBase):
def oneormore(self, node):
return self.non_terminal(node, 'OneOrMore')
def regexchain(self, node):
raise EBNFCompilerError("Not yet implemented!")
def group(self, node):
raise EBNFCompilerError("Group nodes should have been eliminated by "
"AST transformation!")
......
......@@ -586,7 +586,7 @@ AST_SYMBOLS = {'replace_by_single_child', 'reduce_single_child',
'is_whitespace', 'is_expendable', 'remove_whitespace',
# 'remove_scanner_tokens', 'is_scanner_token',
'remove_expendables', 'flatten', 'remove_tokens',
'remove_brackets',
'remove_enclosing_delimiters',
'TOKEN_KEYWORD', 'WHITESPACE_KEYWORD', 'partial'}
......
......@@ -10,11 +10,11 @@ directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { factor }+
factor = [flowmarker] chain
| [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] group
| [flowmarker] regexchain
| [flowmarker] oneormore
| repetition
| option
......@@ -24,11 +24,11 @@ flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&'
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
option = "[" expression §"]"
regexchain = "<" expression §">" # compiles "expression" into a singular regular expression
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
option = "[" expression §"]"
chain = { link "--" }+ link # chained regular expressions
link = regexp | symbol | literal # semantic restriction: symbol must evaluate to a regexp or chain
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
......
# EBNF-Grammar in EBNF
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol §"=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { factor }+
factor = [flowmarker] chain
| [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
| [flowmarker] group
| [flowmarker] oneormore
| repetition
| option
flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&' positive lookahead, '§' required
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
option = "[" expression §"]"
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
chain = { link "--" }+ link # chained regular expressions
link = regexp | symbol | literal # semantic restriction: symbol must evaluate to a regexp or chain
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+\s*(?:,\s*\w+\s*)*/~ # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
# EBNF-Syntax für MLW-Artikel
@ comment = /#.*(?:\n|$)/ # Kommentare beginnen mit '#' und reichen bis zum Zeilenende
@ whitespace = /[\t\r\ ]*/ # Auch Zeilensprünge zählen als Leerraum
@ whitespace = /[\t ]*/ # Zeilensprünge zählen nicht als Leerraum
@ literalws = both # Leerraum vor und nach Literalen wird automatisch entfernt
......
......@@ -680,7 +680,7 @@ class FlowOperator(UnaryOperator):
class Required(FlowOperator):
# TODO: Add constructor that checks for logical errors, like `Required(Optional(...))` constructs
# Add constructor that checks for logical errors, like `Required(Optional(...))` constructs ?
def __call__(self, text):
node, text_ = self.parser(text)
if not node:
......
......@@ -53,7 +53,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'remove_expendables',
'remove_tokens',
'flatten',
'remove_brackets',
'remove_enclosing_delimiters',
'AST_SYMBOLS']
......@@ -561,7 +561,7 @@ def flatten(node):
node.result = tuple(new_result)
def remove_brackets(node):
def remove_enclosing_delimiters(node):
"""Removes any enclosing delimiters from a structure (e.g. quotation marks
from a literal or braces from a group).
"""
......@@ -575,5 +575,5 @@ AST_SYMBOLS = {'replace_by_single_child', 'reduce_single_child',
'is_whitespace', 'is_expendable', 'remove_whitespace',
# 'remove_scanner_tokens', 'is_scanner_token',
'remove_expendables', 'flatten', 'remove_tokens',
'remove_brackets',
'remove_enclosing_delimiters',
'TOKEN_KEYWORD', 'WHITESPACE_KEYWORD', 'partial'}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment