Commit bd322f28 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

DHParser/ebnf.py EBNFGrammar: bugfix plaintext definition

parent d826b074
......@@ -188,7 +188,7 @@ class EBNFGrammar(Grammar):
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
# plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
plaintext = RegExp('`(?:\\\\`|[^"])*?`')
plaintext = Series(RegExp('`(?:\\\\`|[^"])*?`'), wsp__)
# literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
literal = Alternative(Series(RegExp('"(?:\\\\"|[^"])*?"'), wsp__),
Series(RegExp("'(?:\\\\'|[^'])*?'"), wsp__))
......
......@@ -44,7 +44,7 @@ option = "[" §expression "]"
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:\\"|[^"])*?"/~ # e.g. "(", '+', 'while'
| /'(?:\\'|[^'])*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:\\`|[^"])*?`/ # like literal but does not eat whitespace
plaintext = /`(?:\\`|[^"])*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
......
......@@ -59,7 +59,7 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
"""
expression = Forward()
source_hash__ = "6578e0df755d9206e8a78aa4d3a183bd"
source_hash__ = "946ad6d28df1350f1f8cd2502fcf012f"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......@@ -70,7 +70,7 @@ class EBNFGrammar(Grammar):
EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = RegExp('`(?:\\\\`|[^"])*?`')
plaintext = Series(RegExp('`(?:\\\\`|[^"])*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:\\\\"|[^"])*?"'), wsp__), Series(RegExp("'(?:\\\\'|[^'])*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
......@@ -107,27 +107,35 @@ def get_grammar() -> EBNFGrammar:
#######################################################################
EBNF_AST_transformation_table = {
# AST Transformations for the EBNF-grammar
"<": flatten_anonymous_nodes,
"syntax": [],
"definition": [],
"directive": [],
"expression": [],
"term": [],
"factor": [],
"flowmarker": [],
"retrieveop": [],
"group": [],
"unordered": [],
"oneormore": [],
"repetition": [],
"option": [],
"symbol": [],
"literal": [],
"plaintext": [],
"regexp": [],
"whitespace": [],
"EOF": []
# AST Transformations for EBNF-grammar
"<":
remove_expendables,
"syntax":
[], # otherwise '"*": replace_by_single_child' would be applied
"directive, definition":
[flatten, remove_tokens('@', '=', ',')],
"expression":
[replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator],
"term":
[replace_by_single_child, flatten], # supports both idioms:
# "{ factor }+" and "factor { factor }"
"factor, flowmarker, retrieveop":
replace_by_single_child,
"group":
[remove_brackets, replace_by_single_child],
"unordered":
remove_brackets,
"oneormore, repetition, option":
[reduce_single_child, remove_brackets,
forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)(?:.|\n)*')],
"symbol, literal, regexp":
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
reduce_single_child,
# "list_":
# [flatten, remove_infix_operator],
"*":
replace_by_single_child
}
def EBNFTransform() -> TransformationDict:
......
......@@ -30,22 +30,32 @@ M1: '`simple plaintext`'
M2: '`plaintext with escaped quotation: \` `'
M3: '''`plaintext spanning
several lines`'''
M4: '''`plaintext does not include following whitespace, but its
definition does` '''
[fail:plaintext]
F1: ' `preceeding whitespace not parsed by plaintext parser`'
F2: '`following whitespace is neither parsed by plaintext parser` '
[match:regexp]
M1: '/[A-Z][a-z]+/' # plain regex
M2: '/\w+/' # regex with backslashes
M4: '/\//' # forward slashes must be escaped
M5: '/\s*/ ' # whitespace may follow the regex-definition
[fail:regexp]
F1: ' /no preceeding whitespace/'
[match:whitespace]
M1: '~'
M2: '~ '
[fail:whitespace]
F1: ' ~'
[match:EOF]
M1: ''
[fail:EOF]
F1: ' '
[match:group]
M1: '(a|bc|d)'
M2: '((a|b)(c|d))'
[ast:group]
......
......@@ -433,14 +433,14 @@ class TestWhitespace:
cst = parser("DOCWörter Wörter Wörter")
assert not cst.error_flag
lang2 = r'document = `DOC`{ WORD } EOF' + tail
lang2 = r'document = `DOC` { WORD } EOF' + tail
parser = grammar_provider(lang2)()
cst = parser("DOC Wörter Wörter Wörter")
assert cst.error_flag
cst = parser("DOCWörter Wörter Wörter")
assert not cst.error_flag
lang3 = r'document = `DOC`~ { WORD } EOF' + tail
lang3 = r'document = `DOC` ~ { WORD } EOF' + tail
parser = grammar_provider(lang3)()
cst = parser("DOC Wörter Wörter Wörter")
assert not cst.error_flag
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment