2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit d826b074 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- DHParser/examples/EBNF/EBNF.ebnf: errors in literal and plaintext definition...

- DHParser/examples/EBNF/EBNF.ebnf: errors in literal and plaintext definition corrected (see unit tests)
parent deb9c81c
......@@ -187,8 +187,11 @@ class EBNFGrammar(Grammar):
EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
# plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
plaintext = RegExp('`(?:\\\\`|[^"])*?`')
# literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
literal = Alternative(Series(RegExp('"(?:\\\\"|[^"])*?"'), wsp__),
Series(RegExp("'(?:\\\\'|[^'])*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
......
......@@ -152,7 +152,7 @@ def unit_from_config(config_str):
section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg) and not re.match(r'\s+$', cfg[pos:]):
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 2))
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 2)) # TODO: Add file name
return unit
......
......@@ -42,9 +42,9 @@ option = "[" §expression "]"
#: leaf-elements
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
literal = /"(?:\\"|[^"])*?"/~ # e.g. "(", '+', 'while'
| /'(?:\\'|[^'])*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:\\`|[^"])*?`/ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
......
......@@ -59,7 +59,8 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
"""
expression = Forward()
source_hash__ = "de6d0516ea104e7d8318b998e488b2d1"
source_hash__ = "6578e0df755d9206e8a78aa4d3a183bd"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)'
......@@ -69,8 +70,8 @@ class EBNFGrammar(Grammar):
EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
plaintext = RegExp('`(?:\\\\`|[^"])*?`')
literal = Alternative(Series(RegExp('"(?:\\\\"|[^"])*?"'), wsp__), Series(RegExp("'(?:\\\\'|[^'])*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
......
......@@ -10,14 +10,30 @@ F2: "5_starting_with_a_digit"
[match:literal]
M1: '"simple literal"'
M2: '"literal with following whitespace" '
M3: "'literal with single quotation marks'"
M4: "'literal with escaped \' quotation marks'"
M5: '"another literal with escaped \" quotation marks"'
M6: """'literal containing different quotation marks: " '"""
M7: '''"another literal containing different quotation marks: ' "'''
M8: '''"literal spanning
several lines"'''
[fail:literal]
F1: ' "preceeding whitespace is not parsed by literal"'
F2: ''' "don't forget closing quotation marks'''
[match:plaintext]
M1: '`simple plaintext`'
M2: '`plaintext with escaped quotation: \` `'
M3: '''`plaintext spanning
several lines`'''
[fail:plaintext]
F1: ' `preceeding whitespace not parsed by plaintext parser`'
F2: '`following whitespace is neither parsed by plaintext parser` '
[match:regexp]
......
......@@ -433,14 +433,14 @@ class TestWhitespace:
cst = parser("DOCWörter Wörter Wörter")
assert not cst.error_flag
lang2 = r'document = `DOC` { WORD } EOF' + tail
lang2 = r'document = `DOC`{ WORD } EOF' + tail
parser = grammar_provider(lang2)()
cst = parser("DOC Wörter Wörter Wörter")
assert cst.error_flag
cst = parser("DOCWörter Wörter Wörter")
assert not cst.error_flag
lang3 = r'document = `DOC` ~ { WORD } EOF' + tail
lang3 = r'document = `DOC`~ { WORD } EOF' + tail
parser = grammar_provider(lang3)()
cst = parser("DOC Wörter Wörter Wörter")
assert not cst.error_flag
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment