2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit d826b074 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- DHParser/examples/EBNF/EBNF.ebnf: errors in literal and plaintext definition...

- DHParser/examples/EBNF/EBNF.ebnf: errors in literal and plaintext definition corrected (see unit tests)
parent deb9c81c
...@@ -187,8 +187,11 @@ class EBNFGrammar(Grammar): ...@@ -187,8 +187,11 @@ class EBNFGrammar(Grammar):
EOF = NegativeLookahead(RegExp('.')) EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__) whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__) regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__) # plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__)) plaintext = RegExp('`(?:\\\\`|[^"])*?`')
# literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
literal = Alternative(Series(RegExp('"(?:\\\\"|[^"])*?"'), wsp__),
Series(RegExp("'(?:\\\\'|[^'])*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__) symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1) option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1) repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
......
...@@ -152,7 +152,7 @@ def unit_from_config(config_str): ...@@ -152,7 +152,7 @@ def unit_from_config(config_str):
section_match = RX_SECTION.match(cfg, pos) section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg) and not re.match(r'\s+$', cfg[pos:]): if pos != len(cfg) and not re.match(r'\s+$', cfg[pos:]):
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 2)) raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 2)) # TODO: Add file name
return unit return unit
......
...@@ -42,9 +42,9 @@ option = "[" §expression "]" ...@@ -42,9 +42,9 @@ option = "[" §expression "]"
#: leaf-elements #: leaf-elements
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while' literal = /"(?:\\"|[^"])*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly. | /'(?:\\'|[^'])*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace plaintext = /`(?:\\`|[^"])*?`/ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~ regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace whitespace = /~/~ # insignificant whitespace
......
...@@ -59,7 +59,8 @@ class EBNFGrammar(Grammar): ...@@ -59,7 +59,8 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file. r"""Parser for an EBNF source file.
""" """
expression = Forward() expression = Forward()
source_hash__ = "de6d0516ea104e7d8318b998e488b2d1" source_hash__ = "6578e0df755d9206e8a78aa4d3a183bd"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"] parser_initialization__ = ["upon instantiation"]
resume_rules__ = {} resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
...@@ -69,8 +70,8 @@ class EBNFGrammar(Grammar): ...@@ -69,8 +70,8 @@ class EBNFGrammar(Grammar):
EOF = NegativeLookahead(RegExp('.')) EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__) whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__) regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__) plaintext = RegExp('`(?:\\\\`|[^"])*?`')
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__)) literal = Alternative(Series(RegExp('"(?:\\\\"|[^"])*?"'), wsp__), Series(RegExp("'(?:\\\\'|[^'])*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__) symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1) option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1) repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
......
...@@ -10,14 +10,30 @@ F2: "5_starting_with_a_digit" ...@@ -10,14 +10,30 @@ F2: "5_starting_with_a_digit"
[match:literal] [match:literal]
M1: '"simple literal"'
M2: '"literal with following whitespace" '
M3: "'literal with single quotation marks'"
M4: "'literal with escaped \' quotation marks'"
M5: '"another literal with escaped \" quotation marks"'
M6: """'literal containing different quotation marks: " '"""
M7: '''"another literal containing different quotation marks: ' "'''
M8: '''"literal spanning
several lines"'''
[fail:literal] [fail:literal]
F1: ' "preceeding whitespace is not parsed by literal"'
F2: ''' "don't forget closing quotation marks'''
[match:plaintext] [match:plaintext]
M1: '`simple plaintext`'
M2: '`plaintext with escaped quotation: \` `'
M3: '''`plaintext spanning
several lines`'''
[fail:plaintext] [fail:plaintext]
F1: ' `preceeding whitespace not parsed by plaintext parser`'
F2: '`following whitespace is neither parsed by plaintext parser` '
[match:regexp] [match:regexp]
......
...@@ -433,14 +433,14 @@ class TestWhitespace: ...@@ -433,14 +433,14 @@ class TestWhitespace:
cst = parser("DOCWörter Wörter Wörter") cst = parser("DOCWörter Wörter Wörter")
assert not cst.error_flag assert not cst.error_flag
lang2 = r'document = `DOC` { WORD } EOF' + tail lang2 = r'document = `DOC`{ WORD } EOF' + tail
parser = grammar_provider(lang2)() parser = grammar_provider(lang2)()
cst = parser("DOC Wörter Wörter Wörter") cst = parser("DOC Wörter Wörter Wörter")
assert cst.error_flag assert cst.error_flag
cst = parser("DOCWörter Wörter Wörter") cst = parser("DOCWörter Wörter Wörter")
assert not cst.error_flag assert not cst.error_flag
lang3 = r'document = `DOC` ~ { WORD } EOF' + tail lang3 = r'document = `DOC`~ { WORD } EOF' + tail
parser = grammar_provider(lang3)() parser = grammar_provider(lang3)()
cst = parser("DOC Wörter Wörter Wörter") cst = parser("DOC Wörter Wörter Wörter")
assert not cst.error_flag assert not cst.error_flag
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment