Commit 576a3ae7 authored by di68kap's avatar di68kap

- DHParser/ebnf.py/EBNFGrammar : bugfixes for regex, plaintext and literal parsers

parent bd322f28
......@@ -186,12 +186,10 @@ class EBNFGrammar(Grammar):
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
# plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
plaintext = Series(RegExp('`(?:\\\\`|[^"])*?`'), wsp__)
# literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
literal = Alternative(Series(RegExp('"(?:\\\\"|[^"])*?"'), wsp__),
Series(RegExp("'(?:\\\\'|[^'])*?'"), wsp__))
regexp = Series(RegExp('/(?:(?<!\\\\)\\\\(?:/)|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:(?<!\\\\)\\\\`|[^"])*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:(?<!\\\\)\\\\"|[^"])*?"'), wsp__),
Series(RegExp("'(?:(?<!\\\\)\\\\'|[^'])*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
......
......@@ -920,20 +920,6 @@ class Grammar:
(See test/test_testing.TestLookahead !)
"""
last_record = self.history__[-2] if len(self.history__) > 1 else None # type: Optional[HistoryRecord]
# # TODO: Checking match status of history__[-2] is inaccurate if ending
# # lookahead parser is part of an Alternative-parser !!!
# # (Need a test-case!)
# return last_record and parser != self.root_parser__ \
# and last_record.status == HistoryRecord.MATCH \
# and last_record.node.pos \
# + len(last_record.node) >= len(self.document__) \
# and any(tn in self and isinstance(self[tn], Lookahead)
# or tn[0] == ':' and issubclass(eval(tn[1:]), Lookahead)
# for tn in last_record.call_stack)
last_record = self.history__[-2] if len(self.history__) > 1 else None # type: Optional[HistoryRecord]
# TODO: Checking match status of history__[-2] is inaccurate if ending
# lookahead parser is part of an Alternative-parser !!!
# (Need a test-case!)
return last_record and parser != self.root_parser__ \
and any(self.history__[i].status == HistoryRecord.MATCH \
and self.history__[i].node.pos \
......
......@@ -42,10 +42,10 @@ option = "[" §expression "]"
#: leaf-elements
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:\\"|[^"])*?"/~ # e.g. "(", '+', 'while'
| /'(?:\\'|[^'])*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:\\`|[^"])*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
literal = /"(?:(?<!\\)\\"|[^"])*?"/~ # e.g. "(", '+', 'while'
| /'(?:(?<!\\)\\'|[^'])*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:(?<!\\)\\`|[^"])*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:(?<!\\)\\(?:\/)|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
EOF = !/./
......@@ -59,7 +59,7 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
"""
expression = Forward()
source_hash__ = "946ad6d28df1350f1f8cd2502fcf012f"
source_hash__ = "c454e8d67e4190759e529feb13eca0c2"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......@@ -69,9 +69,9 @@ class EBNFGrammar(Grammar):
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:\\\\`|[^"])*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:\\\\"|[^"])*?"'), wsp__), Series(RegExp("'(?:\\\\'|[^'])*?'"), wsp__))
regexp = Series(RegExp('/(?:(?<!\\\\)\\\\(?:/)|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:(?<!\\\\)\\\\`|[^"])*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:(?<!\\\\)\\\\"|[^"])*?"'), wsp__), Series(RegExp("'(?:(?<!\\\\)\\\\'|[^'])*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
......
......@@ -19,10 +19,14 @@ M6: """'literal containing different quotation marks: " '"""
M7: '''"another literal containing different quotation marks: ' "'''
M8: '''"literal spanning
several lines"'''
M9*: '"\\"'
M10*: '"\"'
M11*: '"\"" '
[fail:literal]
F1: ' "preceeding whitespace is not parsed by literal"'
F2: ''' "don't forget closing quotation marks'''
F4: '"\\" other stuff "'
[match:plaintext]
......@@ -42,10 +46,12 @@ M1: '/[A-Z][a-z]+/' # plain regex
M2: '/\w+/' # regex with backslashes
M4: '/\//' # forward slashes must be escaped
M5: '/\s*/ ' # whitespace may follow the regex-definition
M6: '/\\/ '
M7*: '/\// '
[fail:regexp]
F1: ' /no preceeding whitespace/'
F2: '/\\/ other stuff /'
[match:whitespace]
M1: '~'
......
......@@ -156,7 +156,7 @@ structural = "subsection" | "section" | "chapter" | "subsubsection"
CMDNAME = /\\(?:(?!_)\w)+/~
TXTCOMMAND = /\\text\w+/
ESCAPED = /\\[%$&_\/{}]/
SPECIAL = /[$&_\\\\\/]/
SPECIAL = /[$&_\/\\\\]/
BRACKETS = /[\[\]]/ # left or right square bracket: [ ]
LINEFEED = /[\\][\\]/
......
......@@ -57,7 +57,7 @@ class LaTeXGrammar(Grammar):
paragraph = Forward()
tabular_config = Forward()
text_element = Forward()
source_hash__ = "dacb1f9ad5b1c18cdc29c7ddb7878959"
source_hash__ = "242fb29d844ed8eb0024286ea5b78bff"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......@@ -79,7 +79,7 @@ class LaTeXGrammar(Grammar):
NAME = Capture(Series(RegExp('\\w+'), wsp__))
LINEFEED = RegExp('[\\\\][\\\\]')
BRACKETS = RegExp('[\\[\\]]')
SPECIAL = RegExp('[$&_\\\\\\\\/]')
SPECIAL = RegExp('[$&_/\\\\\\\\]')
ESCAPED = RegExp('\\\\[%$&_/{}]')
TXTCOMMAND = RegExp('\\\\text\\w+')
CMDNAME = Series(RegExp('\\\\(?:(?!_)\\w)+'), wsp__)
......
......@@ -277,7 +277,7 @@ class TestSelfHosting:
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
regexp = /\/(?:\\(?:\/)|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
EOF = !/./
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment