Commit 0eb0ec81 authored by eckhart's avatar eckhart
Browse files

test/ `\~` is now substituted by mangled whitespace in regular...

test/ `\~` is now substituted by mangled whitespace in regular expressions for _skip and _resume
parent 791b86d6
......@@ -427,9 +427,18 @@ class EBNFDirectives:
for after a parsing error has error occurred. Other
than the skip field, this configures resuming after
the failing parser has returned.
drop: A set that may contain the elements `DROP_TOKEN` and
super_ws(property): Cache for the "super whitespace" which
is a regular expression that merges whitespace and
comments. This property should only be accessed after
the `whitespace` and `comment` field have been filled
with the values parsed from the EBNF source.
__slots__ = ['whitespace', 'comment', 'literalws', 'tokens', 'filter', 'error', 'skip',
'resume', 'drop']
'resume', 'drop', '_super_ws']
def __init__(self):
self.whitespace = WHITESPACE_TYPES['vertical'] # type: str
......@@ -441,6 +450,7 @@ class EBNFDirectives:
self.skip = dict() # type: Dict[str, List[Union[unrepr, str]]]
self.resume = dict() # type: Dict[str, List[Union[unrepr, str]]]
self.drop = set() # type: Set[str]
self._super_ws = None # type: Optional[str]
def __getitem__(self, key):
return getattr(self, key)
......@@ -449,6 +459,12 @@ class EBNFDirectives:
assert hasattr(self, key)
setattr(self, key, value)
def super_ws(self):
if self._super_ws is None:
self._super_ws = mixin_comment(self.whitespace, self.comment)
return self._super_ws
def keys(self):
return self.__slots__
......@@ -736,9 +752,9 @@ class EBNFCompiler(Compiler):
string search or a regular expression from the nodes content. Returns
an empty string in case the node is neither regexp nor literal.
# self.directives.whitespace, self.directives.comment
if nd.tag_name == 'regexp':
return unrepr("re.compile(r'%s')" % self._extract_regex(nd))
search_regex = self._extract_regex(nd).replace(r'\~', self.directives.super_ws)
return unrepr("re.compile(r'%s')" % search_regex)
elif nd.tag_name == 'literal':
s = nd.content[1:-1] # remove quotation marks
return unrepr("re.compile(r'(?=%s)')" % escape_re(s))
......@@ -578,7 +578,7 @@ PARSER_PLACEHOLDER = Parser()
def mixin_comment(whitespace: str, comment: str) -> str:
Returns a regular expression that merges comment and whitespace
regexps. Thus comments cann occur whereever whitespace is allowed
regexps. Thus comments can occur wherever whitespace is allowed
and will be skipped just as implicit whitespace.
Note, that because this works on the level of regular expressions,
......@@ -644,15 +644,16 @@ class TestCustomizedResumeParsing:
@whitespace = /\s*/
@comment = /(?:\/\*(?:.|\n)*?\*\/)/ # c-style comments
document = ~ { word }
@ word_resume = /\s+(?=.)|$/
@ word_resume = /(?:(?:\s\~)|(?:\~(?<=\s)))(?=.)|$/
word = !EOF §/\w+/ ~
EOF = !/./
doc1 = """word no*word /* comment */ word"""
grammar = grammar_provider(grammar_specification)()
st = grammar(doc1)
# print(next(st.pick(reverse = True)))
# TODO: provide test case
# print(st.as_sxpr())
class TestInSeriesResume:
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment