Commit 707b1cf9 authored by di68kap's avatar di68kap
Browse files

- DHParser/ bugfix for finding reentry position

parent 0eb0ec81
......@@ -171,7 +171,7 @@ def reentry_point(rest: StringView, rules: ResumeList, comment_regex) -> int:
k, length = search_func(search_rule)
while a < b <= k:
a, b = next_comment()
while a <= k < b:
while a <= k + length < b:
k, length = search_func(search_rule, k + max(length, 1))
while a < b <= k:
a, b = next_comment()
......@@ -577,7 +577,7 @@ PARSER_PLACEHOLDER = Parser()
def mixin_comment(whitespace: str, comment: str) -> str:
Returns a regular expression that merges comment and whitespace
Returns a regular expression pattern that merges comment and whitespace
regexps. Thus comments can occur wherever whitespace is allowed
and will be skipped just as implicit whitespace.
......@@ -590,6 +590,33 @@ def mixin_comment(whitespace: str, comment: str) -> str:
return whitespace
def non_empty(whitespace: str) -> str:
Returns a regular expression pattern that matches only if the regular
expression pattern `whitespace` matches AND if the match is not empty.
If `whitespace` already matches the empty string '', then it will be
returned unaltered.
WARNING: `non_empty_ws` does not work regular expressions the matched
strings of which can be followed by a symbol that can also occur at
the start of the regular expression.
In particular, it does not work for fixed size regular expressions,
that ist / / or / / or /\t/ won't work, but / */ or /\s*/ or /\s+/
do work. There is no test for this. Fixed sizes regular expressions
run through `non_empty_ws` will not match at any more if they are applied
to the beginning or the middle of a sequence of whitespaces!!!
:param whitespace: a regular expression pattern
:return: new regular expression pattern that does not match the empty
string '' any more.
if re.match(whitespace, ''):
return r'(?:(?=(.|\n))' + whitespace + r'(?!\1))'
return whitespace
class UnknownParserError(KeyError):
"""UnknownParserError is raised if a Grammar object is called with a
parser that does not exist or if in the course of parsing a parser
......@@ -644,16 +644,21 @@ class TestCustomizedResumeParsing:
@whitespace = /\s*/
@comment = /(?:\/\*(?:.|\n)*?\*\/)/ # c-style comments
document = ~ { word }
@ word_resume = /(?:(?:\s\~)|(?:\~(?<=\s)))(?=.)|$/
# @ word_resume = /(?:(?:\s\~)|(?:\~(?<=\s)))(?=.)|$/
@word_resume = /(?=(.|\n))\~(?!\1)(?=.)|$/
# @ word_resume = /\~(?=.)|$/
word = !EOF §/\w+/ ~
EOF = !/./
doc1 = """word no*word /* comment */ word"""
grammar = grammar_provider(grammar_specification)()
doc1 = """word no*word /* comment */ word"""
st = grammar(doc1)
# print(next(st.pick(reverse = True)))
assert st.children and st.children[-1].tag_name == 'word'
# TODO: provide test case
doc2 = """word no*word/* comment */word"""
st = grammar(doc2)
assert st.children and st.children[-1].tag_name == 'word'
# print(st.as_sxpr())
class TestInSeriesResume:
......@@ -32,14 +32,24 @@ from DHParser.log import is_logging, log_ST, log_parsing_history
from DHParser.error import Error, is_error
from DHParser.parse import ParserError, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, AllOf, SomeOf, \
UnknownParserError, MetaParser, GrammarError, EMPTY_NODE
UnknownParserError, MetaParser, EMPTY_NODE
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, DHPARSER_IMPORTS
from DHParser.dsl import grammar_provider, CompilationError
from DHParser.dsl import grammar_provider
from DHParser.syntaxtree import Node, parse_sxpr
from DHParser.stringview import StringView
class TestWhitespace:
# TODO: add test cases here
def test_whitespace_comment_mangling(self):
def test_non_emptify_version(self):
class TestParserError:
def test_parser_error_str(self):
pe = ParserError(Node('TAG', 'test').with_pos(0), StringView('Beispiel'), None, True)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment