Commit 98a81aab authored by di68kap's avatar di68kap
Browse files

- pasr.py: bugfix: reentry_point() did not work with string-resume expressions

parent f7652de1
......@@ -39,7 +39,7 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, DropWhitespa
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
unrepr, compile_python_object, DHPARSER_PARENTDIR
unrepr, compile_python_object, DHPARSER_PARENTDIR, NEVER_MATCH_PATTERN
from DHParser.transform import TransformationFunc, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_whitespace, remove_empty, \
remove_tokens, flatten, forbid, assert_content
......@@ -99,7 +99,8 @@ from DHParser import start_logging, suspend_logging, resume_logging, is_filename
replace_content, replace_content_by, forbid, assert_content, remove_infix_operator, \\
error_on, recompile_grammar, left_associative, lean_left, set_config_value, \\
get_config_value, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, COMPACT_SERIALIZATION, \\
JSON_SERIALIZATION, access_thread_locals, access_presets, finalize_presets, ErrorCode
JSON_SERIALIZATION, access_thread_locals, access_presets, finalize_presets, ErrorCode, \\
RX_NEVER_MATCH
'''.format(dhparser_parentdir=DHPARSER_PARENTDIR)
......@@ -541,6 +542,7 @@ class EBNFCompiler(Compiler):
compiled texts.)
"""
COMMENT_KEYWORD = "COMMENT__"
COMMENT_RX_KEYWORD = "comment_rx__"
WHITESPACE_KEYWORD = "WSP_RE__"
RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "wsp__"
......@@ -548,8 +550,9 @@ class EBNFCompiler(Compiler):
RESUME_RULES_KEYWORD = "resume_rules__"
SKIP_RULES_SUFFIX = '_skip__'
ERR_MSG_SUFFIX = '_err_msg__'
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD,
RESUME_RULES_KEYWORD, ERR_MSG_SUFFIX}
RESERVED_SYMBOLS = {COMMENT_KEYWORD, COMMENT_RX_KEYWORD, WHITESPACE_KEYWORD, RAW_WS_KEYWORD,
WHITESPACE_PARSER_KEYWORD, DROP_WHITESPACE_PARSER_KEYWORD,
RESUME_RULES_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation."
PREFIX_TABLE = {'§': 'Required',
......@@ -780,6 +783,8 @@ class EBNFCompiler(Compiler):
("mixin_comment(whitespace=" + self.RAW_WS_KEYWORD
+ ", comment=" + self.COMMENT_KEYWORD + ")")))
definitions.append((self.RAW_WS_KEYWORD, "r'{}'".format(self.directives.whitespace)))
comment_rx = "re.compile(COMMENT__)" if self.directives.comment else "RX_NEVER_MATCH"
definitions.append((self.COMMENT_RX_KEYWORD, comment_rx))
definitions.append((self.COMMENT_KEYWORD, "r'{}'".format(self.directives.comment)))
# prepare and add resume-rules
......
......@@ -42,7 +42,8 @@ from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, FrozenNode, RootNode, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_TAG, ResultType
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, cython
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, cython, \
RX_NEVER_MATCH
__all__ = ('Parser',
......@@ -123,7 +124,7 @@ ResumeList = List[Union[str, Any]] # list of strings or regular expressiones
def reentry_point(rest: StringView, rules: ResumeList, comment_regex) -> int:
"""
Finds the point where parsing should resume after a ParserError has been caught.
Makes sure that this reentry-point does not lie inside a comment.
The algorithm makes sure that this reentry-point does not lie inside a comment.
Args:
rest: The rest of the parsed text or, in other words, the point where
a ParserError was thrown.
......@@ -152,7 +153,7 @@ def reentry_point(rest: StringView, rules: ResumeList, comment_regex) -> int:
def str_search(s, start: int = 0) -> Tuple[int, int]:
nonlocal rest
return rest.find(s), len(rule)
return rest.find(s, start), len(rule)
def rx_search(rx, start: int = 0) -> Tuple[int, int]:
nonlocal rest
......@@ -551,9 +552,6 @@ PARSER_PLACEHOLDER = Parser()
#
########################################################################
RX_NEVER_MATCH = re.compile(r'..(?<=^)')
def mixin_comment(whitespace: str, comment: str) -> str:
"""
Returns a regular expression that merges comment and whitespace
......@@ -705,6 +703,8 @@ class Grammar:
comment_rx__: The compiled regular expression for comments. If no
comments have been defined, it defaults to RX_NEVER_MATCH
This instance-attribute will only be defined if a class-attribute
with the same name does not already exist!
start_parser__: During parsing, the parser with which the parsing process
was started (see method `__call__`) or `None` if no parsing process
......@@ -846,8 +846,13 @@ class Grammar:
def __init__(self, root: Parser = None) -> None:
self.all_parsers__ = set() # type: Set[Parser]
self.comment_rx__ = re.compile(self.COMMENT__) \
if hasattr(self, 'COMMENT__') and self.COMMENT__ else RX_NEVER_MATCH
# add compiled regular expression for comments, if it does not already exist
if not hasattr(self, 'comment_rx__'):
self.comment_rx__ = re.compile(self.COMMENT__) \
if hasattr(self, 'COMMENT__') and self.COMMENT__ else RX_NEVER_MATCH
else:
assert ((self.COMMENT__ and self.COMMENT__ == self.comment_rx__.pattern)
or (not self.COMMENT__ and self.comment_rx__ == RX_NEVER_MATCH))
self.start_parser__ = None # type: Optional[Parser]
self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool
......
......@@ -52,6 +52,8 @@ except ImportError:
__all__ = ('typing',
'cython',
'cython_optimized',
'NEVER_MATCH_PATTERN',
'RX_NEVER_MATCH',
're_find',
'escape_re',
'escape_control_characters',
......@@ -102,6 +104,10 @@ DHPARSER_PARENTDIR = os.path.dirname(DHPARSER_DIR.rstrip('/'))
#######################################################################
NEVER_MATCH_PATTERN = r'..(?<=^)'
RX_NEVER_MATCH = re.compile(NEVER_MATCH_PATTERN)
def re_find(s, r, pos=0, endpos=9223372036854775807):
"""
Returns the match of the first occurrence of the regular expression
......
......@@ -785,24 +785,33 @@ class TestReentryAfterError:
block_A = "a" §"b" "c"
block_B = "x" "y" "z"
"""
def mini_suite(grammar):
tree = grammar('abc/*x*/xyz')
assert not tree.errors
tree = grammar('abDxyz')
mandatory_cont = (Error.MANDATORY_CONTINUATION, Error.MANDATORY_CONTINUATION_AT_EOF)
assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont
tree = grammar('abD/*x*/xyz')
assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont
tree = grammar('aD /*x*/ c /* a */ /*x*/xyz')
assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont
# test regex-defined resume rule
grammar = grammar_provider(lang)()
tree = grammar('abc/*x*/xyz')
assert not tree.errors
tree = grammar('abDxyz')
mandatory_cont = (Error.MANDATORY_CONTINUATION, Error.MANDATORY_CONTINUATION_AT_EOF)
assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont
tree = grammar('abD/*x*/xyz')
assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont
tree = grammar('aD /*x*/ c /* a */ /*x*/xyz')
assert len(tree.errors) == 1 and tree.errors[0].code in mandatory_cont
mini_suite(grammar)
# test string-defined resume rule
alt_lang = lang.replace('@ block_A_resume = /x/',
'@ block_A_resume = "x"')
grammar = grammar_provider(alt_lang)()
mini_suite(grammar)
class TestConfiguredErrorMessages:
def test_configured_error_message(self):
lang = """
document = series | /.*/
@series_error = "a badly configured error message {5}"
series = "X" | head §"C" "D"
series = /X/ | head §"C" "D"
head = "A" "B"
"""
parser = grammar_provider(lang)()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment