Commit 7a89b7e0 authored by eckhart's avatar eckhart
Browse files

- bugfixes resuming

parent e48d213b
......@@ -182,7 +182,7 @@ def reentry_point(rest: StringView,
def entry_point(search_func, search_rule) -> int:
a, b = next_comment()
k, length = search_func(search_rule)
while a < b <= k:
while a < b <= k + length:
a, b = next_comment()
# find next as long as start or end point of resume regex are inside a comment
while (a < k < b) or (a < k + length < b):
......@@ -1075,6 +1075,23 @@ class Grammar:
pass
def __deepcopy__(self, memo):
"""Deepcopy method of the parser. Upon instantiation of a Grammar-
object, parsers will be deep-copied to the Grammar object. If a
derived parser-class changes the signature of the `__init__`-constructor,
`__deepcopy__`-method must be replaced (i.e. overridden without
calling the same method from the superclass) by the derived class.
"""
duplicate = self.__class__(self.root_parser__)
duplicate.history_tracking__ = self.history_tracking__
duplicate.resume_notices__ = self.resume_notices__
duplicate.flatten_tree__ = self.flatten_tree__
duplicate.left_recursion_depth__ = self.left_recursion_depth__
duplicate.max_parser_dropouts__ = self.max_parser_dropouts__
duplicate.reentry_search_window__ = self.reentry_search_window__
return duplicate
def __init__(self, root: Parser = None) -> None:
self.all_parsers__ = set() # type: Set[Parser]
# add compiled regular expression for comments, if it does not already exist
......
......@@ -26,6 +26,7 @@ functions that are very generic.
import ast
import bisect
import functools
import hashlib
import io
import multiprocessing
......@@ -508,6 +509,7 @@ def compile_python_object(python_src: str, catch_obj_regex="DSL") -> Any:
@cython.locals(i=cython.int)
@functools.lru_cache()
def linebreaks(text: Union[StringView, str]) -> List[int]:
"""
Returns a list of indices all line breaks in the text.
......
......@@ -49,7 +49,8 @@ def trace_history(self: Parser, text: StringView) -> Tuple[Optional[Node], Strin
grammar.most_recent_error__ = None
errors = [mre.error] # type: List[Error]
text_ = grammar.document__[mre.error.pos:]
lc = line_col(grammar.document_lbreaks__, self.grammar.document_length__ - len(text))
lc = line_col(grammar.document_lbreaks__, mre.error.pos)
resume_pos = self.grammar.document_length__ - len(text)
target = text if len(text) <= 10 else text[:7] + '...'
resumers = [grammar.call_stack__[-1][0]]
......@@ -61,14 +62,13 @@ def trace_history(self: Parser, text: StringView) -> Tuple[Optional[Node], Strin
if mre.first_throw:
notice = Error( # resume notice
'Resuming from parser "{}" with parser "{}" at line {}, column {}: {}'
.format(mre.node.tag_name, resumer, *lc, repr(target)),
mre.error.pos, Error.RESUME_NOTICE)
'Resuming from parser "{}" at position {}:{} with parser "{}": {}'
.format(mre.node.tag_name, *lc, resumer, repr(target)),
resume_pos, Error.RESUME_NOTICE)
else:
notice = Error( # skip notice
'Skipping within parser {} to line {}, column {}: {}'
.format(resumer, *lc, repr(target)),
mre.error.pos, Error.RESUME_NOTICE)
'Skipping from position {}:{} within parser {}: {}'
.format(*lc, resumer, repr(target)), resume_pos, Error.RESUME_NOTICE)
if grammar.resume_notices__:
grammar.tree__.add_error(mre.node, notice)
errors.append(notice)
......
# EBNF-Grammar in EBNF
# Test code with errors. All places marked by a "$" should yield and error
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
@ comment = /#.*(?:\n|$)/
@ whitespace = /\s*/
@ literalws = right
@ anonymous = pure_elem, EOF
@ drop = whitespace, EOF # do not include these even in the concrete syntax tree
@ drop = whitespace, EOF
# re-entry-rules for resuming after parsing-error
......@@ -29,45 +29,45 @@ directive = "@" §symbol "="
#: components
expression = sequence { :OR~ sequence }
sequence = ["§"] ( interleave | lookaround ) # "§" means all following terms mandatory
sequence = ["§"] ( interleave | lookaround )
{ :AND~ ["§"] ( interleave | lookaround ) }
interleave = difference { "°" ["§"] difference }
lookaround = flowmarker § (oneormore | pure_elem)
difference = term ["-" § (oneormore ; pure_elem)]
term = oneormore | repetition | option | pure_elem
difference = term ["-" § (oneormore $ pure_elem)] # <- ERROR
term = oneormore | repetition | option | pure_elem # resuming expected her
#: elements
pure_elem = element § !/[?*+]/ # element strictly without a suffix
element = [retrieveop] symbol !DEF # negative lookahead to be sure it's not a definition
pure_elem = element § !/[?*+]/
element = [retrieveop] symbol !DEF
| literal
| plaintext
| regexp
| whitespace
| group;
| group$ # <- ERROR
#: flow-operators
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "<-!" | "<-&" # '<-' negative lookbehind, '<-&' positive lookbehind
retr$ieveop = "::" | ":?" | ":" # '::' pop, ':?' optional pop, ':' retrieve
flowmarker = "!" | "&" # resuming expected her
| "<-!" | "<-&"
retr$ieveop = "::" | ":?" | ":"
#: groups
group = "(" §expression ")"
oneormore = "{" expression "}+" | element "+"
repetition = "{" §expressi$$$on "}" | element "*"
option = "[" §expression "]" | element "?"
repetition = "{" §expressi$on "}" | element "*" # <- ERROR
option = "[" §expression "]" | element "?" # resuming expected here
#: leaf-elements
symbol = /(?!\d)\w+/~ # e.g. expression, term, parameter_list
$literals = { literal }+ # string chaining, only allowed in directives!
literal = /"(?:(?<!\\)\\"|[^"])*?"/~ # e.g. "(", '+', 'while'
| /'(?:(?<!\\)\\'|[^'])*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:(?<!\\)\\`|[^`])*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:(?<!\\)\\(?:\/)|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
symbol = /(?!\d)\w+/~
$literals = { literal }+ # <- ERROR
literal = /"(?:(?<!\\)\\"|[^"])*?"/~ # resuming expected her
| /'(?:(?<!\\)\\'|[^'])*?'/~
plaintext = /`(?:(?<!\\)\\`|[^`])*?`/~
regexp = /\/(?:(?<!\\)\\(?:\/)|[^\/])*?\//~
whitespace = /~/~
#: delimiters
......@@ -76,5 +76,5 @@ OR = `|`
AND = `,` | ``
ENDL = `;` | ``
EOF = !/./ [:?DEF] [:?OR] [:?AND] [:?ENDL] # [:?DEF], [:?OR], ... clear stack by eating stored value
EOF = !/./ [:?DEF] [:?OR] [:?AND] [:?ENDL]
......@@ -19,6 +19,7 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import copy
import os
import sys
from functools import partial
......@@ -30,14 +31,14 @@ sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser.configuration import get_config_value, set_config_value
from DHParser.toolkit import compile_python_object, re
from DHParser.log import is_logging, log_ST, log_parsing_history
from DHParser.error import Error, is_error
from DHParser.error import Error, is_error, adjust_error_locations
from DHParser.parse import ParserError, Parser, Grammar, Forward, TKN, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative, \
Interleave, UnknownParserError, MetaParser, Token, EMPTY_NODE, Capture, Drop, Whitespace, \
GrammarError
from DHParser import compile_source
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
compile_ebnf, DHPARSER_IMPORTS
parse_ebnf, DHPARSER_IMPORTS
from DHParser.dsl import grammar_provider
from DHParser.syntaxtree import Node, parse_sxpr
from DHParser.stringview import StringView
......@@ -930,6 +931,88 @@ class TestBorderlineCases:
assert not cst.error_flag
EBNF_with_Errors = r"""# Test code with errors. All places marked by a "$" should yield and error
@ comment = /#.*(?:\n|$)/
@ whitespace = /\s*/
@ literalws = right
@ anonymous = pure_elem, EOF
@ drop = whitespace, EOF
# re-entry-rules for resuming after parsing-error
@ definition_resume = /\n\s*(?=@|\w+\w*\s*=)/
@ directive_resume = /\n\s*(?=@|\w+\w*\s*=)/
# specialized error messages for certain cases
@ definition_error = /,/, 'Delimiter "," not expected in definition!\nEither this was meant to '
'be a directive and the directive symbol @ is missing\nor the error is '
'due to inconsistent use of the comma as a delimiter\nfor the elements '
'of a sequence.'
#: top-level
syntax = [~//] { definition | directive } §EOF
definition = symbol §:DEF~ expression :ENDL~
directive = "@" §symbol "="
(regexp | literals | symbol)
{ "," (regexp | literals | symbol) }
#: components
expression = sequence { :OR~ sequence }
sequence = ["§"] ( interleave | lookaround )
{ :AND~ ["§"] ( interleave | lookaround ) }
interleave = difference { "°" ["§"] difference }
lookaround = flowmarker § (oneormore | pure_elem)
difference = term ["-" § (oneormore $ pure_elem)] # <- ERROR
term = oneormore | repetition | option | pure_elem # resuming expected her
#: elements
pure_elem = element § !/[?*+]/
element = [retrieveop] symbol !DEF
| literal
| plaintext
| regexp
| whitespace
| group$ # <- ERROR
#: flow-operators
flowmarker = "!" | "&" # resuming expected her
| "<-!" | "<-&"
retr$ieveop = "::" | ":?" | ":"
#: groups
group = "(" §expression ")"
oneormore = "{" expression "}+" | element "+"
repetition = "{" §expressi$on "}" | element "*" # <- ERROR
option = "[" §expression "]" | element "?" # resuming expected here
#: leaf-elements
symbol = /(?!\d)\w+/~
$literals = { literal }+ # <- ERROR
literal = /"(?:(?<!\\)\\"|[^"])*?"/~ # resuming expected her
| /'(?:(?<!\\)\\'|[^'])*?'/~
plaintext = /`(?:(?<!\\)\\`|[^`])*?`/~
regexp = /\/(?:(?<!\\)\\(?:\/)|[^\/])*?\//~
whitespace = /~/~
#: delimiters
DEF = `=` | `:=` | `::=`
OR = `|`
AND = `,` | ``
ENDL = `;` | ``
EOF = !/./ [:?DEF] [:?OR] [:?AND] [:?ENDL]
"""
class TestReentryAfterError:
def setup(self):
lang = """
......@@ -1072,6 +1155,18 @@ class TestReentryAfterError:
cst = gr(test_case)
assert any(err.code == Error.MANDATORY_CONTINUATION for err in cst.errors)
def test_bigfattest(self):
gr = copy.deepcopy(get_ebnf_grammar())
resume_notices_on(gr)
cst = gr(EBNF_with_Errors)
adjust_error_locations(cst.errors, EBNF_with_Errors)
locations = []
for error in cst.errors_sorted:
locations.append((error.line, error.column))
assert locations == [(36, 37), (37, 1), (47, 19), (51, 1), (53, 5),
(57, 1), (59, 27), (60, 1), (65, 1), (66, 1)]
class TestConfiguredErrorMessages:
def test_configured_error_message(self):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment