Commit 1f55ce46 authored by eckhart's avatar eckhart
Browse files

parse.py: class MandatoryNary: dropped skip_rules and err_msgs; now,...

parse.py: class MandatoryNary: dropped skip_rules and err_msgs; now, self.grammar.skip_rules__ and self.grammar.error_messages__ is used directly
parent ade1ec7c
...@@ -174,11 +174,14 @@ class ParserError(Exception): ...@@ -174,11 +174,14 @@ class ParserError(Exception):
return pe return pe
ResumeList = List[Union[RxPatternType, str, Callable]] # list of strings or regular expressions PatternMatchType = Union[RxPatternType, str, Callable]
ErrorMessagesType = List[Tuple[PatternMatchType, str]]
ResumeList = List[PatternMatchType] # list of strings or regular expressions
ReentryPointAlgorithm = Callable[[StringView, int, int], Tuple[int, int]] ReentryPointAlgorithm = Callable[[StringView, int, int], Tuple[int, int]]
# (text, start point, end point) => (reentry point, match length) # (text, start point, end point) => (reentry point, match length)
# A return value of (-1, x) means that no reentry point before the end of the document was found # A return value of (-1, x) means that no reentry point before the end of the document was found
@cython.returns(cython.int) @cython.returns(cython.int)
@cython.locals(upper_limit=cython.int, closest_match=cython.int, pos=cython.int) @cython.locals(upper_limit=cython.int, closest_match=cython.int, pos=cython.int)
def reentry_point(rest: StringView, def reentry_point(rest: StringView,
...@@ -927,6 +930,19 @@ class Grammar: ...@@ -927,6 +930,19 @@ class Grammar:
that act as rules to find the reentry point if a ParserError was that act as rules to find the reentry point if a ParserError was
thrown during the execution of the parser with the respective name. thrown during the execution of the parser with the respective name.
skip_rules__: A mapping of parser names to a list of regular expressions
that act as rules to find the reentry point if a ParserError was
thrown during the execution of the parser with the respective name.
error_messages__: A mapping of parser names to a Tuple of regalar expressions
and error messages. If a mandatory violation error occurs on a
specific symbol (i.e. parser name) and any of the regular expressions
matches the error message of the first matching expression is used
instead of the generic manadtory violation error messages. This
allows to answer typical kinds of errors (say putting a colon ","
where a semi-colon ";" is expected) with more informative error
messages.
anonymous__: A regular expression to identify names of parsers that are anonymous__: A regular expression to identify names of parsers that are
assigned to class fields but shall never the less yield anonymous assigned to class fields but shall never the less yield anonymous
nodes (i.e. nodes the tag name of which starts with a colon ":" nodes (i.e. nodes the tag name of which starts with a colon ":"
...@@ -1018,6 +1034,8 @@ class Grammar: ...@@ -1018,6 +1034,8 @@ class Grammar:
(resulting in a maximum recursion depth reached error) when (resulting in a maximum recursion depth reached error) when
the grammar definition contains left recursions. the grammar definition contains left recursions.
associated_symbol_cache__: A cache for the associated_symbol()-method.
# mirrored class attributes: # mirrored class attributes:
static_analysis_pending__: A pointer to the class attribute of the same name. static_analysis_pending__: A pointer to the class attribute of the same name.
...@@ -1090,10 +1108,12 @@ class Grammar: ...@@ -1090,10 +1108,12 @@ class Grammar:
has been encountered. Default is 10.000 characters. has been encountered. Default is 10.000 characters.
""" """
python_src__ = '' # type: str python_src__ = '' # type: str
root__ = PARSER_PLACEHOLDER # type: Parser root__ = PARSER_PLACEHOLDER # type: Parser
# root__ must be overwritten with the root-parser by grammar subclass # root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__ = ["pending"] # type: List[str] parser_initialization__ = ["pending"] # type: List[str]
resume_rules__ = dict() # type: Dict[str, ResumeList] resume_rules__ = dict() # type: Dict[str, ResumeList]
skip_rules__ = dict() # type: Dict[str, ResumeList]
error_messages__ = dict() # type: Dict[str, Tuple[PatternMatchType, str]]
anonymous__ = RX_NEVER_MATCH # type: RxPatternType anonymous__ = RX_NEVER_MATCH # type: RxPatternType
# some default values # some default values
COMMENT__ = r'' # type: str # r'#.*(?:\n|$)' COMMENT__ = r'' # type: str # r'#.*(?:\n|$)'
...@@ -1183,14 +1203,15 @@ class Grammar: ...@@ -1183,14 +1203,15 @@ class Grammar:
assert ((self.__class__.COMMENT__ assert ((self.__class__.COMMENT__
and self.__class__.COMMENT__ == self.comment_rx__.pattern) and self.__class__.COMMENT__ == self.comment_rx__.pattern)
or (not self.__class__.COMMENT__ and self.comment_rx__ == RX_NEVER_MATCH)) or (not self.__class__.COMMENT__ and self.comment_rx__ == RX_NEVER_MATCH))
self.start_parser__ = None # type: Optional[Parser] self.start_parser__ = None # type: Optional[Parser]
self._dirty_flag__ = False # type: bool self._dirty_flag__ = False # type: bool
self.left_recursion__ = get_config_value('left_recursion') # type: bool self.left_recursion__ = get_config_value('left_recursion') # type: bool
self.history_tracking__ = get_config_value('history_tracking') # type: bool self.history_tracking__ = get_config_value('history_tracking') # type: bool
self.resume_notices__ = get_config_value('resume_notices') # type: bool self.resume_notices__ = get_config_value('resume_notices') # type: bool
self.flatten_tree__ = get_config_value('flatten_tree') # type: bool self.flatten_tree__ = get_config_value('flatten_tree') # type: bool
self.max_parser_dropouts__ = get_config_value('max_parser_dropouts') # type: int self.max_parser_dropouts__ = get_config_value('max_parser_dropouts') # type: int
self.reentry_search_window__ = get_config_value('reentry_search_window') # type: int self.reentry_search_window__ = get_config_value('reentry_search_window') # type: int
self.associated_symbol_cache__ = dict() # type: Dict[Parser, Parser]
self._reset__() self._reset__()
# prepare parsers in the class, first # prepare parsers in the class, first
...@@ -1534,7 +1555,8 @@ class Grammar: ...@@ -1534,7 +1555,8 @@ class Grammar:
>>> gr.associated_symbol(anonymous_re).pname >>> gr.associated_symbol(anonymous_re).pname
'word' 'word'
""" """
symbol = None # type: Optional[Parser] symbol = self.associated_symbol_cache__.get(parser, None) # type: Optional[Parser]
if symbol: return symbol
def find_symbol_for_parser(context: List[Parser]) -> Optional[bool]: def find_symbol_for_parser(context: List[Parser]) -> Optional[bool]:
nonlocal symbol, parser nonlocal symbol, parser
...@@ -1547,11 +1569,14 @@ class Grammar: ...@@ -1547,11 +1569,14 @@ class Grammar:
return False # continue searching return False # continue searching
if parser.pname: if parser.pname:
return parser symbol = parser
self.root_parser__.apply(find_symbol_for_parser) else:
if symbol is None: self.root_parser__.apply(find_symbol_for_parser)
raise AttributeError('Parser %s (%i) is not contained in Grammar!' if symbol is None:
% (str(parser), id(parser))) raise AttributeError('Parser %s (%i) is not contained in Grammar!'
% (str(parser), id(parser)))
self.associated_symbol_cache__[parser] = symbol
return symbol return symbol
...@@ -2296,7 +2321,6 @@ class Counted(UnaryParser): ...@@ -2296,7 +2321,6 @@ class Counted(UnaryParser):
return errors return errors
MessagesType = List[Tuple[Union[str, RxPatternType, Callable], str]]
NO_MANDATORY = 2**30 NO_MANDATORY = 2**30
...@@ -2320,7 +2344,7 @@ class MandatoryNary(NaryParser): ...@@ -2320,7 +2344,7 @@ class MandatoryNary(NaryParser):
""" """
def __init__(self, *parsers: Parser, def __init__(self, *parsers: Parser,
mandatory: int = NO_MANDATORY, mandatory: int = NO_MANDATORY,
err_msgs: MessagesType = [], err_msgs: ErrorMessagesType = [],
skip: ResumeList = []) -> None: skip: ResumeList = []) -> None:
super(MandatoryNary, self).__init__(*parsers) super(MandatoryNary, self).__init__(*parsers)
length = len(self.parsers) length = len(self.parsers)
...@@ -2328,24 +2352,23 @@ class MandatoryNary(NaryParser): ...@@ -2328,24 +2352,23 @@ class MandatoryNary(NaryParser):
mandatory += length mandatory += length
self.mandatory = mandatory # type: int self.mandatory = mandatory # type: int
self.err_msgs = err_msgs # type: MessagesType
self.skip = skip # type: ResumeList
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
parsers = copy.deepcopy(self.parsers, memo) parsers = copy.deepcopy(self.parsers, memo)
duplicate = self.__class__(*parsers, mandatory=self.mandatory, duplicate = self.__class__(*parsers, mandatory=self.mandatory)
err_msgs=self.err_msgs, skip=self.skip)
copy_parser_base_attrs(self, duplicate) copy_parser_base_attrs(self, duplicate)
return duplicate return duplicate
@cython.returns(cython.int) @cython.returns(cython.int)
def get_reentry_point(self, text_: StringView) -> int: def get_reentry_point(self, text_: StringView) -> int:
"""Returns a reentry-point determined by the skip-list in `self.skip`. """Returns a reentry-point determined by the associated skip-list in
If no reentry-point was found or the skip-list ist empty, -1 is returned. `self.grammar.skip_rules__`. If no reentry-point was found or the
skip-list ist empty, -1 is returned.
""" """
if self.skip: skip = self.grammar.skip_rules__.get(self.grammar.associated_symbol(self).pname, [])
if skip:
gr = self._grammar gr = self._grammar
return reentry_point(text_, self.skip, gr.comment_rx__, gr.reentry_search_window__) return reentry_point(text_, skip, gr.comment_rx__, gr.reentry_search_window__)
return -1 return -1
@cython.locals(i=cython.int, location=cython.int) @cython.locals(i=cython.int, location=cython.int)
...@@ -2380,7 +2403,9 @@ class MandatoryNary(NaryParser): ...@@ -2380,7 +2403,9 @@ class MandatoryNary(NaryParser):
location = grammar.document_length__ - len(text_) location = grammar.document_length__ - len(text_)
err_node = Node(ZOMBIE_TAG, text_[:i]).with_pos(location) err_node = Node(ZOMBIE_TAG, text_[:i]).with_pos(location)
found = text_[:10].replace('\n', '\\n ') + '...' found = text_[:10].replace('\n', '\\n ') + '...'
for search, message in self.err_msgs: sym = self.grammar.associated_symbol(self).pname
err_msgs = self.grammar.error_messages__.get(sym, [])
for search, message in err_msgs:
is_func = callable(search) # search rule is a function: StringView -> bool is_func = callable(search) # search rule is a function: StringView -> bool
is_str = isinstance(search, str) # search rule is a simple string is_str = isinstance(search, str) # search rule is a simple string
is_rxs = not is_func and not is_str # search rule is a regular expression is_rxs = not is_func and not is_str # search rule is a regular expression
...@@ -2417,12 +2442,13 @@ class MandatoryNary(NaryParser): ...@@ -2417,12 +2442,13 @@ class MandatoryNary(NaryParser):
errors = super().static_analysis() errors = super().static_analysis()
msg = [] msg = []
length = len(self.parsers) length = len(self.parsers)
if self.mandatory == NO_MANDATORY and self.err_msgs: sym = self.grammar.associated_symbol(self).pname
msg.append('Custom error messages require that parameter "mandatory" is set!') # if self.mandatory == NO_MANDATORY and sym in self.grammar.error_messages__:
elif self.mandatory == NO_MANDATORY and self.skip: # msg.append('Custom error messages require that parameter "mandatory" is set!')
msg.append('Search expressions for skipping text require parameter ' # elif self.mandatory == NO_MANDATORY and sym in self.grammar.skip_rules__:
'"mandatory" to be set!') # msg.append('Search expressions for skipping text require parameter '
elif length == 0: # '"mandatory" to be set!')
if length == 0:
msg.append('Number of elements %i is below minimum length of 1' % length) msg.append('Number of elements %i is below minimum length of 1' % length)
elif length >= NO_MANDATORY: elif length >= NO_MANDATORY:
msg.append('Number of elements %i of series exceeds maximum length of %i' msg.append('Number of elements %i of series exceeds maximum length of %i'
...@@ -2710,7 +2736,7 @@ class Interleave(MandatoryNary): ...@@ -2710,7 +2736,7 @@ class Interleave(MandatoryNary):
def __init__(self, *parsers: Parser, def __init__(self, *parsers: Parser,
mandatory: int = NO_MANDATORY, mandatory: int = NO_MANDATORY,
err_msgs: MessagesType = [], err_msgs: ErrorMessagesType = [],
skip: ResumeList = [], skip: ResumeList = [],
repetitions: Sequence[Tuple[int, int]] = ()) -> None: repetitions: Sequence[Tuple[int, int]] = ()) -> None:
super(Interleave, self).__init__( super(Interleave, self).__init__(
...@@ -2726,7 +2752,6 @@ class Interleave(MandatoryNary): ...@@ -2726,7 +2752,6 @@ class Interleave(MandatoryNary):
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
parsers = copy.deepcopy(self.parsers, memo) parsers = copy.deepcopy(self.parsers, memo)
duplicate = self.__class__(*parsers, mandatory=self.mandatory, duplicate = self.__class__(*parsers, mandatory=self.mandatory,
err_msgs=self.err_msgs, skip=self.skip,
repetitions=self.repetitions) repetitions=self.repetitions)
copy_parser_base_attrs(self, duplicate) copy_parser_base_attrs(self, duplicate)
return duplicate return duplicate
......
...@@ -739,7 +739,7 @@ class TestErrorCustomizationErrors: ...@@ -739,7 +739,7 @@ class TestErrorCustomizationErrors:
lang2 = '\n'.join(l2) lang2 = '\n'.join(l2)
assert lang2.find('@mitte_') < 0 assert lang2.find('@mitte_') < 0
result, messages, ast = compile_ebnf(lang2) result, messages, ast = compile_ebnf(lang2)
assert not messages assert not messages, str(messages)
l3 = [zeile for zeile in l2 if not zeile.lstrip().startswith('mitte')] l3 = [zeile for zeile in l2 if not zeile.lstrip().startswith('mitte')]
lang3 = '\n'.join(l3).replace('mitte', '(`M` §"ITTE")') lang3 = '\n'.join(l3).replace('mitte', '(`M` §"ITTE")')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment