Commit 7cd2ece0 authored by di68kap's avatar di68kap
Browse files

parse.py: Alternative: Errors like "A" | "AB" are now caught by static checks, already

parent 5763b9b9
......@@ -170,7 +170,7 @@ class EBNFGrammar(Grammar):
RE_CORE = RegExp('(?:(?<!\\\\)\\\\(?:/)|[^/])*')
regex_heuristics = Alternative(RegExp('[^ ]'), RegExp('[^/\\n*?+\\\\]*[*?+\\\\][^/\\n]/'))
literal_heuristics = Alternative(RegExp('~?\\s*"(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^"]*)*"'), RegExp("~?\\s*'(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^']*)*'"), RegExp('~?\\s*`(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^`]*)*`'), RegExp('~?\\s*´(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^´]*)*´'), RegExp('~?\\s*/(?:[\\\\]\\]|[^\\]]|[^\\\\]\\[[^/]*)*/'))
char_range_heuristics = NegativeLookahead(Alternative(RegExp('[\\n\\t ]'), Series(dwsp__, literal_heuristics), Series(Option(Alternative(Token(":"), Token("::"), Token(":?"))), SYM_REGEX, RegExp('\\s*\\]'))))
char_range_heuristics = NegativeLookahead(Alternative(RegExp('[\\n\\t ]'), Series(dwsp__, literal_heuristics), Series(Option(Alternative(Token("::"), Token(":?"), Token(":"))), SYM_REGEX, RegExp('\\s*\\]'))))
CH_LEADIN = Capture(Alternative(Token("0x"), Token("#x")))
RE_LEADOUT = Capture(Token("/"))
RE_LEADIN = Capture(Alternative(Series(Token("/"), Lookahead(regex_heuristics)), Token("^/")))
......
......@@ -1178,10 +1178,12 @@ class Grammar:
def __str__(self):
return self.__class__.__name__
def __getitem__(self, key):
try:
return self.__dict__[key]
except KeyError:
p = getattr(self, key, None)
parser_template = getattr(self.__class__, key, None)
if parser_template:
# add parser to grammar object on the fly...
......@@ -1191,6 +1193,7 @@ class Grammar:
return self[key]
raise UnknownParserError('Unknown parser "%s" !' % key)
def __contains__(self, key):
return key in self.__dict__ or hasattr(self, key)
......@@ -2530,8 +2533,10 @@ class Alternative(NaryParser):
st = self.grammar(fixed_start, self.parsers[k], complete_match=False)
if not st.errors and len(st) >= 1:
errors.append(self.static_error(
"Pre-empted Alternative."
))
"Parser-specification Error in " + self.location_info()
+ "\nAlternative %i will never be reached, because its starting-"
'string "%s" is already captured by earlier alternative %i !'
% (i + 1, fixed_start, k + 1), BAD_ORDER_OF_ALTERNATIVES))
return errors or None
......
......@@ -34,7 +34,7 @@ from typing import Callable, cast, Iterator, Sequence, List, Set, Union, \
from DHParser.configuration import SERIALIZATIONS, XML_SERIALIZATION, \
SXPRESSION_SERIALIZATION, COMPACT_SERIALIZATION, JSON_SERIALIZATION, \
SMART_SERIALIZATION, get_config_value
from DHParser.error import Error, ErrorCode, ERROR
from DHParser.error import Error, ErrorCode, ERROR, PARSER_STOPPED_BEFORE_END
from DHParser.stringview import StringView # , real_indices
from DHParser.toolkit import re, cython, linebreaks, line_col
......@@ -234,7 +234,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
uninitialized.
Examples:
TODO: Add some exmpales here!
TODO: Add some examples here!
Attributes and Properties:
tag_name (str): The name of the node, which is either its
......@@ -1458,7 +1458,10 @@ class RootNode(Node):
def __init__(self, node: Optional[Node] = None):
super().__init__('__not_yet_ready__', '')
self.clear_errors()
self.errors = [] # type: List[Error]
self.error_nodes = dict() # type: Dict[int, List[Error]] # id(node) -> error list
self.error_positions = dict() # type: Dict[int, Set[int]] # pos -> set of id(node)
self.error_flag = 0
if node is not None:
self.swallow(node)
# customization for XML-Representation
......@@ -1472,10 +1475,11 @@ class RootNode(Node):
Removes all error messages. This can be used to keep the error messages
of different subsequent phases of tree-processing separate.
"""
self.errors = [] # type: List[Error]
self.error_nodes = dict() # type: Dict[int, List[Error]] # id(node) -> error list
self.error_positions = dict() # type: Dict[int, Set[int]] # pos -> set of id(node)
self.error_flag = 0
raise NotImplementedError
# self.errors = [] # type: List[Error]
# self.error_nodes = dict() # type: Dict[int, List[Error]] # id(node) -> error list
# self.error_positions = dict() # type: Dict[int, Set[int]] # pos -> set of id(node)
# self.error_flag = 0
def __str__(self):
errors = self.errors_sorted
......@@ -1524,8 +1528,12 @@ class RootNode(Node):
It is possible to add errors to a RootNode object, before it
has actually swallowed the root of the syntax tree.
"""
if self.tag_name != '__not_yet_ready__':
raise AssertionError('RootNode.swallow() has already been called!')
if node is None:
self.tag_name = EMPTY_PTYPE
self.tag_name = ZOMBIE_TAG
self.with_pos(0)
self.new_error(self, 'Parser did not match!', PARSER_STOPPED_BEFORE_END)
return self
self._result = node._result
self.children = node.children
......@@ -1621,6 +1629,19 @@ class RootNode(Node):
self.errors.sort(key=lambda e: e.pos)
return self.errors
def did_match(self) -> bool:
"""Returns True, if the parser that has generated this tree did
match, False otherwise. Depending on wether the Grammar-object that
that generated the syntax tree was called with `complete_match=True`
or not this requires either the complete document to have been
matched or only the beginning.
Note: That if the parser did match, this does not mean that it must
have matched without errors. It simply means the no
PARSER_STOPPED_BEFORE_END-error has occurred."""
return self.tag_name != '__not_yet_ready__' \
and not any(e.code == PARSER_STOPPED_BEFORE_END for e in self.errors)
def customized_XML(self):
"""
Returns a customized XML representation of the tree.
......
......@@ -148,7 +148,7 @@ CH_LEADIN = `0x` | `#x`
char_range_heuristics = ! ( /[\n\t ]/
| ~ literal_heuristics
| [`:`|`::`|`:?`] SYM_REGEX /\s*\]/ )
| [`::`|`:?`|`:`] SYM_REGEX /\s*\]/ )
literal_heuristics = /~?\s*"(?:[\\]\]|[^\]]|[^\\]\[[^"]*)*"/
| /~?\s*'(?:[\\]\]|[^\]]|[^\\]\[[^']*)*'/
| /~?\s*`(?:[\\]\]|[^\]]|[^\\]\[[^`]*)*`/
......
......@@ -256,19 +256,26 @@ class TestFlowControl:
t2 = "All word and not play makes Jack a dull boy END\n"
def test_lookbehind(self):
ws = RegExp(r'\s*')
end = RegExp("END")
ws = RegExp(r'\s*'); ws.pname = "ws"
end = RegExp("END"); end.pname = "end"
doc_end = Lookbehind(RegExp('\\s*?\\n')) + end
word = RegExp(r'\w+')
word = RegExp(r'\w+'); word.pname = "word"
sequence = OneOrMore(NegativeLookahead(end) + word + ws)
document = ws + sequence + doc_end + ws
parser = Grammar(document)
cst = parser(self.t1)
assert not cst.error_flag, cst.as_sxpr()
cst = parser(self.t2)
assert cst.error_flag, cst.as_sxpr()
cst = parser(self.t2, parser['ws'], complete_match=False)
assert cst.did_match() and len(cst) == 0 and not cst.errors
cst = parser(self.t2, parser['word'], complete_match=False)
assert cst.did_match() and cst.content == "All" and not cst.errors
cst = parser(self.t2, parser['end'], complete_match=False)
assert not cst.did_match()
def test_lookbehind_indirect(self):
class LookbehindTestGrammar(Grammar):
parser_initialization__ = ["upon instantiation"]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment