Commit bae74d44 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- Merged Required-Operator into Series-Parser; changed terminology from "Required" to "mandatory"

parent e9b10f1f
......@@ -34,10 +34,16 @@ __all__ = ('Error',
class Error:
__slots__ = ['message', 'level', 'code', 'pos', 'line', 'column']
# error levels
WARNING = 1
ERROR = 1000
HIGHEST = ERROR
# error codes
MANDATORY_CONTINUATION = 1001
def __init__(self, message: str, level: int=ERROR, code: Hashable=0):
self.message = message
assert level >= 0
......
......@@ -78,7 +78,7 @@ from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, s
StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, TOKEN_PTYPE, \
ZOMBIE_PARSER
from DHParser.error import Error, is_error, has_errors, line_col, error_messages
from DHParser.error import Error, is_error, has_errors, line_col
from DHParser.toolkit import load_if_file
__all__ = ('PreprocessorFunc',
......@@ -1394,44 +1394,81 @@ class Series(NaryOperator):
EBNF-Notation: `... ...` (sequence of parsers separated by a blank or new line)
EBNF-Example: `series = letter letter_or_digit`
"""
RX_ARGUMENT = re.compile(r'\s(\S)')
NOPE = 1000
def __init__(self, *parsers: Parser, name: str = '') -> None:
def __init__(self, *parsers: Parser, mandatory: int = NOPE, name: str = '') -> None:
super(Series, self).__init__(*parsers, name=name)
assert len(self.parsers) >= 1
L = len(self.parsers)
assert 1 <= L < Series.NOPE, 'Length %i of series exceeds maximum length of %i' \
% (L, Series.NOPE)
if mandatory < 0: mandatory += L
assert 0 <= mandatory < L or mandatory == Series.NOPE
self.mandatory = mandatory
def __deepcopy__(self, memo):
parsers = copy.deepcopy(self.parsers, memo)
return self.__class__(*parsers, mandatory=self.mandatory, name=self.name)
def __call__(self, text: StringView) -> Tuple[Node, StringView]:
results = () # type: Tuple[Node, ...]
text_ = text # type: StringView
pos = 0
for parser in self.parsers:
node, text_ = parser(text_)
if not node:
return None, text
if pos < self.mandatory:
return None, text
else:
m = text.search(Series.RX_ARGUMENT)
i = max(1, text.index(m.regs[1][0])) if m else 1
node = Node(self, text[:i])
text_ = text[i:]
node.add_error('%s expected; "%s" found!' % (str(parser), text[:10]),
code=Error.MANDATORY_CONTINUATION)
results += (node,)
if node.error_flag:
break
pos += 1
assert len(results) <= len(self.parsers)
return Node(self, results), text_
def __repr__(self):
return " ".join(parser.repr for parser in self.parsers)
return " ".join([parser.repr for parser in self.parsers[:self.mandatory]]
+ (['§'] if self.mandatory != Series.NOPE else [])
+ [parser.repr for parser in self.parsers[self.mandatory:]])
# The following operator definitions add syntactical sugar, so one can write:
# `RE('\d+') + Optional(RE('\.\d+)` instead of `Series(RE('\d+'), Optional(RE('\.\d+))`
@staticmethod
def combined_mandatory(left, right):
left_mandatory, left_length = (left.mandatory, len(left.parsers)) \
if isinstance(left, Series) else (Series.NOPE, 1)
if left_mandatory != Series.NOPE:
return left_mandatory
right_mandatory = right.mandatory if isinstance(right, Series) else Series.NOPE
if right_mandatory != Series.NOPE:
return right_mandatory + left_length
return Series.NOPE
def __add__(self, other: Parser) -> 'Series':
other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
return Series(*(self.parsers + other_parsers))
return Series(*(self.parsers + other_parsers),
mandatory=self.combined_mandatory(self, other))
def __radd__(self, other: Parser) -> 'Series':
other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
return Series(*(other_parsers + self.parsers))
return Series(*(other_parsers + self.parsers),
mandatory=self.combined_mandatory(other, self))
def __iadd__(self, other: Parser) -> 'Series':
other_parsers = cast('Series', other).parsers if isinstance(other, Series) \
else cast(Tuple[Parser, ...], (other,)) # type: Tuple[Parser, ...]
self.parsers += other_parsers
self.mandatory = self.combined_mandatory(self, other)
return self
......@@ -1526,9 +1563,8 @@ class Required(FlowOperator):
i = max(1, text.index(m.regs[1][0])) if m else 1
node = Node(self, text[:i])
text_ = text[i:]
# assert False, "*"+text[:i]+"*"
node.add_error('%s expected; "%s" found!' %
(str(self.parser), text[:10]))
node.add_error('%s expected; "%s" found!' % (str(self.parser), text[:10]),
code=Error.MANDATORY_CONTINUATION)
return node, text_
def __repr__(self):
......@@ -1561,19 +1597,6 @@ class NegativeLookahead(Lookahead):
return not bool_value
# def iter_right_branch(node) -> Iterator[Node]:
# """
# Iterates over the right branch of `node` starting with node itself.
# Iteration is stopped if either there are no child nodes any more or
# if the parser of a node is a Lookahead parser. (Reason is: Since
# lookahead nodes do not advance the parser, it does not make sense
# to look back to them.)
# """
# while node:
# yield node # for well-formed EBNF code
# node = node.children[-1] if node.children else None
class Lookbehind(FlowOperator):
"""EXPERIMENTAL!!!"""
def __init__(self, parser: Parser, name: str = '') -> None:
......
......@@ -25,8 +25,9 @@ from functools import partial
sys.path.extend(['../', './'])
from DHParser.toolkit import is_logging, logging, compile_python_object, StringView
from DHParser.error import Error
from DHParser.parser import compile_source, Retrieve, Grammar, Forward, Token, ZeroOrMore, RE, \
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series
RegExp, Lookbehind, NegativeLookahead, OneOrMore, Series, Alternative
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from DHParser.dsl import grammar_provider, DHPARSER_IMPORTS
......@@ -250,6 +251,75 @@ class TestGrammar:
assert not CST.error_flag, CST.as_sxpr()
class TestSeries:
def test_non_mandatory(self):
lang = """
document = series | /.*/
series = "A" "B" "C" "D"
"""
parser = grammar_provider(lang)()
st = parser("ABCD");
assert not st.error_flag
st = parser("A_CD");
assert not st.error_flag
st = parser("AB_D");
assert not st.error_flag
def test_mandatory(self):
"""Test for the §-operator. The Series-parser should raise an
error for any non-match that occurs after the mandatory-operator.
"""
lang = """
document = series | /.*/
series = "A" "B" §"C" "D"
"""
parser = grammar_provider(lang)()
st = parser("ABCD");
assert not st.error_flag
st = parser("A_CD");
assert not st.error_flag
st = parser("AB_D");
assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
st = parser("ABC_");
assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
def test_series_composition(self):
TA, TB, TC, TD, TE = (Token(b) for b in "ABCDE")
s1 = Series(TA, TB, TC, mandatory=2)
s2 = Series(TD, TE)
combined = Alternative(s1 + s2, RegExp('.*'))
parser = Grammar(combined)
st = parser("ABCDE");
assert not st.error_flag
st = parser("A_CDE");
assert not st.error_flag
st = parser("AB_DE");
assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
st = parser("ABC_E");
assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
combined = Alternative(s2 + s1, RegExp('.*'))
parser = Grammar(combined)
st = parser("DEABC");
assert not st.error_flag
st = parser("_EABC");
assert not st.error_flag
st = parser("D_ABC");
assert not st.error_flag
st = parser("DE_BC");
assert not st.error_flag
st = parser("DEA_C");
assert not st.error_flag
st = parser("DEAB_");
assert st.error_flag
assert st.collect_errors()[0].code == Error.MANDATORY_CONTINUATION
class TestPopRetrieve:
mini_language = """
document = { text | codeblock }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment