Commit 66927bcd authored by eckhart's avatar eckhart

- syntaxtree.py: context and structure now properties of Node objects +...

- syntaxtree.py: context and structure now properties of Node objects + switched str() and .context semantics
parent 175bc030
......@@ -546,7 +546,7 @@ class Grammar:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> number_parser = Grammar(number)
>>> number_parser("3.1416").content()
>>> number_parser("3.1416").content
'3.1416'
Collecting the parsers that define a grammar in a descendant class of
......@@ -1126,7 +1126,7 @@ class RegExp(Parser):
Example:
>>> word = RegExp(r'\w+')
>>> Grammar(word)("Haus").content()
>>> Grammar(word)("Haus").content
'Haus'
EBNF-Notation: `/ ... /`
......@@ -1182,11 +1182,11 @@ class RE(Parser):
>>> word = RE(r'\w+', wR=r'\s*')
>>> parser = Grammar(word)
>>> result = parser('Haus ')
>>> result.content()
>>> result.content
'Haus '
>>> result.structure()
>>> result.structure
'(:RE (:RegExp "Haus") (:Whitespace " "))'
>>> parser(' Haus').content()
>>> str(parser(' Haus'))
' <<< Error on " Haus" | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
EBNF-Notation: `/ ... /~` or `~/ ... /` or `~/ ... /~`
......@@ -1361,11 +1361,11 @@ class Option(UnaryOperator):
Examples:
>>> number = Option(Token('-')) + RegExp(r'\d+') + Option(RegExp(r'\.\d+'))
>>> Grammar(number)('3.14159').content()
>>> Grammar(number)('3.14159').content
'3.14159'
>>> Grammar(number)('3.14159').structure()
>>> Grammar(number)('3.14159').structure
'(:Series (:Option) (:RegExp "3") (:Option (:RegExp ".14159")))'
>>> Grammar(number)('-1').content()
>>> Grammar(number)('-1').content
'-1'
EBNF-Notation: `[ ... ]`
......@@ -1401,9 +1401,9 @@ class ZeroOrMore(Option):
Examples:
>>> sentence = ZeroOrMore(RE(r'\w+,?')) + Token('.')
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content()
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content
'Wo viel der Weisheit, da auch viel des Grämens.'
>>> Grammar(sentence)('.').content() # an empty sentence also matches
>>> Grammar(sentence)('.').content # an empty sentence also matches
'.'
EBNF-Notation: `{ ... }`
......@@ -1436,9 +1436,9 @@ class OneOrMore(UnaryOperator):
Examples:
>>> sentence = OneOrMore(RE(r'\w+,?')) + Token('.')
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content()
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content
'Wo viel der Weisheit, da auch viel des Grämens.'
>>> Grammar(sentence)('.').content() # an empty sentence also matches
>>> str(Grammar(sentence)('.')) # an empty sentence also matches
' <<< Error on "." | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
EBNF-Notation: `{ ... }+`
......@@ -1479,9 +1479,9 @@ class Series(NaryOperator):
Example:
>>> variable_name = RegExp('(?!\d)\w') + RE('\w*')
>>> Grammar(variable_name)('variable_1').content()
>>> Grammar(variable_name)('variable_1').content
'variable_1'
>>> Grammar(variable_name)('1_variable').content()
>>> str(Grammar(variable_name)('1_variable'))
' <<< Error on "1_variable" | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
EBNF-Notation: `... ...` (sequence of parsers separated by a blank or new line)
......@@ -1583,12 +1583,12 @@ class Alternative(NaryOperator):
# the order of the sub-expression matters!
>>> number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
>>> Grammar(number)("3.1416").content()
>>> str(Grammar(number)("3.1416"))
'3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> '
# the most selective expression should be put first:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> Grammar(number)("3.1416").content()
>>> Grammar(number)("3.1416").content
'3.1416'
EBNF-Notation: `... | ...`
......@@ -1645,9 +1645,9 @@ class AllOf(NaryOperator):
Example:
>>> prefixes = AllOf(Token("A"), Token("B"))
>>> Grammar(prefixes)('A B').content()
>>> Grammar(prefixes)('A B').content
'A B'
>>> Grammar(prefixes)('B A').content()
>>> Grammar(prefixes)('B A').content
'B A'
EBNF-Notation: `<... ...>` (sequence of parsers enclosed by angular brackets)
......@@ -1694,11 +1694,11 @@ class SomeOf(NaryOperator):
Example:
>>> prefixes = SomeOf(Token("A"), Token("B"))
>>> Grammar(prefixes)('A B').content()
>>> Grammar(prefixes)('A B').content
'A B'
>>> Grammar(prefixes)('B A').content()
>>> Grammar(prefixes)('B A').content
'B A'
>>> Grammar(prefixes)('B').content()
>>> Grammar(prefixes)('B').content
'B'
EBNF-Notation: `<... ...>` (sequence of parsers enclosed by angular brackets)
......@@ -1869,7 +1869,7 @@ class Capture(UnaryOperator):
if node:
assert self.name, """Tried to apply an unnamed capture-parser!"""
stack = self.grammar.variables__.setdefault(self.name, [])
stack.append(str(node))
stack.append(node.content)
self.grammar.push_rollback__(len(text), lambda: stack.pop())
# caching will be blocked by parser guard (see way above),
# because it would prevent recapturing of rolled back captures
......
......@@ -161,7 +161,7 @@ ZOMBIE_PARSER = ZombieParser()
ChildrenType = Tuple['Node', ...]
NoChildren = cast(ChildrenType, ()) # type: ChildrenType
StrictResultType = Union[ChildrenType, StringView, str]
StrictResultType = Union[ChildrenType, str]
ResultType = Union[ChildrenType, 'Node', StringView, str, None]
......@@ -233,7 +233,7 @@ class Node(collections.abc.Sized):
# Assignment to self.result initializes the attributes _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if leafhint:
self._result = result # type: StrictResultType
self._result = str(result) # type: StrictResultType
self.children = NoChildren # type: ChildrenType
self._len = -1 # type: int # lazy evaluation
else:
......@@ -244,11 +244,11 @@ class Node(collections.abc.Sized):
def __str__(self):
if self.children:
return "".join(str(child) for child in self.children)
elif isinstance(self._result, StringView):
self.result = str(self._result)
return self._result
s = "".join(str(child) for child in self.children) if self.children else self.result
if self._errors:
return ' <<< Error on "%s" | %s >>> ' % \
(s, '; '.join(e.message for e in self._errors))
return s
def __repr__(self):
......@@ -308,6 +308,7 @@ class Node(collections.abc.Sized):
"""
return self._result
@result.setter
def result(self, result: ResultType):
# # made obsolete by static type checking with mypy
......@@ -330,7 +331,7 @@ class Node(collections.abc.Sized):
self.error_flag = max(child.error_flag for child in self.children)
else:
self.children = NoChildren
self._result = result
self._result = str(result)
# # shorter but slower:
# self._result = (result,) if isinstance(result, Node) else result or '' # type: StrictResultType
# self.children = cast(ChildrenType, self._result) \
......@@ -339,6 +340,27 @@ class Node(collections.abc.Sized):
# self.error_flag = max(self.error_flag,
# max(child.error_flag for child in self.children)) # type: bool
@property
def content(self) -> str:
"""
Returns content as string, inserting error messages where
errors occurred.
"""
if self.children:
return "".join(child.content for child in self.children)
return self._result
@property
def structure(self) -> str:
"""
Return structure (and content) as S-expression on a single line
without any line breaks.
"""
return flatten_sxpr(self.as_sxpr(showerrors=False))
@property
def pos(self) -> int:
"""Returns the position of the Node's content in the source text."""
......@@ -362,7 +384,7 @@ class Node(collections.abc.Sized):
@property
def errors(self) -> List[Error]:
"""
Returns the errors that occured at this Node,
Returns the errors that occurred at this Node,
not including any errors from child nodes.
"""
return self._errors.copy()
......@@ -464,7 +486,7 @@ class Node(collections.abc.Sized):
return head + '\n'.join([tab + data_fn(s) for s in res.split('\n')]) + tail
def as_sxpr(self, src: str = None, compact: bool = False) -> str:
def as_sxpr(self, src: str = None, compact: bool = False, showerrors: bool = True) -> str:
"""
Returns content as S-expression, i.e. in lisp-like form.
......@@ -487,7 +509,7 @@ class Node(collections.abc.Sized):
txt += " '(pos %i " % node.pos # + " %i %i)" % line_col(src, node.pos)
# if node.error_flag: # just for debugging error collecting
# txt += " HAS ERRORS"
if node.errors:
if showerrors and node.errors:
txt += " '(err '(%s))" % ' '.join(str(err).replace('"', r'\"')
for err in node.errors)
return txt + '\n'
......@@ -505,7 +527,7 @@ class Node(collections.abc.Sized):
return self._tree_repr(' ', opening, closing, pretty, density=density)
def as_xml(self, src: str = None) -> str:
def as_xml(self, src: str = None, showerrors: bool = True) -> str:
"""
Returns content as XML-tree.
......@@ -521,7 +543,7 @@ class Node(collections.abc.Sized):
# s += ' pos="%i"' % node.pos
if src:
txt += ' line="%i" col="%i"' % line_col(src, node.pos)
if node.errors:
if showerrors and node.errors:
txt += ' err="%s"' % ''.join(str(err).replace('"', r'\"') for err in node.errors)
return txt + ">\n"
......@@ -532,25 +554,6 @@ class Node(collections.abc.Sized):
return self._tree_repr(' ', opening, closing, density=1)
def structure(self) -> str:
"""
Return structure (and content) as S-expression on a single line
without any line breaks.
"""
return flatten_sxpr(self.as_sxpr())
def content(self) -> str:
"""
Returns content as string, inserting error messages where
errors occurred.
"""
s = "".join(child.content() for child in self.children) if self.children \
else str(self.result)
return (' <<< Error on "%s" | %s >>> '
% (s, '; '.join(e.message for e in self._errors))) if self._errors else s
def find(self, match_function: Callable) -> Iterator['Node']:
"""
Finds nodes in the tree that match a specific criterion.
......
......@@ -437,7 +437,7 @@ def collapse(context: List[Node]):
string representation of the node.
"""
node = context[-1]
node.result = str(node)
node.result = node.content
@transformation_factory
......@@ -516,7 +516,7 @@ def is_one_of(context: List[Node], tag_name_set: AbstractSet[str]) -> bool:
def has_content(context: List[Node], regexp: str) -> bool:
"""Checks a node's content against a regular expression."""
return bool(re.match(regexp, str(context[-1])))
return bool(re.match(regexp, context[-1].content))
@transformation_factory(Callable)
......@@ -624,7 +624,7 @@ def assert_content(context: List[Node], regexp: str):
node = context[-1]
if not has_content(context, regexp):
node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regexp), str(node)))
(node.parser.name, str(regexp), node.content))
@transformation_factory
......
......@@ -401,13 +401,13 @@ class TestAllSome:
def test_all(self):
ebnf = 'prefix = <"A" "B">'
grammar = grammar_provider(ebnf)()
assert grammar('B A').content() == 'B A'
assert grammar('B A').content == 'B A'
def test_some(self):
ebnf = 'prefix = <"A" | "B">'
grammar = grammar_provider(ebnf)()
assert grammar('B A').content() == 'B A'
assert grammar('B').content() == 'B'
assert grammar('B A').content == 'B A'
assert grammar('B').content == 'B'
if __name__ == "__main__":
......
......@@ -313,11 +313,11 @@ class TestAllOfSomeOf:
def test_allOf_order(self):
"""Test that parsers of an AllOf-List can match in arbitrary order."""
prefixes = AllOf(Token("A"), Token("B"))
assert Grammar(prefixes)('A B').content() == 'A B'
assert Grammar(prefixes)('B A').content() == 'B A'
assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B A').content == 'B A'
# aternative Form
prefixes = AllOf(Series(Token("B"), Token("A")))
assert Grammar(prefixes)('A B').content() == 'A B'
assert Grammar(prefixes)('A B').content == 'A B'
def test_allOf_completeness(self):
"""Test that an error is raised if not all parsers of an AllOf-List
......@@ -329,28 +329,28 @@ class TestAllOfSomeOf:
"""Test that one and the same parser may be listed several times
and must be matched several times accordingly."""
prefixes = AllOf(Token("A"), Token("B"), Token("A"))
assert Grammar(prefixes)('A A B').content() == 'A A B'
assert Grammar(prefixes)('A B A').content() == 'A B A'
assert Grammar(prefixes)('B A A').content() == 'B A A'
assert Grammar(prefixes)('A A B').content == 'A A B'
assert Grammar(prefixes)('A B A').content == 'A B A'
assert Grammar(prefixes)('B A A').content == 'B A A'
assert Grammar(prefixes)('A B B').error_flag
def test_someOf_order(self):
"""Test that parsers of an AllOf-List can match in arbitrary order."""
prefixes = SomeOf(Token("A"), Token("B"))
assert Grammar(prefixes)('A B').content() == 'A B'
assert Grammar(prefixes)('B A').content() == 'B A'
assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B A').content == 'B A'
# aternative Form
prefixes = SomeOf(Alternative(Token("B"), Token("A")))
assert Grammar(prefixes)('A B').content() == 'A B'
assert Grammar(prefixes)('B').content() == 'B'
assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B').content == 'B'
def test_someOf_redundance(self):
"""Test that one and the same parser may be listed several times
and must be matched several times accordingly."""
prefixes = SomeOf(Token("A"), Token("B"), Token("A"))
assert Grammar(prefixes)('A A B').content() == 'A A B'
assert Grammar(prefixes)('A B A').content() == 'A B A'
assert Grammar(prefixes)('B A A').content() == 'B A A'
assert Grammar(prefixes)('A A B').content == 'A A B'
assert Grammar(prefixes)('A B A').content == 'A B A'
assert Grammar(prefixes)('B A A').content == 'B A A'
assert Grammar(prefixes)('A B B').error_flag
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment