10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 66927bcd authored by eckhart's avatar eckhart

- syntaxtree.py: context and structure now properties of Node objects +...

- syntaxtree.py: context and structure now properties of Node objects + switched str() and .context semantics
parent 175bc030
...@@ -546,7 +546,7 @@ class Grammar: ...@@ -546,7 +546,7 @@ class Grammar:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+') >>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> number_parser = Grammar(number) >>> number_parser = Grammar(number)
>>> number_parser("3.1416").content() >>> number_parser("3.1416").content
'3.1416' '3.1416'
Collecting the parsers that define a grammar in a descendant class of Collecting the parsers that define a grammar in a descendant class of
...@@ -1126,7 +1126,7 @@ class RegExp(Parser): ...@@ -1126,7 +1126,7 @@ class RegExp(Parser):
Example: Example:
>>> word = RegExp(r'\w+') >>> word = RegExp(r'\w+')
>>> Grammar(word)("Haus").content() >>> Grammar(word)("Haus").content
'Haus' 'Haus'
EBNF-Notation: `/ ... /` EBNF-Notation: `/ ... /`
...@@ -1182,11 +1182,11 @@ class RE(Parser): ...@@ -1182,11 +1182,11 @@ class RE(Parser):
>>> word = RE(r'\w+', wR=r'\s*') >>> word = RE(r'\w+', wR=r'\s*')
>>> parser = Grammar(word) >>> parser = Grammar(word)
>>> result = parser('Haus ') >>> result = parser('Haus ')
>>> result.content() >>> result.content
'Haus ' 'Haus '
>>> result.structure() >>> result.structure
'(:RE (:RegExp "Haus") (:Whitespace " "))' '(:RE (:RegExp "Haus") (:Whitespace " "))'
>>> parser(' Haus').content() >>> str(parser(' Haus'))
' <<< Error on " Haus" | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> ' ' <<< Error on " Haus" | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
EBNF-Notation: `/ ... /~` or `~/ ... /` or `~/ ... /~` EBNF-Notation: `/ ... /~` or `~/ ... /` or `~/ ... /~`
...@@ -1361,11 +1361,11 @@ class Option(UnaryOperator): ...@@ -1361,11 +1361,11 @@ class Option(UnaryOperator):
Examples: Examples:
>>> number = Option(Token('-')) + RegExp(r'\d+') + Option(RegExp(r'\.\d+')) >>> number = Option(Token('-')) + RegExp(r'\d+') + Option(RegExp(r'\.\d+'))
>>> Grammar(number)('3.14159').content() >>> Grammar(number)('3.14159').content
'3.14159' '3.14159'
>>> Grammar(number)('3.14159').structure() >>> Grammar(number)('3.14159').structure
'(:Series (:Option) (:RegExp "3") (:Option (:RegExp ".14159")))' '(:Series (:Option) (:RegExp "3") (:Option (:RegExp ".14159")))'
>>> Grammar(number)('-1').content() >>> Grammar(number)('-1').content
'-1' '-1'
EBNF-Notation: `[ ... ]` EBNF-Notation: `[ ... ]`
...@@ -1401,9 +1401,9 @@ class ZeroOrMore(Option): ...@@ -1401,9 +1401,9 @@ class ZeroOrMore(Option):
Examples: Examples:
>>> sentence = ZeroOrMore(RE(r'\w+,?')) + Token('.') >>> sentence = ZeroOrMore(RE(r'\w+,?')) + Token('.')
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content() >>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content
'Wo viel der Weisheit, da auch viel des Grämens.' 'Wo viel der Weisheit, da auch viel des Grämens.'
>>> Grammar(sentence)('.').content() # an empty sentence also matches >>> Grammar(sentence)('.').content # an empty sentence also matches
'.' '.'
EBNF-Notation: `{ ... }` EBNF-Notation: `{ ... }`
...@@ -1436,9 +1436,9 @@ class OneOrMore(UnaryOperator): ...@@ -1436,9 +1436,9 @@ class OneOrMore(UnaryOperator):
Examples: Examples:
>>> sentence = OneOrMore(RE(r'\w+,?')) + Token('.') >>> sentence = OneOrMore(RE(r'\w+,?')) + Token('.')
>>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content() >>> Grammar(sentence)('Wo viel der Weisheit, da auch viel des Grämens.').content
'Wo viel der Weisheit, da auch viel des Grämens.' 'Wo viel der Weisheit, da auch viel des Grämens.'
>>> Grammar(sentence)('.').content() # an empty sentence also matches >>> str(Grammar(sentence)('.')) # an empty sentence also matches
' <<< Error on "." | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> ' ' <<< Error on "." | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
EBNF-Notation: `{ ... }+` EBNF-Notation: `{ ... }+`
...@@ -1479,9 +1479,9 @@ class Series(NaryOperator): ...@@ -1479,9 +1479,9 @@ class Series(NaryOperator):
Example: Example:
>>> variable_name = RegExp('(?!\d)\w') + RE('\w*') >>> variable_name = RegExp('(?!\d)\w') + RE('\w*')
>>> Grammar(variable_name)('variable_1').content() >>> Grammar(variable_name)('variable_1').content
'variable_1' 'variable_1'
>>> Grammar(variable_name)('1_variable').content() >>> str(Grammar(variable_name)('1_variable'))
' <<< Error on "1_variable" | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> ' ' <<< Error on "1_variable" | Parser did not match! Invalid source file?\n Most advanced: None\n Last match: None; >>> '
EBNF-Notation: `... ...` (sequence of parsers separated by a blank or new line) EBNF-Notation: `... ...` (sequence of parsers separated by a blank or new line)
...@@ -1583,12 +1583,12 @@ class Alternative(NaryOperator): ...@@ -1583,12 +1583,12 @@ class Alternative(NaryOperator):
# the order of the sub-expression matters! # the order of the sub-expression matters!
>>> number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+') >>> number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
>>> Grammar(number)("3.1416").content() >>> str(Grammar(number)("3.1416"))
'3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> ' '3 <<< Error on ".141" | Parser stopped before end! trying to recover... >>> '
# the most selective expression should be put first: # the most selective expression should be put first:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+') >>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> Grammar(number)("3.1416").content() >>> Grammar(number)("3.1416").content
'3.1416' '3.1416'
EBNF-Notation: `... | ...` EBNF-Notation: `... | ...`
...@@ -1645,9 +1645,9 @@ class AllOf(NaryOperator): ...@@ -1645,9 +1645,9 @@ class AllOf(NaryOperator):
Example: Example:
>>> prefixes = AllOf(Token("A"), Token("B")) >>> prefixes = AllOf(Token("A"), Token("B"))
>>> Grammar(prefixes)('A B').content() >>> Grammar(prefixes)('A B').content
'A B' 'A B'
>>> Grammar(prefixes)('B A').content() >>> Grammar(prefixes)('B A').content
'B A' 'B A'
EBNF-Notation: `<... ...>` (sequence of parsers enclosed by angular brackets) EBNF-Notation: `<... ...>` (sequence of parsers enclosed by angular brackets)
...@@ -1694,11 +1694,11 @@ class SomeOf(NaryOperator): ...@@ -1694,11 +1694,11 @@ class SomeOf(NaryOperator):
Example: Example:
>>> prefixes = SomeOf(Token("A"), Token("B")) >>> prefixes = SomeOf(Token("A"), Token("B"))
>>> Grammar(prefixes)('A B').content() >>> Grammar(prefixes)('A B').content
'A B' 'A B'
>>> Grammar(prefixes)('B A').content() >>> Grammar(prefixes)('B A').content
'B A' 'B A'
>>> Grammar(prefixes)('B').content() >>> Grammar(prefixes)('B').content
'B' 'B'
EBNF-Notation: `<... ...>` (sequence of parsers enclosed by angular brackets) EBNF-Notation: `<... ...>` (sequence of parsers enclosed by angular brackets)
...@@ -1869,7 +1869,7 @@ class Capture(UnaryOperator): ...@@ -1869,7 +1869,7 @@ class Capture(UnaryOperator):
if node: if node:
assert self.name, """Tried to apply an unnamed capture-parser!""" assert self.name, """Tried to apply an unnamed capture-parser!"""
stack = self.grammar.variables__.setdefault(self.name, []) stack = self.grammar.variables__.setdefault(self.name, [])
stack.append(str(node)) stack.append(node.content)
self.grammar.push_rollback__(len(text), lambda: stack.pop()) self.grammar.push_rollback__(len(text), lambda: stack.pop())
# caching will be blocked by parser guard (see way above), # caching will be blocked by parser guard (see way above),
# because it would prevent recapturing of rolled back captures # because it would prevent recapturing of rolled back captures
......
...@@ -161,7 +161,7 @@ ZOMBIE_PARSER = ZombieParser() ...@@ -161,7 +161,7 @@ ZOMBIE_PARSER = ZombieParser()
ChildrenType = Tuple['Node', ...] ChildrenType = Tuple['Node', ...]
NoChildren = cast(ChildrenType, ()) # type: ChildrenType NoChildren = cast(ChildrenType, ()) # type: ChildrenType
StrictResultType = Union[ChildrenType, StringView, str] StrictResultType = Union[ChildrenType, str]
ResultType = Union[ChildrenType, 'Node', StringView, str, None] ResultType = Union[ChildrenType, 'Node', StringView, str, None]
...@@ -233,7 +233,7 @@ class Node(collections.abc.Sized): ...@@ -233,7 +233,7 @@ class Node(collections.abc.Sized):
# Assignment to self.result initializes the attributes _result, children and _len # Assignment to self.result initializes the attributes _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes # The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if leafhint: if leafhint:
self._result = result # type: StrictResultType self._result = str(result) # type: StrictResultType
self.children = NoChildren # type: ChildrenType self.children = NoChildren # type: ChildrenType
self._len = -1 # type: int # lazy evaluation self._len = -1 # type: int # lazy evaluation
else: else:
...@@ -244,11 +244,11 @@ class Node(collections.abc.Sized): ...@@ -244,11 +244,11 @@ class Node(collections.abc.Sized):
def __str__(self): def __str__(self):
if self.children: s = "".join(str(child) for child in self.children) if self.children else self.result
return "".join(str(child) for child in self.children) if self._errors:
elif isinstance(self._result, StringView): return ' <<< Error on "%s" | %s >>> ' % \
self.result = str(self._result) (s, '; '.join(e.message for e in self._errors))
return self._result return s
def __repr__(self): def __repr__(self):
...@@ -308,6 +308,7 @@ class Node(collections.abc.Sized): ...@@ -308,6 +308,7 @@ class Node(collections.abc.Sized):
""" """
return self._result return self._result
@result.setter @result.setter
def result(self, result: ResultType): def result(self, result: ResultType):
# # made obsolete by static type checking with mypy # # made obsolete by static type checking with mypy
...@@ -330,7 +331,7 @@ class Node(collections.abc.Sized): ...@@ -330,7 +331,7 @@ class Node(collections.abc.Sized):
self.error_flag = max(child.error_flag for child in self.children) self.error_flag = max(child.error_flag for child in self.children)
else: else:
self.children = NoChildren self.children = NoChildren
self._result = result self._result = str(result)
# # shorter but slower: # # shorter but slower:
# self._result = (result,) if isinstance(result, Node) else result or '' # type: StrictResultType # self._result = (result,) if isinstance(result, Node) else result or '' # type: StrictResultType
# self.children = cast(ChildrenType, self._result) \ # self.children = cast(ChildrenType, self._result) \
...@@ -339,6 +340,27 @@ class Node(collections.abc.Sized): ...@@ -339,6 +340,27 @@ class Node(collections.abc.Sized):
# self.error_flag = max(self.error_flag, # self.error_flag = max(self.error_flag,
# max(child.error_flag for child in self.children)) # type: bool # max(child.error_flag for child in self.children)) # type: bool
@property
def content(self) -> str:
"""
Returns content as string, inserting error messages where
errors occurred.
"""
if self.children:
return "".join(child.content for child in self.children)
return self._result
@property
def structure(self) -> str:
"""
Return structure (and content) as S-expression on a single line
without any line breaks.
"""
return flatten_sxpr(self.as_sxpr(showerrors=False))
@property @property
def pos(self) -> int: def pos(self) -> int:
"""Returns the position of the Node's content in the source text.""" """Returns the position of the Node's content in the source text."""
...@@ -362,7 +384,7 @@ class Node(collections.abc.Sized): ...@@ -362,7 +384,7 @@ class Node(collections.abc.Sized):
@property @property
def errors(self) -> List[Error]: def errors(self) -> List[Error]:
""" """
Returns the errors that occured at this Node, Returns the errors that occurred at this Node,
not including any errors from child nodes. not including any errors from child nodes.
""" """
return self._errors.copy() return self._errors.copy()
...@@ -464,7 +486,7 @@ class Node(collections.abc.Sized): ...@@ -464,7 +486,7 @@ class Node(collections.abc.Sized):
return head + '\n'.join([tab + data_fn(s) for s in res.split('\n')]) + tail return head + '\n'.join([tab + data_fn(s) for s in res.split('\n')]) + tail
def as_sxpr(self, src: str = None, compact: bool = False) -> str: def as_sxpr(self, src: str = None, compact: bool = False, showerrors: bool = True) -> str:
""" """
Returns content as S-expression, i.e. in lisp-like form. Returns content as S-expression, i.e. in lisp-like form.
...@@ -487,7 +509,7 @@ class Node(collections.abc.Sized): ...@@ -487,7 +509,7 @@ class Node(collections.abc.Sized):
txt += " '(pos %i " % node.pos # + " %i %i)" % line_col(src, node.pos) txt += " '(pos %i " % node.pos # + " %i %i)" % line_col(src, node.pos)
# if node.error_flag: # just for debugging error collecting # if node.error_flag: # just for debugging error collecting
# txt += " HAS ERRORS" # txt += " HAS ERRORS"
if node.errors: if showerrors and node.errors:
txt += " '(err '(%s))" % ' '.join(str(err).replace('"', r'\"') txt += " '(err '(%s))" % ' '.join(str(err).replace('"', r'\"')
for err in node.errors) for err in node.errors)
return txt + '\n' return txt + '\n'
...@@ -505,7 +527,7 @@ class Node(collections.abc.Sized): ...@@ -505,7 +527,7 @@ class Node(collections.abc.Sized):
return self._tree_repr(' ', opening, closing, pretty, density=density) return self._tree_repr(' ', opening, closing, pretty, density=density)
def as_xml(self, src: str = None) -> str: def as_xml(self, src: str = None, showerrors: bool = True) -> str:
""" """
Returns content as XML-tree. Returns content as XML-tree.
...@@ -521,7 +543,7 @@ class Node(collections.abc.Sized): ...@@ -521,7 +543,7 @@ class Node(collections.abc.Sized):
# s += ' pos="%i"' % node.pos # s += ' pos="%i"' % node.pos
if src: if src:
txt += ' line="%i" col="%i"' % line_col(src, node.pos) txt += ' line="%i" col="%i"' % line_col(src, node.pos)
if node.errors: if showerrors and node.errors:
txt += ' err="%s"' % ''.join(str(err).replace('"', r'\"') for err in node.errors) txt += ' err="%s"' % ''.join(str(err).replace('"', r'\"') for err in node.errors)
return txt + ">\n" return txt + ">\n"
...@@ -532,25 +554,6 @@ class Node(collections.abc.Sized): ...@@ -532,25 +554,6 @@ class Node(collections.abc.Sized):
return self._tree_repr(' ', opening, closing, density=1) return self._tree_repr(' ', opening, closing, density=1)
def structure(self) -> str:
"""
Return structure (and content) as S-expression on a single line
without any line breaks.
"""
return flatten_sxpr(self.as_sxpr())
def content(self) -> str:
"""
Returns content as string, inserting error messages where
errors occurred.
"""
s = "".join(child.content() for child in self.children) if self.children \
else str(self.result)
return (' <<< Error on "%s" | %s >>> '
% (s, '; '.join(e.message for e in self._errors))) if self._errors else s
def find(self, match_function: Callable) -> Iterator['Node']: def find(self, match_function: Callable) -> Iterator['Node']:
""" """
Finds nodes in the tree that match a specific criterion. Finds nodes in the tree that match a specific criterion.
......
...@@ -437,7 +437,7 @@ def collapse(context: List[Node]): ...@@ -437,7 +437,7 @@ def collapse(context: List[Node]):
string representation of the node. string representation of the node.
""" """
node = context[-1] node = context[-1]
node.result = str(node) node.result = node.content
@transformation_factory @transformation_factory
...@@ -516,7 +516,7 @@ def is_one_of(context: List[Node], tag_name_set: AbstractSet[str]) -> bool: ...@@ -516,7 +516,7 @@ def is_one_of(context: List[Node], tag_name_set: AbstractSet[str]) -> bool:
def has_content(context: List[Node], regexp: str) -> bool: def has_content(context: List[Node], regexp: str) -> bool:
"""Checks a node's content against a regular expression.""" """Checks a node's content against a regular expression."""
return bool(re.match(regexp, str(context[-1]))) return bool(re.match(regexp, context[-1].content))
@transformation_factory(Callable) @transformation_factory(Callable)
...@@ -624,7 +624,7 @@ def assert_content(context: List[Node], regexp: str): ...@@ -624,7 +624,7 @@ def assert_content(context: List[Node], regexp: str):
node = context[-1] node = context[-1]
if not has_content(context, regexp): if not has_content(context, regexp):
node.add_error('Element "%s" violates %s on %s' % node.add_error('Element "%s" violates %s on %s' %
(node.parser.name, str(regexp), str(node))) (node.parser.name, str(regexp), node.content))
@transformation_factory @transformation_factory
......
...@@ -401,13 +401,13 @@ class TestAllSome: ...@@ -401,13 +401,13 @@ class TestAllSome:
def test_all(self): def test_all(self):
ebnf = 'prefix = <"A" "B">' ebnf = 'prefix = <"A" "B">'
grammar = grammar_provider(ebnf)() grammar = grammar_provider(ebnf)()
assert grammar('B A').content() == 'B A' assert grammar('B A').content == 'B A'
def test_some(self): def test_some(self):
ebnf = 'prefix = <"A" | "B">' ebnf = 'prefix = <"A" | "B">'
grammar = grammar_provider(ebnf)() grammar = grammar_provider(ebnf)()
assert grammar('B A').content() == 'B A' assert grammar('B A').content == 'B A'
assert grammar('B').content() == 'B' assert grammar('B').content == 'B'
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -313,11 +313,11 @@ class TestAllOfSomeOf: ...@@ -313,11 +313,11 @@ class TestAllOfSomeOf:
def test_allOf_order(self): def test_allOf_order(self):
"""Test that parsers of an AllOf-List can match in arbitrary order.""" """Test that parsers of an AllOf-List can match in arbitrary order."""
prefixes = AllOf(Token("A"), Token("B")) prefixes = AllOf(Token("A"), Token("B"))
assert Grammar(prefixes)('A B').content() == 'A B' assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B A').content() == 'B A' assert Grammar(prefixes)('B A').content == 'B A'
# aternative Form # aternative Form
prefixes = AllOf(Series(Token("B"), Token("A"))) prefixes = AllOf(Series(Token("B"), Token("A")))
assert Grammar(prefixes)('A B').content() == 'A B' assert Grammar(prefixes)('A B').content == 'A B'
def test_allOf_completeness(self): def test_allOf_completeness(self):
"""Test that an error is raised if not all parsers of an AllOf-List """Test that an error is raised if not all parsers of an AllOf-List
...@@ -329,28 +329,28 @@ class TestAllOfSomeOf: ...@@ -329,28 +329,28 @@ class TestAllOfSomeOf:
"""Test that one and the same parser may be listed several times """Test that one and the same parser may be listed several times
and must be matched several times accordingly.""" and must be matched several times accordingly."""
prefixes = AllOf(Token("A"), Token("B"), Token("A")) prefixes = AllOf(Token("A"), Token("B"), Token("A"))
assert Grammar(prefixes)('A A B').content() == 'A A B' assert Grammar(prefixes)('A A B').content == 'A A B'
assert Grammar(prefixes)('A B A').content() == 'A B A' assert Grammar(prefixes)('A B A').content == 'A B A'
assert Grammar(prefixes)('B A A').content() == 'B A A' assert Grammar(prefixes)('B A A').content == 'B A A'
assert Grammar(prefixes)('A B B').error_flag assert Grammar(prefixes)('A B B').error_flag
def test_someOf_order(self): def test_someOf_order(self):
"""Test that parsers of an AllOf-List can match in arbitrary order.""" """Test that parsers of an AllOf-List can match in arbitrary order."""
prefixes = SomeOf(Token("A"), Token("B")) prefixes = SomeOf(Token("A"), Token("B"))
assert Grammar(prefixes)('A B').content() == 'A B' assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B A').content() == 'B A' assert Grammar(prefixes)('B A').content == 'B A'
# aternative Form # aternative Form
prefixes = SomeOf(Alternative(Token("B"), Token("A"))) prefixes = SomeOf(Alternative(Token("B"), Token("A")))
assert Grammar(prefixes)('A B').content() == 'A B' assert Grammar(prefixes)('A B').content == 'A B'
assert Grammar(prefixes)('B').content() == 'B' assert Grammar(prefixes)('B').content == 'B'
def test_someOf_redundance(self): def test_someOf_redundance(self):
"""Test that one and the same parser may be listed several times """Test that one and the same parser may be listed several times
and must be matched several times accordingly.""" and must be matched several times accordingly."""
prefixes = SomeOf(Token("A"), Token("B"), Token("A")) prefixes = SomeOf(Token("A"), Token("B"), Token("A"))
assert Grammar(prefixes)('A A B').content() == 'A A B' assert Grammar(prefixes)('A A B').content == 'A A B'
assert Grammar(prefixes)('A B A').content() == 'A B A' assert Grammar(prefixes)('A B A').content == 'A B A'
assert Grammar(prefixes)('B A A').content() == 'B A A' assert Grammar(prefixes)('B A A').content == 'B A A'
assert Grammar(prefixes)('A B B').error_flag assert Grammar(prefixes)('A B B').error_flag
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment