In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 60800f1c authored by Eckhart Arnold's avatar Eckhart Arnold

- parsers.py: support for rolling back of discarded capture and pop operations

parent 2721ad5f
......@@ -24,7 +24,10 @@ try:
import regex as re
except ImportError:
import re
from .typing import Any, cast, Tuple, Union
try:
from typing import Any, cast, Tuple, Union
except ImportError:
from .typing34 import Any, cast, Tuple, Union
from DHParser.ebnf import EBNFTransformer, EBNFCompiler, grammar_changed, \
get_ebnf_scanner, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
......@@ -74,7 +77,7 @@ except ImportError:
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
......
......@@ -23,11 +23,14 @@ try:
import regex as re
except ImportError:
import re
from .typing import Callable, Dict, List, Set, Tuple
try:
from typing import Callable, Dict, List, Set, Tuple
except ImportError:
from .typing34 import Callable, Dict, List, Set, Tuple
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \
......@@ -121,26 +124,26 @@ class EBNFGrammar(Grammar):
wspL__ = ''
wspR__ = WSP__
EOF = NegativeLookahead(RE('.', wR=''))
list_ = Sequence(RE('\\w+'), ZeroOrMore(Sequence(Token(","), RE('\\w+'))))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
regexp = RE('~?/(?:[^/]|(?<=\\\\)/)*/~?')
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
symbol = RE('(?!\\d)\\w+')
option = Sequence(Token("["), expression, Required(Token("]")))
repetition = Sequence(Token("{"), expression, Required(Token("}")))
oneormore = Sequence(Token("{"), expression, Token("}+"))
regexchain = Sequence(Token("<"), expression, Required(Token(">")))
group = Sequence(Token("("), expression, Required(Token(")")))
option = Series(Token("["), expression, Required(Token("]")))
repetition = Series(Token("{"), expression, Required(Token("}")))
oneormore = Series(Token("{"), expression, Token("}+"))
regexchain = Series(Token("<"), expression, Required(Token(">")))
group = Series(Token("("), expression, Required(Token(")")))
retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("§"), Token("-!"), Token("-&"))
factor = Alternative(Sequence(Optional(flowmarker), Optional(retrieveop), symbol, NegativeLookahead(Token("="))),
Sequence(Optional(flowmarker), literal), Sequence(Optional(flowmarker), regexp),
Sequence(Optional(flowmarker), group), Sequence(Optional(flowmarker), regexchain),
Sequence(Optional(flowmarker), oneormore), repetition, option)
factor = Alternative(Series(Optional(flowmarker), Optional(retrieveop), symbol, NegativeLookahead(Token("="))),
Series(Optional(flowmarker), literal), Series(Optional(flowmarker), regexp),
Series(Optional(flowmarker), group), Series(Optional(flowmarker), regexchain),
Series(Optional(flowmarker), oneormore), repetition, option)
term = OneOrMore(factor)
expression.set(Sequence(term, ZeroOrMore(Sequence(Token("|"), term))))
directive = Sequence(Token("@"), Required(symbol), Required(Token("=")), Alternative(regexp, literal, list_))
definition = Sequence(symbol, Required(Token("=")), expression)
syntax = Sequence(Optional(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), Required(symbol), Required(Token("=")), Alternative(regexp, literal, list_))
definition = Series(symbol, Required(Token("=")), expression)
syntax = Series(Optional(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
root__ = syntax
......@@ -601,7 +604,7 @@ class EBNFCompiler(Compiler):
return self.non_terminal(node, 'Alternative')
def on_term(self, node) -> str:
return self.non_terminal(node, 'Sequence')
return self.non_terminal(node, 'Series')
def on_factor(self, node: Node) -> str:
assert node.children
......
This diff is collapsed.
......@@ -27,8 +27,12 @@ try:
import regex as re
except ImportError:
import re
from .typing import AbstractSet, Any, ByteString, Callable, cast, Container, Iterator, List, \
NamedTuple, Sequence, Union, Text, Tuple
try:
from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Iterator, List, NamedTuple, Sequence, Union, Text, Tuple
except ImportError:
from .typing34 import AbstractSet, Any, ByteString, Callable, cast, Container, Dict, \
Iterator, List, NamedTuple, Sequence, Union, Text, Tuple
from DHParser.toolkit import log_dir, expand_table, line_col, smart_list
......@@ -273,9 +277,9 @@ class Node:
def show(self) -> str:
"""Returns content as string, inserting error messages where
errors ocurred.
errors occurred.
"""
s = "".join(child.show_errors() for child in self.children) if self.children \
s = "".join(child.show() for child in self.children) if self.children \
else str(self.result)
return (' <<< Error on "%s" | %s >>> ' % (s, '; '.join(self._errors))) if self._errors else s
......@@ -389,7 +393,7 @@ class Node:
"""
for child in self.children:
child.propagate_error_flags()
self.error_flag |= child.error_flag
self.error_flag = self.error_flag or child.error_flag
def collect_errors(self, clear_errors=False) -> List[Error]:
"""
......@@ -605,7 +609,7 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
# with a single value
table = {name: smart_list(call) for name, call in list(processing_table.items())}
table = expand_table(table)
cache = {}
cache = {} # type: Dict[str, List[Callable]]
def traverse_recursive(node):
if node.children:
......
......@@ -38,7 +38,10 @@ try:
import regex as re
except ImportError:
import re
from .typing import List, Tuple
try:
from typing import List, Tuple
except ImportError:
from .typing34 import List, Tuple
__all__ = ['logging',
......
......@@ -50,8 +50,8 @@ class ArithmeticGrammar(ParserRoot):
constant.set(Sequence("constant", digit, ZeroOrMore(None, digit)))
variable.set(Alternative("variable", Token("x", wspcR=wspc__), Token("y", wspcR=wspc__), Token("z", wspcR=wspc__)))
factor = Alternative("factor", constant, variable, Sequence(None, Token("(", wspcR=wspc__), expression, Token(")", wspcR=wspc__)))
term = Sequence("term", factor, ZeroOrMore(None, Sequence(None, Alternative(None, Token("*", wspcR=wspc__), Token("/", wspcR=wspc__)), factor)))
expression.set(Sequence("expression", term, ZeroOrMore(None, Sequence(None, Alternative(None, Token("+", wspcR=wspc__), Token("-", wspcR=wspc__)), term))))
term = Sequence("term", factor, ZeroOrMore(None, Series(None, Alternative(None, Token("*", wspcR=wspc__), Token("/", wspcR=wspc__)), factor)))
expression.set(Sequence("expression", term, ZeroOrMore(None, Series(None, Alternative(None, Token("+", wspcR=wspc__), Token("-", wspcR=wspc__)), term))))
root__ = expression
"""
......@@ -96,7 +96,7 @@ class EBNFGrammar(ParserRoot):
option = Sequence("option", Token("[", wspcR=wspc__), expression, Token("]", wspcR=wspc__))
factor = Alternative("factor", symbol, literal, regexp, option, repetition, group)
term = Sequence("term", factor, ZeroOrMore(None, factor))
expression.set(Sequence("expression", term, ZeroOrMore(None, Sequence(None, Token("|", wspcR=wspc__), term))))
expression.set(Sequence("expression", term, ZeroOrMore(None, Series(None, Token("|", wspcR=wspc__), term))))
production = Sequence("production", symbol, Token("=", wspcR=wspc__), expression, Token(".", wspcR=wspc__))
syntax = ZeroOrMore("syntax", production)
root__ = syntax
......
......@@ -15,7 +15,7 @@ try:
except ImportError:
import re
from DHParser.parsers import Grammar, Compiler, Alternative, Required, Token, \
Optional, OneOrMore, Sequence, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source
Optional, OneOrMore, Series, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source
from DHParser.syntaxtree import traverse, reduce_single_child, replace_by_single_child, no_transformation, \
remove_expendables, remove_tokens, flatten, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD
......@@ -157,39 +157,39 @@ class MLWGrammar(Grammar):
WORT_GROSS = RE('[A-ZÄÖÜ][a-zäöüß]+', wL='')
WORT = RE('[A-ZÄÖÜ]?[a-zäöüß]+', wL='')
NAMENS_ABKÜRZUNG = RE('[A-ZÄÖÜÁÀ]\\.', wR='', wL='')
Name = Sequence(WORT, ZeroOrMore(Alternative(WORT, NAMENS_ABKÜRZUNG)))
Autorinfo = Sequence(Alternative(Token("AUTORIN"), Token("AUTOR")), Name)
Zusatz = Sequence(Token("ZUSATZ"), RE('\\s*.*', wR='', wL=''), TRENNER)
EinBeleg = Sequence(OneOrMore(Sequence(NegativeLookahead(Sequence(Optional(LEER), Alternative(Token("*"), Token("BEDEUTUNG"), Token("AUTOR"), Token("NAME"), Token("ZUSATZ")))), RE('\\s*.*\\s*', wR='', wL=''))), Optional(Zusatz))
Belege = Sequence(Token("BELEGE"), Optional(LEER), ZeroOrMore(Sequence(Token("*"), EinBeleg)))
DeutscheBedeutung = Sequence(Token("DEU"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''))
LateinischeBedeutung = Sequence(Token("LAT"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''))
Interpretamente = Sequence(LateinischeBedeutung, Optional(LEER), Required(DeutscheBedeutung), Optional(LEER))
Bedeutungskategorie = Sequence(RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''), Optional(LEER))
Bedeutung = Sequence(Alternative(Interpretamente, Bedeutungskategorie), Optional(Belege))
BedeutungsPosition = OneOrMore(Sequence(Token("BEDEUTUNG"), Optional(LEER), Required(Bedeutung)))
Name = Series(WORT, ZeroOrMore(Alternative(WORT, NAMENS_ABKÜRZUNG)))
Autorinfo = Series(Alternative(Token("AUTORIN"), Token("AUTOR")), Name)
Zusatz = Series(Token("ZUSATZ"), RE('\\s*.*', wR='', wL=''), TRENNER)
EinBeleg = Series(OneOrMore(Series(NegativeLookahead(Series(Optional(LEER), Alternative(Token("*"), Token("BEDEUTUNG"), Token("AUTOR"), Token("NAME"), Token("ZUSATZ")))), RE('\\s*.*\\s*', wR='', wL=''))), Optional(Zusatz))
Belege = Series(Token("BELEGE"), Optional(LEER), ZeroOrMore(Series(Token("*"), EinBeleg)))
DeutscheBedeutung = Series(Token("DEU"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''))
LateinischeBedeutung = Series(Token("LAT"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''))
Interpretamente = Series(LateinischeBedeutung, Optional(LEER), Required(DeutscheBedeutung), Optional(LEER))
Bedeutungskategorie = Series(RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''), Optional(LEER))
Bedeutung = Series(Alternative(Interpretamente, Bedeutungskategorie), Optional(Belege))
BedeutungsPosition = OneOrMore(Series(Token("BEDEUTUNG"), Optional(LEER), Required(Bedeutung)))
VerweisZiel = RE('<\\w+>')
Verweis = RE('\\w+')
Beleg = Verweis
Schreibweise = Alternative(Token("vizreg-"), Token("festregel(a)"), Token("fezdregl(a)"), Token("fat-"))
SWVariante = Sequence(Schreibweise, Token(":"), Beleg)
SWVariante = Series(Schreibweise, Token(":"), Beleg)
SWTyp = Alternative(Token("script."), Token("script. fat-"))
SchreibweisenPosition = Sequence(Token("SCHREIBWEISE"), Optional(LEER), Required(SWTyp), Token(":"), Optional(LEER), Required(SWVariante), ZeroOrMore(Sequence(TRENNER, SWVariante)), Optional(LEER))
SchreibweisenPosition = Series(Token("SCHREIBWEISE"), Optional(LEER), Required(SWTyp), Token(":"), Optional(LEER), Required(SWVariante), ZeroOrMore(Series(TRENNER, SWVariante)), Optional(LEER))
ArtikelKopf = SchreibweisenPosition
_genus = Alternative(Token("maskulinum"), Token("m."), Token("femininum"), Token("f."), Token("neutrum"), Token("n."))
Flexion = RE('-?[a-z]+', wL='')
Flexionen = Sequence(Flexion, ZeroOrMore(Sequence(Token(","), Required(Flexion))))
GVariante = Sequence(Flexionen, Optional(_genus), Token(":"), Beleg)
GrammatikVarianten = Sequence(TRENNER, GVariante)
Flexionen = Series(Flexion, ZeroOrMore(Series(Token(","), Required(Flexion))))
GVariante = Series(Flexionen, Optional(_genus), Token(":"), Beleg)
GrammatikVarianten = Series(TRENNER, GVariante)
_wortart = Alternative(Token("nomen"), Token("n."), Token("verb"), Token("v."), Token("adverb"), Token("adv."), Token("adjektiv"), Token("adj."))
GrammatikPosition = Sequence(Token("GRAMMATIK"), Optional(LEER), Required(_wortart), Required(TRENNER), Required(Flexionen), Optional(_genus), ZeroOrMore(GrammatikVarianten), Optional(TRENNER))
LVZusatz = Sequence(Token("ZUSATZ"), Token("sim."))
GrammatikPosition = Series(Token("GRAMMATIK"), Optional(LEER), Required(_wortart), Required(TRENNER), Required(Flexionen), Optional(_genus), ZeroOrMore(GrammatikVarianten), Optional(TRENNER))
LVZusatz = Series(Token("ZUSATZ"), Token("sim."))
LVariante = RE('(?:[a-z]|-)+')
LemmaVarianten = Sequence(Token("VARIANTEN"), Optional(LEER), Required(LVariante), ZeroOrMore(Sequence(TRENNER, LVariante)), Optional(Sequence(TRENNER, LVZusatz)), Optional(TRENNER))
LemmaVarianten = Series(Token("VARIANTEN"), Optional(LEER), Required(LVariante), ZeroOrMore(Series(TRENNER, LVariante)), Optional(Series(TRENNER, LVZusatz)), Optional(TRENNER))
_tll = Token("*")
Lemma = Sequence(Optional(_tll), WORT_KLEIN, Optional(LEER))
LemmaPosition = Sequence(Token("LEMMA"), Required(Lemma), Optional(LemmaVarianten), Required(GrammatikPosition))
Artikel = Sequence(Optional(LEER), Required(LemmaPosition), Optional(ArtikelKopf), Required(BedeutungsPosition), Required(Autorinfo), Optional(LEER), DATEI_ENDE)
Lemma = Series(Optional(_tll), WORT_KLEIN, Optional(LEER))
LemmaPosition = Series(Token("LEMMA"), Required(Lemma), Optional(LemmaVarianten), Required(GrammatikPosition))
Artikel = Series(Optional(LEER), Required(LemmaPosition), Optional(ArtikelKopf), Required(BedeutungsPosition), Required(Autorinfo), Optional(LEER), DATEI_ENDE)
root__ = Artikel
......
This diff is collapsed.
This diff is collapsed.
......@@ -136,17 +136,17 @@ class TestGrammar:
# checks whether pos values in the parsing result and in the
# history record have been initialized
with logging("LOGS"):
parser = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
parser("no_file_name*")
for record in parser.history:
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
grammar("no_file_name*")
for record in grammar.history__:
assert not record.node or record.node.pos >= 0
def test_select_parsing(self):
parser = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
parser("wort", "WORT")
parser("eine Zeile", "textzeile")
parser("kein Haupt", "haupt")
parser("so ist es richtig", "haupt")
grammar = compile_python_object(DHPARSER_IMPORTS + self.pyparser, '\w+Grammar$')()
grammar("wort", "WORT")
grammar("eine Zeile", "textzeile")
grammar("kein Haupt", "haupt")
grammar("so ist es richtig", "haupt")
class TestPopRetrieve:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment