Commit ed754af9 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- mypy-test and some type errors corrected

parent 2ce46062
......@@ -78,7 +78,7 @@ except ImportError:
from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, TRUE_CONDITION, \\
......
......@@ -31,7 +31,7 @@ except ImportError:
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, RE, NegativeLookahead, \
Alternative, Series, Optional, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
Alternative, Series, Option, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
PreprocessorFunc
from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, Node, TransformationFunc
from DHParser.transform import TransformationDict, traverse, remove_brackets, \
......@@ -132,15 +132,15 @@ class EBNFGrammar(Grammar):
group = Series(Token("("), expression, Required(Token(")")))
retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("§"), Token("-!"), Token("-&"))
factor = Alternative(Series(Optional(flowmarker), Optional(retrieveop), symbol, NegativeLookahead(Token("="))),
Series(Optional(flowmarker), literal), Series(Optional(flowmarker), regexp),
Series(Optional(flowmarker), group), Series(Optional(flowmarker), oneormore),
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp),
Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore),
repetition, option)
term = OneOrMore(factor)
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), Required(symbol), Required(Token("=")), Alternative(regexp, literal, list_))
definition = Series(symbol, Required(Token("=")), expression)
syntax = Series(Optional(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
root__ = syntax
......@@ -784,7 +784,7 @@ class EBNFCompiler(Compiler):
def on_option(self, node) -> str:
return self.non_terminal(node, 'Optional')
return self.non_terminal(node, 'Option')
def on_repetition(self, node) -> str:
......
......@@ -97,7 +97,7 @@ __all__ = ('PreprocessorFunc',
# 'UnaryOperator',
# 'NaryOperator',
'Synonym',
'Optional',
'Option',
'ZeroOrMore',
'OneOrMore',
'Series',
......@@ -355,7 +355,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
representing the root of the concrete syntax tree resulting from the
match as well as the substring `text[i:]` where i is the length of
matched text (which can be zero in the case of parsers like
`ZeroOrMore` or `Optional`). If `i > 0` then the parser has "moved
`ZeroOrMore` or `Option`). If `i > 0` then the parser has "moved
forward".
If the parser does not match it returns `(None, text). **Note** that
......@@ -1229,7 +1229,7 @@ class NaryOperator(Parser):
parser.apply(func)
class Optional(UnaryOperator):
class Option(UnaryOperator):
"""
Parser `Optional` always matches, even if its child-parser
did not match.
......@@ -1243,7 +1243,7 @@ class Optional(UnaryOperator):
left it.
Examples:
>>> number = Optional(Token('-')) + RegExp(r'\d+') + Optional(RegExp(r'\.\d+'))
>>> number = Option(Token('-')) + RegExp(r'\d+') + Option(RegExp(r'\.\d+'))
>>> Grammar(number)('3.14159').content()
'3.14159'
>>> Grammar(number)('3.14159').structure()
......@@ -1255,9 +1255,9 @@ class Optional(UnaryOperator):
EBNF-Example: `number = ["-"] /\d+/ [ /\.\d+/ ]
"""
def __init__(self, parser: Parser, name: str = '') -> None:
super(Optional, self).__init__(parser, name)
super(Option, self).__init__(parser, name)
# assert isinstance(parser, Parser)
assert not isinstance(parser, Optional), \
assert not isinstance(parser, Option), \
"Redundant nesting of options: %s(%s)" % \
(str(name), str(parser.name))
assert not isinstance(parser, Required), \
......@@ -1275,10 +1275,10 @@ class Optional(UnaryOperator):
and not self.parser.name else self.parser.repr) + ']'
class ZeroOrMore(Optional):
class ZeroOrMore(Option):
"""
`ZeroOrMore` applies a parser repeatedly as long as this parser
matches. Like `Optional` the `ZeroOrMore` parser always matches. In
matches. Like `Option` the `ZeroOrMore` parser always matches. In
case of zero repetitions, the empty match `((), text)` is returned.
Examples:
......@@ -1310,8 +1310,8 @@ class ZeroOrMore(Optional):
class OneOrMore(UnaryOperator):
def __init__(self, parser: Parser, name: str = '') -> None:
super(OneOrMore, self).__init__(parser, name)
assert not isinstance(parser, Optional), \
"Use ZeroOrMore instead of nesting OneOrMore and Optional: " \
assert not isinstance(parser, Option), \
"Use ZeroOrMore instead of nesting OneOrMore and Option: " \
"%s(%s)" % (str(name), str(parser.name))
def __call__(self, text: StringView) -> Tuple[Node, StringView]:
......@@ -1397,7 +1397,7 @@ class Alternative(NaryOperator):
super(Alternative, self).__init__(*parsers, name=name)
assert len(self.parsers) >= 1
# only the last alternative may be optional. Could this be checked at compile time?
assert all(not isinstance(p, Optional) for p in self.parsers[:-1])
assert all(not isinstance(p, Option) for p in self.parsers[:-1])
self.been_here = dict() # type: Dict[int, int]
def __call__(self, text: StringView) -> Tuple[Node, StringView]:
......@@ -1446,7 +1446,7 @@ class FlowOperator(UnaryOperator):
class Required(FlowOperator):
# Add constructor that checks for logical errors, like `Required(Optional(...))` constructs ?
# Add constructor that checks for logical errors, like `Required(Option(...))` constructs ?
RX_ARGUMENT = re.compile(r'\s(\S)')
def __call__(self, text: StringView) -> Tuple[Node, StringView]:
......
......@@ -169,6 +169,8 @@ class StringView(collections.abc.Sized):
def __init__(self, text: str, begin: Optional[int] = 0, end: Optional[int] = None) -> None:
self.text = text # type: str
self.begin = 0 # type: int
self.end = 0 # type: int
self.begin, self.end = StringView.real_indices(begin, end, len(text))
self.len = max(self.end - self.begin, 0)
......@@ -218,7 +220,7 @@ def sv_match(regex, sv: StringView):
return regex.match(sv.text, pos=sv.begin, endpos=sv.end)
def sv_index(absolute_index: Union[int, Iterable], sv: StringView) -> int:
def sv_index(absolute_index: int, sv: StringView) -> int:
"""
Converts the an index into string watched by a StringView object
to an index relativ to the string view object, e.g.:
......@@ -232,7 +234,7 @@ def sv_index(absolute_index: Union[int, Iterable], sv: StringView) -> int:
return absolute_index - sv.begin
def sv_indices(absolute_indices: Iterable[int], sv: StringView) -> Tuple[int]:
def sv_indices(absolute_indices: Iterable[int], sv: StringView) -> Tuple[int, ...]:
"""Converts the an index into string watched by a StringView object
to an index relativ to the string view object. See also: `sv_index()`
"""
......
......@@ -191,7 +191,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars = globals()
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Optional', 'mixin_comment',
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Option', 'mixin_comment',
'ZeroOrMore', 'OneOrMore', 'Sequence', 'Alternative', 'Forward',
'NegativeLookahead', 'PositiveLookahead', 'PreprocessorToken', 'Grammar'}}
exec(code, name_space)
......
......@@ -269,7 +269,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars = globals()
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Optional', 'mixin_comment',
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Option', 'mixin_comment',
'ZeroOrMore', 'Sequence', 'Alternative', 'Forward'}}
exec(code, name_space)
parser = name_space['Grammar']
......
......@@ -17,7 +17,7 @@ except ImportError:
import re
from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
Required, Token, Synonym, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Series, RE, Capture, \
Option, NegativeLookbehind, OneOrMore, RegExp, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, TransformationDict, \
Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
......@@ -230,11 +230,11 @@ class LaTeXGrammar(Grammar):
EOF = RegExp('(?!.)')
BACKSLASH = RegExp('[\\\\]')
LB = RegExp('\\s*?\\n|$')
NEW_LINE = Series(RegExp('[ \\t]*'), Optional(RegExp(COMMENT__)), RegExp('\\n'))
NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
GAP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Optional(WSPC))
LFF = Series(NEW_LINE, Optional(WSPC))
PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
LFF = Series(NEW_LINE, Option(WSPC))
LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
INTEGER = RE('\\d+')
......@@ -250,15 +250,15 @@ class LaTeXGrammar(Grammar):
no_command = Alternative(Token("\\begin{"), Token("\\end"), Series(BACKSLASH, structural))
text = Series(TEXTCHUNK, ZeroOrMore(Series(RE(''), TEXTCHUNK)))
block = Series(RegExp('{'), RE(''), ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RE(''))), Required(RegExp('}')))
cfg_text = ZeroOrMore(Alternative(Series(Optional(RE('')), text), CMDNAME, SPECIAL))
cfg_text = ZeroOrMore(Alternative(Series(Option(RE('')), text), CMDNAME, SPECIAL))
config = Series(Token("["), cfg_text, Required(Token("]")))
cline = Series(Token("\\cline{"), INTEGER, Token("-"), INTEGER, Token("}"))
hline = Token("\\hline")
multicolumn = Series(Token("\\multicolumn"), Token("{"), INTEGER, Token("}"), tabular_config, block_of_paragraphs)
caption = Series(Token("\\caption"), block)
includegraphics = Series(Token("\\includegraphics"), Optional(config), block)
includegraphics = Series(Token("\\includegraphics"), Option(config), block)
footnote = Series(Token("\\footnote"), block_of_paragraphs)
generic_command = Series(NegativeLookahead(no_command), CMDNAME, Optional(Series(Optional(Series(RE(''), config)), RE(''), block)))
generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RE(''), config)), RE(''), block)))
text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
known_command = Alternative(footnote, includegraphics, caption, multicolumn, hline, cline)
command = Alternative(known_command, text_command, generic_command)
......@@ -273,8 +273,8 @@ class LaTeXGrammar(Grammar):
line_element = Alternative(text, block, inline_environment, command)
text_element.set(Alternative(line_element, LINEFEED))
paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RE(''))))
sequence = OneOrMore(Series(Alternative(paragraph, block_environment), Optional(PARSEP)))
block_of_paragraphs.set(Series(Token("{"), Optional(sequence), Required(Token("}"))))
sequence = OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP)))
block_of_paragraphs.set(Series(Token("{"), Option(sequence), Required(Token("}"))))
tabular_config.set(Series(Token("{"), RE('[lcr|]+'), Required(Token("}"))))
tabular_cell = ZeroOrMore(Series(line_element, RE('')))
tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Token("&"), Alternative(multicolumn, tabular_cell))), Token("\\\\"), Alternative(hline, ZeroOrMore(cline)))
......@@ -282,31 +282,31 @@ class LaTeXGrammar(Grammar):
verbatim = Series(Token("\\begin{verbatim}"), sequence, Required(Token("\\end{verbatim}")))
quotation = Alternative(Series(Token("\\begin{quotation}"), sequence, Required(Token("\\end{quotation}"))), Series(Token("\\begin{quote}"), sequence, Required(Token("\\end{quote}"))))
figure = Series(Token("\\begin{figure}"), sequence, Required(Token("\\end{figure}")))
item = Series(Token("\\item"), Optional(WSPC), sequence)
enumerate = Series(Token("\\begin{enumerate}"), Optional(WSPC), ZeroOrMore(item), Required(Token("\\end{enumerate}")))
itemize = Series(Token("\\begin{itemize}"), Optional(WSPC), ZeroOrMore(item), Required(Token("\\end{itemize}")))
item = Series(Token("\\item"), Option(WSPC), sequence)
enumerate = Series(Token("\\begin{enumerate}"), Option(WSPC), ZeroOrMore(item), Required(Token("\\end{enumerate}")))
itemize = Series(Token("\\begin{itemize}"), Option(WSPC), ZeroOrMore(item), Required(Token("\\end{itemize}")))
end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
generic_block = Series(begin_generic_block, sequence, Required(end_generic_block))
known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
block_environment.set(Alternative(known_environment, generic_block))
Index = Series(Token("\\printindex"), Optional(WSPC))
Bibliography = Series(Token("\\bibliography"), block, Optional(WSPC))
SubParagraph = Series(Token("\\subparagraph"), block, Optional(WSPC), Optional(sequence))
SubParagraphs = OneOrMore(Series(SubParagraph, Optional(WSPC)))
Paragraph = Series(Token("\\paragraph"), block, Optional(WSPC), ZeroOrMore(Alternative(sequence, SubParagraphs)))
Paragraphs = OneOrMore(Series(Paragraph, Optional(WSPC)))
SubSubSection = Series(Token("\\subsubsection"), block, Optional(WSPC), ZeroOrMore(Alternative(sequence, Paragraphs)))
SubSubSections = OneOrMore(Series(SubSubSection, Optional(WSPC)))
SubSection = Series(Token("\\subsection"), block, Optional(WSPC), ZeroOrMore(Alternative(sequence, SubSubSections)))
SubSections = OneOrMore(Series(SubSection, Optional(WSPC)))
Section = Series(Token("\\section"), block, Optional(WSPC), ZeroOrMore(Alternative(sequence, SubSections)))
Sections = OneOrMore(Series(Section, Optional(WSPC)))
Chapter = Series(Token("\\chapter"), block, Optional(WSPC), ZeroOrMore(Alternative(sequence, Sections)))
Chapters = OneOrMore(Series(Chapter, Optional(WSPC)))
Index = Series(Token("\\printindex"), Option(WSPC))
Bibliography = Series(Token("\\bibliography"), block, Option(WSPC))
SubParagraph = Series(Token("\\subparagraph"), block, Option(WSPC), Option(sequence))
SubParagraphs = OneOrMore(Series(SubParagraph, Option(WSPC)))
Paragraph = Series(Token("\\paragraph"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubParagraphs)))
Paragraphs = OneOrMore(Series(Paragraph, Option(WSPC)))
SubSubSection = Series(Token("\\subsubsection"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, Paragraphs)))
SubSubSections = OneOrMore(Series(SubSubSection, Option(WSPC)))
SubSection = Series(Token("\\subsection"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubSubSections)))
SubSections = OneOrMore(Series(SubSection, Option(WSPC)))
Section = Series(Token("\\section"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, SubSections)))
Sections = OneOrMore(Series(Section, Option(WSPC)))
Chapter = Series(Token("\\chapter"), block, Option(WSPC), ZeroOrMore(Alternative(sequence, Sections)))
Chapters = OneOrMore(Series(Chapter, Option(WSPC)))
frontpages = Synonym(sequence)
document = Series(Optional(WSPC), Token("\\begin{document}"), Optional(WSPC), frontpages, Optional(WSPC), Alternative(Chapters, Sections), Optional(WSPC), Optional(Bibliography), Optional(Index), Optional(WSPC), Token("\\end{document}"), Optional(WSPC), Required(EOF))
preamble = OneOrMore(Series(Optional(WSPC), command))
document = Series(Option(WSPC), Token("\\begin{document}"), Option(WSPC), frontpages, Option(WSPC), Alternative(Chapters, Sections), Option(WSPC), Option(Bibliography), Option(Index), Option(WSPC), Token("\\end{document}"), Option(WSPC), Required(EOF))
preamble = OneOrMore(Series(Option(WSPC), command))
latexdoc = Series(preamble, document)
root__ = latexdoc
......
......@@ -15,7 +15,7 @@ try:
except ImportError:
import re
from DHParser.parser import Grammar, Compiler, Alternative, Required, Token, \
Optional, OneOrMore, Series, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source
Option, OneOrMore, Series, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source
from DHParser.syntaxtree import traverse, reduce_single_child, replace_by_single_child, no_transformation, \
remove_expendables, remove_tokens, flatten, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD
......@@ -160,36 +160,36 @@ class MLWGrammar(Grammar):
Name = Series(WORT, ZeroOrMore(Alternative(WORT, NAMENS_ABKÜRZUNG)))
Autorinfo = Series(Alternative(Token("AUTORIN"), Token("AUTOR")), Name)
Zusatz = Series(Token("ZUSATZ"), RE('\\s*.*', wR='', wL=''), TRENNER)
EinBeleg = Series(OneOrMore(Series(NegativeLookahead(Series(Optional(LEER), Alternative(Token("*"), Token("BEDEUTUNG"), Token("AUTOR"), Token("NAME"), Token("ZUSATZ")))), RE('\\s*.*\\s*', wR='', wL=''))), Optional(Zusatz))
Belege = Series(Token("BELEGE"), Optional(LEER), ZeroOrMore(Series(Token("*"), EinBeleg)))
EinBeleg = Series(OneOrMore(Series(NegativeLookahead(Series(Option(LEER), Alternative(Token("*"), Token("BEDEUTUNG"), Token("AUTOR"), Token("NAME"), Token("ZUSATZ")))), RE('\\s*.*\\s*', wR='', wL=''))), Option(Zusatz))
Belege = Series(Token("BELEGE"), Option(LEER), ZeroOrMore(Series(Token("*"), EinBeleg)))
DeutscheBedeutung = Series(Token("DEU"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''))
LateinischeBedeutung = Series(Token("LAT"), RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''))
Interpretamente = Series(LateinischeBedeutung, Optional(LEER), Required(DeutscheBedeutung), Optional(LEER))
Bedeutungskategorie = Series(RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''), Optional(LEER))
Bedeutung = Series(Alternative(Interpretamente, Bedeutungskategorie), Optional(Belege))
BedeutungsPosition = OneOrMore(Series(Token("BEDEUTUNG"), Optional(LEER), Required(Bedeutung)))
Interpretamente = Series(LateinischeBedeutung, Option(LEER), Required(DeutscheBedeutung), Option(LEER))
Bedeutungskategorie = Series(RE('(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+', wL=''), Option(LEER))
Bedeutung = Series(Alternative(Interpretamente, Bedeutungskategorie), Option(Belege))
BedeutungsPosition = OneOrMore(Series(Token("BEDEUTUNG"), Option(LEER), Required(Bedeutung)))
VerweisZiel = RE('<\\w+>')
Verweis = RE('\\w+')
Beleg = Verweis
Schreibweise = Alternative(Token("vizreg-"), Token("festregel(a)"), Token("fezdregl(a)"), Token("fat-"))
SWVariante = Series(Schreibweise, Token(":"), Beleg)
SWTyp = Alternative(Token("script."), Token("script. fat-"))
SchreibweisenPosition = Series(Token("SCHREIBWEISE"), Optional(LEER), Required(SWTyp), Token(":"), Optional(LEER), Required(SWVariante), ZeroOrMore(Series(TRENNER, SWVariante)), Optional(LEER))
SchreibweisenPosition = Series(Token("SCHREIBWEISE"), Option(LEER), Required(SWTyp), Token(":"), Option(LEER), Required(SWVariante), ZeroOrMore(Series(TRENNER, SWVariante)), Option(LEER))
ArtikelKopf = SchreibweisenPosition
_genus = Alternative(Token("maskulinum"), Token("m."), Token("femininum"), Token("f."), Token("neutrum"), Token("n."))
Flexion = RE('-?[a-z]+', wL='')
Flexionen = Series(Flexion, ZeroOrMore(Series(Token(","), Required(Flexion))))
GVariante = Series(Flexionen, Optional(_genus), Token(":"), Beleg)
GVariante = Series(Flexionen, Option(_genus), Token(":"), Beleg)
GrammatikVarianten = Series(TRENNER, GVariante)
_wortart = Alternative(Token("nomen"), Token("n."), Token("verb"), Token("v."), Token("adverb"), Token("adv."), Token("adjektiv"), Token("adj."))
GrammatikPosition = Series(Token("GRAMMATIK"), Optional(LEER), Required(_wortart), Required(TRENNER), Required(Flexionen), Optional(_genus), ZeroOrMore(GrammatikVarianten), Optional(TRENNER))
GrammatikPosition = Series(Token("GRAMMATIK"), Option(LEER), Required(_wortart), Required(TRENNER), Required(Flexionen), Option(_genus), ZeroOrMore(GrammatikVarianten), Option(TRENNER))
LVZusatz = Series(Token("ZUSATZ"), Token("sim."))
LVariante = RE('(?:[a-z]|-)+')
LemmaVarianten = Series(Token("VARIANTEN"), Optional(LEER), Required(LVariante), ZeroOrMore(Series(TRENNER, LVariante)), Optional(Series(TRENNER, LVZusatz)), Optional(TRENNER))
LemmaVarianten = Series(Token("VARIANTEN"), Option(LEER), Required(LVariante), ZeroOrMore(Series(TRENNER, LVariante)), Option(Series(TRENNER, LVZusatz)), Option(TRENNER))
_tll = Token("*")
Lemma = Series(Optional(_tll), WORT_KLEIN, Optional(LEER))
LemmaPosition = Series(Token("LEMMA"), Required(Lemma), Optional(LemmaVarianten), Required(GrammatikPosition))
Artikel = Series(Optional(LEER), Required(LemmaPosition), Optional(ArtikelKopf), Required(BedeutungsPosition), Required(Autorinfo), Optional(LEER), DATEI_ENDE)
Lemma = Series(Option(_tll), WORT_KLEIN, Option(LEER))
LemmaPosition = Series(Token("LEMMA"), Required(Lemma), Option(LemmaVarianten), Required(GrammatikPosition))
Artikel = Series(Option(LEER), Required(LemmaPosition), Option(ArtikelKopf), Required(BedeutungsPosition), Required(Autorinfo), Option(LEER), DATEI_ENDE)
root__ = Artikel
......
......@@ -19,7 +19,7 @@ except ImportError:
import re
from DHParser.toolkit import logging, is_filename
from DHParser.parser import Grammar, Compiler, Required, Token, \
Optional, OneOrMore, Series, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source, \
Option, OneOrMore, Series, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, Synonym
from DHParser.syntaxtree import Node, traverse, remove_last, \
reduce_single_child, replace_by_single_child, remove_tokens, flatten, is_whitespace, collapse, replace_content, \
......@@ -105,13 +105,13 @@ class LyrikGrammar(Grammar):
namenfolge = OneOrMore(NAME)
wortfolge = OneOrMore(WORT)
jahr = Synonym(JAHRESZAHL)
ort = Series(wortfolge, Optional(verknüpfung))
untertitel = Series(wortfolge, Optional(verknüpfung))
werk = Series(wortfolge, Optional(Series(Token("."), Required(untertitel))), Optional(verknüpfung))
autor = Series(namenfolge, Optional(verknüpfung))
bibliographisches = Series(autor, Required(Token(",")), Optional(NZ), werk, Required(Token(",")), Optional(NZ), ort,
Required(Token(",")), Optional(NZ), jahr, Required(Token(".")))
gedicht = Series(bibliographisches, OneOrMore(LEERZEILE), Optional(serie), Required(titel), Required(text),
ort = Series(wortfolge, Option(verknüpfung))
untertitel = Series(wortfolge, Option(verknüpfung))
werk = Series(wortfolge, Option(Series(Token("."), Required(untertitel))), Option(verknüpfung))
autor = Series(namenfolge, Option(verknüpfung))
bibliographisches = Series(autor, Required(Token(",")), Option(NZ), werk, Required(Token(",")), Option(NZ), ort,
Required(Token(",")), Option(NZ), jahr, Required(Token(".")))
gedicht = Series(bibliographisches, OneOrMore(LEERZEILE), Option(serie), Required(titel), Required(text),
RE('\\s*', wR=''), Required(ENDE))
root__ = gedicht
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment