Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
df506660
Commit
df506660
authored
Mar 08, 2017
by
Eckhart Arnold
Browse files
some small amendmends
parent
c75f9148
Changes
2
Hide whitespace changes
Inline
Side-by-side
ParserCombinators.py
View file @
df506660
...
@@ -471,19 +471,18 @@ class ParserHeadquarter:
...
@@ -471,19 +471,18 @@ class ParserHeadquarter:
return
return
cdict
=
cls
.
__dict__
cdict
=
cls
.
__dict__
for
entry
in
cdict
:
for
entry
in
cdict
:
if
sane_parser_name
(
entry
):
if
sane_parser_name
(
entry
):
# implies isinstance(parser, Parser) qua convention
parser
=
cdict
[
entry
]
parser
=
cdict
[
entry
]
if
isinstance
(
parser
,
Parser
):
assert
isinstance
(
parser
,
Parser
)
# print(type(parser), parser.name, entry)
if
isinstance
(
parser
,
Forward
):
if
isinstance
(
parser
,
Forward
):
assert
not
parser
.
name
or
parser
.
name
==
entry
assert
not
parser
.
name
or
parser
.
name
==
entry
if
parser
.
name
and
parser
.
name
!=
entry
:
if
parser
.
name
and
parser
.
name
!=
entry
:
raise
ValueError
((
"Parser named %s should not be "
raise
ValueError
((
"Parser named %s should not be "
" assigned to field with different name: %s"
" assigned to field with different name: %s"
%
(
parser
.
name
,
entry
)))
%
(
parser
.
name
,
entry
)))
parser
.
parser
.
name
=
entry
parser
.
parser
.
name
=
entry
else
:
else
:
parser
.
name
=
entry
parser
.
name
=
entry
cls
.
parser_initialization__
=
"done"
cls
.
parser_initialization__
=
"done"
def
__init__
(
self
):
def
__init__
(
self
):
...
@@ -500,7 +499,9 @@ class ParserHeadquarter:
...
@@ -500,7 +499,9 @@ class ParserHeadquarter:
"""Adds the copy of the parser object to this instance of ParserHeadquarter.
"""Adds the copy of the parser object to this instance of ParserHeadquarter.
"""
"""
# print(parser.name)
# print(parser.name)
if
sane_parser_name
(
parser
.
name
):
# overwrite class variable with instance variable
if
sane_parser_name
(
parser
.
name
):
# implies isinstance(parser, Parser) qua convention
assert
isinstance
(
parser
,
Parser
)
# overwrite class variable with instance variable!!!
setattr
(
self
,
parser
.
name
,
parser
)
setattr
(
self
,
parser
.
name
,
parser
)
parser
.
headquarter
=
self
parser
.
headquarter
=
self
...
@@ -661,7 +662,7 @@ class RegExp(Parser):
...
@@ -661,7 +662,7 @@ class RegExp(Parser):
class
RE
(
Parser
):
class
RE
(
Parser
):
def
__init__
(
self
,
regexp
,
wL
=
''
,
wR
=
''
,
name
=
None
):
def
__init__
(
self
,
regexp
,
wL
=
None
,
wR
=
None
,
name
=
None
):
super
(
RE
,
self
).
__init__
(
name
)
super
(
RE
,
self
).
__init__
(
name
)
self
.
wL
=
RegExp
(
wL
,
WHITESPACE_KEYWORD
)
if
wL
else
''
self
.
wL
=
RegExp
(
wL
,
WHITESPACE_KEYWORD
)
if
wL
else
''
self
.
wR
=
RegExp
(
wR
,
WHITESPACE_KEYWORD
)
if
wR
else
''
self
.
wR
=
RegExp
(
wR
,
WHITESPACE_KEYWORD
)
if
wR
else
''
...
...
examples/MLW/MLW_compiler.py
View file @
df506660
...
@@ -100,7 +100,7 @@ class MLWGrammar(ParserHeadquarter):
...
@@ -100,7 +100,7 @@ class MLWGrammar(ParserHeadquarter):
DATEI_ENDE = !/./
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
NIEMALS = /(?!.)/
"""
"""
source_hash__
=
"
7a55cb4440d934ce0300c8610a3b4c33
"
source_hash__
=
"
460019891fffc4dbf8d8e8573f5f699c
"
parser_initialization__
=
"upon instatiation"
parser_initialization__
=
"upon instatiation"
wsp__
=
mixin_comment
(
whitespace
=
r
'\s*'
,
comment
=
r
'#.*(?:\n|$)'
)
wsp__
=
mixin_comment
(
whitespace
=
r
'\s*'
,
comment
=
r
'#.*(?:\n|$)'
)
NIEMALS
=
RE
(
'(?!.)'
)
NIEMALS
=
RE
(
'(?!.)'
)
...
@@ -112,37 +112,37 @@ class MLWGrammar(ParserHeadquarter):
...
@@ -112,37 +112,37 @@ class MLWGrammar(ParserHeadquarter):
WORT_GROSS
=
RE
(
'[A-ZÄÖÜ][a-zäöüß]+'
,
wR
=
wsp__
)
WORT_GROSS
=
RE
(
'[A-ZÄÖÜ][a-zäöüß]+'
,
wR
=
wsp__
)
WORT
=
RE
(
'[A-ZÄÖÜ]?[a-zäöüß]+'
,
wR
=
wsp__
)
WORT
=
RE
(
'[A-ZÄÖÜ]?[a-zäöüß]+'
,
wR
=
wsp__
)
Name
=
Sequence
(
WORT
,
ZeroOrMore
(
Alternative
(
WORT
,
RE
(
'[A-ZÄÖÜÁÀ]
\\
.'
))))
Name
=
Sequence
(
WORT
,
ZeroOrMore
(
Alternative
(
WORT
,
RE
(
'[A-ZÄÖÜÁÀ]
\\
.'
))))
Autorinfo
=
Sequence
(
Alternative
(
Token
(
"AUTORIN"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"AUTOR"
,
w
R
=
wsp__
,
w
L
=
wsp__
)),
Name
)
Autorinfo
=
Sequence
(
Alternative
(
Token
(
"AUTORIN"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"AUTOR"
,
w
L
=
wsp__
,
w
R
=
wsp__
)),
Name
)
Zusatz
=
Sequence
(
Token
(
"ZUSATZ"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
RE
(
'
\\
s?.*'
))
Zusatz
=
Sequence
(
Token
(
"ZUSATZ"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
RE
(
'
\\
s?.*'
))
EinBeleg
=
Sequence
(
OneOrMore
(
Sequence
(
NegativeLookahead
(
Sequence
(
RE
(
'
\\
s*'
),
Alternative
(
Token
(
"*"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"BEDEUTUNG"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"AUTOR"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"NAME"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"ZUSATZ"
,
w
R
=
wsp__
,
w
L
=
wsp__
)))),
RE
(
'
\\
s?.*'
))),
Optional
(
Zusatz
))
EinBeleg
=
Sequence
(
OneOrMore
(
Sequence
(
NegativeLookahead
(
Sequence
(
RE
(
'
\\
s*'
),
Alternative
(
Token
(
"*"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"BEDEUTUNG"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"AUTOR"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"NAME"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"ZUSATZ"
,
w
L
=
wsp__
,
w
R
=
wsp__
)))),
RE
(
'
\\
s?.*'
))),
Optional
(
Zusatz
))
Belege
=
Sequence
(
Token
(
"BELEGE"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
ZeroOrMore
(
Sequence
(
Token
(
"*"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
EinBeleg
)))
Belege
=
Sequence
(
Token
(
"BELEGE"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
ZeroOrMore
(
Sequence
(
Token
(
"*"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
EinBeleg
)))
DeutscheBedeutung
=
Sequence
(
Token
(
"DEU"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wR
=
wsp__
))
DeutscheBedeutung
=
Sequence
(
Token
(
"DEU"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wR
=
wsp__
))
LateinischeBedeutung
=
Sequence
(
Token
(
"LAT"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wR
=
wsp__
))
LateinischeBedeutung
=
Sequence
(
Token
(
"LAT"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wR
=
wsp__
))
Interpretamente
=
Sequence
(
LateinischeBedeutung
,
DeutscheBedeutung
,
Optional
(
Belege
))
Interpretamente
=
Sequence
(
LateinischeBedeutung
,
DeutscheBedeutung
,
Optional
(
Belege
))
Bedeutungskategorie
=
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wR
=
wsp__
)
Bedeutungskategorie
=
RE
(
'(?:(?![A-ZÄÖÜ][A-ZÄÖÜ]).)+'
,
wR
=
wsp__
)
Bedeutung
=
Alternative
(
Interpretamente
,
Bedeutungskategorie
)
Bedeutung
=
Alternative
(
Interpretamente
,
Bedeutungskategorie
)
BedeutungsPosition
=
OneOrMore
(
Sequence
(
Token
(
"BEDEUTUNG"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Bedeutung
))
BedeutungsPosition
=
OneOrMore
(
Sequence
(
Token
(
"BEDEUTUNG"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Bedeutung
))
VerweisZiel
=
RE
(
'<
\\
w+>'
,
wR
=
wsp__
,
wL
=
wsp__
)
VerweisZiel
=
RE
(
'<
\\
w+>'
,
wR
=
wsp__
,
wL
=
wsp__
)
Verweis
=
RE
(
'>>
\\
w+'
,
wR
=
wsp__
,
wL
=
wsp__
)
Verweis
=
RE
(
'>>
\\
w+'
,
wR
=
wsp__
,
wL
=
wsp__
)
Beleg
=
Verweis
Beleg
=
Verweis
Schreibweise
=
Alternative
(
Token
(
"vizreg-"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"festregel(a)"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"fezdregl(a)"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"fat-"
,
w
R
=
wsp__
,
w
L
=
wsp__
))
Schreibweise
=
Alternative
(
Token
(
"vizreg-"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"festregel(a)"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"fezdregl(a)"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"fat-"
,
w
L
=
wsp__
,
w
R
=
wsp__
))
SWVariante
=
Sequence
(
Schreibweise
,
Token
(
":"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Beleg
)
SWVariante
=
Sequence
(
Schreibweise
,
Token
(
":"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Beleg
)
SWTyp
=
Alternative
(
Token
(
"script."
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"script. fat-"
,
w
R
=
wsp__
,
w
L
=
wsp__
))
SWTyp
=
Alternative
(
Token
(
"script."
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"script. fat-"
,
w
L
=
wsp__
,
w
R
=
wsp__
))
SchreibweisenPosition
=
Sequence
(
Token
(
"SCHREIBWEISE"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
SWTyp
),
Token
(
":"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
SWVariante
),
ZeroOrMore
(
Sequence
(
Token
(
","
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
SWVariante
))))
SchreibweisenPosition
=
Sequence
(
Token
(
"SCHREIBWEISE"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
SWTyp
),
Token
(
":"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
SWVariante
),
ZeroOrMore
(
Sequence
(
Token
(
","
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
SWVariante
))))
ArtikelKopf
=
SchreibweisenPosition
ArtikelKopf
=
SchreibweisenPosition
_genus
=
Alternative
(
Token
(
"maskulinum"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"m."
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"femininum"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"f."
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"neutrum"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"n."
,
w
R
=
wsp__
,
w
L
=
wsp__
))
_genus
=
Alternative
(
Token
(
"maskulinum"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"m."
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"femininum"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"f."
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"neutrum"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"n."
,
w
L
=
wsp__
,
w
R
=
wsp__
))
Flexion
=
RE
(
'-?[a-z]+'
,
wR
=
wsp__
)
Flexion
=
RE
(
'-?[a-z]+'
,
wR
=
wsp__
)
Flexionen
=
Sequence
(
Flexion
,
ZeroOrMore
(
Sequence
(
Token
(
","
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
Flexion
))))
Flexionen
=
Sequence
(
Flexion
,
ZeroOrMore
(
Sequence
(
Token
(
","
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
Flexion
))))
GVariante
=
Sequence
(
Flexionen
,
Optional
(
_genus
),
Token
(
":"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Beleg
)
GVariante
=
Sequence
(
Flexionen
,
Optional
(
_genus
),
Token
(
":"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Beleg
)
GrammatikVarianten
=
Sequence
(
Token
(
";"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
GVariante
))
GrammatikVarianten
=
Sequence
(
Token
(
";"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
GVariante
))
_wortart
=
Alternative
(
Token
(
"nomen"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"n."
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"verb"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"v."
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"adverb"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"adv."
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"adjektiv"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"adj."
,
w
R
=
wsp__
,
w
L
=
wsp__
))
_wortart
=
Alternative
(
Token
(
"nomen"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"n."
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"verb"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"v."
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"adverb"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"adv."
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"adjektiv"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"adj."
,
w
L
=
wsp__
,
w
R
=
wsp__
))
GrammatikPosition
=
Sequence
(
Token
(
"GRAMMATIK"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
_wortart
),
Required
(
Token
(
";"
,
w
R
=
wsp__
,
w
L
=
wsp__
)),
Required
(
Flexionen
),
Optional
(
_genus
),
ZeroOrMore
(
GrammatikVarianten
),
Optional
(
Alternative
(
Token
(
";"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Token
(
"."
,
w
R
=
wsp__
,
w
L
=
wsp__
))))
GrammatikPosition
=
Sequence
(
Token
(
"GRAMMATIK"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
_wortart
),
Required
(
Token
(
";"
,
w
L
=
wsp__
,
w
R
=
wsp__
)),
Required
(
Flexionen
),
Optional
(
_genus
),
ZeroOrMore
(
GrammatikVarianten
),
Optional
(
Alternative
(
Token
(
";"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Token
(
"."
,
w
L
=
wsp__
,
w
R
=
wsp__
))))
LVZusatz
=
Token
(
"sim."
,
w
R
=
wsp__
,
w
L
=
wsp__
)
LVZusatz
=
Token
(
"sim."
,
w
L
=
wsp__
,
w
R
=
wsp__
)
LVariante
=
RE
(
'(?:[a-z]|-)+'
,
wR
=
wsp__
,
wL
=
wsp__
)
LVariante
=
RE
(
'(?:[a-z]|-)+'
,
wR
=
wsp__
,
wL
=
wsp__
)
LemmaVarianten
=
Sequence
(
Token
(
"VARIANTEN"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
LVariante
),
ZeroOrMore
(
Sequence
(
Token
(
","
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
LVariante
))),
Optional
(
Sequence
(
Token
(
";"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
LVZusatz
))))
LemmaVarianten
=
Sequence
(
Token
(
"VARIANTEN"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
LVariante
),
ZeroOrMore
(
Sequence
(
Token
(
","
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
LVariante
))),
Optional
(
Sequence
(
Token
(
";"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
LVZusatz
))))
_tll
=
Token
(
"*"
,
w
R
=
wsp__
,
w
L
=
wsp__
)
_tll
=
Token
(
"*"
,
w
L
=
wsp__
,
w
R
=
wsp__
)
Lemma
=
Sequence
(
Optional
(
_tll
),
WORT_KLEIN
)
Lemma
=
Sequence
(
Optional
(
_tll
),
WORT_KLEIN
)
LemmaPosition
=
Sequence
(
Token
(
"LEMMA"
,
w
R
=
wsp__
,
w
L
=
wsp__
),
Required
(
Lemma
),
Optional
(
LemmaVarianten
),
Required
(
GrammatikPosition
))
LemmaPosition
=
Sequence
(
Token
(
"LEMMA"
,
w
L
=
wsp__
,
w
R
=
wsp__
),
Required
(
Lemma
),
Optional
(
LemmaVarianten
),
Required
(
GrammatikPosition
))
Artikel
=
Sequence
(
Optional
(
LEER
),
Required
(
LemmaPosition
),
Optional
(
ArtikelKopf
),
Required
(
BedeutungsPosition
),
Required
(
Autorinfo
),
Optional
(
LEER
),
DATEI_ENDE
)
Artikel
=
Sequence
(
Optional
(
LEER
),
Required
(
LemmaPosition
),
Optional
(
ArtikelKopf
),
Required
(
BedeutungsPosition
),
Required
(
Autorinfo
),
Optional
(
LEER
),
DATEI_ENDE
)
root__
=
Artikel
root__
=
Artikel
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment