Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
abb84c1c
Commit
abb84c1c
authored
Mar 10, 2017
by
Eckhart Arnold
Browse files
sync-commit: work on MLW...
parent
82e16eb0
Changes
5
Show whitespace changes
Inline
Side-by-side
ParserCombinators.py
View file @
abb84c1c
...
@@ -1150,24 +1150,16 @@ def flatten(node):
...
@@ -1150,24 +1150,16 @@ def flatten(node):
node
.
result
=
tuple
(
new_result
)
node
.
result
=
tuple
(
new_result
)
def
remove_tokens
(
node
,
tokens
):
def
remove_tokens
(
node
,
tokens
=
{}
):
"""Reomoves any among a particular set of tokens from the immediate
"""Reomoves any among a particular set of tokens from the immediate
descendants of a node.
descendants of a node.
"""
"""
if
node
.
children
:
if
node
.
children
:
if
tokens
:
node
.
result
=
tuple
(
child
for
child
in
node
.
result
node
.
result
=
tuple
(
child
for
child
in
node
.
result
if
child
.
parser
.
name
!=
TOKEN_KEYWORD
or
if
child
.
parser
.
name
!=
TOKEN_KEYWORD
or
child
.
result
not
in
tokens
)
child
.
result
not
in
tokens
)
else
:
def
remove_all_tokens
(
node
):
"""Removes all tokens from the immediate descendants of a node.
:param node: the node from which children that represent tokens
shall be removed
:return: the node with all children that are tokens removed
"""
if
node
.
children
:
node
.
result
=
tuple
(
child
for
child
in
node
.
result
node
.
result
=
tuple
(
child
for
child
in
node
.
result
if
child
.
parser
.
name
!=
TOKEN_KEYWORD
)
if
child
.
parser
.
name
!=
TOKEN_KEYWORD
)
...
...
examples/MLW/MLW_compiler.py
View file @
abb84c1c
...
@@ -100,7 +100,7 @@ class MLWGrammar(ParserHeadquarter):
...
@@ -100,7 +100,7 @@ class MLWGrammar(ParserHeadquarter):
DATEI_ENDE = !/./
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
NIEMALS = /(?!.)/
"""
"""
source_hash__
=
"
d7afa7bb0037ee25c0cabfa6e5a956c6
"
source_hash__
=
"
2c3456ee74172407cbe1f15e3649b41f
"
parser_initialization__
=
"upon instatiation"
parser_initialization__
=
"upon instatiation"
wsp__
=
mixin_comment
(
whitespace
=
r
'\s*'
,
comment
=
r
'#.*(?:\n|$)'
)
wsp__
=
mixin_comment
(
whitespace
=
r
'\s*'
,
comment
=
r
'#.*(?:\n|$)'
)
wspL__
=
wsp__
wspL__
=
wsp__
...
@@ -151,17 +151,26 @@ class MLWGrammar(ParserHeadquarter):
...
@@ -151,17 +151,26 @@ class MLWGrammar(ParserHeadquarter):
### DON'T EDIT OR REMOVE THIS LINE ###
### DON'T EDIT OR REMOVE THIS LINE ###
def
test
(
node
):
if
node
.
parser
.
name
==
"WORT_KLEIN"
:
assert
False
,
node
.
as_sexpr
()
node
=
remove_expendables
(
node
)
node
=
reduce_single_child
(
node
)
assert
False
,
node
.
parser
.
name
return
node
def
test
(
node
):
def
test
(
node
):
print
(
node
.
as_sexpr
())
print
(
node
.
as_sexpr
())
return
node
def
join_strings
(
node
,
delimiter
=
'
\n
'
):
new_result
=
[]
n
=
0
while
n
<
len
(
node
.
result
):
nd
=
node
.
result
[
n
]
if
not
nd
.
children
:
a
=
n
n
+=
1
while
n
<
len
(
node
.
result
)
and
not
node
.
result
[
n
].
children
:
n
+=
1
nd
.
result
=
delimiter
.
join
((
r
.
result
for
r
in
node
.
result
[
a
:
n
]))
new_result
.
append
(
nd
)
node
.
result
=
tuple
(
new_result
)
print
(
node
.
as_sexpr
())
MLWTransTable
=
{
MLWTransTable
=
{
# AST Transformations for the MLW-grammar
# AST Transformations for the MLW-grammar
...
@@ -174,7 +183,7 @@ MLWTransTable = {
...
@@ -174,7 +183,7 @@ MLWTransTable = {
"LemmaVarianten"
:
"LemmaVarianten"
:
[
partial
(
remove_tokens
,
tokens
=
{
'VARIANTEN'
}),
flatten
,
[
partial
(
remove_tokens
,
tokens
=
{
'VARIANTEN'
}),
flatten
,
partial
(
remove_tokens
,
tokens
=
{
','
,
';'
})],
partial
(
remove_tokens
,
tokens
=
{
','
,
';'
})],
"LVariante, LVZusatz, Schreibweise"
:
"LVariante, LVZusatz, Schreibweise
, Name
"
:
[
remove_expendables
,
reduce_single_child
],
[
remove_expendables
,
reduce_single_child
],
"SWVariante"
:
"SWVariante"
:
[
remove_expendables
,
partial
(
remove_tokens
,
tokens
=
{
':'
})],
[
remove_expendables
,
partial
(
remove_tokens
,
tokens
=
{
':'
})],
...
@@ -197,10 +206,12 @@ MLWTransTable = {
...
@@ -197,10 +206,12 @@ MLWTransTable = {
"Bedeutung"
:
no_transformation
,
"Bedeutung"
:
no_transformation
,
"Bedeutungskategorie"
:
no_transformation
,
"Bedeutungskategorie"
:
no_transformation
,
"Interpretamente"
:
no_transformation
,
"Interpretamente"
:
no_transformation
,
"LateinischeBedeutung"
:
no_transformation
,
"LateinischeBedeutung, DeutscheBedeutung"
:
"DeutscheBedeutung"
:
no_transformation
,
[
remove_expendables
,
remove_tokens
,
reduce_single_child
],
"Belege"
:
no_transformation
,
"Belege"
:
"EinBeleg"
:
no_transformation
,
[
flatten
,
remove_tokens
],
"EinBeleg"
:
[
flatten
],
# remove_expendables], # join_strings],
"Beleg"
:
no_transformation
,
"Beleg"
:
no_transformation
,
"VerweisZiel"
:
no_transformation
,
"VerweisZiel"
:
no_transformation
,
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT"
:
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT"
:
...
...
examples/MLW/compile_MLW-grammar.py
View file @
abb84c1c
...
@@ -21,7 +21,7 @@ limitations under the License.
...
@@ -21,7 +21,7 @@ limitations under the License.
import
os
import
os
import
sys
import
sys
sys
.
path
.
append
(
os
.
path
.
abspath
(
'../'
))
sys
.
path
.
append
(
os
.
path
.
abspath
(
'../
../
'
))
from
ParserCombinators
import
run_compiler
from
ParserCombinators
import
run_compiler
errors
=
run_compiler
(
"MLW.ebnf"
)
errors
=
run_compiler
(
"MLW.ebnf"
)
if
errors
:
if
errors
:
...
...
examples/MLW/samples/compile_MLW-entry.py
View file @
abb84c1c
...
@@ -21,7 +21,7 @@ limitations under the License.
...
@@ -21,7 +21,7 @@ limitations under the License.
import
os
import
os
import
sys
import
sys
sys
.
path
.
append
(
os
.
path
.
abspath
(
'../'
))
sys
.
path
.
append
(
os
.
path
.
abspath
(
'../
../../
'
))
import
ParserCombinators
import
ParserCombinators
from
ParserCombinators
import
run_compiler
,
has_source_changed
from
ParserCombinators
import
run_compiler
,
has_source_changed
...
...
examples/MLW/samples/fascitergula.xml
View file @
abb84c1c
...
@@ -110,40 +110,17 @@
...
@@ -110,40 +110,17 @@
<Bedeutung>
<Bedeutung>
<Interpretamente>
<Interpretamente>
<LateinischeBedeutung>
<LateinischeBedeutung>
<token__>
LAT
</token__>
<RegExp>
pannus, faciale, sudarium
pannus, faciale, sudarium
</RegExp>
</LateinischeBedeutung>
</LateinischeBedeutung>
<DeutscheBedeutung>
<DeutscheBedeutung>
<token__>
DEU
</token__>
<RegExp>
Gesichts-, Schweißtuch [usu liturg.; de re v. p. 32, 63]
Gesichts-, Schweißtuch [usu liturg.; de re v. p. 32, 63]
</RegExp>
</DeutscheBedeutung>
</DeutscheBedeutung>
<Belege>
<Belege>
<token__>
BELEGE
</token__>
<ZeroOrMore>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<EinBeleg>
<RegExp>
<RegExp>
Catal. thes. Germ. 28,11 (post 851) -um III.
Catal. thes. Germ. 28,11 (post 851) -um III.
</RegExp>
</RegExp>
</EinBeleg>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<EinBeleg>
<OneOrMore>
<OneOrMore>
<RegExp>
<RegExp>
...
@@ -154,21 +131,11 @@
...
@@ -154,21 +131,11 @@
</RegExp>
</RegExp>
</OneOrMore>
</OneOrMore>
</EinBeleg>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<EinBeleg>
<RegExp>
<RegExp>
Catal. thes. Germ. 18,7 "-eterculi viginti quatuor".
Catal. thes. Germ. 18,7 "-eterculi viginti quatuor".
</RegExp>
</RegExp>
</EinBeleg>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<EinBeleg>
<OneOrMore>
<OneOrMore>
<RegExp>
<RegExp>
...
@@ -179,11 +146,6 @@
...
@@ -179,11 +146,6 @@
</RegExp>
</RegExp>
</OneOrMore>
</OneOrMore>
</EinBeleg>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<EinBeleg>
<OneOrMore>
<OneOrMore>
<RegExp>
<RegExp>
...
@@ -205,8 +167,6 @@
...
@@ -205,8 +167,6 @@
</RegExp>
</RegExp>
</Zusatz>
</Zusatz>
</EinBeleg>
</EinBeleg>
</Sequence>
</ZeroOrMore>
</Belege>
</Belege>
</Interpretamente>
</Interpretamente>
</Bedeutung>
</Bedeutung>
...
@@ -218,40 +178,17 @@
...
@@ -218,40 +178,17 @@
<Bedeutung>
<Bedeutung>
<Interpretamente>
<Interpretamente>
<LateinischeBedeutung>
<LateinischeBedeutung>
<token__>
LAT
</token__>
<RegExp>
capital, rica
capital, rica
</RegExp>
</LateinischeBedeutung>
</LateinischeBedeutung>
<DeutscheBedeutung>
<DeutscheBedeutung>
<token__>
DEU
</token__>
<RegExp>
Kopftuch
Kopftuch
</RegExp>
</DeutscheBedeutung>
</DeutscheBedeutung>
<Belege>
<Belege>
<token__>
BELEGE
</token__>
<ZeroOrMore>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<EinBeleg>
<RegExp>
<RegExp>
Transl. Libor. I 32 raptis feminarum -is (fa[s]citergiis var. l.).
Transl. Libor. I 32 raptis feminarum -is (fa[s]citergiis var. l.).
</RegExp>
</RegExp>
</EinBeleg>
</EinBeleg>
</Sequence>
<Sequence>
<token__>
*
</token__>
<EinBeleg>
<EinBeleg>
<OneOrMore>
<OneOrMore>
<RegExp>
<RegExp>
...
@@ -262,8 +199,6 @@
...
@@ -262,8 +199,6 @@
</RegExp>
</RegExp>
</OneOrMore>
</OneOrMore>
</EinBeleg>
</EinBeleg>
</Sequence>
</ZeroOrMore>
</Belege>
</Belege>
</Interpretamente>
</Interpretamente>
</Bedeutung>
</Bedeutung>
...
@@ -274,9 +209,7 @@
...
@@ -274,9 +209,7 @@
AUTORIN
AUTORIN
</token__>
</token__>
<Name>
<Name>
<WORT>
Weber
Weber
</WORT>
</Name>
</Name>
</Autorinfo>
</Autorinfo>
</Artikel>
</Artikel>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment