Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
badw-it
DHParser
Commits
8e3b05a7
Commit
8e3b05a7
authored
Jan 11, 2018
by
di68kap
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- transform.py: weitere Transformationsregeln
- MLW: AST-Transformations
parent
e64661d8
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
396 additions
and
190 deletions
+396
-190
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+22
-14
DHParser/toolkit.py
DHParser/toolkit.py
+1
-1
DHParser/transform.py
DHParser/transform.py
+127
-10
examples/MLW/Beispiele/facitergula/fascitergula.mlw
examples/MLW/Beispiele/facitergula/fascitergula.mlw
+1
-1
examples/MLW/MLW.ebnf
examples/MLW/MLW.ebnf
+5
-3
examples/MLW/MLWCompiler.py
examples/MLW/MLWCompiler.py
+64
-27
examples/MLW/grammar_tests/REPORT/02_test_lemmaposition.md
examples/MLW/grammar_tests/REPORT/02_test_lemmaposition.md
+47
-133
examples/MLW/verarbeite_Beispiele.py
examples/MLW/verarbeite_Beispiele.py
+1
-1
test/test_transform.py
test/test_transform.py
+128
-0
No files found.
DHParser/syntaxtree.py
View file @
8e3b05a7
...
...
@@ -616,20 +616,25 @@ def mock_syntax_tree(sxpr):
that does not match an opening bracket matched earlier within the same
package."""
s
=
s
.
strip
()
while
s
[
0
]
!=
')'
:
if
s
[
0
]
!=
'('
:
raise
ValueError
(
'"(" expected, not '
+
s
[:
10
])
# assert s[0] == '(', s
level
=
1
k
=
1
while
level
>
0
:
if
s
[
k
]
==
'('
:
level
+=
1
elif
s
[
k
]
==
')'
:
level
-=
1
k
+=
1
yield
s
[:
k
]
s
=
s
[
k
:].
strip
()
try
:
while
s
[
0
]
!=
')'
:
if
s
[
0
]
!=
'('
:
raise
ValueError
(
'"(" expected, not '
+
s
[:
10
])
# assert s[0] == '(', s
level
=
1
k
=
1
while
level
>
0
:
if
s
[
k
]
==
'('
:
level
+=
1
elif
s
[
k
]
==
')'
:
level
-=
1
k
+=
1
yield
s
[:
k
]
s
=
s
[
k
:].
strip
()
except
IndexError
:
errmsg
=
(
'Malformed S-expression. Unprocessed part: "%s"'
%
s
)
if
s
\
else
'Malformed S-expression. Closing bracket(s) ")" missing.'
raise
AssertionError
(
errmsg
)
sxpr
=
sxpr
.
strip
()
if
sxpr
[
0
]
!=
'('
:
...
...
@@ -637,6 +642,9 @@ def mock_syntax_tree(sxpr):
# assert sxpr[0] == '(', sxpr
sxpr
=
sxpr
[
1
:].
strip
()
match
=
re
.
match
(
r
'[\w:]+'
,
sxpr
)
if
match
is
None
:
raise
AssertionError
(
'Malformed S-expression Node-tagname or identifier expected, '
'not "%s"'
%
sxpr
[:
40
].
replace
(
'
\n
'
,
''
))
name
,
class_name
=
(
sxpr
[:
match
.
end
()].
split
(
':'
)
+
[
''
])[:
2
]
sxpr
=
sxpr
[
match
.
end
():].
strip
()
if
sxpr
[
0
]
==
'('
:
...
...
DHParser/toolkit.py
View file @
8e3b05a7
...
...
@@ -130,7 +130,7 @@ def logging(dirname="LOGS"):
save
=
LOGGING
except
NameError
:
save
=
""
LOGGING
=
dirname
LOGGING
=
dirname
or
""
yield
LOGGING
=
save
...
...
DHParser/transform.py
View file @
8e3b05a7
...
...
@@ -44,6 +44,7 @@ __all__ = ('TransformationDict',
'merge_children'
,
'replace_content'
,
'apply_if'
,
'traverse_locally'
,
'is_anonymous'
,
'is_whitespace'
,
'is_empty'
,
...
...
@@ -51,6 +52,14 @@ __all__ = ('TransformationDict',
'is_token'
,
'is_one_of'
,
'has_content'
,
'lstrip'
,
'rstrip'
,
'strip'
,
'keep_children'
,
'keep_children_if'
,
'keep_tokens'
,
'keep_nodes'
,
'keep_content'
,
'remove_children_if'
,
'remove_nodes'
,
'remove_content'
,
...
...
@@ -63,7 +72,6 @@ __all__ = ('TransformationDict',
'remove_infix_operator'
,
'remove_single_child'
,
'remove_tokens'
,
'keep_children'
,
'flatten'
,
'forbid'
,
'require'
,
...
...
@@ -508,10 +516,60 @@ def is_expendable(context: List[Node]) -> bool:
return
is_empty
(
context
)
or
is_whitespace
(
context
)
@
transformation_factory
(
Callable
)
def
lstrip
(
context
:
List
[
Node
],
condition
:
Callable
=
is_expendable
):
"""Recursively removes all leading child-nodes that fulfill a given condition."""
node
=
context
[
-
1
]
i
=
1
while
i
>
0
and
node
.
children
:
lstrip
(
context
+
[
node
.
children
[
0
]],
condition
)
i
,
L
=
0
,
len
(
node
.
children
)
while
i
<
L
and
condition
(
context
+
[
node
.
children
[
i
]]):
i
+=
1
if
i
>
0
:
node
.
result
=
node
.
children
[
i
:]
@
transformation_factory
(
Callable
)
def
rstrip
(
context
:
List
[
Node
],
condition
:
Callable
=
is_expendable
):
"""Recursively removes all leading nodes that fulfill a given condition."""
node
=
context
[
-
1
]
i
,
L
=
0
,
len
(
node
.
children
)
while
i
<
L
and
node
.
children
:
rstrip
(
context
+
[
node
.
children
[
-
1
]],
condition
)
L
=
len
(
node
.
children
)
i
=
L
while
i
>
0
and
condition
(
context
+
[
node
.
children
[
i
-
1
]]):
i
-=
1
if
i
<
L
:
node
.
result
=
node
.
children
[:
i
]
@
transformation_factory
(
Callable
)
def
strip
(
context
:
List
[
Node
],
condition
:
Callable
=
is_expendable
)
->
str
:
"""Removes leading and trailing child-nodes that fulfill a given condition."""
lstrip
(
context
,
condition
)
rstrip
(
context
,
condition
)
@
transformation_factory
(
AbstractSet
[
str
])
def
is_token
(
context
:
List
[
Node
],
tokens
:
AbstractSet
[
str
]
=
frozenset
())
->
bool
:
"""Checks whether the last node in the context is has `ptype == TOKEN_PTYPE`
and it's content without leading or trailing whitespace child-nodes
matches one of the given tokens. If no tokens are given, any token is a match.
"""
def
stripped
(
nd
:
Node
)
->
str
:
# assert node.parser.ptype == TOKEN_PTYPE
if
nd
.
children
:
i
,
k
=
0
,
len
(
nd
.
children
)
while
i
<
len
(
nd
.
children
)
and
nd
.
children
[
i
]
==
WHITESPACE_PTYPE
:
i
+=
1
while
k
>
0
and
nd
.
children
[
k
-
1
]
==
WHITESPACE_PTYPE
:
k
-=
1
return
""
.
join
(
child
.
content
for
child
in
node
.
children
[
i
:
k
])
return
nd
.
content
node
=
context
[
-
1
]
return
node
.
parser
.
ptype
==
TOKEN_PTYPE
and
(
not
tokens
or
node
.
result
in
tokens
)
return
node
.
parser
.
ptype
==
TOKEN_PTYPE
and
(
not
tokens
or
stripped
(
node
)
in
tokens
)
@
transformation_factory
(
AbstractSet
[
str
])
...
...
@@ -526,13 +584,44 @@ def has_content(context: List[Node], regexp: str) -> bool:
return
bool
(
re
.
match
(
regexp
,
context
[
-
1
].
content
))
@
transformation_factory
(
AbstractSet
[
str
])
def
has_parent
(
context
:
List
[
Node
],
tag_name_set
:
AbstractSet
[
str
])
->
bool
:
"""Checks whether a node with one of the given tag names appears somewhere
in the context before the last node in the context."""
for
i
in
range
(
2
,
len
(
context
)):
if
context
[
-
i
].
tag_name
in
tag_name_set
:
return
True
return
False
@
transformation_factory
(
Callable
)
def
apply_if
(
context
:
List
[
Node
],
transformation
:
Callable
,
condition
:
Callable
):
"""Applies a transformation only if a certain condition is met."""
node
=
context
[
-
1
]
if
condition
(
node
):
if
condition
(
context
):
transformation
(
context
)
# @transformation_factory(List[Callable])
# def apply_to_child(context: List[Node], transformations: List[Callable], condition: Callable):
# """Applies a list of transformations to those children that meet a specifc condition."""
# node = context[-1]
# for child in node.children:
# context.append(child)
# if condition(context):
# for transform in transformations:
# transform(context)
# context.pop()
@
transformation_factory
(
Dict
)
def
traverse_locally
(
context
:
List
[
Node
],
processing_table
:
Dict
,
# actually: ProcessingTableType
key_func
:
Callable
=
key_tag_name
):
# actually: KeyFunc
"""Transforms the syntax tree starting from the last node in the context
according to the given processing table. The purpose of this function is
to apply certain transformations locally, i.e. only for those nodes that
have the last node in the context as their parent node.
"""
traverse
(
context
[
-
1
],
processing_table
,
key_func
)
@
transformation_factory
(
slice
)
def
keep_children
(
context
:
List
[
Node
],
section
:
slice
=
slice
(
None
)):
...
...
@@ -543,7 +632,35 @@ def keep_children(context: List[Node], section: slice = slice(None)):
@
transformation_factory
(
Callable
)
def
remove_children_if
(
context
:
List
[
Node
],
condition
:
Callable
):
# , section: slice = slice(None)):
def
keep_children_if
(
context
:
List
[
Node
],
condition
:
Callable
):
"""Removes all children for which `condition()` returns `True`."""
node
=
context
[
-
1
]
if
node
.
children
:
node
.
result
=
tuple
(
c
for
c
in
node
.
children
if
condition
(
context
+
[
c
]))
@
transformation_factory
def
keep_tokens
(
context
:
List
[
Node
],
tokens
:
AbstractSet
[
str
]
=
frozenset
()):
"""Removes any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens
are removed."""
keep_children_if
(
context
,
partial
(
is_token
,
tokens
=
tokens
))
@
transformation_factory
def
keep_nodes
(
context
:
List
[
Node
],
tag_names
:
AbstractSet
[
str
]):
"""Removes children by tag name."""
keep_children_if
(
context
,
partial
(
is_one_of
,
tag_name_set
=
tag_names
))
@
transformation_factory
def
keep_content
(
context
:
List
[
Node
],
regexp
:
str
):
"""Removes children depending on their string value."""
keep_children_if
(
context
,
partial
(
has_content
,
regexp
=
regexp
))
@
transformation_factory
(
Callable
)
def
remove_children_if
(
context
:
List
[
Node
],
condition
:
Callable
):
"""Removes all children for which `condition()` returns `True`."""
node
=
context
[
-
1
]
if
node
.
children
:
...
...
@@ -576,16 +693,16 @@ def remove_children_if(context: List[Node], condition: Callable): # , section:
remove_whitespace
=
remove_children_if
(
is_whitespace
)
# partial(remove_children_if, condition=is_whitespace)
remove_empty
=
remove_children_if
(
is_empty
)
remove_expendables
=
remove_children_if
(
is_expendable
)
# partial(remove_children_if, condition=is_expendable)
remove_first
=
apply_if
(
keep_children
(
slice
(
1
,
None
)),
lambda
nd
:
len
(
nd
.
children
)
>
1
)
remove_last
=
apply_if
(
keep_children
(
slice
(
None
,
-
1
)),
lambda
nd
:
len
(
nd
.
children
)
>
1
)
remove_brackets
=
apply_if
(
keep_children
(
slice
(
1
,
-
1
)),
lambda
nd
:
len
(
nd
.
children
)
>=
2
)
remove_first
=
apply_if
(
keep_children
(
slice
(
1
,
None
)),
lambda
ctx
:
len
(
ctx
[
-
1
]
.
children
)
>
1
)
remove_last
=
apply_if
(
keep_children
(
slice
(
None
,
-
1
)),
lambda
ctx
:
len
(
ctx
[
-
1
]
.
children
)
>
1
)
remove_brackets
=
apply_if
(
keep_children
(
slice
(
1
,
-
1
)),
lambda
ctx
:
len
(
ctx
[
-
1
]
.
children
)
>=
2
)
remove_infix_operator
=
keep_children
(
slice
(
0
,
None
,
2
))
remove_single_child
=
apply_if
(
keep_children
(
slice
(
0
)),
lambda
nd
:
len
(
nd
.
children
)
==
1
)
remove_single_child
=
apply_if
(
keep_children
(
slice
(
0
)),
lambda
ctx
:
len
(
ctx
[
-
1
]
.
children
)
==
1
)
@
transformation_factory
def
remove_tokens
(
context
:
List
[
Node
],
tokens
:
AbstractSet
[
str
]
=
frozenset
()):
"""Re
o
moves any among a particular set of tokens from the immediate
"""Removes any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens
are removed."""
remove_children_if
(
context
,
partial
(
is_token
,
tokens
=
tokens
))
...
...
examples/MLW/Beispiele/facitergula/fascitergula.mlw
View file @
8e3b05a7
...
...
@@ -22,7 +22,7 @@ SCHREIBWEISE
BEDEUTUNG
LAT pannus, faciale, sudarium
DEU Gesichts-, Schweißtuch {usu liturg.; de re v. {=> eintrag/ibi_X}}
DEU Gesichts-
tuch
, Schweißtuch {usu liturg.;
;
de re v. {=> eintrag/ibi_X}}
* Catal.: thes. Germ.; 28,11 (post 851) "-um III".
* Form.: Sangall.; {#ibi_2} 39 p. 421,16
...
...
examples/MLW/MLW.ebnf
View file @
8e3b05a7
...
...
@@ -36,7 +36,7 @@ LemmaWort = LAT_WORT
LemmaVarianten = LemmaVariante { [";" | ","] [ZW] LemmaVariante } [ ABS Zusatz ]
LemmaVariante = LAT_WORT [Zusatz]
LemmaVariante = LAT_WORT [Zusatz]
# Ist eine Lemma immer ein einzelnes Wort?
## GRAMMATIK-POSITION ##
...
...
@@ -113,8 +113,10 @@ Bedeutungskategorie = { EINZEILER [LZ] [Zusatz] [LZ] } §":"
Interpretamente = LateinischeBedeutung (LZ | " " | "--") §DeutscheBedeutung [":"]
LateinischeBedeutung = LAT [ZW] LateinischerAusdruck { "," LateinischerAusdruck }
DeutscheBedeutung = DEU [ZW] DeutscherAusdruck { "," DeutscherAusdruck }
LateinischerAusdruck = { <(LAT_WORT | "(" { LAT_WORT }+ ")") [Zusatz]> }+
DeutscherAusdruck = { <(DEU_WORT | "(" { DEU_WORT }+ ")") [Zusatz]> }+
LateinischerAusdruck = { <LateinischesWort [Zusatz]> }+
DeutscherAusdruck = { <DeutschesWort [Zusatz]> }+
LateinischesWort = (LAT_WORT | "(" { LAT_WORT }+ ")")
DeutschesWort = (DEU_WORT | "(" { DEU_WORT }+ ")")
LAT = "LATEINISCH" | "LAT"
DEU = "DEUTSCH" | "DEU"
...
...
examples/MLW/MLWCompiler.py
View file @
8e3b05a7
...
...
@@ -24,8 +24,10 @@ from DHParser import logging, is_filename, load_if_file, \
traverse
,
remove_children_if
,
merge_children
,
is_anonymous
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_whitespace
,
\
is_empty
,
is_expendable
,
collapse
,
replace_content
,
remove_nodes
,
remove_content
,
remove_brackets
,
replace_parser
,
\
keep_children
,
is_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
is_empty
,
is_expendable
,
collapse
,
replace_content
,
remove_nodes
,
remove_content
,
\
remove_brackets
,
replace_parser
,
traverse_locally
,
remove_nodes
,
\
keep_children
,
is_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
,
\
lstrip
,
rstrip
,
strip
,
keep_nodes
#######################################################################
...
...
@@ -88,7 +90,7 @@ class MLWGrammar(Grammar):
LemmaVarianten = LemmaVariante { [";" | ","] [ZW] LemmaVariante } [ ABS Zusatz ]
LemmaVariante = LAT_WORT [Zusatz]
LemmaVariante = LAT_WORT [Zusatz]
# Ist eine Lemma immer ein einzelnes Wort?
## GRAMMATIK-POSITION ##
...
...
@@ -165,8 +167,10 @@ class MLWGrammar(Grammar):
Interpretamente = LateinischeBedeutung (LZ | " " | "--") §DeutscheBedeutung [":"]
LateinischeBedeutung = LAT [ZW] LateinischerAusdruck { "," LateinischerAusdruck }
DeutscheBedeutung = DEU [ZW] DeutscherAusdruck { "," DeutscherAusdruck }
LateinischerAusdruck = { <(LAT_WORT | "(" { LAT_WORT }+ ")") [Zusatz]> }+
DeutscherAusdruck = { <(DEU_WORT | "(" { DEU_WORT }+ ")") [Zusatz]> }+
LateinischerAusdruck = { <LateinischesWort [Zusatz]> }+
DeutscherAusdruck = { <DeutschesWort [Zusatz]> }+
LateinischesWort = (LAT_WORT | "(" { LAT_WORT }+ ")")
DeutschesWort = (DEU_WORT | "(" { DEU_WORT }+ ")")
LAT = "LATEINISCH" | "LAT"
DEU = "DEUTSCH" | "DEU"
...
...
@@ -305,7 +309,7 @@ class MLWGrammar(Grammar):
flexion
=
Forward
()
genus
=
Forward
()
wortart
=
Forward
()
source_hash__
=
"
a01b075b877de8bc46f92fa3b3e5b028
"
source_hash__
=
"
ded96803a4eb4164ea8d2cf18924172b
"
parser_initialization__
=
"upon instantiation"
COMMENT__
=
r
'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)'
WHITESPACE__
=
r
'[\t ]*'
...
...
@@ -388,8 +392,10 @@ class MLWGrammar(Grammar):
GRI
=
Alternative
(
Token
(
"GRIECHISCH"
),
Token
(
"GRIECH"
),
Token
(
"GRIE"
),
Token
(
"GRI"
))
DEU
=
Alternative
(
Token
(
"DEUTSCH"
),
Token
(
"DEU"
))
LAT
=
Alternative
(
Token
(
"LATEINISCH"
),
Token
(
"LAT"
))
DeutscherAusdruck
=
OneOrMore
(
AllOf
(
Alternative
(
DEU_WORT
,
Series
(
Token
(
"("
),
OneOrMore
(
DEU_WORT
),
Token
(
")"
))),
Option
(
Zusatz
)))
LateinischerAusdruck
=
OneOrMore
(
AllOf
(
Alternative
(
LAT_WORT
,
Series
(
Token
(
"("
),
OneOrMore
(
LAT_WORT
),
Token
(
")"
))),
Option
(
Zusatz
)))
DeutschesWort
=
Alternative
(
DEU_WORT
,
Series
(
Token
(
"("
),
OneOrMore
(
DEU_WORT
),
Token
(
")"
)))
LateinischesWort
=
Alternative
(
LAT_WORT
,
Series
(
Token
(
"("
),
OneOrMore
(
LAT_WORT
),
Token
(
")"
)))
DeutscherAusdruck
=
OneOrMore
(
AllOf
(
DeutschesWort
,
Option
(
Zusatz
)))
LateinischerAusdruck
=
OneOrMore
(
AllOf
(
LateinischesWort
,
Option
(
Zusatz
)))
DeutscheBedeutung
=
Series
(
DEU
,
Option
(
ZW
),
DeutscherAusdruck
,
ZeroOrMore
(
Series
(
Token
(
","
),
DeutscherAusdruck
)))
LateinischeBedeutung
=
Series
(
LAT
,
Option
(
ZW
),
LateinischerAusdruck
,
ZeroOrMore
(
Series
(
Token
(
","
),
LateinischerAusdruck
)))
Interpretamente
=
Series
(
LateinischeBedeutung
,
Alternative
(
LZ
,
Token
(
" "
),
Token
(
"--"
)),
DeutscheBedeutung
,
Option
(
Token
(
":"
)),
mandatory
=
2
)
...
...
@@ -454,54 +460,84 @@ def get_grammar() -> MLWGrammar:
#
#######################################################################
LemmaVariante_table
=
{
"LAT_WORT, DEU_WORT"
:
[
remove_whitespace
,
reduce_single_child
],
"Zusatz"
:
[
reduce_single_child
]
}
MLW_AST_transformation_table
=
{
# AST Transformations for the MLW-grammar
"+"
:
[
remove_empty
,
remove_
tokens
,
remove_
nodes
(
'ZWW'
,
'LZ'
,
'DPP'
,
'COMMENT__'
,
'ABS'
,
'SEM'
)],
"+"
:
[
remove_empty
,
remove_
nodes
(
'ZWW'
,
'LZ'
,
'DPP'
,
'COMMENT__'
,
'ABS'
,
'SEM'
)
,
remove_
tokens
(
","
,
"{"
,
"}"
,
"=>"
)],
"Autor"
:
[
reduce_single_child
],
"Artikel"
:
[],
"LemmaPosition"
:
[],
"LemmaPosition"
:
[
remove_first
],
"Lemma"
:
[],
"klassisch"
:
[],
"gesichert"
:
[],
"LemmaVarianten"
:
[],
"LemmaWort"
:
[],
"klassisch"
:
[
reduce_single_child
],
"gesichert"
:
[
reduce_single_child
],
"LemmaVariante"
:
[
reduce_single_child
,
traverse_locally
(
LemmaVariante_table
)],
"LemmaVarianten"
:
[
flatten
,
remove_nodes
(
"ZW"
)],
"LemmaWort"
:
[
reduce_single_child
],
"LemmaZusatz"
:
[],
"lzs_typ"
:
[],
"GrammatikPosition"
:
[],
"GrammatikPosition"
:
[
remove_first
,
flatten
],
"wortart"
:
[
replace_or_reduce
],
"GrammatikVarianten"
:
[],
"flexion"
:
[],
"FLEX"
:
[],
"deklination"
:
[],
"konjugation"
:
[],
"FLEX"
:
[
remove_whitespace
,
reduce_single_child
],
"genus"
:
[
replace_or_reduce
],
"EtymologiePosition"
:
[],
"EtymologieVarianten"
:
[],
"EtymologieVariante"
:
[],
"ArtikelKopf"
:
[
replace_by_single_child
],
"SchreibweisenPosition
"
:
[
],
"SchreibweisenPosition
, StrukturPosition, VerwechselungsPosition"
:
[
remove_first
],
"SWTyp"
:
[
replace_or_reduce
],
"SWVariante"
:
[],
"Schreibweise"
:
[
replace_by_single_child
],
"BedeutungsPosition"
:
[],
"Kategorie"
:
[],
"Varianten"
:
[
flatten
],
"Variante"
:
[],
"Gegenstand"
:
[
reduce_single_child
],
"Besonderheit"
:
[
reduce_single_child
],
"BedeutungsPosition"
:
[
flatten
,
remove_tokens
(
"BEDEUTUNG"
)],
"Bedeutung"
:
[],
"U1Bedeutung, U2Bedeutung, U3Bedeutung, U4Bedeutung, U5Bedeutung"
:
[
remove_first
,
flatten
],
"Bedeutungskategorie"
:
[],
"Beleg"
:
[],
"BelegText"
:
[
partial
(
strip
,
condition
=
lambda
context
:
is_expendable
(
context
)
or
has_content
(
context
,
'[".]'
)),
reduce_single_child
],
"BelegStelle"
:
[
flatten
],
"Interpretamente"
:
[],
"LateinischeBedeutung"
:
[],
"DeutscheBedeutung"
:
[],
"Belege"
:
[],
"LateinischeBedeutung"
:
[
remove_nodes
(
"LAT"
),
flatten
],
"DeutscheBedeutung"
:
[
remove_nodes
(
"DEU"
),
flatten
],
"LateinischerAusdruck"
:
[
flatten
,
reduce_single_child
],
"DeutscherAusdruck"
:
[
flatten
,
reduce_single_child
],
"LateinischesWort, DeutschesWort"
:
[
strip
,
collapse
],
"Belege"
:
[
flatten
,
remove_tokens
(
"*"
)],
"Beleg"
:
[],
"EinBeleg"
:
[],
"Zusatz"
:
[],
"ArtikelVerfasser"
:
[],
"Zitat"
:
[
flatten
,
remove_nodes
(
"ZW"
)],
"Zusatz"
:
[
reduce_single_child
,
flatten
,
remove_tokens
(
";;"
,
";"
)],
"ArtikelVerfasser"
:
[
remove_first
],
"Stellenverzeichnis"
:
[
remove_first
],
"Verweisliste"
:
[
flatten
,
remove_tokens
(
"*"
)],
"Stellenverweis"
:
[
flatten
],
"Name"
:
[],
"Stelle"
:
[
collapse
],
"SW_LAT"
:
[
replace_or_reduce
],
"SW_DEU"
:
[
replace_or_reduce
],
"SW_GRIECH"
:
[
replace_or_reduce
],
"Beleg"
:
[
replace_by_single_child
],
"Verweis"
:
[],
"Verweis"
:
[
remove_tokens
(
"=>"
)],
"VerweisZiel"
:
[],
"Anker"
:
[
remove_tokens
(
"#"
),
reduce_single_child
],
"Werk"
:
[
reduce_single_child
],
"ZielName"
:
[
replace_by_single_child
],
"URL"
:
[
flatten
,
keep_nodes
(
'protokoll'
,
'domäne'
,
'pfad'
,
'ziel'
)],
"NAMENS_ABKÜRZUNG"
:
[],
"NAME"
:
[],
"DEU_WORT"
:
[
reduce_single_child
],
...
...
@@ -512,6 +548,7 @@ MLW_AST_transformation_table = {
"GROSSSCHRIFT"
:
[],
"GROSSFOLGE"
:
[],
"BUCHSTABENFOLGE"
:
[],
"EINZEILER, FREITEXT, MEHRZEILER"
:
[
strip
,
collapse
],
"ZEICHENFOLGE"
:
[],
"TR"
:
[
replace_or_reduce
],
"ABS"
:
[
replace_or_reduce
],
...
...
@@ -526,7 +563,7 @@ MLW_AST_transformation_table = {
"KOMMENTARZEILEN"
:
[],
"DATEI_ENDE"
:
[],
"NIEMALS"
:
[],
":Token"
:
[],
":Token"
:
[
remove_whitespace
,
reduce_single_child
],
"RE"
:
reduce_single_child
,
"*"
:
replace_by_single_child
}
...
...
examples/MLW/grammar_tests/REPORT/02_test_lemmaposition.md
View file @
8e3b05a7
...
...
@@ -13,9 +13,7 @@ Match-test "1"
### AST
(Lemma
(LemmaWort
(LAT_WORT
"facitergula"
)
"facitergula"
)
)
...
...
@@ -41,49 +39,17 @@ Match-test "1"
### AST
(LemmaVarianten
(L
AT_WORT
(L
emmaVariante
"fascitergula"
)
(:ZeroOrMore
(:Series
(ZW
(ZEILENSPRUNG
(:RegExp
""
""
)
)
)
(LAT_WORT
"facietergula"
)
)
(:Series
(ZW
(ZEILENSPRUNG
(:RegExp
""
""
)
)
)
(LAT_WORT
"facistergula"
)
)
(:Series
(ZW
(ZEILENSPRUNG
(:RegExp
""
""
)
)
)
(LAT_WORT
"farcutergula"
)
)
(LemmaVariante
"facietergula"
)
(LemmaVariante
"facistergula"
)
(LemmaVariante
"farcutergula"
)
)
...
...
@@ -95,7 +61,7 @@ Match-test "2"
### AST
(LemmaVarianten
(L
AT_WORT
(L
emmaVariante
"fascitergula"
)
)
...
...
@@ -108,7 +74,7 @@ Match-test "3"
### AST
(LemmaVarianten
(L
AT_WORT
(L
emmaVariante
(:RegExp
"fascitergula"
)
...
...
@@ -118,17 +84,10 @@ Match-test "3"
)
(LemmaVariante
(LAT_WORT
(:RegExp
"facietergula"
)
(:Whitespace
" "
)
"facietergula"
)
(Zusatz
(DEU_WORT
"sim."
)
"sim."
)
)
)
...
...
@@ -141,27 +100,18 @@ Match-test "4"
### AST
(LemmaVarianten
(L
AT_WORT
(L
emmaVariante
"fascitergula"
)
(:ZeroOrMore
(LemmaVariante
"facietergula"
)
(LemmaVariante
(LAT_WORT
"faci
e
tergula"
"fa
s
ci
s
tergula"
)
(LemmaVariante
(LAT_WORT
(:RegExp
"fascistergula"
)
(:Whitespace
" "
)
)
(Zusatz
(DEU_WORT
"sim."
)
)
(Zusatz
"sim."
)
)
)
...
...
@@ -195,79 +145,43 @@ Match-test "1"
(LemmaPosition
(Lemma
(LemmaWort
(LAT_WORT
"facitergula"
)
"facitergula"
)
)
(LemmaVarianten
(L
AT_WORT