Commit 1422279c authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- transformation for unknown parser names now issue a warning

parent e3ce4b20
...@@ -26,12 +26,12 @@ from DHParser.ebnf import EBNFCompiler, grammar_changed, \ ...@@ -26,12 +26,12 @@ from DHParser.ebnf import EBNFCompiler, grammar_changed, \
get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \ get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.error import Error, is_error, has_errors, only_errors from DHParser.error import Error, is_error, has_errors, only_errors
from DHParser.log import logging
from DHParser.parse import Grammar, Compiler, compile_source from DHParser.parse import Grammar, Compiler, compile_source
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc from DHParser.syntaxtree import Node, TransformationFunc
from DHParser.toolkit import load_if_file, is_python_code, compile_python_object, \ from DHParser.toolkit import load_if_file, is_python_code, compile_python_object, \
re re
from DHParser.log import logging
__all__ = ('DHPARSER_IMPORTS', __all__ = ('DHPARSER_IMPORTS',
'GrammarError', 'GrammarError',
...@@ -84,7 +84,8 @@ from DHParser import logging, is_filename, load_if_file, \\ ...@@ -84,7 +84,8 @@ from DHParser import logging, is_filename, load_if_file, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\ remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\ is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, \\ remove_nodes, remove_content, remove_brackets, replace_parser, \\
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip
''' '''
...@@ -296,7 +297,7 @@ def load_compiler_suite(compiler_suite: str) -> \ ...@@ -296,7 +297,7 @@ def load_compiler_suite(compiler_suite: str) -> \
Tuple[PreprocessorFactoryFunc, ParserFactoryFunc, Tuple[PreprocessorFactoryFunc, ParserFactoryFunc,
TransformerFactoryFunc, CompilerFactoryFunc]: TransformerFactoryFunc, CompilerFactoryFunc]:
""" """
Extracts a compiler suite from file or string ``compiler suite`` Extracts a compiler suite from file or string `compiler_suite`
and returns it as a tuple (preprocessor, parser, ast, compiler). and returns it as a tuple (preprocessor, parser, ast, compiler).
Returns: Returns:
...@@ -442,6 +443,10 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It ...@@ -442,6 +443,10 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
source = f.read() source = f.read()
sections = RX_SECTION_MARKER.split(source) sections = RX_SECTION_MARKER.split(source)
intro, imports, preprocessor, parser, ast, compiler, outro = sections intro, imports, preprocessor, parser, ast, compiler, outro = sections
# TODO: Verify transformation table
ast_trans_table = compile_python_object(DHPARSER_IMPORTS + ast,
r'(?:\w+_)?AST_transformation_table$')
messages.extend(ebnf_compiler.verify_transformation_table(ast_trans_table))
except (PermissionError, FileNotFoundError, IOError) as error: except (PermissionError, FileNotFoundError, IOError) as error:
intro, imports, preprocessor, parser, ast, compiler, outro = '', '', '', '', '', '', '' intro, imports, preprocessor, parser, ast, compiler, outro = '', '', '', '', '', '', ''
except ValueError as error: except ValueError as error:
......
...@@ -27,7 +27,7 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, RE, \ ...@@ -27,7 +27,7 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, RE, \
Compiler Compiler
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table
from DHParser.transform import traverse, remove_brackets, \ from DHParser.transform import traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \ reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, remove_infix_operator remove_tokens, flatten, forbid, assert_content, remove_infix_operator
...@@ -475,6 +475,18 @@ class EBNFCompiler(Compiler): ...@@ -475,6 +475,18 @@ class EBNFCompiler(Compiler):
compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)] compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(compiler) return '\n'.join(compiler)
def verify_transformation_table(self, transtable):
assert self._dirty_flag
table_entries = set(expand_table(transtable).keys()) - {'*', '+', '~'}
symbols = self.rules.keys()
messages = []
for entry in table_entries:
if entry not in symbols and not entry.startswith(":"):
messages.append(Error(('Symbol "%s" is not defined in grammar %s but appears in '
'the transformation table!') % (entry, self.grammar_name),
Error.UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE))
return messages
def assemble_parser(self, definitions: List[Tuple[str, str]], root_node: Node) -> str: def assemble_parser(self, definitions: List[Tuple[str, str]], root_node: Node) -> str:
""" """
......
...@@ -46,6 +46,8 @@ class Error: ...@@ -46,6 +46,8 @@ class Error:
REDEFINED_DIRECTIVE_WARNING = 101 REDEFINED_DIRECTIVE_WARNING = 101
REDECLARED_TOKEN_WARNING = 102 REDECLARED_TOKEN_WARNING = 102
UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE = 601
# error codes # error codes
MANDATORY_CONTINUATION = 1001 MANDATORY_CONTINUATION = 1001
......
...@@ -61,18 +61,18 @@ https://epsil.github.io/gll/ ...@@ -61,18 +61,18 @@ https://epsil.github.io/gll/
import copy import copy
import os import os
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
from DHParser.error import Error, is_error, linebreaks, adjust_error_locations from DHParser.error import Error, is_error, linebreaks, adjust_error_locations
from DHParser.log import is_logging, logfile_basename, HistoryRecord, log_ST, \
log_parsing_history
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.stringview import StringView, EMPTY_STRING_VIEW from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \ from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.toolkit import sane_parser_name, \ from DHParser.toolkit import sane_parser_name, \
escape_control_characters, load_if_file, re escape_control_characters, load_if_file, re
from DHParser.log import log_dir, is_logging, logfile_basename, HistoryRecord, log_ST, \
log_parsing_history
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
__all__ = ('Parser', __all__ = ('Parser',
'UnknownParserError', 'UnknownParserError',
...@@ -1931,11 +1931,11 @@ class Compiler: ...@@ -1931,11 +1931,11 @@ class Compiler:
def __init__(self, grammar_name="", grammar_source=""): def __init__(self, grammar_name="", grammar_source=""):
self._reset() self._reset()
self._dirty_flag = False
self.set_grammar_name(grammar_name, grammar_source) self.set_grammar_name(grammar_name, grammar_source)
def _reset(self): def _reset(self):
self.context = [] # type: List[Node] self.context = [] # type: List[Node]
self._dirty_flag = False
def __call__(self, node: Node) -> Any: def __call__(self, node: Node) -> Any:
""" """
...@@ -1947,8 +1947,7 @@ class Compiler: ...@@ -1947,8 +1947,7 @@ class Compiler:
""" """
if self._dirty_flag: if self._dirty_flag:
self._reset() self._reset()
else: self._dirty_flag = True
self._dirty_flag = True
result = self.compile(node) result = self.compile(node)
self.propagate_error_flags(node, lazy=True) self.propagate_error_flags(node, lazy=True)
return result return result
......
...@@ -47,7 +47,7 @@ except ImportError: ...@@ -47,7 +47,7 @@ except ImportError:
import DHParser.foreign_typing as typing import DHParser.foreign_typing as typing
sys.modules['typing'] = typing # make it possible to import from typing sys.modules['typing'] = typing # make it possible to import from typing
from typing import Any, Iterable, Sequence, Set, Union, cast from typing import Any, Iterable, Sequence, Set, Union, Dict, cast
__all__ = ('escape_re', __all__ = ('escape_re',
'escape_control_characters', 'escape_control_characters',
...@@ -267,7 +267,7 @@ def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]: ...@@ -267,7 +267,7 @@ def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
return [arg] return [arg]
def expand_table(compact_table): def expand_table(compact_table: Dict) -> Dict:
"""Expands a table by separating keywords that are tuples or strings """Expands a table by separating keywords that are tuples or strings
containing comma separated words into single keyword entries with containing comma separated words into single keyword entries with
the same values. Returns the expanded table. the same values. Returns the expanded table.
......
...@@ -27,7 +27,6 @@ sys.path.extend(['../../', '../', './']) ...@@ -27,7 +27,6 @@ sys.path.extend(['../../', '../', './'])
from DHParser import dsl from DHParser import dsl
from DHParser import testing from DHParser import testing
from DHParser import toolkit
# print(dir(dsl)) # print(dir(dsl))
...@@ -42,7 +41,7 @@ with DHParser.log.logging(False): ...@@ -42,7 +41,7 @@ with DHParser.log.logging(False):
from LaTeXCompiler import get_grammar, get_transformer from LaTeXCompiler import get_grammar, get_transformer
with DHParser.log.logging(True): with DHParser.log.logging(False):
error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer, error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer,
fn_patterns=['*_test_*.ini'], fn_patterns=['*_test_*.ini'],
report=True, verbose=True) report=True, verbose=True)
......
...@@ -91,13 +91,14 @@ VerwechselungsPosition = ZWW "VERWECHSELBAR" Position ...@@ -91,13 +91,14 @@ VerwechselungsPosition = ZWW "VERWECHSELBAR" Position
## ARTIKELKOPF POSITIONEN ## ## ARTIKELKOPF POSITIONEN ##
Position = [LZ] §Kategorien Position = [LZ] §(Kategorien | Besonderheiten)
Kategorien = Kategorie { ZWW Kategorie } Kategorien = Kategorie { ZWW Kategorie }
Kategorie = Besonderheit §DPP [LZ] ( Varianten | Kategorien ) Kategorie = Beschreibung DPP [LZ] Besonderheiten
Besonderheit = EINZEILER Besonderheiten = Besonderheit { ZWW Besonderheit }
Besonderheit = Beschreibung DPP [LZ] Varianten
Varianten = Variante { ZWW Variante } Varianten = Variante { ZWW Variante }
Variante = !KATEGORIENZEILE Gegenstand DPP Belege Variante = !KATEGORIENZEILE Beschreibung DPP Belege
Gegenstand = EINZEILER Beschreibung = EINZEILER
#### BEDEUTUNGS-POSITION ##################################################### #### BEDEUTUNGS-POSITION #####################################################
...@@ -179,10 +180,7 @@ BelegStelle = [<Anker | Zusatz>] (Stelle [[ZW] BelegText] | Verweis) [[ZW] ...@@ -179,10 +180,7 @@ BelegStelle = [<Anker | Zusatz>] (Stelle [[ZW] BelegText] | Verweis) [[ZW]
BelegText = /"/ { MEHRZEILER | Anker | Zusatz } §/"/~ ["."] BelegText = /"/ { MEHRZEILER | Anker | Zusatz } §/"/~ ["."]
AutorWerk = EINZEILER AutorWerk = EINZEILER
Werk = EINZEILER
Stelle = EINZEILER Stelle = EINZEILER
Datierung = EINZEILER
Edition = EINZEILER
#### VERWEISE (LINKS) ######################################################## #### VERWEISE (LINKS) ########################################################
......
...@@ -145,13 +145,14 @@ class MLWGrammar(Grammar): ...@@ -145,13 +145,14 @@ class MLWGrammar(Grammar):
## ARTIKELKOPF POSITIONEN ## ## ARTIKELKOPF POSITIONEN ##
Position = [LZ] §Kategorien Position = [LZ] §(Kategorien | Besonderheiten)
Kategorien = Kategorie { ZWW Kategorie } Kategorien = Kategorie { ZWW Kategorie }
Kategorie = Besonderheit §DPP [LZ] ( Varianten | Kategorien ) Kategorie = Beschreibung DPP [LZ] Besonderheiten
Besonderheit = EINZEILER Besonderheiten = Besonderheit { ZWW Besonderheit }
Besonderheit = Beschreibung DPP [LZ] Varianten
Varianten = Variante { ZWW Variante } Varianten = Variante { ZWW Variante }
Variante = !KATEGORIENZEILE Gegenstand DPP Belege Variante = !KATEGORIENZEILE Beschreibung DPP Belege
Gegenstand = EINZEILER Beschreibung = EINZEILER
#### BEDEUTUNGS-POSITION ##################################################### #### BEDEUTUNGS-POSITION #####################################################
...@@ -233,10 +234,7 @@ class MLWGrammar(Grammar): ...@@ -233,10 +234,7 @@ class MLWGrammar(Grammar):
BelegText = /"/ { MEHRZEILER | Anker | Zusatz } §/"/~ ["."] BelegText = /"/ { MEHRZEILER | Anker | Zusatz } §/"/~ ["."]
AutorWerk = EINZEILER AutorWerk = EINZEILER
Werk = EINZEILER
Stelle = EINZEILER Stelle = EINZEILER
Datierung = EINZEILER
Edition = EINZEILER
#### VERWEISE (LINKS) ######################################################## #### VERWEISE (LINKS) ########################################################
...@@ -308,7 +306,6 @@ class MLWGrammar(Grammar): ...@@ -308,7 +306,6 @@ class MLWGrammar(Grammar):
DEU_WORT = Forward() DEU_WORT = Forward()
FREITEXT = Forward() FREITEXT = Forward()
GROSSSCHRIFT = Forward() GROSSSCHRIFT = Forward()
Kategorien = Forward()
LZ = Forward() LZ = Forward()
LemmaWort = Forward() LemmaWort = Forward()
ROEMISCHE_ZAHL = Forward() ROEMISCHE_ZAHL = Forward()
...@@ -321,7 +318,7 @@ class MLWGrammar(Grammar): ...@@ -321,7 +318,7 @@ class MLWGrammar(Grammar):
flexion = Forward() flexion = Forward()
genus = Forward() genus = Forward()
wortart = Forward() wortart = Forward()
source_hash__ = "59b9cf3ee2f5a4bb0c8396422e102f32" source_hash__ = "17e7d9c6b771eb2fa259912b687f8677"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)' COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)'
WHITESPACE__ = r'[\t ]*' WHITESPACE__ = r'[\t ]*'
...@@ -375,10 +372,7 @@ class MLWGrammar(Grammar): ...@@ -375,10 +372,7 @@ class MLWGrammar(Grammar):
Anker = Series(Token("{"), Token("@"), ziel, Token("}"), mandatory=2) Anker = Series(Token("{"), Token("@"), ziel, Token("}"), mandatory=2)
VerweisKern = Series(Token("=>"), Alternative(Series(alias, Token("|"), Alternative(Token("-"), URL)), URL), mandatory=1) VerweisKern = Series(Token("=>"), Alternative(Series(alias, Token("|"), Alternative(Token("-"), URL)), URL), mandatory=1)
Verweis = Series(Token("{"), VerweisKern, Token("}")) Verweis = Series(Token("{"), VerweisKern, Token("}"))
Edition = Synonym(EINZEILER)
Datierung = Synonym(EINZEILER)
Stelle = Synonym(EINZEILER) Stelle = Synonym(EINZEILER)
Werk = Synonym(EINZEILER)
AutorWerk = Synonym(EINZEILER) AutorWerk = Synonym(EINZEILER)
BelegText = Series(RegExp('"'), ZeroOrMore(Alternative(MEHRZEILER, Anker, Zusatz)), RE('"'), Option(Token(".")), mandatory=2) BelegText = Series(RegExp('"'), ZeroOrMore(Alternative(MEHRZEILER, Anker, Zusatz)), RE('"'), Option(Token(".")), mandatory=2)
BelegStelle = Series(Option(SomeOf(Anker, Zusatz)), Alternative(Series(Stelle, Option(Series(Option(ZW), BelegText))), Verweis), Option(Series(Option(ZW), Zusatz))) BelegStelle = Series(Option(SomeOf(Anker, Zusatz)), Alternative(Series(Stelle, Option(Series(Option(ZW), BelegText))), Verweis), Option(Series(Option(ZW), Zusatz)))
...@@ -427,13 +421,14 @@ class MLWGrammar(Grammar): ...@@ -427,13 +421,14 @@ class MLWGrammar(Grammar):
U2Bedeutung = OneOrMore(Series(ZWW, Alternative(Token("UU_BEDEUTUNG"), Token("UNTER_UNTER_BEDEUTUNG")), Option(LZ), Bedeutung, Option(U3Bedeutung), mandatory=3)) U2Bedeutung = OneOrMore(Series(ZWW, Alternative(Token("UU_BEDEUTUNG"), Token("UNTER_UNTER_BEDEUTUNG")), Option(LZ), Bedeutung, Option(U3Bedeutung), mandatory=3))
U1Bedeutung = OneOrMore(Series(ZWW, Alternative(Token("U_BEDEUTUNG"), Token("UNTER_BEDEUTUNG")), Option(LZ), Bedeutung, Option(U2Bedeutung), mandatory=3)) U1Bedeutung = OneOrMore(Series(ZWW, Alternative(Token("U_BEDEUTUNG"), Token("UNTER_BEDEUTUNG")), Option(LZ), Bedeutung, Option(U2Bedeutung), mandatory=3))
BedeutungsPosition = OneOrMore(Series(ZWW, Token("BEDEUTUNG"), Option(LZ), Bedeutung, Option(U1Bedeutung), mandatory=3)) BedeutungsPosition = OneOrMore(Series(ZWW, Token("BEDEUTUNG"), Option(LZ), Bedeutung, Option(U1Bedeutung), mandatory=3))
Gegenstand = Synonym(EINZEILER) Beschreibung = Synonym(EINZEILER)
Variante = Series(NegativeLookahead(KATEGORIENZEILE), Gegenstand, DPP, Belege) Variante = Series(NegativeLookahead(KATEGORIENZEILE), Beschreibung, DPP, Belege)
Varianten = Series(Variante, ZeroOrMore(Series(ZWW, Variante))) Varianten = Series(Variante, ZeroOrMore(Series(ZWW, Variante)))
Besonderheit = Synonym(EINZEILER) Besonderheit = Series(Beschreibung, DPP, Option(LZ), Varianten)
Kategorie = Series(Besonderheit, DPP, Option(LZ), Alternative(Varianten, Kategorien), mandatory=1) Besonderheiten = Series(Besonderheit, ZeroOrMore(Series(ZWW, Besonderheit)))
Kategorien.set(Series(Kategorie, ZeroOrMore(Series(ZWW, Kategorie)))) Kategorie = Series(Beschreibung, DPP, Option(LZ), Besonderheiten)
Position = Series(Option(LZ), Kategorien, mandatory=1) Kategorien = Series(Kategorie, ZeroOrMore(Series(ZWW, Kategorie)))
Position = Series(Option(LZ), Alternative(Kategorien, Besonderheiten), mandatory=1)
VerwechselungsPosition = Series(ZWW, Token("VERWECHSELBAR"), Position) VerwechselungsPosition = Series(ZWW, Token("VERWECHSELBAR"), Position)
MetrikPosition = Series(ZWW, Token("METRIK"), Position) MetrikPosition = Series(ZWW, Token("METRIK"), Position)
GebrauchsPosition = Series(ZWW, Token("GEBRAUCH"), Position) GebrauchsPosition = Series(ZWW, Token("GEBRAUCH"), Position)
...@@ -500,7 +495,7 @@ MLW_AST_transformation_table = { ...@@ -500,7 +495,7 @@ MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar # AST Transformations for the MLW-grammar
"+": [remove_anonymous_empty, remove_nodes('ZWW', 'ZW', 'LZ', 'DPP', 'COMMENT__', 'ABS', 'SEM', 'TR'), "+": [remove_anonymous_empty, remove_nodes('ZWW', 'ZW', 'LZ', 'DPP', 'COMMENT__', 'ABS', 'SEM', 'TR'),
remove_tokens(":")], remove_tokens(":")],
"Autor": [reduce_single_child], "AutorWerk": [reduce_single_child],
"Artikel": [], "Artikel": [],
"LemmaPosition": [], "LemmaPosition": [],
"Lemma": [], "Lemma": [],
...@@ -509,11 +504,8 @@ MLW_AST_transformation_table = { ...@@ -509,11 +504,8 @@ MLW_AST_transformation_table = {
"LemmaWort": [reduce_single_child], "LemmaWort": [reduce_single_child],
"LemmaVariante": [reduce_single_child, traverse_locally(LemmaVariante_table)], "LemmaVariante": [reduce_single_child, traverse_locally(LemmaVariante_table)],
"LemmaVarianten": [flatten], "LemmaVarianten": [flatten],
"LemmaZusatz": [],
"lzs_typ": [],
"GrammatikPosition": [flatten], "GrammatikPosition": [flatten],
"wortart": [replace_or_reduce], "wortart": [replace_or_reduce],
"GrammatikVarianten": [],
"flexion": [], "flexion": [],
"deklination": [], "deklination": [],
"konjugation": [], "konjugation": [],
...@@ -522,19 +514,15 @@ MLW_AST_transformation_table = { ...@@ -522,19 +514,15 @@ MLW_AST_transformation_table = {
"nomen, verb, adverb, adjektiv, praeposition": [content_from_parser_name], "nomen, verb, adverb, adjektiv, praeposition": [content_from_parser_name],
"maskulinum, femininum, neutrum": [content_from_parser_name], "maskulinum, femininum, neutrum": [content_from_parser_name],
"EtymologiePosition": [], "EtymologiePosition": [],
"EtymologieVarianten": [],
"EtymologieVariante": [], "EtymologieVariante": [],
"ArtikelKopf": [replace_by_single_child], "ArtikelKopf": [replace_by_single_child],
"SchreibweisenPosition, StrukturPosition, VerwechselungsPosition, GebrauchsPosition": "SchreibweisenPosition, StrukturPosition, VerwechselungsPosition, GebrauchsPosition":
[], [],
"SWTyp": [replace_or_reduce],
"SWVariante": [],
"Schreibweise": [replace_by_single_child],
"Kategorien": [flatten], "Kategorien": [flatten],
"Kategorie": [], "Kategorie": [],
"Varianten": [flatten], "Varianten": [flatten],
"Variante": [], "Variante": [],
"Gegenstand": [reduce_single_child], "Beschreibung": [reduce_single_child],
"Besonderheit": [reduce_single_child], "Besonderheit": [reduce_single_child],
"BedeutungsPosition": [flatten, remove_tokens("BEDEUTUNG")], "BedeutungsPosition": [flatten, remove_tokens("BEDEUTUNG")],
"Bedeutung": [], "Bedeutung": [],
...@@ -554,7 +542,6 @@ MLW_AST_transformation_table = { ...@@ -554,7 +542,6 @@ MLW_AST_transformation_table = {
"LateinischesWort, DeutschesWort": [strip, collapse], "LateinischesWort, DeutschesWort": [strip, collapse],
"Belege": [flatten], "Belege": [flatten],
"Beleg": [], "Beleg": [],
"EinBeleg": [],
"Zitat": [flatten], "Zitat": [flatten],
"Zusatz": [reduce_single_child, flatten], "Zusatz": [reduce_single_child, flatten],
"ArtikelVerfasser": [], "ArtikelVerfasser": [],
...@@ -564,15 +551,10 @@ MLW_AST_transformation_table = { ...@@ -564,15 +551,10 @@ MLW_AST_transformation_table = {
"GebrauchsHinweis, PlurSingHinweis": [remove_whitespace, reduce_single_child], "GebrauchsHinweis, PlurSingHinweis": [remove_whitespace, reduce_single_child],
"Name": [collapse], "Name": [collapse],
"Stelle": [collapse], "Stelle": [collapse],
"SW_LAT": [replace_or_reduce],
"SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce],
"Verweis": [], "Verweis": [],
"VerweisKern": [flatten], "VerweisKern": [flatten],
"pfad, ziel": [reduce_single_child], # [apply_if(replace_content(lambda s: ''), has_parent("URL"))], "pfad, ziel": [reduce_single_child], # [apply_if(replace_content(lambda s: ''), has_parent("URL"))],
"Anker": [reduce_single_child], "Anker": [reduce_single_child],
"Werk": [reduce_single_child],
"ZielName": [replace_by_single_child],
"URL": [flatten, keep_nodes('protokoll', 'domäne', 'pfad', 'ziel'), replace_by_single_child], "URL": [flatten, keep_nodes('protokoll', 'domäne', 'pfad', 'ziel'), replace_by_single_child],
"NAMENS_ABKÜRZUNG": [], "NAMENS_ABKÜRZUNG": [],
"NAME": [], "NAME": [],
...@@ -580,9 +562,7 @@ MLW_AST_transformation_table = { ...@@ -580,9 +562,7 @@ MLW_AST_transformation_table = {
"DEU_GROSS": [reduce_single_child], "DEU_GROSS": [reduce_single_child],
"DEU_KLEIN": [reduce_single_child], "DEU_KLEIN": [reduce_single_child],
"LAT_WORT": [reduce_single_child], "LAT_WORT": [reduce_single_child],
"LAT_WORT_TEIL": [],
"GROSSSCHRIFT": [], "GROSSSCHRIFT": [],
"GROSSFOLGE": [],
"BUCHSTABENFOLGE": [], "BUCHSTABENFOLGE": [],
"EINZEILER, FREITEXT, MEHRZEILER": [strip, collapse], "EINZEILER, FREITEXT, MEHRZEILER": [strip, collapse],
"ZEICHENFOLGE": [], "ZEICHENFOLGE": [],
...@@ -592,7 +572,6 @@ MLW_AST_transformation_table = { ...@@ -592,7 +572,6 @@ MLW_AST_transformation_table = {
"ZW": [], "ZW": [],
"ZWW": [], "ZWW": [],
"LÜCKE": [], "LÜCKE": [],
"LEERRAUM": [],
"LEERZEILE": [], "LEERZEILE": [],
"RZS": [], "RZS": [],
"ZEILENSPRUNG": [], "ZEILENSPRUNG": [],
...@@ -600,7 +579,7 @@ MLW_AST_transformation_table = { ...@@ -600,7 +579,7 @@ MLW_AST_transformation_table = {
"DATEI_ENDE": [], "DATEI_ENDE": [],
"NIEMALS": [], "NIEMALS": [],
":Token": [remove_whitespace, reduce_single_child], ":Token": [remove_whitespace, reduce_single_child],
"RE": reduce_single_child, ":RE": reduce_single_child,
"*": replace_by_single_child "*": replace_by_single_child
} }
......
...@@ -35,17 +35,12 @@ M1: """kategorie1: ...@@ -35,17 +35,12 @@ M1: """kategorie1:
4 : """ 4 : """
SCHREIBWEISE SCHREIBWEISE
script.: script.:
hym-: {=> v. ibi. 1}
em-: Chron.: Fred.; 2,35sqq. capit. p. 43. 2,36 p. 60,10.
ym-: Chart.: Sangall.; A 194.
impir-: {=> v. ibi. 2}
script. form:
hym-: {=> v. ibi. 1} hym-: {=> v. ibi. 1}
em-: Chron.: Fred.; 2,35sqq. capit. p. 43. 2,36 p. 60,10. em-: Chron.: Fred.; 2,35sqq. capit. p. 43. 2,36 p. 60,10.
ym-: Chart.: Sangall.; A 194. ym-: Chart.: Sangall.; A 194.
impir-: {=> v. ibi. 2}""" impir-: {=> v. ibi. 2}"""
[match:Besonderheit] [match:Beschreibung]
1: """script.""" 1: """script."""
2: """script. fat-""" 2: """script. fat-"""
3: """festregel(a)""" 3: """festregel(a)"""
......
...@@ -60,7 +60,7 @@ Match-test "M1" ...@@ -60,7 +60,7 @@ Match-test "M1"
"" ""
"" ""
) )
(:Whitespace (:RE
" " " "
) )
) )
...@@ -80,7 +80,7 @@ Match-test "M2" ...@@ -80,7 +80,7 @@ Match-test "M2"
"" ""
"" ""
) )
(:Whitespace (:RE
" // Kommentar" " // Kommentar"
) )
) )
...@@ -102,7 +102,7 @@ Match-test "M3" ...@@ -102,7 +102,7 @@ Match-test "M3"
"" ""
"" ""
) )
(:Whitespace (:RE
" " " "
) )
) )
...@@ -124,7 +124,7 @@ Match-test "M4" ...@@ -124,7 +124,7 @@ Match-test "M4"
"" ""
"" ""
) )