Commit 1422279c authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- transformation for unknown parser names now issue a warning

parent e3ce4b20
......@@ -26,12 +26,12 @@ from DHParser.ebnf import EBNFCompiler, grammar_changed, \
get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.error import Error, is_error, has_errors, only_errors
from DHParser.log import logging
from DHParser.parse import Grammar, Compiler, compile_source
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc
from DHParser.toolkit import load_if_file, is_python_code, compile_python_object, \
re
from DHParser.log import logging
__all__ = ('DHPARSER_IMPORTS',
'GrammarError',
......@@ -84,7 +84,8 @@ from DHParser import logging, is_filename, load_if_file, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, \\
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip
'''
......@@ -296,7 +297,7 @@ def load_compiler_suite(compiler_suite: str) -> \
Tuple[PreprocessorFactoryFunc, ParserFactoryFunc,
TransformerFactoryFunc, CompilerFactoryFunc]:
"""
Extracts a compiler suite from file or string ``compiler suite``
Extracts a compiler suite from file or string `compiler_suite`
and returns it as a tuple (preprocessor, parser, ast, compiler).
Returns:
......@@ -442,6 +443,10 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
source = f.read()
sections = RX_SECTION_MARKER.split(source)
intro, imports, preprocessor, parser, ast, compiler, outro = sections
# TODO: Verify transformation table
ast_trans_table = compile_python_object(DHPARSER_IMPORTS + ast,
r'(?:\w+_)?AST_transformation_table$')
messages.extend(ebnf_compiler.verify_transformation_table(ast_trans_table))
except (PermissionError, FileNotFoundError, IOError) as error:
intro, imports, preprocessor, parser, ast, compiler, outro = '', '', '', '', '', '', ''
except ValueError as error:
......
......@@ -27,7 +27,7 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, RE, \
Compiler
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table
from DHParser.transform import traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, remove_infix_operator
......@@ -475,6 +475,18 @@ class EBNFCompiler(Compiler):
compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(compiler)
def verify_transformation_table(self, transtable):
assert self._dirty_flag
table_entries = set(expand_table(transtable).keys()) - {'*', '+', '~'}
symbols = self.rules.keys()
messages = []
for entry in table_entries:
if entry not in symbols and not entry.startswith(":"):
messages.append(Error(('Symbol "%s" is not defined in grammar %s but appears in '
'the transformation table!') % (entry, self.grammar_name),
Error.UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE))
return messages
def assemble_parser(self, definitions: List[Tuple[str, str]], root_node: Node) -> str:
"""
......
......@@ -46,6 +46,8 @@ class Error:
REDEFINED_DIRECTIVE_WARNING = 101
REDECLARED_TOKEN_WARNING = 102
UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE = 601
# error codes
MANDATORY_CONTINUATION = 1001
......
......@@ -61,18 +61,18 @@ https://epsil.github.io/gll/
import copy
import os
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
from DHParser.error import Error, is_error, linebreaks, adjust_error_locations
from DHParser.log import is_logging, logfile_basename, HistoryRecord, log_ST, \
log_parsing_history
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.toolkit import sane_parser_name, \
escape_control_characters, load_if_file, re
from DHParser.log import log_dir, is_logging, logfile_basename, HistoryRecord, log_ST, \
log_parsing_history
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
__all__ = ('Parser',
'UnknownParserError',
......@@ -1931,11 +1931,11 @@ class Compiler:
def __init__(self, grammar_name="", grammar_source=""):
self._reset()
self._dirty_flag = False
self.set_grammar_name(grammar_name, grammar_source)
def _reset(self):
self.context = [] # type: List[Node]
self._dirty_flag = False
def __call__(self, node: Node) -> Any:
"""
......@@ -1947,8 +1947,7 @@ class Compiler:
"""
if self._dirty_flag:
self._reset()
else:
self._dirty_flag = True
self._dirty_flag = True
result = self.compile(node)
self.propagate_error_flags(node, lazy=True)
return result
......
......@@ -47,7 +47,7 @@ except ImportError:
import DHParser.foreign_typing as typing
sys.modules['typing'] = typing # make it possible to import from typing
from typing import Any, Iterable, Sequence, Set, Union, cast
from typing import Any, Iterable, Sequence, Set, Union, Dict, cast
__all__ = ('escape_re',
'escape_control_characters',
......@@ -267,7 +267,7 @@ def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
return [arg]
def expand_table(compact_table):
def expand_table(compact_table: Dict) -> Dict:
"""Expands a table by separating keywords that are tuples or strings
containing comma separated words into single keyword entries with
the same values. Returns the expanded table.
......
......@@ -27,7 +27,6 @@ sys.path.extend(['../../', '../', './'])
from DHParser import dsl
from DHParser import testing
from DHParser import toolkit
# print(dir(dsl))
......@@ -42,7 +41,7 @@ with DHParser.log.logging(False):
from LaTeXCompiler import get_grammar, get_transformer
with DHParser.log.logging(True):
with DHParser.log.logging(False):
error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer,
fn_patterns=['*_test_*.ini'],
report=True, verbose=True)
......
......@@ -91,13 +91,14 @@ VerwechselungsPosition = ZWW "VERWECHSELBAR" Position
## ARTIKELKOPF POSITIONEN ##
Position = [LZ] §Kategorien
Position = [LZ] §(Kategorien | Besonderheiten)
Kategorien = Kategorie { ZWW Kategorie }
Kategorie = Besonderheit §DPP [LZ] ( Varianten | Kategorien )
Besonderheit = EINZEILER
Kategorie = Beschreibung DPP [LZ] Besonderheiten
Besonderheiten = Besonderheit { ZWW Besonderheit }
Besonderheit = Beschreibung DPP [LZ] Varianten
Varianten = Variante { ZWW Variante }
Variante = !KATEGORIENZEILE Gegenstand DPP Belege
Gegenstand = EINZEILER
Variante = !KATEGORIENZEILE Beschreibung DPP Belege
Beschreibung = EINZEILER
#### BEDEUTUNGS-POSITION #####################################################
......@@ -179,10 +180,7 @@ BelegStelle = [<Anker | Zusatz>] (Stelle [[ZW] BelegText] | Verweis) [[ZW]
BelegText = /"/ { MEHRZEILER | Anker | Zusatz } §/"/~ ["."]
AutorWerk = EINZEILER
Werk = EINZEILER
Stelle = EINZEILER
Datierung = EINZEILER
Edition = EINZEILER
#### VERWEISE (LINKS) ########################################################
......
......@@ -145,13 +145,14 @@ class MLWGrammar(Grammar):
## ARTIKELKOPF POSITIONEN ##
Position = [LZ] §Kategorien
Position = [LZ] §(Kategorien | Besonderheiten)
Kategorien = Kategorie { ZWW Kategorie }
Kategorie = Besonderheit §DPP [LZ] ( Varianten | Kategorien )
Besonderheit = EINZEILER
Kategorie = Beschreibung DPP [LZ] Besonderheiten
Besonderheiten = Besonderheit { ZWW Besonderheit }
Besonderheit = Beschreibung DPP [LZ] Varianten
Varianten = Variante { ZWW Variante }
Variante = !KATEGORIENZEILE Gegenstand DPP Belege
Gegenstand = EINZEILER
Variante = !KATEGORIENZEILE Beschreibung DPP Belege
Beschreibung = EINZEILER
#### BEDEUTUNGS-POSITION #####################################################
......@@ -233,10 +234,7 @@ class MLWGrammar(Grammar):
BelegText = /"/ { MEHRZEILER | Anker | Zusatz } §/"/~ ["."]
AutorWerk = EINZEILER
Werk = EINZEILER
Stelle = EINZEILER
Datierung = EINZEILER
Edition = EINZEILER
#### VERWEISE (LINKS) ########################################################
......@@ -308,7 +306,6 @@ class MLWGrammar(Grammar):
DEU_WORT = Forward()
FREITEXT = Forward()
GROSSSCHRIFT = Forward()
Kategorien = Forward()
LZ = Forward()
LemmaWort = Forward()
ROEMISCHE_ZAHL = Forward()
......@@ -321,7 +318,7 @@ class MLWGrammar(Grammar):
flexion = Forward()
genus = Forward()
wortart = Forward()
source_hash__ = "59b9cf3ee2f5a4bb0c8396422e102f32"
source_hash__ = "17e7d9c6b771eb2fa259912b687f8677"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)'
WHITESPACE__ = r'[\t ]*'
......@@ -375,10 +372,7 @@ class MLWGrammar(Grammar):
Anker = Series(Token("{"), Token("@"), ziel, Token("}"), mandatory=2)
VerweisKern = Series(Token("=>"), Alternative(Series(alias, Token("|"), Alternative(Token("-"), URL)), URL), mandatory=1)
Verweis = Series(Token("{"), VerweisKern, Token("}"))
Edition = Synonym(EINZEILER)
Datierung = Synonym(EINZEILER)
Stelle = Synonym(EINZEILER)
Werk = Synonym(EINZEILER)
AutorWerk = Synonym(EINZEILER)
BelegText = Series(RegExp('"'), ZeroOrMore(Alternative(MEHRZEILER, Anker, Zusatz)), RE('"'), Option(Token(".")), mandatory=2)
BelegStelle = Series(Option(SomeOf(Anker, Zusatz)), Alternative(Series(Stelle, Option(Series(Option(ZW), BelegText))), Verweis), Option(Series(Option(ZW), Zusatz)))
......@@ -427,13 +421,14 @@ class MLWGrammar(Grammar):
U2Bedeutung = OneOrMore(Series(ZWW, Alternative(Token("UU_BEDEUTUNG"), Token("UNTER_UNTER_BEDEUTUNG")), Option(LZ), Bedeutung, Option(U3Bedeutung), mandatory=3))
U1Bedeutung = OneOrMore(Series(ZWW, Alternative(Token("U_BEDEUTUNG"), Token("UNTER_BEDEUTUNG")), Option(LZ), Bedeutung, Option(U2Bedeutung), mandatory=3))
BedeutungsPosition = OneOrMore(Series(ZWW, Token("BEDEUTUNG"), Option(LZ), Bedeutung, Option(U1Bedeutung), mandatory=3))
Gegenstand = Synonym(EINZEILER)
Variante = Series(NegativeLookahead(KATEGORIENZEILE), Gegenstand, DPP, Belege)
Beschreibung = Synonym(EINZEILER)
Variante = Series(NegativeLookahead(KATEGORIENZEILE), Beschreibung, DPP, Belege)
Varianten = Series(Variante, ZeroOrMore(Series(ZWW, Variante)))
Besonderheit = Synonym(EINZEILER)
Kategorie = Series(Besonderheit, DPP, Option(LZ), Alternative(Varianten, Kategorien), mandatory=1)
Kategorien.set(Series(Kategorie, ZeroOrMore(Series(ZWW, Kategorie))))
Position = Series(Option(LZ), Kategorien, mandatory=1)
Besonderheit = Series(Beschreibung, DPP, Option(LZ), Varianten)
Besonderheiten = Series(Besonderheit, ZeroOrMore(Series(ZWW, Besonderheit)))
Kategorie = Series(Beschreibung, DPP, Option(LZ), Besonderheiten)
Kategorien = Series(Kategorie, ZeroOrMore(Series(ZWW, Kategorie)))
Position = Series(Option(LZ), Alternative(Kategorien, Besonderheiten), mandatory=1)
VerwechselungsPosition = Series(ZWW, Token("VERWECHSELBAR"), Position)
MetrikPosition = Series(ZWW, Token("METRIK"), Position)
GebrauchsPosition = Series(ZWW, Token("GEBRAUCH"), Position)
......@@ -500,7 +495,7 @@ MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar
"+": [remove_anonymous_empty, remove_nodes('ZWW', 'ZW', 'LZ', 'DPP', 'COMMENT__', 'ABS', 'SEM', 'TR'),
remove_tokens(":")],
"Autor": [reduce_single_child],
"AutorWerk": [reduce_single_child],
"Artikel": [],
"LemmaPosition": [],
"Lemma": [],
......@@ -509,11 +504,8 @@ MLW_AST_transformation_table = {
"LemmaWort": [reduce_single_child],
"LemmaVariante": [reduce_single_child, traverse_locally(LemmaVariante_table)],
"LemmaVarianten": [flatten],
"LemmaZusatz": [],
"lzs_typ": [],
"GrammatikPosition": [flatten],
"wortart": [replace_or_reduce],
"GrammatikVarianten": [],
"flexion": [],
"deklination": [],
"konjugation": [],
......@@ -522,19 +514,15 @@ MLW_AST_transformation_table = {
"nomen, verb, adverb, adjektiv, praeposition": [content_from_parser_name],
"maskulinum, femininum, neutrum": [content_from_parser_name],
"EtymologiePosition": [],
"EtymologieVarianten": [],
"EtymologieVariante": [],
"ArtikelKopf": [replace_by_single_child],
"SchreibweisenPosition, StrukturPosition, VerwechselungsPosition, GebrauchsPosition":
[],
"SWTyp": [replace_or_reduce],
"SWVariante": [],
"Schreibweise": [replace_by_single_child],
"Kategorien": [flatten],
"Kategorie": [],
"Varianten": [flatten],
"Variante": [],
"Gegenstand": [reduce_single_child],
"Beschreibung": [reduce_single_child],
"Besonderheit": [reduce_single_child],
"BedeutungsPosition": [flatten, remove_tokens("BEDEUTUNG")],
"Bedeutung": [],
......@@ -554,7 +542,6 @@ MLW_AST_transformation_table = {
"LateinischesWort, DeutschesWort": [strip, collapse],
"Belege": [flatten],
"Beleg": [],
"EinBeleg": [],
"Zitat": [flatten],
"Zusatz": [reduce_single_child, flatten],
"ArtikelVerfasser": [],
......@@ -564,15 +551,10 @@ MLW_AST_transformation_table = {
"GebrauchsHinweis, PlurSingHinweis": [remove_whitespace, reduce_single_child],
"Name": [collapse],
"Stelle": [collapse],
"SW_LAT": [replace_or_reduce],
"SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce],
"Verweis": [],
"VerweisKern": [flatten],
"pfad, ziel": [reduce_single_child], # [apply_if(replace_content(lambda s: ''), has_parent("URL"))],
"Anker": [reduce_single_child],
"Werk": [reduce_single_child],
"ZielName": [replace_by_single_child],
"URL": [flatten, keep_nodes('protokoll', 'domäne', 'pfad', 'ziel'), replace_by_single_child],
"NAMENS_ABKÜRZUNG": [],
"NAME": [],
......@@ -580,9 +562,7 @@ MLW_AST_transformation_table = {
"DEU_GROSS": [reduce_single_child],
"DEU_KLEIN": [reduce_single_child],
"LAT_WORT": [reduce_single_child],
"LAT_WORT_TEIL": [],
"GROSSSCHRIFT": [],
"GROSSFOLGE": [],
"BUCHSTABENFOLGE": [],
"EINZEILER, FREITEXT, MEHRZEILER": [strip, collapse],
"ZEICHENFOLGE": [],
......@@ -592,7 +572,6 @@ MLW_AST_transformation_table = {
"ZW": [],
"ZWW": [],
"LÜCKE": [],
"LEERRAUM": [],
"LEERZEILE": [],
"RZS": [],
"ZEILENSPRUNG": [],
......@@ -600,7 +579,7 @@ MLW_AST_transformation_table = {
"DATEI_ENDE": [],
"NIEMALS": [],
":Token": [remove_whitespace, reduce_single_child],
"RE": reduce_single_child,
":RE": reduce_single_child,
"*": replace_by_single_child
}
......
......@@ -35,17 +35,12 @@ M1: """kategorie1:
4 : """
SCHREIBWEISE
script.:
hym-: {=> v. ibi. 1}
em-: Chron.: Fred.; 2,35sqq. capit. p. 43. 2,36 p. 60,10.
ym-: Chart.: Sangall.; A 194.
impir-: {=> v. ibi. 2}
script. form:
hym-: {=> v. ibi. 1}
em-: Chron.: Fred.; 2,35sqq. capit. p. 43. 2,36 p. 60,10.
ym-: Chart.: Sangall.; A 194.
impir-: {=> v. ibi. 2}"""
[match:Besonderheit]
[match:Beschreibung]
1: """script."""
2: """script. fat-"""
3: """festregel(a)"""
......
......@@ -60,7 +60,7 @@ Match-test "M1"
""
""
)
(:Whitespace
(:RE
" "
)
)
......@@ -80,7 +80,7 @@ Match-test "M2"
""
""
)
(:Whitespace
(:RE
" // Kommentar"
)
)
......@@ -102,7 +102,7 @@ Match-test "M3"
""
""
)
(:Whitespace
(:RE
" "
)
)
......@@ -124,7 +124,7 @@ Match-test "M4"
""
""
)
(:Whitespace
(:RE
" /* Kommentar"
""
" Kommentar fortsetzung */"
......@@ -171,7 +171,7 @@ Match-test "M1"
""
""
)
(:Whitespace
(:RE
" "
)
)
......@@ -226,7 +226,7 @@ Match-test "M3"
""
""
)
(:Whitespace
(:RE
" "
)
)
......@@ -300,7 +300,7 @@ Match-test "M5"
""
""
)
(:Whitespace
(:RE
" // Kommentar"
)
)
......
......@@ -27,7 +27,6 @@ sys.path.extend(['../../', '../', './'])
from DHParser import dsl
from DHParser import testing
from DHParser import toolkit
if not dsl.recompile_grammar('MLW.ebnf', force=True): # recompiles Grammar only if it has changed
with open('MLW_ebnf_ERRORS.txt') as f:
......@@ -36,7 +35,7 @@ if not dsl.recompile_grammar('MLW.ebnf', force=True): # recompiles Grammar only
from MLWCompiler import get_grammar, get_transformer
with DHParser.log.logging(True):
with DHParser.log.logging(False):
error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer,
fn_patterns=['*_test*'],
verbose=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment