Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 1422279c authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- transformation for unknown parser names now issue a warning

parent e3ce4b20
......@@ -26,12 +26,12 @@ from DHParser.ebnf import EBNFCompiler, grammar_changed, \
get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.error import Error, is_error, has_errors, only_errors
from DHParser.log import logging
from DHParser.parse import Grammar, Compiler, compile_source
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc
from DHParser.toolkit import load_if_file, is_python_code, compile_python_object, \
re
from DHParser.log import logging
__all__ = ('DHPARSER_IMPORTS',
'GrammarError',
......@@ -84,7 +84,8 @@ from DHParser import logging, is_filename, load_if_file, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, \\
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip
'''
......@@ -296,7 +297,7 @@ def load_compiler_suite(compiler_suite: str) -> \
Tuple[PreprocessorFactoryFunc, ParserFactoryFunc,
TransformerFactoryFunc, CompilerFactoryFunc]:
"""
Extracts a compiler suite from file or string ``compiler suite``
Extracts a compiler suite from file or string `compiler_suite`
and returns it as a tuple (preprocessor, parser, ast, compiler).
Returns:
......@@ -442,6 +443,10 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
source = f.read()
sections = RX_SECTION_MARKER.split(source)
intro, imports, preprocessor, parser, ast, compiler, outro = sections
# TODO: Verify transformation table
ast_trans_table = compile_python_object(DHPARSER_IMPORTS + ast,
r'(?:\w+_)?AST_transformation_table$')
messages.extend(ebnf_compiler.verify_transformation_table(ast_trans_table))
except (PermissionError, FileNotFoundError, IOError) as error:
intro, imports, preprocessor, parser, ast, compiler, outro = '', '', '', '', '', '', ''
except ValueError as error:
......
......@@ -27,7 +27,7 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, RE, \
Compiler
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table
from DHParser.transform import traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, remove_infix_operator
......@@ -475,6 +475,18 @@ class EBNFCompiler(Compiler):
compiler += [COMPILER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(compiler)
def verify_transformation_table(self, transtable):
assert self._dirty_flag
table_entries = set(expand_table(transtable).keys()) - {'*', '+', '~'}
symbols = self.rules.keys()
messages = []
for entry in table_entries:
if entry not in symbols and not entry.startswith(":"):
messages.append(Error(('Symbol "%s" is not defined in grammar %s but appears in '
'the transformation table!') % (entry, self.grammar_name),
Error.UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE))
return messages
def assemble_parser(self, definitions: List[Tuple[str, str]], root_node: Node) -> str:
"""
......
......@@ -46,6 +46,8 @@ class Error:
REDEFINED_DIRECTIVE_WARNING = 101
REDECLARED_TOKEN_WARNING = 102
UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE = 601
# error codes
MANDATORY_CONTINUATION = 1001
......
......@@ -61,18 +61,18 @@ https://epsil.github.io/gll/
import copy
import os
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
from DHParser.error import Error, is_error, linebreaks, adjust_error_locations
from DHParser.log import is_logging, logfile_basename, HistoryRecord, log_ST, \
log_parsing_history
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc, with_source_mapping, strip_tokens
from DHParser.toolkit import sane_parser_name, \
escape_control_characters, load_if_file, re
from DHParser.log import log_dir, is_logging, logfile_basename, HistoryRecord, log_ST, \
log_parsing_history
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
__all__ = ('Parser',
'UnknownParserError',
......@@ -1931,11 +1931,11 @@ class Compiler:
def __init__(self, grammar_name="", grammar_source=""):
self._reset()
self._dirty_flag = False
self.set_grammar_name(grammar_name, grammar_source)
def _reset(self):
self.context = [] # type: List[Node]
self._dirty_flag = False
def __call__(self, node: Node) -> Any:
"""
......@@ -1947,8 +1947,7 @@ class Compiler:
"""
if self._dirty_flag:
self._reset()
else:
self._dirty_flag = True
self._dirty_flag = True
result = self.compile(node)
self.propagate_error_flags(node, lazy=True)
return result
......
......@@ -47,7 +47,7 @@ except ImportError:
import DHParser.foreign_typing as typing
sys.modules['typing'] = typing # make it possible to import from typing
from typing import Any, Iterable, Sequence, Set, Union, cast
from typing import Any, Iterable, Sequence, Set, Union, Dict, cast
__all__ = ('escape_re',
'escape_control_characters',
......@@ -267,7 +267,7 @@ def smart_list(arg: Union[str, Iterable, Any]) -> Union[Sequence, Set]:
return [arg]
def expand_table(compact_table):
def expand_table(compact_table: Dict) -> Dict:
"""Expands a table by separating keywords that are tuples or strings
containing comma separated words into single keyword entries with
the same values. Returns the expanded table.
......
......@@ -27,7 +27,6 @@ sys.path.extend(['../../', '../', './'])
from DHParser import dsl
from DHParser import testing
from DHParser import toolkit
# print(dir(dsl))
......@@ -42,7 +41,7 @@ with DHParser.log.logging(False):
from LaTeXCompiler import get_grammar, get_transformer
with DHParser.log.logging(True):
with DHParser.log.logging(False):
error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer,
fn_patterns=['*_test_*.ini'],
report=True, verbose=True)
......
......@@ -91,13 +91,14 @@ VerwechselungsPosition = ZWW "VERWECHSELBAR" Position
## ARTIKELKOPF POSITIONEN ##
Position = [LZ] §Kategorien
Position = [LZ] §(Kategorien | Besonderheiten)
Kategorien = Kategorie { ZWW Kategorie }
Kategorie = Besonderheit §DPP [LZ] ( Varianten | Kategorien )
Besonderheit = EINZEILER
Kategorie = Beschreibung DPP [LZ] Besonderheiten
Besonderheiten = Besonderheit { ZWW Besonderheit }
Besonderheit = Beschreibung DPP [LZ] Varianten
Varianten = Variante { ZWW Variante }
Variante = !KATEGORIENZEILE Gegenstand DPP Belege
Gegenstand = EINZEILER
Variante = !KATEGORIENZEILE Beschreibung DPP Belege
Beschreibung = EINZEILER
#### BEDEUTUNGS-POSITION #####################################################
......@@ -179,10 +180,7 @@ BelegStelle = [<Anker | Zusatz>] (Stelle [[ZW] BelegText] | Verweis) [[ZW]
BelegText = /"/ { MEHRZEILER | Anker | Zusatz } §/"/~ ["."]
AutorWerk = EINZEILER
Werk = EINZEILER
Stelle = EINZEILER
Datierung = EINZEILER
Edition = EINZEILER
#### VERWEISE (LINKS) ########################################################
......
......@@ -145,13 +145,14 @@ class MLWGrammar(Grammar):
## ARTIKELKOPF POSITIONEN ##
Position = [LZ] §Kategorien
Position = [LZ] §(Kategorien | Besonderheiten)
Kategorien = Kategorie { ZWW Kategorie }
Kategorie = Besonderheit §DPP [LZ] ( Varianten | Kategorien )
Besonderheit = EINZEILER
Kategorie = Beschreibung DPP [LZ] Besonderheiten
Besonderheiten = Besonderheit { ZWW Besonderheit }
Besonderheit = Beschreibung DPP [LZ] Varianten
Varianten = Variante { ZWW Variante }
Variante = !KATEGORIENZEILE Gegenstand DPP Belege
Gegenstand = EINZEILER
Variante = !KATEGORIENZEILE Beschreibung DPP Belege
Beschreibung = EINZEILER
#### BEDEUTUNGS-POSITION #####################################################
......@@ -233,10 +234,7 @@ class MLWGrammar(Grammar):
BelegText = /"/ { MEHRZEILER | Anker | Zusatz } §/"/~ ["."]
AutorWerk = EINZEILER
Werk = EINZEILER
Stelle = EINZEILER
Datierung = EINZEILER
Edition = EINZEILER
#### VERWEISE (LINKS) ########################################################
......@@ -308,7 +306,6 @@ class MLWGrammar(Grammar):
DEU_WORT = Forward()
FREITEXT = Forward()
GROSSSCHRIFT = Forward()
Kategorien = Forward()
LZ = Forward()
LemmaWort = Forward()
ROEMISCHE_ZAHL = Forward()
......@@ -321,7 +318,7 @@ class MLWGrammar(Grammar):
flexion = Forward()
genus = Forward()
wortart = Forward()
source_hash__ = "59b9cf3ee2f5a4bb0c8396422e102f32"
source_hash__ = "17e7d9c6b771eb2fa259912b687f8677"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?:\/\/.*)|(?:\/\*(?:.|\n)*?\*\/)'
WHITESPACE__ = r'[\t ]*'
......@@ -375,10 +372,7 @@ class MLWGrammar(Grammar):
Anker = Series(Token("{"), Token("@"), ziel, Token("}"), mandatory=2)
VerweisKern = Series(Token("=>"), Alternative(Series(alias, Token("|"), Alternative(Token("-"), URL)), URL), mandatory=1)
Verweis = Series(Token("{"), VerweisKern, Token("}"))
Edition = Synonym(EINZEILER)
Datierung = Synonym(EINZEILER)
Stelle = Synonym(EINZEILER)
Werk = Synonym(EINZEILER)
AutorWerk = Synonym(EINZEILER)
BelegText = Series(RegExp('"'), ZeroOrMore(Alternative(MEHRZEILER, Anker, Zusatz)), RE('"'), Option(Token(".")), mandatory=2)
BelegStelle = Series(Option(SomeOf(Anker, Zusatz)), Alternative(Series(Stelle, Option(Series(Option(ZW), BelegText))), Verweis), Option(Series(Option(ZW), Zusatz)))
......@@ -427,13 +421,14 @@ class MLWGrammar(Grammar):
U2Bedeutung = OneOrMore(Series(ZWW, Alternative(Token("UU_BEDEUTUNG"), Token("UNTER_UNTER_BEDEUTUNG")), Option(LZ), Bedeutung, Option(U3Bedeutung), mandatory=3))
U1Bedeutung = OneOrMore(Series(ZWW, Alternative(Token("U_BEDEUTUNG"), Token("UNTER_BEDEUTUNG")), Option(LZ), Bedeutung, Option(U2Bedeutung), mandatory=3))
BedeutungsPosition = OneOrMore(Series(ZWW, Token("BEDEUTUNG"), Option(LZ), Bedeutung, Option(U1Bedeutung), mandatory=3))
Gegenstand = Synonym(EINZEILER)
Variante = Series(NegativeLookahead(KATEGORIENZEILE), Gegenstand, DPP, Belege)
Beschreibung = Synonym(EINZEILER)
Variante = Series(NegativeLookahead(KATEGORIENZEILE), Beschreibung, DPP, Belege)
Varianten = Series(Variante, ZeroOrMore(Series(ZWW, Variante)))
Besonderheit = Synonym(EINZEILER)
Kategorie = Series(Besonderheit, DPP, Option(LZ), Alternative(Varianten, Kategorien), mandatory=1)
Kategorien.set(Series(Kategorie, ZeroOrMore(Series(ZWW, Kategorie))))
Position = Series(Option(LZ), Kategorien, mandatory=1)
Besonderheit = Series(Beschreibung, DPP, Option(LZ), Varianten)
Besonderheiten = Series(Besonderheit, ZeroOrMore(Series(ZWW, Besonderheit)))
Kategorie = Series(Beschreibung, DPP, Option(LZ), Besonderheiten)
Kategorien = Series(Kategorie, ZeroOrMore(Series(ZWW, Kategorie)))
Position = Series(Option(LZ), Alternative(Kategorien, Besonderheiten), mandatory=1)
VerwechselungsPosition = Series(ZWW, Token("VERWECHSELBAR"), Position)
MetrikPosition = Series(ZWW, Token("METRIK"), Position)
GebrauchsPosition = Series(ZWW, Token("GEBRAUCH"), Position)
......@@ -500,7 +495,7 @@ MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar
"+": [remove_anonymous_empty, remove_nodes('ZWW', 'ZW', 'LZ', 'DPP', 'COMMENT__', 'ABS', 'SEM', 'TR'),
remove_tokens(":")],
"Autor": [reduce_single_child],
"AutorWerk": [reduce_single_child],
"Artikel": [],
"LemmaPosition": [],
"Lemma": [],
......@@ -509,11 +504,8 @@ MLW_AST_transformation_table = {
"LemmaWort": [reduce_single_child],
"LemmaVariante": [reduce_single_child, traverse_locally(LemmaVariante_table)],
"LemmaVarianten": [flatten],
"LemmaZusatz": [],
"lzs_typ": [],
"GrammatikPosition": [flatten],
"wortart": [replace_or_reduce],
"GrammatikVarianten": [],
"flexion": [],
"deklination": [],
"konjugation": [],
......@@ -522,19 +514,15 @@ MLW_AST_transformation_table = {
"nomen, verb, adverb, adjektiv, praeposition": [content_from_parser_name],
"maskulinum, femininum, neutrum": [content_from_parser_name],
"EtymologiePosition": [],
"EtymologieVarianten": [],
"EtymologieVariante": [],
"ArtikelKopf": [replace_by_single_child],
"SchreibweisenPosition, StrukturPosition, VerwechselungsPosition, GebrauchsPosition":
[],
"SWTyp": [replace_or_reduce],
"SWVariante": [],
"Schreibweise": [replace_by_single_child],
"Kategorien": [flatten],
"Kategorie": [],
"Varianten": [flatten],
"Variante": [],
"Gegenstand": [reduce_single_child],
"Beschreibung": [reduce_single_child],
"Besonderheit": [reduce_single_child],
"BedeutungsPosition": [flatten, remove_tokens("BEDEUTUNG")],
"Bedeutung": [],
......@@ -554,7 +542,6 @@ MLW_AST_transformation_table = {
"LateinischesWort, DeutschesWort": [strip, collapse],
"Belege": [flatten],
"Beleg": [],
"EinBeleg": [],
"Zitat": [flatten],
"Zusatz": [reduce_single_child, flatten],
"ArtikelVerfasser": [],
......@@ -564,15 +551,10 @@ MLW_AST_transformation_table = {
"GebrauchsHinweis, PlurSingHinweis": [remove_whitespace, reduce_single_child],
"Name": [collapse],
"Stelle": [collapse],
"SW_LAT": [replace_or_reduce],
"SW_DEU": [replace_or_reduce],
"SW_GRIECH": [replace_or_reduce],
"Verweis": [],
"VerweisKern": [flatten],
"pfad, ziel": [reduce_single_child], # [apply_if(replace_content(lambda s: ''), has_parent("URL"))],
"Anker": [reduce_single_child],
"Werk": [reduce_single_child],
"ZielName": [replace_by_single_child],
"URL": [flatten, keep_nodes('protokoll', 'domäne', 'pfad', 'ziel'), replace_by_single_child],
"NAMENS_ABKÜRZUNG": [],
"NAME": [],
......@@ -580,9 +562,7 @@ MLW_AST_transformation_table = {
"DEU_GROSS": [reduce_single_child],
"DEU_KLEIN": [reduce_single_child],
"LAT_WORT": [reduce_single_child],
"LAT_WORT_TEIL": [],
"GROSSSCHRIFT": [],
"GROSSFOLGE": [],
"BUCHSTABENFOLGE": [],
"EINZEILER, FREITEXT, MEHRZEILER": [strip, collapse],
"ZEICHENFOLGE": [],
......@@ -592,7 +572,6 @@ MLW_AST_transformation_table = {
"ZW": [],
"ZWW": [],
"LÜCKE": [],
"LEERRAUM": [],
"LEERZEILE": [],
"RZS": [],
"ZEILENSPRUNG": [],
......@@ -600,7 +579,7 @@ MLW_AST_transformation_table = {
"DATEI_ENDE": [],
"NIEMALS": [],
":Token": [remove_whitespace, reduce_single_child],
"RE": reduce_single_child,
":RE": reduce_single_child,
"*": replace_by_single_child
}
......
......@@ -35,17 +35,12 @@ M1: """kategorie1:
4 : """
SCHREIBWEISE
script.:
hym-: {=> v. ibi. 1}
em-: Chron.: Fred.; 2,35sqq. capit. p. 43. 2,36 p. 60,10.
ym-: Chart.: Sangall.; A 194.
impir-: {=> v. ibi. 2}
script. form:
hym-: {=> v. ibi. 1}
em-: Chron.: Fred.; 2,35sqq. capit. p. 43. 2,36 p. 60,10.
ym-: Chart.: Sangall.; A 194.
impir-: {=> v. ibi. 2}"""
[match:Besonderheit]
[match:Beschreibung]
1: """script."""
2: """script. fat-"""
3: """festregel(a)"""
......
......@@ -60,7 +60,7 @@ Match-test "M1"
""
""
)
(:Whitespace
(:RE
" "
)
)
......@@ -80,7 +80,7 @@ Match-test "M2"
""
""
)
(:Whitespace
(:RE
" // Kommentar"
)
)
......@@ -102,7 +102,7 @@ Match-test "M3"
""
""
)
(:Whitespace
(:RE
" "
)
)
......@@ -124,7 +124,7 @@ Match-test "M4"
""
""
)
(:Whitespace
(:RE
" /* Kommentar"
""
" Kommentar fortsetzung */"
......@@ -171,7 +171,7 @@ Match-test "M1"
""
""
)
(:Whitespace
(:RE
" "
)
)
......@@ -226,7 +226,7 @@ Match-test "M3"
""
""
)
(:Whitespace
(:RE
" "
)
)
......@@ -300,7 +300,7 @@ Match-test "M5"
""
""
)
(:Whitespace
(:RE
" // Kommentar"
)
)
......
......@@ -27,7 +27,6 @@ sys.path.extend(['../../', '../', './'])
from DHParser import dsl
from DHParser import testing
from DHParser import toolkit
if not dsl.recompile_grammar('MLW.ebnf', force=True): # recompiles Grammar only if it has changed
with open('MLW_ebnf_ERRORS.txt') as f:
......@@ -36,7 +35,7 @@ if not dsl.recompile_grammar('MLW.ebnf', force=True): # recompiles Grammar only
from MLWCompiler import get_grammar, get_transformer
with DHParser.log.logging(True):
with DHParser.log.logging(False):
error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer,
fn_patterns=['*_test*'],
verbose=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment