Starting from 2021-07-01, all LRZ GitLab users will be required to explicitly accept the GitLab Terms of Service. Please see the detailed information at https://doku.lrz.de/display/PUBLIC/GitLab and make sure that your projects conform to the requirements.

Commit 2b66993d authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- better syntaxtree.mock_syntax_tree() generation: Can now read actual ASTs in...

- better syntaxtree.mock_syntax_tree() generation: Can now read actual ASTs in S-expression form. Useful for testing of AST-transformation or -validation as long as these do not call isinstance() on any parser.
parent 525ad4bc
......@@ -37,10 +37,10 @@ from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_sing
__all__ = ['EBNFGrammar',
'EBNFTransTable',
'EBNFTransform',
'EBNFCompilerError',
# 'Scanner',
'EBNFCompiler']
'EBNFCompiler',
'grammar_changed']
class EBNFGrammar(GrammarBase):
......
......@@ -38,7 +38,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'ZOMBIE_PARSER',
'Error',
'Node',
'compact_sexpr',
'mock_syntax_tree',
'traverse',
'no_operation',
'replace_by_single_child',
......@@ -59,16 +59,22 @@ __all__ = ['WHITESPACE_KEYWORD',
class MockParser:
"""
MockParser objects can be used to reconstruct syntax trees from a
serialized form like S-expressions or XML. Mock objects are needed,
because Node objects require a parser object for instantiation.
Mock objects have just enough properties to serve that purpose.
Mock objects should not be used for anything other than
syntax tree (re-)construction. In all other cases where a parser
object substitute is needed, chose the singleton ZOMBIE_PARSER.
"""
def __init__(self, name=''):
self.name = name
def __str__(self):
return self.name or self.__class__.__name__
def __call__(self, text):
"""Better call Saul ;-)"""
return None, text
class ZombieParser(MockParser):
"""
......@@ -94,6 +100,10 @@ class ZombieParser(MockParser):
def __deepcopy__(self, memo):
return self
def __call__(self, text):
"""Better call Saul ;-)"""
return None, text
ZOMBIE_PARSER = ZombieParser()
......@@ -242,7 +252,7 @@ class Node:
return head + '\n'.join([tab + dataF(s)
for s in str(self.result).split('\n')]) + tail
def as_sexpr(self, src=None, prettyprint=True):
def as_sexpr(self, src=None):
"""
Returns content as S-expression, i.e. in lisp-like form.
......@@ -269,8 +279,7 @@ class Node:
else "'%s'" % s if s.find("'") < 0 \
else '"%s"' % s.replace('"', r'\"')
return self._tree_repr(' ', opening, lambda node: ')',
pretty if prettyprint else lambda s: s)
return self._tree_repr(' ', opening, lambda node: ')', pretty) # pretty if prettyprint else lambda s: s)
def as_xml(self, src=None):
"""
......@@ -395,7 +404,7 @@ def mock_syntax_tree(sexpr):
>>> mock_syntax_tree("(a (b c))").as_sexpr()
(a
(b
c
"c"
)
)
"""
......@@ -423,10 +432,20 @@ def mock_syntax_tree(sexpr):
if sexpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sexpr))
else:
m = re.match('\w+', sexpr)
result = sexpr[:m.end()]
sexpr = sexpr[m.end():].strip()
assert sexpr[0] == ')', sexpr
lines = []
while sexpr and sexpr[0] != ')':
for qm in ['"""', "'''", '"', "'"]:
m = re.match(qm + r'.*?' + qm, sexpr)
if m:
i = len(qm)
lines.append(sexpr[i:m.end() - i])
sexpr = sexpr[m.end():].strip()
break
else:
m = re.match(r'(?:(?!\)).)*', sexpr)
lines.append(sexpr[:m.end()])
sexpr = sexpr[m.end():]
result = "\n".join(lines)
return Node(MockParser(name), result)
......
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
from functools import partial
import sys
try:
......@@ -19,10 +18,11 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, full_compilation
from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_children_if, \
reduce_single_child, replace_by_single_child, remove_whitespace, TOKEN_KEYWORD, \
no_operation, remove_expendables, remove_tokens, flatten, WHITESPACE_KEYWORD, \
is_whitespace, is_expendable
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD
#######################################################################
#
......@@ -33,6 +33,7 @@ from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_childr
def MLWScanner(text):
return text
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
......@@ -142,7 +143,7 @@ class MLWGrammar(GrammarBase):
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
"""
source_hash__ = "c286ef13eadbfca1130da7eee9f4011c"
source_hash__ = "9fce888d1b21b2d11a6228e0b97f9291"
parser_initialization__ = "upon instatiation"
COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)')
......@@ -220,7 +221,7 @@ def join_strings(node, delimiter='\n'):
node.result = tuple(new_result)
MLW_ASTTransform = {
MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar
"Artikel": no_operation,
"LemmaPosition":
......@@ -268,7 +269,7 @@ MLW_ASTTransform = {
"Autorinfo":
[partial(remove_tokens, tokens={'AUTORIN', 'AUTOR'})],
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT":
# test,
# test,
[remove_expendables, reduce_single_child],
"LEER": no_operation,
"DATEI_ENDE": no_operation,
......@@ -283,7 +284,8 @@ MLW_ASTTransform = {
[remove_expendables, replace_by_single_child]
}
MLW_ASTPipeline = [MLW_ASTTransform]
MLWTransform = partial(traverse, processing_table=MLW_AST_transformation_table)
#######################################################################
#
......@@ -299,146 +301,151 @@ class MLWCompiler(CompilerBase):
super(MLWCompiler, self).__init__()
assert re.match('\w+\Z', grammar_name)
def Artikel(self, node):
def on_Artikel(self, node):
return node
def LemmaPosition(self, node):
def on_LemmaPosition(self, node):
pass
def on_Lemma(self, node):
pass
def Lemma(self, node):
def on__tll(self, node):
pass
def _tll(self, node):
def on_LemmaVarianten(self, node):
pass
def LemmaVarianten(self, node):
def on_LVariante(self, node):
pass
def LVariante(self, node):
def on_LVZusatz(self, node):
pass
def LVZusatz(self, node):
def on_GrammatikPosition(self, node):
pass
def GrammatikPosition(self, node):
def on__wortart(self, node):
pass
def _wortart(self, node):
def on_GrammatikVarianten(self, node):
pass
def GrammatikVarianten(self, node):
def on_GVariante(self, node):
pass
def GVariante(self, node):
def on_Flexionen(self, node):
pass
def Flexionen(self, node):
def on_Flexion(self, node):
pass
def Flexion(self, node):
def on__genus(self, node):
pass
def _genus(self, node):
def on_ArtikelKopf(self, node):
pass
def ArtikelKopf(self, node):
def on_SchreibweisenPosition(self, node):
pass
def SchreibweisenPosition(self, node):
def on_SWTyp(self, node):
pass
def SWTyp(self, node):
def on_SWVariante(self, node):
pass
def SWVariante(self, node):
def on_Schreibweise(self, node):
pass
def Schreibweise(self, node):
def on_Beleg(self, node):
pass
def Beleg(self, node):
def on_Verweis(self, node):
pass
def Verweis(self, node):
def on_VerweisZiel(self, node):
pass
def VerweisZiel(self, node):
def on_BedeutungsPosition(self, node):
pass
def BedeutungsPosition(self, node):
def on_Bedeutung(self, node):
pass
def Bedeutung(self, node):
def on_Bedeutungskategorie(self, node):
pass
def Bedeutungskategorie(self, node):
def on_Interpretamente(self, node):
pass
def Interpretamente(self, node):
def on_LateinischeBedeutung(self, node):
pass
def LateinischeBedeutung(self, node):
def on_DeutscheBedeutung(self, node):
pass
def DeutscheBedeutung(self, node):
def on_Belege(self, node):
pass
def Belege(self, node):
def on_EinBeleg(self, node):
pass
def EinBeleg(self, node):
def on_Zusatz(self, node):
pass
def Zusatz(self, node):
def on_Autorinfo(self, node):
pass
def Autorinfo(self, node):
def on_Name(self, node):
pass
def Name(self, node):
def on_NAMENS_ABKÜRZUNG(self, node):
pass
def WORT(self, node):
def on_WORT(self, node):
pass
def WORT_GROSS(self, node):
def on_WORT_GROSS(self, node):
pass
def WORT_KLEIN(self, node):
def on_WORT_KLEIN(self, node):
pass
def LAT_WORT(self, node):
def on_LAT_WORT(self, node):
pass
def GROSSSCHRIFT(self, node):
def on_GROSSSCHRIFT(self, node):
pass
def TRENNER(self, node):
def on_TRENNER(self, node):
pass
def ZSPRUNG(self, node):
def on_ZSPRUNG(self, node):
pass
def LEER(self, node):
def on_LEER(self, node):
pass
def DATEI_ENDE(self, node):
def on_DATEI_ENDE(self, node):
pass
def NIEMALS(self, node):
def on_NIEMALS(self, node):
pass
#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################
def compile_MLW(source):
"""Compiles ``source`` and returns (result, errors, ast).
"""
return full_compilation(source, MLWScanner,
MLWGrammar(), MLW_ASTPipeline, MLWCompiler())
MLWGrammar(), MLWTransform, MLWCompiler())
if __name__ == "__main__":
if len(sys.argv) > 1:
......@@ -450,4 +457,4 @@ if __name__ == "__main__":
else:
print(result)
else:
print("Usage: MLW_compiler.py [FILENAME]")
\ No newline at end of file
print("Usage: MLW_compiler.py [FILENAME]")
<Artikel>
<LemmaPosition>
<TOKEN__>
<RegExp>
LEMMA
</RegExp>
<WSP__>
</WSP__>
</TOKEN__>
<Lemma>
<_tll>
*
</_tll>
<Optional>
<_tll>
<RegExp>
*
</RegExp>
</_tll>
</Optional>
<WORT_KLEIN>
facitergula
</WORT_KLEIN>
<LEER>
<RegExp>
</RegExp>
</LEER>
</Lemma>
<LemmaVarianten>
<LEER>
<RegExp>
facitergula
</RegExp>
</LEER>
<LVariante>
fasc-itergula
</LVariante>
<TRENNER>
<ZSPRUNG>
</WORT_KLEIN>
<Optional>
<LEER>
<RegExp>
</RegExp>
</ZSPRUNG>
</TRENNER>
<LVariante>
fac-iet-ergula
</LVariante>
<TRENNER>
<ZSPRUNG>
<RegExp>
</RegExp>
</ZSPRUNG>
</TRENNER>
<LVariante>
fac-ist-ergula
</LVariante>
<TRENNER>
<ZSPRUNG>
</LEER>
</Optional>
</Lemma>
<Optional>
<LemmaVarianten>
<TOKEN__>
<RegExp>
VARIANTEN
</RegExp>
</ZSPRUNG>
</TRENNER>
<LVariante>
fa-rcu-tergula
</LVariante>
<TRENNER>
<OneOrMore>
<ZSPRUNG>
<RegExp>
</RegExp>
</ZSPRUNG>
<ZSPRUNG>
<RegExp>
</RegExp>
</ZSPRUNG>
</OneOrMore>
</TRENNER>
<LVZusatz>
<TOKEN__>
ZUSATZ
</TOKEN__>
<TOKEN__>
sim.
</TOKEN__>
</LVZusatz>
<TRENNER>
<OneOrMore>
<ZSPRUNG>
<Optional>
<LEER>
<RegExp>
</RegExp>
</ZSPRUNG>
<ZSPRUNG>
<RegExp>
</RegExp>
</ZSPRUNG>
<ZSPRUNG>
<RegExp>
</RegExp>
</ZSPRUNG>
</OneOrMore>
</TRENNER>
</LemmaVarianten>
</LEER>
</Optional>
<LVariante>
<RegExp>
fasc-itergula
</RegExp>
</LVariante>
<ZeroOrMore>
<Sequence>
<TRENNER>
<OneOrMore>
<ZSPRUNG>
<RegExp>
</RegExp>
<WSP__>
</WSP__>
</ZSPRUNG>
</OneOrMore>
</TRENNER>
<LVariante>
<RegExp>
fac-iet-ergula
</RegExp>
</LVariante>
</Sequence>
<Sequence>
<TRENNER>
<OneOrMore>
<ZSPRUNG>
<RegExp>
</RegExp>
<WSP__>
</WSP__>
</ZSPRUNG>
</OneOrMore>
</TRENNER>
<LVariante>
<RegExp>
fac-ist-ergula
</RegExp>
</LVariante>
</Sequence>
<Sequence>
<TRENNER>
<OneOrMore>
<ZSPRUNG>
<RegExp>
</RegExp>
<WSP__>
</WSP__>
</ZSPRUNG>
</OneOrMore>
</TRENNER>
<LVariante>
<RegExp>
fa-rcu-tergula
</RegExp>
</LVariante>
</Sequence>
</ZeroOrMore>
<Optional>
<Sequence>
<TRENNER>
<OneOrMore>
<ZSPRUNG>
<RegExp>
</RegExp>
</ZSPRUNG>
<ZSPRUNG>
<RegExp>
</RegExp>
<WSP__>
</WSP__>
</ZSPRUNG>
</OneOrMore>
</TRENNER>
<LVZusatz>
<TOKEN__>
<RegExp>
ZUSATZ
</RegExp>
<WSP__>
</WSP__>
</TOKEN__>
<TOKEN__>
<RegExp>
sim.
</RegExp>
</TOKEN__>
</LVZusatz>
</Sequence>
</Optional>
<Optional>