10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 2b66993d authored by Eckhart Arnold's avatar Eckhart Arnold

- better syntaxtree.mock_syntax_tree() generation: Can now read actual ASTs in...

- better syntaxtree.mock_syntax_tree() generation: Can now read actual ASTs in S-expression form. Useful for testing of AST-transformation or -validation as long as these do not call isinstance() on any parser.
parent 525ad4bc
......@@ -37,10 +37,10 @@ from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_sing
__all__ = ['EBNFGrammar',
'EBNFTransTable',
'EBNFTransform',
'EBNFCompilerError',
# 'Scanner',
'EBNFCompiler']
'EBNFCompiler',
'grammar_changed']
class EBNFGrammar(GrammarBase):
......
......@@ -38,7 +38,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'ZOMBIE_PARSER',
'Error',
'Node',
'compact_sexpr',
'mock_syntax_tree',
'traverse',
'no_operation',
'replace_by_single_child',
......@@ -59,16 +59,22 @@ __all__ = ['WHITESPACE_KEYWORD',
class MockParser:
"""
MockParser objects can be used to reconstruct syntax trees from a
serialized form like S-expressions or XML. Mock objects are needed,
because Node objects require a parser object for instantiation.
Mock objects have just enough properties to serve that purpose.
Mock objects should not be used for anything other than
syntax tree (re-)construction. In all other cases where a parser
object substitute is needed, chose the singleton ZOMBIE_PARSER.
"""
def __init__(self, name=''):
self.name = name
def __str__(self):
return self.name or self.__class__.__name__
def __call__(self, text):
"""Better call Saul ;-)"""
return None, text
class ZombieParser(MockParser):
"""
......@@ -94,6 +100,10 @@ class ZombieParser(MockParser):
def __deepcopy__(self, memo):
return self
def __call__(self, text):
"""Better call Saul ;-)"""
return None, text
ZOMBIE_PARSER = ZombieParser()
......@@ -242,7 +252,7 @@ class Node:
return head + '\n'.join([tab + dataF(s)
for s in str(self.result).split('\n')]) + tail
def as_sexpr(self, src=None, prettyprint=True):
def as_sexpr(self, src=None):
"""
Returns content as S-expression, i.e. in lisp-like form.
......@@ -269,8 +279,7 @@ class Node:
else "'%s'" % s if s.find("'") < 0 \
else '"%s"' % s.replace('"', r'\"')
return self._tree_repr(' ', opening, lambda node: ')',
pretty if prettyprint else lambda s: s)
return self._tree_repr(' ', opening, lambda node: ')', pretty) # pretty if prettyprint else lambda s: s)
def as_xml(self, src=None):
"""
......@@ -395,7 +404,7 @@ def mock_syntax_tree(sexpr):
>>> mock_syntax_tree("(a (b c))").as_sexpr()
(a
(b
c
"c"
)
)
"""
......@@ -423,10 +432,20 @@ def mock_syntax_tree(sexpr):
if sexpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sexpr))
else:
m = re.match('\w+', sexpr)
result = sexpr[:m.end()]
sexpr = sexpr[m.end():].strip()
assert sexpr[0] == ')', sexpr
lines = []
while sexpr and sexpr[0] != ')':
for qm in ['"""', "'''", '"', "'"]:
m = re.match(qm + r'.*?' + qm, sexpr)
if m:
i = len(qm)
lines.append(sexpr[i:m.end() - i])
sexpr = sexpr[m.end():].strip()
break
else:
m = re.match(r'(?:(?!\)).)*', sexpr)
lines.append(sexpr[:m.end()])
sexpr = sexpr[m.end():]
result = "\n".join(lines)
return Node(MockParser(name), result)
......
#!/usr/bin/python
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
from functools import partial
import sys
try:
......@@ -19,10 +18,11 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, full_compilation
from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_children_if, \
reduce_single_child, replace_by_single_child, remove_whitespace, TOKEN_KEYWORD, \
no_operation, remove_expendables, remove_tokens, flatten, WHITESPACE_KEYWORD, \
is_whitespace, is_expendable
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD
#######################################################################
#
......@@ -33,6 +33,7 @@ from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_childr
def MLWScanner(text):
return text
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
......@@ -142,7 +143,7 @@ class MLWGrammar(GrammarBase):
DATEI_ENDE = !/./
NIEMALS = /(?!.)/
"""
source_hash__ = "c286ef13eadbfca1130da7eee9f4011c"
source_hash__ = "9fce888d1b21b2d11a6228e0b97f9291"
parser_initialization__ = "upon instatiation"
COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)')
......@@ -220,7 +221,7 @@ def join_strings(node, delimiter='\n'):
node.result = tuple(new_result)
MLW_ASTTransform = {
MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar
"Artikel": no_operation,
"LemmaPosition":
......@@ -268,7 +269,7 @@ MLW_ASTTransform = {
"Autorinfo":
[partial(remove_tokens, tokens={'AUTORIN', 'AUTOR'})],
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT":
# test,
# test,
[remove_expendables, reduce_single_child],
"LEER": no_operation,
"DATEI_ENDE": no_operation,
......@@ -283,7 +284,8 @@ MLW_ASTTransform = {
[remove_expendables, replace_by_single_child]
}
MLW_ASTPipeline = [MLW_ASTTransform]
MLWTransform = partial(traverse, processing_table=MLW_AST_transformation_table)
#######################################################################
#
......@@ -299,146 +301,151 @@ class MLWCompiler(CompilerBase):
super(MLWCompiler, self).__init__()
assert re.match('\w+\Z', grammar_name)
def Artikel(self, node):
def on_Artikel(self, node):
return node
def LemmaPosition(self, node):
def on_LemmaPosition(self, node):
pass
def on_Lemma(self, node):
pass
def Lemma(self, node):
def on__tll(self, node):
pass
def _tll(self, node):
def on_LemmaVarianten(self, node):
pass
def LemmaVarianten(self, node):
def on_LVariante(self, node):
pass
def LVariante(self, node):
def on_LVZusatz(self, node):
pass
def LVZusatz(self, node):
def on_GrammatikPosition(self, node):
pass
def GrammatikPosition(self, node):
def on__wortart(self, node):
pass
def _wortart(self, node):
def on_GrammatikVarianten(self, node):
pass
def GrammatikVarianten(self, node):
def on_GVariante(self, node):
pass
def GVariante(self, node):
def on_Flexionen(self, node):
pass
def Flexionen(self, node):
def on_Flexion(self, node):
pass
def Flexion(self, node):
def on__genus(self, node):
pass
def _genus(self, node):
def on_ArtikelKopf(self, node):
pass
def ArtikelKopf(self, node):
def on_SchreibweisenPosition(self, node):
pass
def SchreibweisenPosition(self, node):
def on_SWTyp(self, node):
pass
def SWTyp(self, node):
def on_SWVariante(self, node):
pass
def SWVariante(self, node):
def on_Schreibweise(self, node):
pass
def Schreibweise(self, node):
def on_Beleg(self, node):
pass
def Beleg(self, node):
def on_Verweis(self, node):
pass
def Verweis(self, node):
def on_VerweisZiel(self, node):
pass
def VerweisZiel(self, node):
def on_BedeutungsPosition(self, node):
pass
def BedeutungsPosition(self, node):
def on_Bedeutung(self, node):
pass
def Bedeutung(self, node):
def on_Bedeutungskategorie(self, node):
pass
def Bedeutungskategorie(self, node):
def on_Interpretamente(self, node):
pass
def Interpretamente(self, node):
def on_LateinischeBedeutung(self, node):
pass
def LateinischeBedeutung(self, node):
def on_DeutscheBedeutung(self, node):
pass
def DeutscheBedeutung(self, node):
def on_Belege(self, node):
pass
def Belege(self, node):
def on_EinBeleg(self, node):
pass
def EinBeleg(self, node):
def on_Zusatz(self, node):
pass
def Zusatz(self, node):
def on_Autorinfo(self, node):
pass
def Autorinfo(self, node):
def on_Name(self, node):
pass
def Name(self, node):
def on_NAMENS_ABKÜRZUNG(self, node):
pass
def WORT(self, node):
def on_WORT(self, node):
pass
def WORT_GROSS(self, node):
def on_WORT_GROSS(self, node):
pass
def WORT_KLEIN(self, node):
def on_WORT_KLEIN(self, node):
pass
def LAT_WORT(self, node):
def on_LAT_WORT(self, node):
pass
def GROSSSCHRIFT(self, node):
def on_GROSSSCHRIFT(self, node):
pass
def TRENNER(self, node):
def on_TRENNER(self, node):
pass
def ZSPRUNG(self, node):
def on_ZSPRUNG(self, node):
pass
def LEER(self, node):
def on_LEER(self, node):
pass
def DATEI_ENDE(self, node):
def on_DATEI_ENDE(self, node):
pass
def NIEMALS(self, node):
def on_NIEMALS(self, node):
pass
#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################
def compile_MLW(source):
"""Compiles ``source`` and returns (result, errors, ast).
"""
return full_compilation(source, MLWScanner,
MLWGrammar(), MLW_ASTPipeline, MLWCompiler())
MLWGrammar(), MLWTransform, MLWCompiler())
if __name__ == "__main__":
if len(sys.argv) > 1:
......@@ -450,4 +457,4 @@ if __name__ == "__main__":
else:
print(result)
else:
print("Usage: MLW_compiler.py [FILENAME]")
\ No newline at end of file
print("Usage: MLW_compiler.py [FILENAME]")
This diff is collapsed.
#!/usr/bin/python3
"""test_DHParser.py - tests of global aspects of DHParser
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from DHParser.toolkit import *
from DHParser.syntaxtree import *
from DHParser.parsers import *
from DHParser.ebnf import *
from DHParser.dsl import *
if __name__ == "__main__":
from run import runner
runner("", globals())
\ No newline at end of file
......@@ -44,10 +44,23 @@ class TestSExpr:
def test_compact_sexpr(self):
assert compact_sexpr("(a\n (b\n c\n )\n)\n") == "(a (b c))"
def test_selftest_from_sexpr(self):
def test_mock_syntax_tree(self):
sexpr = '(a (b c) (d e) (f (g h)))'
tree = mock_syntax_tree(sexpr)
assert compact_sexpr(tree.as_sexpr(prettyprint=False)) == sexpr
assert compact_sexpr(tree.as_sexpr().replace('"', '')) == sexpr
# test different quotation marks
sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))'''
sexpr_stripped = '(a (b c k l) (d e) (f (g h)))'
tree = mock_syntax_tree(sexpr)
assert compact_sexpr(tree.as_sexpr().replace('"', '')) == sexpr_stripped
sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))'
tree = mock_syntax_tree(sexpr_clean)
assert compact_sexpr(tree.as_sexpr()) == sexpr_clean
tree = mock_syntax_tree(sexpr_stripped)
assert compact_sexpr(tree.as_sexpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
class TestNode:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment