Starting from 2021-07-01, all LRZ GitLab users will be required to explicitly accept the GitLab Terms of Service. Please see the detailed information at https://doku.lrz.de/display/PUBLIC/GitLab and make sure that your projects conform to the requirements.

Commit 2b66993d authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- better syntaxtree.mock_syntax_tree() generation: Can now read actual ASTs in...

- better syntaxtree.mock_syntax_tree() generation: Can now read actual ASTs in S-expression form. Useful for testing of AST-transformation or -validation as long as these do not call isinstance() on any parser.
parent 525ad4bc
...@@ -37,10 +37,10 @@ from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_sing ...@@ -37,10 +37,10 @@ from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_sing
__all__ = ['EBNFGrammar', __all__ = ['EBNFGrammar',
'EBNFTransTable', 'EBNFTransform',
'EBNFCompilerError', 'EBNFCompilerError',
# 'Scanner', 'EBNFCompiler',
'EBNFCompiler'] 'grammar_changed']
class EBNFGrammar(GrammarBase): class EBNFGrammar(GrammarBase):
......
...@@ -38,7 +38,7 @@ __all__ = ['WHITESPACE_KEYWORD', ...@@ -38,7 +38,7 @@ __all__ = ['WHITESPACE_KEYWORD',
'ZOMBIE_PARSER', 'ZOMBIE_PARSER',
'Error', 'Error',
'Node', 'Node',
'compact_sexpr', 'mock_syntax_tree',
'traverse', 'traverse',
'no_operation', 'no_operation',
'replace_by_single_child', 'replace_by_single_child',
...@@ -59,16 +59,22 @@ __all__ = ['WHITESPACE_KEYWORD', ...@@ -59,16 +59,22 @@ __all__ = ['WHITESPACE_KEYWORD',
class MockParser: class MockParser:
"""
MockParser objects can be used to reconstruct syntax trees from a
serialized form like S-expressions or XML. Mock objects are needed,
because Node objects require a parser object for instantiation.
Mock objects have just enough properties to serve that purpose.
Mock objects should not be used for anything other than
syntax tree (re-)construction. In all other cases where a parser
object substitute is needed, chose the singleton ZOMBIE_PARSER.
"""
def __init__(self, name=''): def __init__(self, name=''):
self.name = name self.name = name
def __str__(self): def __str__(self):
return self.name or self.__class__.__name__ return self.name or self.__class__.__name__
def __call__(self, text):
"""Better call Saul ;-)"""
return None, text
class ZombieParser(MockParser): class ZombieParser(MockParser):
""" """
...@@ -94,6 +100,10 @@ class ZombieParser(MockParser): ...@@ -94,6 +100,10 @@ class ZombieParser(MockParser):
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return self return self
def __call__(self, text):
"""Better call Saul ;-)"""
return None, text
ZOMBIE_PARSER = ZombieParser() ZOMBIE_PARSER = ZombieParser()
...@@ -242,7 +252,7 @@ class Node: ...@@ -242,7 +252,7 @@ class Node:
return head + '\n'.join([tab + dataF(s) return head + '\n'.join([tab + dataF(s)
for s in str(self.result).split('\n')]) + tail for s in str(self.result).split('\n')]) + tail
def as_sexpr(self, src=None, prettyprint=True): def as_sexpr(self, src=None):
""" """
Returns content as S-expression, i.e. in lisp-like form. Returns content as S-expression, i.e. in lisp-like form.
...@@ -269,8 +279,7 @@ class Node: ...@@ -269,8 +279,7 @@ class Node:
else "'%s'" % s if s.find("'") < 0 \ else "'%s'" % s if s.find("'") < 0 \
else '"%s"' % s.replace('"', r'\"') else '"%s"' % s.replace('"', r'\"')
return self._tree_repr(' ', opening, lambda node: ')', return self._tree_repr(' ', opening, lambda node: ')', pretty) # pretty if prettyprint else lambda s: s)
pretty if prettyprint else lambda s: s)
def as_xml(self, src=None): def as_xml(self, src=None):
""" """
...@@ -395,7 +404,7 @@ def mock_syntax_tree(sexpr): ...@@ -395,7 +404,7 @@ def mock_syntax_tree(sexpr):
>>> mock_syntax_tree("(a (b c))").as_sexpr() >>> mock_syntax_tree("(a (b c))").as_sexpr()
(a (a
(b (b
c "c"
) )
) )
""" """
...@@ -423,10 +432,20 @@ def mock_syntax_tree(sexpr): ...@@ -423,10 +432,20 @@ def mock_syntax_tree(sexpr):
if sexpr[0] == '(': if sexpr[0] == '(':
result = tuple(mock_syntax_tree(block) for block in next_block(sexpr)) result = tuple(mock_syntax_tree(block) for block in next_block(sexpr))
else: else:
m = re.match('\w+', sexpr) lines = []
result = sexpr[:m.end()] while sexpr and sexpr[0] != ')':
sexpr = sexpr[m.end():].strip() for qm in ['"""', "'''", '"', "'"]:
assert sexpr[0] == ')', sexpr m = re.match(qm + r'.*?' + qm, sexpr)
if m:
i = len(qm)
lines.append(sexpr[i:m.end() - i])
sexpr = sexpr[m.end():].strip()
break
else:
m = re.match(r'(?:(?!\)).)*', sexpr)
lines.append(sexpr[:m.end()])
sexpr = sexpr[m.end():]
result = "\n".join(lines)
return Node(MockParser(name), result) return Node(MockParser(name), result)
......
#!/usr/bin/python #!/usr/bin/python
####################################################################### #######################################################################
# #
# SYMBOLS SECTION - Can be edited. Changes will be preserved. # SYMBOLS SECTION - Can be edited. Changes will be preserved.
# #
####################################################################### #######################################################################
from functools import partial from functools import partial
import sys import sys
try: try:
...@@ -19,10 +18,11 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \ ...@@ -19,10 +18,11 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \ Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, full_compilation ZeroOrMore, Forward, NegativeLookahead, mixin_comment, full_compilation
from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_children_if, \ from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
reduce_single_child, replace_by_single_child, remove_whitespace, TOKEN_KEYWORD, \ remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, WHITESPACE_KEYWORD, \ no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
is_whitespace, is_expendable WHITESPACE_KEYWORD, TOKEN_KEYWORD
####################################################################### #######################################################################
# #
...@@ -33,6 +33,7 @@ from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_childr ...@@ -33,6 +33,7 @@ from DHParser.syntaxtree import Node, remove_enclosing_delimiters, remove_childr
def MLWScanner(text): def MLWScanner(text):
return text return text
####################################################################### #######################################################################
# #
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN! # PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
...@@ -142,7 +143,7 @@ class MLWGrammar(GrammarBase): ...@@ -142,7 +143,7 @@ class MLWGrammar(GrammarBase):
DATEI_ENDE = !/./ DATEI_ENDE = !/./
NIEMALS = /(?!.)/ NIEMALS = /(?!.)/
""" """
source_hash__ = "c286ef13eadbfca1130da7eee9f4011c" source_hash__ = "9fce888d1b21b2d11a6228e0b97f9291"
parser_initialization__ = "upon instatiation" parser_initialization__ = "upon instatiation"
COMMENT__ = r'#.*(?:\n|$)' COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)') WSP__ = mixin_comment(whitespace=r'[\t ]*', comment=r'#.*(?:\n|$)')
...@@ -220,7 +221,7 @@ def join_strings(node, delimiter='\n'): ...@@ -220,7 +221,7 @@ def join_strings(node, delimiter='\n'):
node.result = tuple(new_result) node.result = tuple(new_result)
MLW_ASTTransform = { MLW_AST_transformation_table = {
# AST Transformations for the MLW-grammar # AST Transformations for the MLW-grammar
"Artikel": no_operation, "Artikel": no_operation,
"LemmaPosition": "LemmaPosition":
...@@ -268,7 +269,7 @@ MLW_ASTTransform = { ...@@ -268,7 +269,7 @@ MLW_ASTTransform = {
"Autorinfo": "Autorinfo":
[partial(remove_tokens, tokens={'AUTORIN', 'AUTOR'})], [partial(remove_tokens, tokens={'AUTORIN', 'AUTOR'})],
"WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT": "WORT, WORT_KLEIN, WORT_GROSS, GROSSSCHRIFT":
# test, # test,
[remove_expendables, reduce_single_child], [remove_expendables, reduce_single_child],
"LEER": no_operation, "LEER": no_operation,
"DATEI_ENDE": no_operation, "DATEI_ENDE": no_operation,
...@@ -283,7 +284,8 @@ MLW_ASTTransform = { ...@@ -283,7 +284,8 @@ MLW_ASTTransform = {
[remove_expendables, replace_by_single_child] [remove_expendables, replace_by_single_child]
} }
MLW_ASTPipeline = [MLW_ASTTransform] MLWTransform = partial(traverse, processing_table=MLW_AST_transformation_table)
####################################################################### #######################################################################
# #
...@@ -299,146 +301,151 @@ class MLWCompiler(CompilerBase): ...@@ -299,146 +301,151 @@ class MLWCompiler(CompilerBase):
super(MLWCompiler, self).__init__() super(MLWCompiler, self).__init__()
assert re.match('\w+\Z', grammar_name) assert re.match('\w+\Z', grammar_name)
def Artikel(self, node): def on_Artikel(self, node):
return node return node
def LemmaPosition(self, node): def on_LemmaPosition(self, node):
pass
def on_Lemma(self, node):
pass pass
def Lemma(self, node): def on__tll(self, node):
pass pass
def _tll(self, node): def on_LemmaVarianten(self, node):
pass pass
def LemmaVarianten(self, node): def on_LVariante(self, node):
pass pass
def LVariante(self, node): def on_LVZusatz(self, node):
pass pass
def LVZusatz(self, node): def on_GrammatikPosition(self, node):
pass pass
def GrammatikPosition(self, node): def on__wortart(self, node):
pass pass
def _wortart(self, node): def on_GrammatikVarianten(self, node):
pass pass
def GrammatikVarianten(self, node): def on_GVariante(self, node):
pass pass
def GVariante(self, node): def on_Flexionen(self, node):
pass pass
def Flexionen(self, node): def on_Flexion(self, node):
pass pass
def Flexion(self, node): def on__genus(self, node):
pass pass
def _genus(self, node): def on_ArtikelKopf(self, node):
pass pass
def ArtikelKopf(self, node): def on_SchreibweisenPosition(self, node):
pass pass
def SchreibweisenPosition(self, node): def on_SWTyp(self, node):
pass pass
def SWTyp(self, node): def on_SWVariante(self, node):
pass pass
def SWVariante(self, node): def on_Schreibweise(self, node):
pass pass
def Schreibweise(self, node): def on_Beleg(self, node):
pass pass
def Beleg(self, node): def on_Verweis(self, node):
pass pass
def Verweis(self, node): def on_VerweisZiel(self, node):
pass pass
def VerweisZiel(self, node): def on_BedeutungsPosition(self, node):
pass pass
def BedeutungsPosition(self, node): def on_Bedeutung(self, node):
pass pass
def Bedeutung(self, node): def on_Bedeutungskategorie(self, node):
pass pass
def Bedeutungskategorie(self, node): def on_Interpretamente(self, node):
pass pass
def Interpretamente(self, node): def on_LateinischeBedeutung(self, node):
pass pass
def LateinischeBedeutung(self, node): def on_DeutscheBedeutung(self, node):
pass pass
def DeutscheBedeutung(self, node): def on_Belege(self, node):
pass pass
def Belege(self, node): def on_EinBeleg(self, node):
pass pass
def EinBeleg(self, node): def on_Zusatz(self, node):
pass pass
def Zusatz(self, node): def on_Autorinfo(self, node):
pass pass
def Autorinfo(self, node): def on_Name(self, node):
pass pass
def Name(self, node): def on_NAMENS_ABKÜRZUNG(self, node):
pass pass
def WORT(self, node): def on_WORT(self, node):
pass pass
def WORT_GROSS(self, node): def on_WORT_GROSS(self, node):
pass pass
def WORT_KLEIN(self, node): def on_WORT_KLEIN(self, node):
pass pass
def LAT_WORT(self, node): def on_LAT_WORT(self, node):
pass pass
def GROSSSCHRIFT(self, node): def on_GROSSSCHRIFT(self, node):
pass pass
def TRENNER(self, node): def on_TRENNER(self, node):
pass pass
def ZSPRUNG(self, node): def on_ZSPRUNG(self, node):
pass pass
def LEER(self, node): def on_LEER(self, node):
pass pass
def DATEI_ENDE(self, node): def on_DATEI_ENDE(self, node):
pass pass
def NIEMALS(self, node): def on_NIEMALS(self, node):
pass pass
####################################################################### #######################################################################
# #
# END OF DHPARSER-SECTIONS # END OF DHPARSER-SECTIONS
# #
####################################################################### #######################################################################
def compile_MLW(source): def compile_MLW(source):
"""Compiles ``source`` and returns (result, errors, ast). """Compiles ``source`` and returns (result, errors, ast).
""" """
return full_compilation(source, MLWScanner, return full_compilation(source, MLWScanner,
MLWGrammar(), MLW_ASTPipeline, MLWCompiler()) MLWGrammar(), MLWTransform, MLWCompiler())
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) > 1: if len(sys.argv) > 1:
...@@ -450,4 +457,4 @@ if __name__ == "__main__": ...@@ -450,4 +457,4 @@ if __name__ == "__main__":
else: else:
print(result) print(result)
else: else:
print("Usage: MLW_compiler.py [FILENAME]") print("Usage: MLW_compiler.py [FILENAME]")
\ No newline at end of file
This diff is collapsed.
#!/usr/bin/python3
"""test_DHParser.py - tests of global aspects of DHParser
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from DHParser.toolkit import *
from DHParser.syntaxtree import *
from DHParser.parsers import *
from DHParser.ebnf import *
from DHParser.dsl import *
if __name__ == "__main__":
from run import runner
runner("", globals())
\ No newline at end of file
...@@ -44,10 +44,23 @@ class TestSExpr: ...@@ -44,10 +44,23 @@ class TestSExpr:
def test_compact_sexpr(self): def test_compact_sexpr(self):
assert compact_sexpr("(a\n (b\n c\n )\n)\n") == "(a (b c))" assert compact_sexpr("(a\n (b\n c\n )\n)\n") == "(a (b c))"
def test_selftest_from_sexpr(self): def test_mock_syntax_tree(self):
sexpr = '(a (b c) (d e) (f (g h)))' sexpr = '(a (b c) (d e) (f (g h)))'
tree = mock_syntax_tree(sexpr) tree = mock_syntax_tree(sexpr)
assert compact_sexpr(tree.as_sexpr(prettyprint=False)) == sexpr assert compact_sexpr(tree.as_sexpr().replace('"', '')) == sexpr
# test different quotation marks
sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))'''
sexpr_stripped = '(a (b c k l) (d e) (f (g h)))'
tree = mock_syntax_tree(sexpr)
assert compact_sexpr(tree.as_sexpr().replace('"', '')) == sexpr_stripped
sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))'
tree = mock_syntax_tree(sexpr_clean)
assert compact_sexpr(tree.as_sexpr()) == sexpr_clean
tree = mock_syntax_tree(sexpr_stripped)
assert compact_sexpr(tree.as_sexpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
class TestNode: class TestNode:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment