Commit 228c20d1 authored by eckhart's avatar eckhart
Browse files

- examples/EBNF/grammar_tests: grammar tests completed

parent 8e4dc4c5
......@@ -27,7 +27,7 @@ overwriting the values in the CONFIG_PRESET dictionary.
The recommended way to use a different configuration in any custom code using
DHParser is to use the second method, i.e. to overwrite the values for which
this is desired in the CONFIG_PRESET dictionary right after the start of the
programm and before any DHParser-function is invoked.
program and before any DHParser-function is invoked.
"""
from typing import Dict, Hashable, Any
......@@ -42,9 +42,9 @@ CONFIG_PRESET = dict() # type: Dict[Hashable, Any]
# Default value: False
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
# Flattens anonymous nodes, by removing the node and adding its childeren
# Flattens anonymous nodes, by removing the node and adding its children
# to the parent node in place of the removed node. This is a very useful
# optimization that should be truned on except for learning or teaching
# optimization that should be turned on except for learning or teaching
# purposes, in which case a concrete syntax tree that more diligently
# reflects the parser structure may be helpful.
CONFIG_PRESET['flatten_tree_while_parsing'] = True
......@@ -59,7 +59,7 @@ CONFIG_PRESET['flatten_tree_while_parsing'] = True
# # (sub-)class. This works also for parser trees that are
# # handwritten in Python using the parser classes from module
# # `parse`. It slightly slows down instantiation of Grammar
# # clasees, though.
# # classes, though.
# # 'none' - no static analysis at all (not recommended).
# # Default value: "early"
# CONFIG_PRESET['static_analysis'] = "early"
......@@ -71,7 +71,7 @@ CONFIG_PRESET['flatten_tree_while_parsing'] = True
# 'compact' - compact tree output, i.e. children a represented
# on indented lines with no opening or closing tags,
# brackets etc.
# Default values: "compact" for conctrete syntax trees and "XML" for
# Default values: "compact" for concrete syntax trees and "XML" for
# abstract syntax trees and "S-expression" for any
# other kind of tree.
CONFIG_PRESET['cst_serialization'] = "compact"
......
......@@ -144,7 +144,10 @@ def unit_from_config(config_str):
for i in range(1, len(lines)):
lines[i] = lines[i][indent:]
testcode = '\n'.join(lines)
unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
# unit.setdefault(symbol, OD()).setdefault(stage, OD())[testkey] = testcode
test = unit.setdefault(symbol, OD()).setdefault(stage, OD())
assert testkey not in test, "Key %s already exists in text %s:%s !" % (testkey, stage, symbol)
test[testkey] = testcode
pos = eat_comments(cfg, entry_match.span()[1])
entry_match = RX_ENTRY.match(cfg, pos)
......@@ -210,7 +213,8 @@ def unit_from_file(filename):
intersection.sort()
if intersection:
errors.append("Same names %s assigned to match and fail test "
"of parser %s." % (str(intersection), parser_name))
"of parser %s." % (str(intersection), parser_name) +
" Please, use different names!")
if errors:
raise EnvironmentError("Error(s) in Testfile %s :\n" % filename
+ '\n'.join(errors))
......@@ -244,6 +248,12 @@ def get_report(test_unit):
lines = txt.split('\n')
lines[0] = ' ' + lines[0]
return "\n ".join(lines)
def flatten(serialization):
if serialization.lstrip().startswith('(') and serialization.count('\n') <= 16:
return flatten_sxpr(serialization)
return serialization
report = []
for parser_name, tests in test_unit.items():
heading = 'Test of parser: "%s"' % parser_name
......@@ -261,10 +271,10 @@ def get_report(test_unit):
cst = tests.get('__cst__', {}).get(test_name, None)
if cst and (not ast or str(test_name).endswith('*')):
report.append('\n### CST')
report.append(indent(serialize(cst, 'cst')))
report.append(indent(flatten(serialize(cst, 'cst'))))
if ast:
report.append('\n### AST')
report.append(indent(serialize(ast, 'ast')))
report.append(indent(flatten(serialize(ast, 'ast'))))
for test_name, test_code in tests.get('fail', dict()).items():
heading = 'Fail-test "%s"' % test_name
report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
......
expression = term { ("+" | "-") term}
term = factor { ("*"|"/") factor}
factor = constant | variable | "(" expression ")"
variable = "x" | "y" | "z"
constant = digit {digit}
digit = "0" | "1" | "..." | "9"
test = digit constant variable
#!/usr/bin/python3
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
from functools import partial
import os
import sys
sys.path.extend(['../../', '../', './'])
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, \
traverse, remove_children_if, is_anonymous, Whitespace, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip
#######################################################################
#
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def ArithmeticPreprocessor(text):
return text, lambda i: i
def get_preprocessor() -> PreprocessorFunc:
return ArithmeticPreprocessor
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file.
"""
constant = Forward()
digit = Forward()
expression = Forward()
variable = Forward()
source_hash__ = "43a6a760b591f9409b06f3c18a3b5ea5"
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wsp__ = Whitespace(WSP_RE__)
test = Series(digit, constant, variable)
digit.set(Alternative(Series(Token("0"), wsp__), Series(Token("1"), wsp__), Series(Token("..."), wsp__), Series(Token("9"), wsp__)))
constant.set(Series(digit, ZeroOrMore(digit)))
variable.set(Alternative(Series(Token("x"), wsp__), Series(Token("y"), wsp__), Series(Token("z"), wsp__)))
factor = Alternative(constant, variable, Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__)))
term = Series(factor, ZeroOrMore(Series(Alternative(Series(Token("*"), wsp__), Series(Token("/"), wsp__)), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(Series(Token("+"), wsp__), Series(Token("-"), wsp__)), term))))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
global GLOBALS
try:
grammar = GLOBALS.Arithmetic_00000001_grammar_singleton
except AttributeError:
GLOBALS.Arithmetic_00000001_grammar_singleton = ArithmeticGrammar()
if hasattr(get_grammar, 'python_src__'):
GLOBALS.Arithmetic_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.Arithmetic_00000001_grammar_singleton
return grammar
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
Arithmetic_AST_transformation_table = {
# AST Transformations for the Arithmetic-grammar
"<": remove_empty,
"expression": [],
"term": [],
"factor": [replace_or_reduce],
"variable": [replace_or_reduce],
"constant": [],
"digit": [replace_or_reduce],
"test": [],
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
}
def ArithmeticTransform() -> TransformationDict:
return partial(traverse, processing_table=Arithmetic_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
global thread_local_Arithmetic_transformer_singleton
try:
transformer = thread_local_Arithmetic_transformer_singleton
except NameError:
thread_local_Arithmetic_transformer_singleton = ArithmeticTransform()
transformer = thread_local_Arithmetic_transformer_singleton
return transformer
#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
class ArithmeticCompiler(Compiler):
"""Compiler for the abstract-syntax-tree of a Arithmetic source file.
"""
def on_expression(self, node):
return node
# def on_term(self, node):
# return node
# def on_factor(self, node):
# return node
# def on_variable(self, node):
# return node
# def on_constant(self, node):
# return node
# def on_digit(self, node):
# return node
# def on_test(self, node):
# return node
def get_compiler() -> ArithmeticCompiler:
global thread_local_Arithmetic_compiler_singleton
try:
compiler = thread_local_Arithmetic_compiler_singleton
except NameError:
thread_local_Arithmetic_compiler_singleton = ArithmeticCompiler()
compiler = thread_local_Arithmetic_compiler_singleton
return compiler
#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################
def compile_src(source, log_dir=''):
"""Compiles ``source`` and returns (result, errors, ast).
"""
with logging(log_dir):
compiler = get_compiler()
cname = compiler.__class__.__name__
log_file_name = os.path.basename(os.path.splitext(source)[0]) \
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, get_preprocessor(),
get_grammar(),
get_transformer(), compiler)
return result
if __name__ == "__main__":
if len(sys.argv) > 1:
file_name, log_dir = sys.argv[1], ''
if file_name in ['-d', '--debug'] and len(sys.argv) > 2:
file_name, log_dir = sys.argv[2], 'LOGS'
result, errors, ast = compile_src(file_name, log_dir)
if errors:
cwd = os.getcwd()
rel_path = file_name[len(cwd):] if file_name.startswith(cwd) else file_name
for error in errors:
print(rel_path + ':' + str(error))
sys.exit(1)
else:
print(result.as_xml() if isinstance(result, Node) else result)
else:
print("Usage: ArithmeticCompiler.py [FILENAME]")
Arithmetic
==========
This is a grammar for simple arithmetic calculations, a standard textbook
example for EBNF-Grammars
#!/usr/bin/python3
"""recompile_grammar.py - recompiles all pdf files in the current directoy
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../../', '../', './'])
from DHParser import dsl
dsl.recompile_grammar('.', force=True)
......@@ -35,7 +35,7 @@ symbol = /(?!\d)\w+/~ # e.g. expression, factor, param
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
regexp = /\/(?:\\(\/)|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
EOF = !/./
......@@ -19,14 +19,15 @@ M6: """'literal containing different quotation marks: " '"""
M7: '''"another literal containing different quotation marks: ' "'''
M8: '''"literal spanning
several lines"'''
M9*: '"\\"'
M10*: '"\"'
M11*: '"\"" '
M9: '"\\"'
M10: '"\"'
M11: '"\"" '
[fail:literal]
F1: ' "preceeding whitespace is not parsed by literal"'
F2: ''' "don't forget closing quotation marks'''
F4: '"\\" other stuff "'
F3: '"\\" other stuff "'
F4: '"\\\""' # sorry, this does not work, use a regular expression instead
[match:plaintext]
......@@ -42,25 +43,27 @@ F1: ' `preceeding whitespace not parsed by plaintext parser`'
[match:regexp]
M1: '/[A-Z][a-z]+/' # plain regex
M2: '/\w+/' # regex with backslashes
M4: '/\//' # forward slashes must be escaped
M5: '/\s*/ ' # whitespace may follow the regex-definition
M1: '/[A-Z][a-z]+/' # plain regex
M2: '/\w+/' # regex with backslashes
M4: '/\//' # forward slashes must be escaped
M5: '/\s*/ ' # whitespace may follow the regex-definition
M6: '/\\/ '
M7*: '/\// '
M7: '/\// ' # escaping of forward slash
M8: '/\\(\/)/' # use grouping to avoid ambiguities
[fail:regexp]
F1: ' /no preceeding whitespace/'
F2: '/\\/ other stuff /'
F3: '/\\\//' # use /\\(\/)/ instead
[match:whitespace]
[match:whitespace] # whitespace denoted by '~' is considered insignificant by convention
M1: '~'
M2: '~ '
[fail:whitespace]
F1: ' ~'
[match:EOF]
[match:EOF] # End of file
M1: ''
[fail:EOF]
......
[match:group]
M1: '(a|bc|d)'
M1: '(a|bc|d)' # a simple sequence binds stronger than '|'
M2: '((a|b)(c|d))'
[ast:group]
[fail:group]
M1: (expression (symbol "a") (symbol "bc") (symbol "d"))
M2: (term (expression (symbol "a") (symbol "b")) (expression (symbol "c") (symbol "d")))
[match:unordered]
M1: '<a b c>' # all must match but in arbitrary order, e.g. b,c,a is a match, c,a not
M2: '<a|b|c>' # at least one element must match, e.g. c,a is a match
[ast:unordered]
[fail:unordered]
M1: (unordered (term (symbol "a") (symbol "b") (symbol "c")))
M2: (unordered (expression (symbol "a") (symbol "b") (symbol "c")))
[match:oneormore]
M1: '{ a }+' # matches a, aa, aaa ...
M2: '{ a | b }+' # matches b, a, ba, babbaba ...
[ast:oneormore]
M1: (oneormore (symbol "a"))
M2: (oneormore (expression (symbol "a") (symbol "b")))
[fail:oneormore]
F1: '{ this is zero or more }'
[match:repetition]
M1: '{ "hi" }'
[ast:repetition]
[fail:repetition]
M1: (repetition (literal '"hi"'))
[match:option]
M1: '[ maybe ]'
M2: '[<a|b|c>]' # unordered with any number, including zerom, of elements
[ast:option]
M1: (option (symbol "maybe"))
[fail:option]
[match:flowmarker]
M1: '&'
M2: '!'
M3: '-&'
M4: '-!'
[ast:flowmarker]
[fail:flowmarker]
M1: (flowmarker "&")
M2: (flowmarker "!")
M3: (flowmarker "-&")
M4: (flowmarker "-!")
[match:retrieveop]
M1: ':'
M2: '::'
[ast:retrieveop]
[fail:retrieveop]
M1: (retrieveop ":")
M2: (retrieveop "::")
[match:factor]
M1: '& (what next)'
M2: '! (not this)'
M3: '-& (has been)'
M4: '-! (has not been)'
M5: '::pop' # read and remove last value of symbol 'pop'
M6: ':retrieve' # read, but don't remove last value of symbol 'retrieve'
[ast:factor]
M1: (factor (flowmarker "&") (term (symbol "what") (symbol "next")))
M2: (factor (flowmarker "!") (term (symbol "not") (symbol "this")))
M3: (factor (flowmarker "-&") (term (symbol "has") (symbol "been")))
M4: (factor (flowmarker "-!") (term (symbol "has") (symbol "not") (symbol "been")))
M5: (factor (retrieveop "::") (symbol "pop"))
M6: (factor (retrieveop ":") (symbol "retrieve"))
[match:expression]
M1: 'a | b'
M2: 'a b | c d'
M3: 'a (b | c) d'
[ast:expression]
[fail:expression]
M1: (expression (symbol "a") (symbol "b"))
M2: (expression (term (symbol "a") (symbol "b")) (term (symbol "c") (symbol "d")))
M3: (term (symbol "a") (expression (symbol "b") (symbol "c")) (symbol "d"))
[match:term]
M1: 'a b'
M2: 'a b c'
M3: 'a §[b] c'
M4: '§&:symbol'
[ast:term]
[fail:term]
M1: (term (symbol "a") (symbol "b"))
M2: (term (symbol "a") (symbol "b") (symbol "c"))
M3: (term (symbol "a") (:Token "§") (option (symbol "b")) (symbol "c"))
M4: (term (:Token "§") (factor (flowmarker "&") (retrieveop ":") (symbol "symbol")))
[match:factor]
M1*: 'symbol'
M2*: '"literal"'
M3*: "'literal'"
M4*: '`plaintext`'
M5*: '/regexp/'
M6*: '~'
M7*: '{ one }+'
M8*: '{ zero }'
M9*: '[option]'
M10*: '&:symbol'
M11*: '!<a b>'
[ast:factor]
M1: (symbol "symbol")
M2: (literal '"literal"')
M3: (literal "'literal'")
M4: (plaintext "`plaintext`")
M5: (regexp "/regexp/")
M6: (whitespace "~")
M7: (oneormore (symbol "one"))
M8: (repetition (symbol "zero"))
M9: (option (symbol "option"))
M10: (factor (flowmarker "&") (retrieveop ":") (symbol "symbol"))
M11: (factor (flowmarker "!") (unordered (term (symbol "a") (symbol "b"))))
[fail:factor]
F1: ':(group)' # retrieve operators only work with symbols
F2: '&{zero}' # lookahead operators don't make sense with parsers
F3: '&[option]' # that never fail
[match:syntax]
M1: """@ whitespace = /\s*/
letters = { /\w/ }"""
[ast:syntax]
[fail:syntax]
M1: (syntax (directive (symbol "whitespace") (regexp "/\s*/"))
(definition (symbol "letters") (repetition (regexp "/\w/"))))
[match:definition]
M1: 'expression = term { "|" term }'
[ast:definition]
[fail:definition]
M1: (definition (symbol "expression") (term (symbol "term")
(repetition (term (literal '"|"') (symbol "term")))))
[match:directive]
M1: '@drop = token, whitespace'
[ast:directive]
M1: (directive (symbol "drop") (symbol "token") (symbol "whitespace"))
[fail:directive]
......@@ -16,13 +16,16 @@ scriptpath = os.path.dirname(__file__)
try:
from DHParser import dsl
import DHParser.log
from DHParser import testing, create_test_templates
from DHParser import testing, create_test_templates, CONFIG_PRESET
except ModuleNotFoundError:
print('Could not import DHParser. Please adjust sys.path in file '
'"%s" manually' % __file__)
sys.exit(1)