Commit 33f3e221 authored by di68kap's avatar di68kap

- DHParser/confguration.py new module: centralizes all configuration values

parent 576a3ae7
......@@ -18,10 +18,12 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
# Flat namespace for the DHParser Package. Is this a good idea...?
from .compile import *
from .configuration import *
from .dsl import *
from .ebnf import *
# Flat namespace for the DHParser Package. Is this a good idea...?
from .error import *
from .log import *
from .parse import *
......@@ -37,4 +39,4 @@ name = "DHParser"
__author__ = "Eckhart Arnold <arnold@badw.de>"
__copyright__ = "http://www.apache.org/licenses/LICENSE-2.0"
# __all__ = ['toolkit', 'stringview', 'error', 'syntaxtree', 'preprocess', 'parse',
# 'transform', 'ebnf', 'dsl', 'testing', 'versionnumber']
# 'transform', 'ebnf', 'dsl', 'testing', 'versionnumber', 'configuration']
# configuration.py - default configuration values for DHParser
#
# Copyright 2016 by Eckhart Arnold (arnold@badw.de)
# Bavarian Academy of Sciences an Humanities (badw.de)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
"""
Module "configuration.py" defines the default configuration for DHParser.
The configuration values can be changed while running via the
DHParser.toolkit.get_config_value() and DHParser.toolkit.get_config_value()-
functions.
The presets can also be overwritten before(!) spawning any parsing processes by
overwriting the values in the CONFIG_PRESET dictionary.
The recommended way to use a different configuration in any custom code using
DHParser is to use the second method, i.e. to overwrite the values for which
this is desired in the CONFIG_PRESET dictionary right after the start of the
programm and before any DHParser-function is invoked.
"""
from typing import Dict, Hashable, Any
__all__ = ('CONFIG_PRESET',)
CONFIG_PRESET = dict() # type: Dict[Hashable, Any]
# DHParser.ebnfy.EBNFCompiler class adds the the EBNF-grammar to the
# docstring of the generated Grammar-class
# Default value: False
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
# Flattens anonymous nodes, by removing the node and adding its childeren
# to the parent node in place of the removed node. This is a very useful
# optimization that should be truned on except for learning or teaching
# purposes, in which case a concrete syntax tree that more diligently
# reflects the parser structure may be helpful.
CONFIG_PRESET['flatten_tree_while_parsing'] = True
# Carries out static analysis on the the parser tree before parsing starts
# to ensure its correctness. Possible values are:
# 'early' - static analysis is carried out by DHParser.ebnf.EBNFCompiler,
# already. Any errors it revealed will be located in the EBNF
# source code. This naturally only works for parser that are
# generated from an EBNF syntax declaration.
# 'late' - static analysis is carried out when instantiating a Grammar
# (sub-)class. This works also for parser trees that are
# handwritten in Python using the parser classes from module
# `parse`. It slightly slows down instantiation of Grammar
# clasees, though.
# 'none' - no static analysis at all (not recommended).
# Default value: "early"
CONFIG_PRESET['static_analysis'] = "early"
# Defines the output format for the serialization of syntax trees.
# Possible values are:
# 'XML' - output as XML
# 'S-expression' - output as S-expression, i.e. a list-like format
# 'compact' - compact tree output, i.e. children a represented
# on indented lines with no opening or closing tags,
# brackets etc.
# Default values: "compact" for conctrete syntax trees and "XML" for
# abstract syntax trees and "S-expression" for any
# other kind of tree.
CONFIG_PRESET['cst_serialization'] = "compact"
CONFIG_PRESET['ast_serialization'] = "XML"
CONFIG_PRESET['default_serialization'] = "S-expression"
# Allows (coarse-grained) parallelization for running tests via the
# Python multiprocessing module
# Default value: True
CONFIG_PRESET['test_parallelization'] = True
......@@ -37,7 +37,8 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace,
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, expand_table, \
GLOBALS, CONFIG_PRESET, get_config_value, unrepr, compile_python_object, typing
GLOBALS, get_config_value, unrepr, compile_python_object, typing
from DHParser.configuration import CONFIG_PRESET
from DHParser.transform import TransformationFunc, traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content
......@@ -61,18 +62,6 @@ __all__ = ('get_ebnf_preprocessor',
'CompilerFactoryFunc')
########################################################################
#
# Presets
#
########################################################################
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
# CONFIG_PRESET['static_analysis'] = "early" # do a static analysis right
# # after ebnf compilation
# already set in parse.py - config vars should probably moved to a
# a dedicated global module
########################################################################
#
# source code support
......
This diff is collapsed.
This diff is collapsed.
......@@ -39,8 +39,9 @@ from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME
from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, FrozenNode, RootNode, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_TAG, ResultType
from DHParser.toolkit import sane_parser_name, escape_control_characters, get_config_value,\
CONFIG_PRESET, re, typing, cython
from DHParser.toolkit import sane_parser_name, escape_control_characters, get_config_value, \
re, typing, cython
from DHParser.configuration import CONFIG_PRESET
from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, Optional, Any
......@@ -85,20 +86,6 @@ __all__ = ('Parser',
'Forward')
########################################################################
#
# Presets
#
########################################################################
CONFIG_PRESET['flatten_tree_while_parsing'] = True
CONFIG_PRESET['static_analysis'] = "early"
# 'early': do static analysis already when compiling and EBNF grammar, see ebnf.py
# 'late': do a static analysis, the first time a grammar class is instantiated
# 'none': no static analysis of the grammar
# TODO: move all presests to a dedicated configuration module
########################################################################
#
# Parser base class
......
......@@ -28,7 +28,7 @@ import copy
from DHParser.error import Error, ErrorCode, linebreaks, line_col
from DHParser.stringview import StringView
from DHParser.toolkit import re
from DHParser.toolkit import get_config_value, re
from typing import Callable, cast, Iterator, List, AbstractSet, Set, Union, Tuple, Optional
......@@ -40,6 +40,7 @@ __all__ = ('WHITESPACE_PTYPE',
'StrictResultType',
'ChildrenType',
'Node',
'serialize',
'FrozenNode',
'tree_sanity_check',
'RootNode',
......@@ -731,6 +732,33 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return sum(child.tree_size() for child in self.children) + 1
def serialize(node: Node, how: str='default') -> str:
"""
Serializes the tree starting with `node` either as S-expression, XML
or in compact form. Possible values for `how` are 'S-expression',
'XML', 'compact' accordingly, or 'AST', 'CST', 'default' in which case
the value of respective configuration variable determines the
serialization format. (See module `configuration.py`.)
"""
switch = how.lower()
if switch == 'ast':
switch = get_config_value('ast_serialization').lower()
elif switch == 'cst':
switch = get_config_value('cst_serialization').lower()
elif switch == 'default':
switch = get_config_value('default_serialization').lower()
if switch == 's-expression':
return node.as_sxpr()
elif switch == 'xml':
return node.as_xml()
elif switch == 'compact':
return node.as_sxpr(compact=True)
else:
raise ValueError('Unknown serialization %s, %s' % (how, switch))
class FrozenNode(Node):
"""
FrozenNode is an immutable kind of Node, i.e. it must not be changed
......
......@@ -28,7 +28,6 @@ main cause of trouble when constructing a context free Grammar.
import collections
import concurrent.futures
# import configparser
import copy
import fnmatch
import inspect
......@@ -36,14 +35,14 @@ import json
import multiprocessing
import os
import sys
from typing import Dict, List, Union, cast
from DHParser.error import Error, is_error, adjust_error_locations
from DHParser.log import is_logging, clear_logs, log_parsing_history
from DHParser.parse import UnknownParserError, Parser, Lookahead
from DHParser.syntaxtree import Node, RootNode, parse_tree, flatten_sxpr, ZOMBIE_TAG
from DHParser.toolkit import load_if_file, re, typing
from DHParser.syntaxtree import Node, RootNode, parse_tree, flatten_sxpr, serialize, ZOMBIE_TAG
from DHParser.toolkit import get_config_value, set_config_value, load_if_file, re
from typing import Dict, List, Union, cast
__all__ = ('unit_from_config',
'unit_from_json',
......@@ -262,10 +261,10 @@ def get_report(test_unit):
cst = tests.get('__cst__', {}).get(test_name, None)
if cst and (not ast or str(test_name).endswith('*')):
report.append('\n### CST')
report.append(indent(cst.as_sxpr(compact=True)))
report.append(indent(serialize(cst, 'cst')))
if ast:
report.append('\n### AST')
report.append(indent(ast.as_xml()))
report.append(indent(serialize(ast, 'ast')))
for test_name, test_code in tests.get('fail', dict()).items():
heading = 'Fail-test "%s"' % test_name
report.append('\n%s\n%s\n' % (heading, '-' * len(heading)))
......@@ -408,7 +407,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
if compare:
if not compare.equals(cst):
errata.append('Concrete syntax tree test "%s" for parser "%s" failed:\n%s' %
(test_name, parser_name, cst.as_sxpr()))
(test_name, parser_name, serialize(cst, 'cst')))
if verbose:
infostr = ' cst-test "' + test_name + '" ... '
write(infostr + ("OK" if len(errata) == errflag else "FAIL"))
......@@ -502,21 +501,33 @@ def grammar_suite(directory, parser_factory, transformer_factory,
os.chdir(directory)
if is_logging():
clear_logs()
with concurrent.futures.ProcessPoolExecutor(multiprocessing.cpu_count()) as pool:
errata_futures = []
for filename in sorted(os.listdir()):
if get_config_value('test_parallelization'):
with concurrent.futures.ProcessPoolExecutor(multiprocessing.cpu_count()) as pool:
results = []
for filename in sorted(os.listdir('.')):
if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
parameters = filename, parser_factory, transformer_factory, report, verbose
results.append((filename, pool.submit(grammar_unit, *parameters)))
# grammar_unit(*parameters)
for filename, err_future in results:
try:
errata = err_future.result()
if errata:
all_errors[filename] = errata
except ValueError as e:
if not ignore_unknown_filetypes or str(e).find("Unknown") < 0:
raise e
else:
results = []
for filename in sorted(os.listdir('.')):
if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
parameters = filename, parser_factory, transformer_factory, report, verbose
errata_futures.append((filename, pool.submit(grammar_unit, *parameters)))
# grammar_unit(*parameters)
for filename, err_future in errata_futures:
try:
errata = err_future.result()
if errata:
all_errors[filename] = errata
except ValueError as e:
if not ignore_unknown_filetypes or str(e).find("Unknown") < 0:
raise e
results.append((filename, grammar_unit(*parameters)))
for filename, errata in results:
if errata:
all_errors[filename] = errata
os.chdir(save_cwd)
error_report = []
err_N = 0
......@@ -760,15 +771,21 @@ def run_path(path):
sys.path.append(path)
files = os.listdir(path)
result_futures = []
with concurrent.futures.ProcessPoolExecutor(multiprocessing.cpu_count()) as pool:
if get_config_value('test_parallelization'):
with concurrent.futures.ProcessPoolExecutor(multiprocessing.cpu_count()) as pool:
for f in files:
result_futures.append(pool.submit(run_file, f))
# run_file(f) # for testing!
for r in result_futures:
try:
_ = r.result()
except AssertionError as failure:
print(failure)
else:
for f in files:
result_futures.append(pool.submit(run_file, f))
# run_file(f) # for testing!
for r in result_futures:
try:
_ = r.result()
except AssertionError as failure:
print(failure)
run_file(f)
else:
path, fname = os.path.split(path)
sys.path.append(path)
......
......@@ -22,7 +22,6 @@ several of the the other DHParser-Modules or that are just very generic
so that they are best defined in a toolkit-module.
"""
import codecs
import hashlib
import io
import multiprocessing
......@@ -34,14 +33,8 @@ try:
except ImportError:
import re
import sys
try:
import typing
except ImportError:
import DHParser.foreign_typing as typing
sys.modules['typing'] = typing # make it possible to import from typing
from typing import Any, Iterable, Sequence, Set, Union, Dict, Hashable, cast
import typing
from typing import Any, Iterable, Sequence, Set, Union, Dict, Hashable
try:
import cython
......@@ -52,6 +45,8 @@ except ImportError:
cython_optimized = False # type: bool
import DHParser.shadow_cython as cython
from DHParser.configuration import CONFIG_PRESET
__all__ = ('typing',
'cython',
......@@ -72,7 +67,6 @@ __all__ = ('typing',
'smart_list',
'sane_parser_name',
'GLOBALS',
'CONFIG_PRESET',
'get_config_value',
'set_config_value')
......@@ -84,8 +78,6 @@ __all__ = ('typing',
#######################################################################
GLOBALS = threading.local()
CONFIG_PRESET = dict() # type: Dict[Hashable, Any]
def get_config_value(key: Hashable) -> Any:
"""
......
......@@ -16,4 +16,4 @@
# permissions and limitations under the License.
__all__ = ('__version__',)
__version__ = '0.8.5' # + '_dev' + str(os.stat(__file__).st_mtime)
__version__ = '0.8.6' # + '_dev' + str(os.stat(__file__).st_mtime)
......@@ -59,7 +59,8 @@ class ArithmeticGrammar(Grammar):
r"""Parser for an Arithmetic source file.
"""
expression = Forward()
source_hash__ = "d77842f8b59d2ec3736b21778c0c9c78"
source_hash__ = "588e988cfef8ace70244463ad9c64fc7"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*'
......
......@@ -59,7 +59,7 @@ class ArithmeticGrammar(Grammar):
digit = Forward()
expression = Forward()
variable = Forward()
source_hash__ = "cf537b22b7a1a2a58c426f99f784285d"
source_hash__ = "43a6a760b591f9409b06f3c18a3b5ea5"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......
......@@ -59,7 +59,7 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
"""
expression = Forward()
source_hash__ = "c454e8d67e4190759e529feb13eca0c2"
source_hash__ = "7a7c3764b7b37241534fbb65b44b219d"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......
......@@ -57,7 +57,7 @@ class LaTeXGrammar(Grammar):
paragraph = Forward()
tabular_config = Forward()
text_element = Forward()
source_hash__ = "242fb29d844ed8eb0024286ea5b78bff"
source_hash__ = "30f9fd1ad9257035ba83975dd2f46856"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......
......@@ -23,11 +23,15 @@ import sys
sys.path.extend(['../../', '../', './'])
from DHParser import configuration
from DHParser import dsl
import DHParser.log
from DHParser import testing
configuration.CONFIG_PRESET['test_parallelization'] = True
def recompile_grammar(grammar_src, force):
with DHParser.log.logging(False):
# recompiles Grammar only if it has changed
......
......@@ -58,7 +58,7 @@ def get_preprocessor() -> PreprocessorFunc:
class Lyrik_explicit_whitespaceGrammar(Grammar):
r"""Parser for a Lyrik_explicit_whitespace source file.
"""
source_hash__ = "bcb3cee425961a2148941b492e614bd2"
source_hash__ = "2a7f0e987e796860b804a7e162df7e7b"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
......
......@@ -67,10 +67,11 @@ class XMLGrammar(Grammar):
extSubsetDecl = Forward()
ignoreSectContents = Forward()
markupdecl = Forward()
source_hash__ = "3b6f8c0aafa133d9139684e42a30adfa"
source_hash__ = "05c24553a9c13db86136495bd1b5fed8"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
COMMENT__ = r'//'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
......
......@@ -60,10 +60,11 @@ class XMLSnippetGrammar(Grammar):
"""
Name = Forward()
element = Forward()
source_hash__ = "ef0fa6d8c7a96ee0fe2a8e209c3f2ae9"
source_hash__ = "d3c46a530b258f47d6ae47ccf8297702"
static_analysis_pending__ = False
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
COMMENT__ = r'//'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
......
......@@ -25,16 +25,23 @@ if __name__ == "__main__":
run += 1
if ret > 0:
failures += 1
print("********** FAILURE **********")
for example in example_dirs:
example_path = os.path.join(rootdir, 'examples', example)
if os.path.isdir(example_path):
save = os.getcwd()
os.chdir(example_path)
ebnf = []
for name in os.listdir(example_path):
if name.lower().endswith('.ebnf'):
ebnf.append(name)
for name in os.listdir(example_path):
if os.path.isfile(name) \
and (name == "recompile_grammar.py" or fnmatch.fnmatch(name, 'tst_*.py')):
print(os.path.join(example_path, name))
for grammar in ebnf:
check(os.system(interpreter + name + ' ' + grammar))
check(os.system(interpreter + name))
os.chdir(save)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment