2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 0b102654 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

Merge remote-tracking branch 'mirror/master'

# Conflicts:
#	experimental/new2/new2Compiler.py
parents 1a70d257 9c105b05
DHParser Version 0.8.3 (20.8.2018)
..................................
- new transformation function collapse_if()
- restored compatibility with Python 3.4
- StepByStep Guide typos fixed
- bug fixes
DHParser Version 0.8.2 (10.7.2018)
..................................
- refactoring of module parser.py for more simplicity. RE is now
a simple (macro-style) function instead of a class of its own.
Class Token has been merged with PlainText. Possible break of
backwards compatibility with certain AST-transformation-tables in
connection with Token and RE-nodes!
DHParser Version 0.8.1 (2.7.2018)
.................................
- compatibility fixes for Python 3.7
DHParser Version 0.8.0 (24.6.2018)
..................................
- refactoring of error reporting: it is now done centrally through the
(newly introduced) syntaxtree.RootNode object; makes it faster and easier
- "step by step" guide added to the documentation
- XML is now a first class citizen for serialization next to S-expressions:
xml serialization can now also be read with syntaxtree.parse_xml() plus
better serialization with syntaxtree.Node.as_xml()
- added example: XML-Parser
- added optional CST-reporting on a case by case basis to the testing
framework, using the "*"-marker to indicate which test should add CSTs to
the report
- moved compilation support to a separate module: compile.py
- source mapping added if preprocessor changes source code
(see module preprocess.py)
- new semantics for syntaxtree.Node.__str__: now includes error messages;
use Node.content to retrieve the content without any error messages
- LaTeX-example: better AST-transformations.
DHParser Version 0.7.8 (29.11.2017)
...................................
......
......@@ -95,7 +95,7 @@ Allow to specify parsers/nodes, the result of which will be dropped
right away, so that the nodes they produce do not need to be removed
during the AST-Transformations. Typical candidates would be:
1. Tokens ":Token"
1. Tokens ":_Token"
2. Whitespace ":Whitespace" (in some cases)
3. empty Nodes
......@@ -143,8 +143,8 @@ parsers:
"contains" another parser without its calls being run through the
parser guard, but that records every call of the parser and its
results, e.g. to trace the `option`-parser from the ebnf-parser (see
DHParser/ebnf.py) you'd write: `option = Trace(Series(Token("["),
expression, Token("]"), mandatory=1))`
DHParser/ebnf.py) you'd write: `option = Trace(Series(_Token("["),
expression, _Token("]"), mandatory=1))`
- For the ebnf-representation a tracing-prefix could be added, say `?`,
e.g. `option = ?("[" §expression "]")` or, alternatively, `?option =
......
......@@ -33,6 +33,7 @@ from .toolkit import *
from .transform import *
from .versionnumber import __version__
name = "DHParser"
__author__ = "Eckhart Arnold <arnold@badw.de>"
__copyright__ = "http://www.apache.org/licenses/LICENSE-2.0"
# __all__ = ['toolkit', 'stringview', 'error', 'syntaxtree', 'preprocess', 'parse',
......
......@@ -38,7 +38,7 @@ import os
import re
from DHParser.preprocess import strip_tokens, with_source_mapping, PreprocessorFunc
from DHParser.syntaxtree import Node, RootNode
from DHParser.syntaxtree import Node, RootNode, StrictResultType
from DHParser.transform import TransformationFunc
from DHParser.parse import Grammar
from DHParser.error import adjust_error_locations, is_error, Error
......@@ -150,8 +150,19 @@ class Compiler:
>>> Compiler.method_name('expression')
'on_expression'
"""
assert re.match(r'\w+$', node_name)
return 'on_' + node_name
def compile_children(self, node: Node) -> StrictResultType:
"""Compiles all children of the given node and returns the tuple
of the compiled children or the node's (potentially empty) result
in case the node does not have any children.
"""
if node.children:
return tuple(self.compile(child) for child in node.children)
else:
return node.result
def fallback_compiler(self, node: Node) -> Any:
"""This is a generic compiler function which will be called on
all those node types for which no compiler method `on_XXX` has
......
......@@ -76,6 +76,7 @@ dhparserdir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
DHPARSER_IMPORTS = '''
import collections
from functools import partial
import os
import sys
......@@ -86,20 +87,21 @@ try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \\
from DHParser import logging, is_filename, load_if_file, MockParser, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, TransformationDict, \\
traverse, remove_children_if, merge_children, is_anonymous, \\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \\
remove_children_if, move_whitespace, normalize_whitespace, is_anonymous, matches_re, \\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, is_empty, \\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by
'''.format(dhparserdir=dhparserdir)
......
......@@ -30,7 +30,7 @@ from functools import partial
from DHParser.compile import CompilerError, Compiler
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, RE, \
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
......@@ -117,7 +117,7 @@ class EBNFGrammar(Grammar):
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
......@@ -126,38 +126,39 @@ class EBNFGrammar(Grammar):
EOF = !/./
"""
expression = Forward()
source_hash__ = "3fc9f5a340f560e847d9af0b61a68743"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
whitespace = RE('~')
regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
symbol = RE('(?!\\d)\\w+')
option = Series(Token("["), expression, Token("]"), mandatory=1)
repetition = Series(Token("{"), expression, Token("}"), mandatory=1)
oneormore = Series(Token("{"), expression, Token("}+"))
unordered = Series(Token("<"), expression, Token(">"), mandatory=1)
group = Series(Token("("), expression, Token(")"), mandatory=1)
retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__),
Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol,
NegativeLookahead(Series(Token("="), wsp__))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext),
Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace),
Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group),
Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Token("="), expression, mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__),
Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))),
ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
......@@ -382,9 +383,9 @@ class EBNFCompiler(Compiler):
regular expressions found in the current parsing process
"""
COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__"
WHITESPACE_KEYWORD = "WSP_RE__"
RAW_WS_KEYWORD = "WHITESPACE__"
WHITESPACE_PARSER_KEYWORD = "whitespace__"
WHITESPACE_PARSER_KEYWORD = "wsp__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation."
......@@ -459,7 +460,7 @@ class EBNFCompiler(Compiler):
elif rule.startswith('Synonym'):
transformations = '[reduce_single_child]'
transtable.append(' "' + name + '": %s,' % transformations)
transtable.append(' ":Token, :RE": reduce_single_child,')
transtable.append(' ":Token": reduce_single_child,')
transtable += [' "*": replace_by_single_child', '}', '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......@@ -511,7 +512,7 @@ class EBNFCompiler(Compiler):
if entry not in symbols and not entry.startswith(":"):
messages.append(Error(('Symbol "%s" is not defined in grammar %s but appears in '
'the transformation table!') % (entry, self.grammar_name),
Error.UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE, 0))
0, Error.UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE))
return messages
......@@ -539,10 +540,10 @@ class EBNFCompiler(Compiler):
definitions.append((self.WHITESPACE_PARSER_KEYWORD,
'Whitespace(%s)' % self.WHITESPACE_KEYWORD))
definitions.append(('wspR__', self.WHITESPACE_KEYWORD
if 'right' in self.directives['literalws'] else "''"))
definitions.append(('wspL__', self.WHITESPACE_KEYWORD
if 'left' in self.directives['literalws'] else "''"))
# definitions.append(('wspR__', self.WHITESPACE_KEYWORD
# if 'right' in self.directives['literalws'] else "''"))
# definitions.append(('wspL__', self.WHITESPACE_KEYWORD
# if 'left' in self.directives['literalws'] else "''"))
definitions.append((self.WHITESPACE_KEYWORD,
("mixin_comment(whitespace=" + self.RAW_WS_KEYWORD +
", comment=" + self.COMMENT_KEYWORD + ")")))
......@@ -778,7 +779,6 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal.
"""
arguments = [self.compile(r) for r in node.children] + custom_args
# node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return parser_class + '(' + ', '.join(arguments) + ')'
......@@ -921,12 +921,22 @@ class EBNFCompiler(Compiler):
def on_literal(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\') + ')'
center = 'Token(' + node.content.replace('\\', r'\\') + ')'
left = self.WHITESPACE_PARSER_KEYWORD if 'left' in self.directives['literalws'] else ''
right = self.WHITESPACE_PARSER_KEYWORD if 'right' in self.directives['literalws'] else ''
if left or right:
return 'Series(' + ", ".join(item for item in (left, center, right) if item) + ')'
return center
def on_plaintext(self, node: Node) -> str:
return 'Token(' + node.content.replace('\\', r'\\').replace('`', '"') \
+ ", wL='', wR='')"
tk = node.content.replace('\\', r'\\')
rpl = '"' if tk.find('"') < 0 else "'" if tk.find("'") < 0 else ''
if rpl:
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return 'Token(' + tk + ')'
def on_regexp(self, node: Node) -> str:
......@@ -935,7 +945,7 @@ class EBNFCompiler(Compiler):
if rx[0] == '/' and rx[-1] == '/':
parser = 'RegExp('
else:
parser = 'RE('
parser = '_RE('
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name
......@@ -961,7 +971,7 @@ class EBNFCompiler(Compiler):
def on_whitespace(self, node: Node) -> str:
return 'whitespace__'
return self.WHITESPACE_PARSER_KEYWORD
def on_list_(self, node) -> Set[str]:
......
......@@ -41,9 +41,10 @@ import bisect
from DHParser.preprocess import SourceMapFunc
from DHParser.stringview import StringView
from DHParser.toolkit import typing
from typing import Iterable, Iterator, Union, Tuple, List, Optional
from typing import Iterable, Iterator, Union, Tuple, List, NewType
__all__ = ('Error',
__all__ = ('ErrorCode',
'Error',
'is_error',
'is_warning',
'has_errors',
......@@ -53,44 +54,44 @@ __all__ = ('Error',
'adjust_error_locations')
class ErrorCode(int):
pass
class Error:
__slots__ = ['message', 'level', 'code', '_pos', 'orig_pos', 'line', 'column', '_node_keep']
# error levels
NO_ERROR = 0
MESSAGE = 1
WARNING = 10
ERROR = 1000
NO_ERROR = ErrorCode(0)
MESSAGE = ErrorCode(1)
WARNING = ErrorCode(100)
ERROR = ErrorCode(1000)
HIGHEST = ERROR
# warning codes
REDEFINED_DIRECTIVE_WARNING = 101
REDECLARED_TOKEN_WARNING = 102
REDEFINED_DIRECTIVE_WARNING = ErrorCode(101)
REDECLARED_TOKEN_WARNING = ErrorCode(102)
UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE = 601
UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE = ErrorCode(601)
# error codes
MANDATORY_CONTINUATION = 1001
MANDATORY_CONTINUATION = ErrorCode(1001)
def __init__(self, message: str, code: int = ERROR, pos: int = -1,
orig_pos: int = -1, line: int = -1, column: int = -1,
node: Optional['Node'] = None) -> None:
self.message = message
def __init__(self, message: str, pos, code: ErrorCode = ERROR,
orig_pos: int = -1, line: int = -1, column: int = -1) -> None:
assert isinstance(code, ErrorCode)
assert not isinstance(pos, ErrorCode)
assert pos >= 0
assert code >= 0
self.code = code
self.message = message
self._pos = pos
self.code = code
self.orig_pos = orig_pos
self.line = line
self.column = column
if node is not None and node._pos >= 0:
assert self._pos < 0 or self._pos == node._pos
self._pos = node._pos
self._node_keep = None # if node is not needed, if pos has been set
else:
self._node_keep = node # redundant: consider removing, see RootNode.collect_errors
def __str__(self):
prefix = ''
......@@ -104,10 +105,6 @@ class Error:
@property
def pos(self):
if self._pos < 0:
assert self._node_keep and self._node_keep.pos >= 0, "pos value not ready yet"
self._pos = self._node_keep.pos # lazy evaluation of position
self._node_keep = None # forget node to allow GC to free memory
return self._pos
@property
......@@ -212,7 +209,7 @@ def adjust_error_locations(errors: List[Error],
Returns:
The list of errors. (Returning the list of errors is just syntactical
sugar. Be aware that the line, col and orig_pos attributes have been
sugar. Be aware that the line, col and orig_pos attr have been
changed in place.)
"""
line_breaks = linebreaks(original_text)
......
......@@ -1542,7 +1542,7 @@ class _ProtocolMeta(GenericMeta):
# Every class is a subclass of the empty protocol.
return True
# Find all attributes defined in the protocol.
# Find all attr defined in the protocol.
attrs = self._get_protocol_attrs()
for attr in attrs:
......@@ -1557,11 +1557,11 @@ class _ProtocolMeta(GenericMeta):
if getattr(c, '_is_protocol', False) and c.__name__ != '_Protocol':
protocol_bases.append(c)
# Get attributes included in protocol.
# Get attr included in protocol.
attrs = set()
for base in protocol_bases:
for attr in base.__dict__.keys():
# Include attributes not defined in any non-protocol bases.
# Include attr not defined in any non-protocol bases.
for c in self.__mro__:
if (c is not base and attr in c.__dict__ and
not getattr(c, '_is_protocol', False)):
......
......@@ -287,7 +287,7 @@ class HistoryRecord:
@property
def stack(self) -> str:
return "->".join((p.repr if p.ptype in {':RegExp', ':PlainText'} else p.name or p.ptype)
return "->".join((p.repr if p.ptype in {':RegExp', ':Token'} else p.name or p.ptype)
for p in self.call_stack)
@property
......@@ -372,7 +372,7 @@ def log_ST(syntax_tree, log_file_name):
f.write(syntax_tree.as_sxpr())
LOG_SIZE_THRESHOLD = 100000 # maximum number of history records to log
LOG_SIZE_THRESHOLD = 10000 # maximum number of history records to log
LOG_TAIL_THRESHOLD = 500 # maximum number of history recors for "tail log"
......
This diff is collapsed.
......@@ -185,7 +185,7 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
positions, offsets = [0], [0]
o = 0
i = tokenized_source.find(BEGIN_TOKEN)
e = -1
e = -2
while i >= 0:
d = tokenized_source.find(TOKEN_DELIMITER, i)
e = tokenized_source.find(END_TOKEN, i)
......@@ -195,7 +195,7 @@ def tokenized_to_original_mapping(tokenized_source: str) -> SourceMap:
offsets.extend([o + 1, o])
i = tokenized_source.find(BEGIN_TOKEN, e + 1)
if e + 1 < len(tokenized_source):
positions.append(len(tokenized_source))
positions.append(len(tokenized_source) + 1)
offsets.append(offsets[-1])
# post conditions
......
This diff is collapsed.
......@@ -44,8 +44,9 @@ from DHParser.toolkit import re, typing
from typing import Tuple
__all__ = ('unit_from_configfile',
__all__ = ('unit_from_config',
'unit_from_json',
'TEST_READERS',
'unit_from_file',
'get_report',
'grammar_unit',
......@@ -93,16 +94,18 @@ RX_ENTRY = re.compile('\s*(\w+\*?)\s*:\s*(?:{value})\s*'.format(value=RE_VALUE))
RX_COMMENT = re.compile('\s*#.*\n')
def unit_from_configfile(config_filename):
def unit_from_config(config_str):
""" Reads grammar unit tests contained in a file in config file (.ini)
syntax.
Args:
config_filename (str): A config file containing Grammar unit-tests
config_str (str): A string containing a config-file with Grammar unit-tests
Returns:
A dictionary representing the unit tests.
"""
# TODO: issue a warning if the same match:xxx or fail:xxx block appears more than once
def eat_comments(txt, pos):
m = RX_COMMENT.match(txt, pos)
while m:
......@@ -110,9 +113,7 @@ def unit_from_configfile(config_filename):
m = RX_COMMENT.match(txt, pos)
return pos
with open(config_filename, 'r', encoding="utf-8") as f:
cfg = f.read()
cfg = cfg.replace('\t', ' ')
cfg = config_str.replace('\t', ' ')
OD = collections.OrderedDict
unit = OD()
......@@ -147,41 +148,59 @@ def unit_from_configfile(config_filename):
section_match = RX_SECTION.match(cfg, pos)
if pos != len(cfg):
raise SyntaxError('in file %s in line %i' % (config_filename, cfg[:pos].count('\n') + 1))
raise SyntaxError('in line %i' % (cfg[:pos].count('\n') + 1))
return unit
def unit_from_json(json_filename):
def unit_from_json(json_str):
"""
Reads grammar unit tests from a json file.
Reads grammar unit tests from a json string.
"""
with open(json_filename, 'r', encoding='utf8') as f:
unit = json.load(f)
unit = json.loads(json_str)
for symbol in unit:
for stage in unit[symbol]:
if stage not in UNIT_STAGES:
raise ValueError('Test stage %s not in: %s' % (stage, str(UNIT_STAGES)))
return unit
# TODO: add support for yaml, cson, toml
# A dictionary associating file endings with reader functions that
# transfrom strings containing the file's content to a nested dictionary
# structure of test cases.
TEST_READERS = {
'.ini': unit_from_config,
'.json': unit_from_json
}
def unit_from_file(filename):
"""
Reads a grammar unit test from a file. The format of the file is
determined by the ending of its name.
"""
if filename.endswith(".json"):
test_unit = unit_from_json(filename)
elif filename.endswith(".ini"):
test_unit = unit_from_configfile(filename)
else:
try:
reader = TEST_READERS[os.path.splitext(filename)[1].lower()]
with open(filename, 'r', encoding='utf8') as f:
data = f.read()
test_unit = reader(data)
except KeyError:
raise ValueError("Unknown unit test file type: " + filename[filename.rfind('.'):])
# Check for ambiguous Test names
errors = []
for parser_name, tests in test_unit.items():
# normalize case for test category names
keys = list(tests.keys())
for key in keys:
new_key = key.lower()
if new_key != key:
tests[new_key] = tests[keys]
del tests[keys]
m_names = set(tests.get('match', dict()).keys())
f_names = set(tests.get('fail', dict()).keys())
intersection = list(m_names & f_names)
......@@ -196,12 +215,12 @@ def unit_from_file(filename):
return test_unit
def all_match_tests(tests):
"""Returns all match tests from ``tests``, This includes match tests
marked with an asterix for CST-output as well as unmarked match-tests.
"""
return itertools.chain(tests.get('match', dict()).items(),
tests.get('match*', dict()).items())
# def all_match_tests(tests):
# """Returns all match tests from ``tests``, This includes match tests
# marked with an asterix for CST-output as well as unmarked match-tests.
# """
# return itertools.chain(tests.get('match', dict()).items(),
# tests.get('match*', dict()).items())
def get_report(test_unit):
......@@ -263,6 +282,21 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
"""
Unit tests for a grammar-parser and ast transformations.