Commit 028797e1 authored by di68kap's avatar di68kap
Browse files

Merge remote-tracking branch 'origin/development' into development

parents 1beb283d 49f47560
......@@ -39,7 +39,7 @@ import os
import re
from DHParser.preprocess import strip_tokens, with_source_mapping, PreprocessorFunc
from DHParser.syntaxtree import Node, RootNode, StrictResultType
from DHParser.syntaxtree import Node, RootNode, ZOMBIE_ROOTNODE, StrictResultType
from DHParser.transform import TransformationFunc
from DHParser.parse import Grammar
from DHParser.error import adjust_error_locations, is_error, Error
......@@ -97,7 +97,7 @@ class Compiler:
self._reset()
def _reset(self):
self.tree = None # type: Optional[RootNode]
self.tree = ZOMBIE_ROOTNODE # type: RootNode
self.context = [] # type: List[Node]
self._dirty_flag = False
......@@ -116,7 +116,7 @@ class Compiler:
result = self.compile(root)
return result
def set_grammar_name(self, grammar_name: str="", grammar_source: str=""):
def set_grammar_name(self, grammar_name: str = "", grammar_source: str = ""):
"""
Changes the grammar's name and the grammar's source.
......@@ -190,9 +190,9 @@ class Compiler:
"""
elem = node.parser.name or node.parser.ptype[1:]
if not sane_parser_name(elem):
node.add_error("Reserved name '%s' not allowed as parser "
"name! " % elem + "(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)")
self.tree.new_error(node, "Reserved name '%s' not allowed as parser "
"name! " % elem + "(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)")
return None
else:
try:
......@@ -217,9 +217,9 @@ class Compiler:
def compile_source(source: str,
preprocessor: Optional[PreprocessorFunc], # str -> str
parser: Grammar, # str -> Node (concrete syntax tree (CST))
transformer: TransformationFunc, # Node (CST) -> Node (abstract syntax tree (AST))
transformer: TransformationFunc, # Node (CST) -> Node (abstract ST (AST))
compiler: Compiler, # Node (AST) -> Any
preserve_ast: bool = False) -> Tuple[Any, List[Error], Node]:
preserve_ast: bool = False) -> Tuple[Optional[Any], List[Error], Optional[Node]]:
"""
Compiles a source in four stages:
1. Pre-Processing (if needed)
......@@ -259,7 +259,7 @@ def compile_source(source: str,
source_mapping = lambda i: i
else:
source_text, source_mapping = with_source_mapping(preprocessor(original_text))
syntax_tree = parser(source_text)
syntax_tree = parser(source_text) # type: RootNode
if is_logging():
log_ST(syntax_tree, log_file_name + '.cst')
log_parsing_history(parser, log_file_name)
......@@ -287,3 +287,6 @@ def compile_source(source: str,
messages = syntax_tree.collect_errors()
adjust_error_locations(messages, original_text, source_mapping)
return result, messages, ast
# TODO: Verify compiler against grammar, i.e. make sure that for all on_X()-methods, `X` is the name of a parser
......@@ -101,7 +101,7 @@ from DHParser import logging, is_filename, load_if_file, MockParser, \\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens, \\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \\
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \\
replace_content, replace_content_by, recompile_grammar
replace_content, replace_content_by, error_on, recompile_grammar
'''.format(dhparserdir=dhparserdir)
......@@ -133,7 +133,7 @@ if __name__ == "__main__":
'because grammar was not found at: ' + grammar_path)
if len(sys.argv) > 1:
# compile file
# compile file
file_name, log_dir = sys.argv[1], ''
if file_name in ['-d', '--debug'] and len(sys.argv) > 2:
file_name, log_dir = sys.argv[2], 'LOGS'
......@@ -157,7 +157,7 @@ class DSLException(Exception):
"""
def __init__(self, errors):
assert isinstance(errors, Iterator) or isinstance(errors, list) \
or isinstance(errors, tuple)
or isinstance(errors, tuple)
self.errors = errors
def __str__(self):
......@@ -209,9 +209,10 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
parser_py, messages = grammar_src, [] # type: str, List[Error]
else:
with logging(False):
parser_py, messages, _ = compile_source(
result, messages, _ = compile_source(
grammar_src, None,
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
parser_py = cast(str, result)
if has_errors(messages):
raise GrammarError(only_errors(messages), grammar_src)
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, r'\w+Grammar$')()
......@@ -227,7 +228,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
def compileDSL(text_or_file: str,
preprocessor: PreprocessorFunc,
preprocessor: Optional[PreprocessorFunc],
dsl_grammar: Union[str, Grammar],
ast_transformation: TransformationFunc,
compiler: Compiler) -> Any:
......@@ -461,7 +462,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
cfactory = get_ebnf_compiler
compiler1 = cfactory()
compiler1.set_grammar_name(compiler_name, source_file)
result, messages, AST = compile_source(source, sfactory(), pfactory(), tfactory(), compiler1)
result, messages, _ = compile_source(source, sfactory(), pfactory(), tfactory(), compiler1)
if has_errors(messages):
return messages
......@@ -477,17 +479,17 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
f = open(rootname + 'Compiler.py', 'r', encoding="utf-8")
source = f.read()
sections = RX_SECTION_MARKER.split(source)
intro, imports, preprocessor, parser, ast, compiler, outro = sections
# TODO: Verify transformation table
intro, imports, preprocessor, _, ast, compiler, outro = sections
ast_trans_table = compile_python_object(DHPARSER_IMPORTS + ast,
r'(?:\w+_)?AST_transformation_table$')
messages.extend(ebnf_compiler.verify_transformation_table(ast_trans_table))
except (PermissionError, FileNotFoundError, IOError) as error:
intro, imports, preprocessor, parser, ast, compiler, outro = '', '', '', '', '', '', ''
except ValueError as error:
# TODO: Verify compiler
except (PermissionError, FileNotFoundError, IOError):
intro, imports, preprocessor, _, ast, compiler, outro = '', '', '', '', '', '', ''
except ValueError:
name = '"' + rootname + 'Compiler.py"'
raise ValueError('Could not identify all required sections in ' + name +
'. Please delete or repair ' + name + ' manually!')
raise ValueError('Could not identify all required sections in ' + name
+ '. Please delete or repair ' + name + ' manually!')
finally:
if f:
f.close()
......@@ -515,7 +517,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
f.write(SECTION_MARKER.format(marker=PREPROCESSOR_SECTION))
f.write(preprocessor)
f.write(SECTION_MARKER.format(marker=PARSER_SECTION))
f.write(result)
f.write(cast(str, result))
f.write(SECTION_MARKER.format(marker=AST_SECTION))
f.write(ast)
f.write(SECTION_MARKER.format(marker=COMPILER_SECTION))
......@@ -558,7 +560,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
def recompile_grammar(ebnf_filename, force=False,
notify: Callable=lambda: None) -> bool:
notify: Callable = lambda: None) -> bool:
"""
Re-compiles an EBNF-grammar if necessary, that is, if either no
corresponding 'XXXXCompiler.py'-file exists or if that file is
......@@ -581,7 +583,7 @@ def recompile_grammar(ebnf_filename, force=False,
success = success and recompile_grammar(entry, force)
return success
base, ext = os.path.splitext(ebnf_filename)
base, _ = os.path.splitext(ebnf_filename)
compiler_name = base + 'Compiler.py'
error_file_name = base + '_ebnf_ERRORS.txt'
messages = [] # type: Iterable[Error]
......
......@@ -183,7 +183,7 @@ def grammar_changed(grammar_class, grammar_source: str) -> bool:
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(Grammar\)', pycode)
m = re.search(r'class \w*\(Grammar\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
......@@ -247,6 +247,7 @@ EBNF_AST_transformation_table = {
def EBNFTransform() -> TransformationFunc:
return partial(traverse, processing_table=EBNF_AST_transformation_table.copy())
def get_ebnf_transformer() -> TransformationFunc:
global thread_local_EBNF_transformer_singleton
try:
......@@ -475,13 +476,13 @@ class EBNFCompiler(Compiler):
raise EBNFCompilerError('Compiler has not been run before calling '
'"gen_Compiler_Skeleton()"!')
compiler = ['class ' + self.grammar_name + 'Compiler(Compiler):',
' """Compiler for the abstract-syntax-tree of a ' +
self.grammar_name + ' source file.',
' """Compiler for the abstract-syntax-tree of a '
+ self.grammar_name + ' source file.',
' """', '',
' def __init__(self, grammar_name="' +
self.grammar_name + '", grammar_source=""):',
' super(' + self.grammar_name +
'Compiler, self).__init__(grammar_name, grammar_source)',
' def __init__(self, grammar_name="'
+ self.grammar_name + '", grammar_source=""):',
' super(' + self.grammar_name
+ 'Compiler, self).__init__(grammar_name, grammar_source)',
r" assert re.match('\w+\Z', grammar_name)", '',
' def _reset(self):',
' super()._reset()',
......@@ -515,6 +516,13 @@ class EBNFCompiler(Compiler):
0, Error.UNDEFINED_SYMBOL_IN_TRANSFORMATION_TABLE))
return messages
def verify_compiler(self, compiler):
"""
Checks for on_XXXX()-methods that occur in the compiler, although XXXX
has never been defined in the grammar. Usually, this kind of
inconsistency results from an error like a typo in the compiler-code.
"""
pass # TODO: add verification code here
def assemble_parser(self, definitions: List[Tuple[str, str]], root_node: Node) -> str:
"""
......@@ -541,8 +549,8 @@ class EBNFCompiler(Compiler):
definitions.append((self.WHITESPACE_PARSER_KEYWORD,
'Whitespace(%s)' % self.WHITESPACE_KEYWORD))
definitions.append((self.WHITESPACE_KEYWORD,
("mixin_comment(whitespace=" + self.RAW_WS_KEYWORD +
", comment=" + self.COMMENT_KEYWORD + ")")))
("mixin_comment(whitespace=" + self.RAW_WS_KEYWORD
+ ", comment=" + self.COMMENT_KEYWORD + ")")))
definitions.append((self.RAW_WS_KEYWORD, "r'{whitespace}'".format(**self.directives)))
definitions.append((self.COMMENT_KEYWORD, "r'{comment}'".format(**self.directives)))
......@@ -550,11 +558,11 @@ class EBNFCompiler(Compiler):
# add EBNF grammar to the doc string of the parser class
article = 'an ' if self.grammar_name[0:1] in "AaEeIiOoUu" else 'a ' # what about 'hour', 'universe' etc.?
declarations = ['class ' + self.grammar_name +
'Grammar(Grammar):',
'r"""Parser for ' + article + self.grammar_name +
' source file' +
(', with this grammar:' if self.grammar_source else '.')]
declarations = ['class ' + self.grammar_name
+ 'Grammar(Grammar):',
'r"""Parser for ' + article + self.grammar_name
+ ' source file'
+ (', with this grammar:' if self.grammar_source else '.')]
definitions.append(('parser_initialization__', '"upon instantiation"'))
if self.grammar_source:
definitions.append(('source_hash__',
......@@ -583,7 +591,7 @@ class EBNFCompiler(Compiler):
for symbol in self.symbols:
if symbol not in defined_symbols:
self.tree.new_error(self.symbols[symbol],
"Missing definition for symbol '%s'" % symbol)
"Missing definition for symbol '%s'" % symbol)
# root_node.error_flag = True
# check for unconnected rules
......@@ -642,7 +650,7 @@ class EBNFCompiler(Compiler):
first = self.rules[rule][0]
if not first.errors:
self.tree.new_error(first, 'First definition of rule "%s" '
'followed by illegal redefinitions.' % rule)
'followed by illegal redefinitions.' % rule)
self.tree.new_error(node, 'A rule "%s" has already been defined earlier.' % rule)
elif rule in EBNFCompiler.RESERVED_SYMBOLS:
self.tree.new_error(node, 'Symbol "%s" is a reserved symbol.' % rule)
......@@ -682,7 +690,8 @@ class EBNFCompiler(Compiler):
prepended by the multiline-flag. Returns the regular expression string.
"""
flags = self.re_flags | {'x'} if rx.find('\n') >= 0 else self.re_flags
if flags: rx = "(?%s)%s" % ("".join(flags), rx)
if flags:
rx = "(?%s)%s" % ("".join(flags), rx)
try:
re.compile(rx)
except Exception as re_error:
......@@ -769,7 +778,7 @@ class EBNFCompiler(Compiler):
return ""
def non_terminal(self, node: Node, parser_class: str, custom_args: List[str]=[]) -> str:
def non_terminal(self, node: Node, parser_class: str, custom_args: List[str] = []) -> str:
"""
Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal.
......@@ -833,7 +842,7 @@ class EBNFCompiler(Compiler):
# shift = (Node(node.parser, node.result[1].result),)
# node.result[1].result = shift + node.result[2:]
node.children[1].result = (Node(node.children[1].parser, node.children[1].result),) \
+ node.children[2:]
+ node.children[2:]
node.children[1].parser = node.parser
node.result = (node.children[0], node.children[1])
......@@ -943,7 +952,7 @@ class EBNFCompiler(Compiler):
else:
parser = '_RE('
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
if 'left' not in self.directives['literalws']:
name = ['wL=' + self.WHITESPACE_KEYWORD] + name
rx = rx[1:]
elif 'left' in self.directives['literalws']:
......
......@@ -41,7 +41,7 @@ import bisect
from DHParser.preprocess import SourceMapFunc
from DHParser.stringview import StringView
from DHParser.toolkit import typing
from typing import Iterable, Iterator, Union, Tuple, List, NewType
from typing import Iterable, Iterator, Union, Tuple, List
__all__ = ('ErrorCode',
'Error',
......@@ -200,7 +200,7 @@ def line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
def adjust_error_locations(errors: List[Error],
original_text: Union[StringView, str],
source_mapping: SourceMapFunc=lambda i: i) -> List[Error]:
source_mapping: SourceMapFunc = lambda i: i) -> List[Error]:
"""Adds (or adjusts) line and column numbers of error messages in place.
Args:
......
......@@ -55,7 +55,7 @@ import os
from DHParser.error import line_col
from DHParser.stringview import StringView
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE
from DHParser.syntaxtree import Node
from DHParser.toolkit import is_filename, escape_control_characters, typing
from typing import List, Tuple, Union
......@@ -206,12 +206,13 @@ class HistoryRecord:
COLGROUP = '<colgroup>\n<col style="width:2%"/><col style="width:2%"/><col ' \
'style="width:75%"/><col style="width:6%"/><col style="width:15%"/>\n</colgroup>'
HEADINGS = ('<tr><th>L</th><th>C</th><th>parser call sequence</th>'
'<th>success</th><th>text matched or failed</th></tr>')
HTML_LEAD_IN = ('<!DOCTYPE html>\n'
'<th>success</th><th>text matched or failed</th></tr>')
HTML_LEAD_IN = (
'<!DOCTYPE html>\n'
'<html>\n<head>\n<meta charset="utf-8"/>\n<style>\n'
'td,th {font-family:monospace; '
'border-right: thin solid grey; border-bottom: thin solid grey}\n'
'td.line, td.column {color:darkgrey}\n' # 'td.stack {}\n'
'border-right: thin solid grey; border-bottom: thin solid grey}\n'
'td.line, td.column {color:darkgrey}\n' # 'td.stack {}\n'
'td.status {font-weight:bold}\n'
'td.text {color:darkblue}\n'
'table {border-spacing: 0px; border: thin solid darkgrey; width:100%}\n'
......@@ -236,7 +237,7 @@ class HistoryRecord:
def __str__(self):
return '%4i, %2i: %s; %s; "%s"' % self.as_tuple()
def as_tuple(self) -> Snapshot:
def as_tuple(self) -> 'Snapshot':
"""
Returns history record formatted as a snapshot tuple.
"""
......@@ -260,10 +261,10 @@ class HistoryRecord:
if status == self.MATCH:
status = '<span class="match">' + status + '</span>'
i = stack.rfind('-&gt;')
chr = stack[i+12:i+13]
chr = stack[i + 12:i + 13]
while not chr.isidentifier() and i >= 0:
i = stack.rfind('-&gt;', 0, i)
chr = stack[i+12:i+13]
chr = stack[i + 12:i + 13]
if i >= 0:
i += 12
k = stack.find('<', i)
......@@ -294,7 +295,6 @@ class HistoryRecord:
def status(self) -> str:
return self.FAIL if self.node is None else \
('"%s"' % self.err_msg()) if self.node.errors else self.MATCH
# has_errors(self.node._errors)
@property
def excerpt(self):
......@@ -344,8 +344,8 @@ class HistoryRecord:
remaining = -1
result = None
for record in history:
if (record.status == HistoryRecord.MATCH and
(record.remaining < remaining or remaining < 0)):
if (record.status == HistoryRecord.MATCH
and (record.remaining < remaining or remaining < 0)):
result = record
remaining = record.remaining
return result
......@@ -376,7 +376,7 @@ LOG_SIZE_THRESHOLD = 10000 # maximum number of history records to log
LOG_TAIL_THRESHOLD = 500 # maximum number of history recors for "tail log"
def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> None:
def log_parsing_history(grammar, log_file_name: str = '', html: bool = True) -> None:
"""
Writes a log of the parsing history of the most recently parsed document.
......@@ -415,8 +415,7 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
if not is_logging():
raise AssertionError("Cannot log history when logging is turned off!")
# assert self.history__, \
# "Parser did not yet run or logging was turned off when running parser!"
if not log_file_name:
name = grammar.__class__.__name__
log_file_name = name[:-7] if name.lower().endswith('grammar') else name
......@@ -424,35 +423,22 @@ def log_parsing_history(grammar, log_file_name: str = '', html: bool=True) -> No
log_file_name = log_file_name[:-4]
full_history = ['<h1>Full parsing history of "%s"</h1>' % log_file_name] # type: List[str]
# match_history = ['<h1>Match history of parsing "%s"</h1>' % log_file_name] # type: List[str]
# errors_only = ['<h1>Errors when parsing "%s"</h1>' % log_file_name] # type: List[str]
if len(grammar.history__) > LOG_SIZE_THRESHOLD:
warning =('Sorry, man, %iK history records is just too many! '
'Only looking at the last %iK records.'
% (len(grammar.history__)//1000, LOG_SIZE_THRESHOLD//1000))
warning = ('Sorry, man, %iK history records is just too many! '
'Only looking at the last %iK records.'
% (len(grammar.history__) // 1000, LOG_SIZE_THRESHOLD // 1000))
html_warning = '<p><strong>' + warning + '</strong></p>'
full_history.append(html_warning)
# match_history.append(html_warning)
# errors_only.append(html_warning)
lead_in = '\n'. join(['<table>', HistoryRecord.COLGROUP, HistoryRecord.HEADINGS])
full_history.append(lead_in)
# match_history.append(lead_in)
# errors_only.append(lead_in)
for record in grammar.history__[-LOG_SIZE_THRESHOLD:]:
line = record.as_html_tr() if html else str(record)
append_line(full_history, line)
# if record.node and record.node.parser.ptype != WHITESPACE_PTYPE:
# append_line(match_history, line)
# if record.node.errors:
# append_line(errors_only, line)
write_log(full_history, log_file_name + '_full')
if len(full_history) > LOG_TAIL_THRESHOLD + 10:
heading = '<h1>Last 500 records of parsing history of "%s"</h1>' % log_file_name + lead_in
write_log([heading] + full_history[-LOG_TAIL_THRESHOLD:], log_file_name + '_full.tail')
# write_log(match_history, log_file_name + '_match')
# if (len(errors_only) > 3 or (len(grammar.history__) <= LOG_SIZE_THRESHOLD
# and len(errors_only) > 2)):
# write_log(errors_only, log_file_name + '_errors')
......@@ -40,7 +40,7 @@ from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, RootNode, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.toolkit import sane_parser_name, escape_control_characters, re, typing
from typing import Callable, cast, Dict, DefaultDict, List, Set, Tuple, Union, Optional
from typing import Callable, cast, List, Tuple, Set, Dict, DefaultDict, Union, Optional
__all__ = ('Parser',
......@@ -263,7 +263,7 @@ class Parser(ParserBase):
"""
duplicate = self.__class__()
duplicate.name = self.name
duplicate.ptype = self.ptype
duplicate.ptype = self.ptype
return duplicate
def reset(self):
......@@ -271,7 +271,7 @@ class Parser(ParserBase):
the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class."""
self.visited = dict() # type: Dict[int, Tuple[Optional[Node], StringView]]
self.recursion_counter = defaultdict(lambda :0) # type: DefaultDict[int, int]
self.recursion_counter = defaultdict(lambda: 0) # type: DefaultDict[int, int]
self.cycle_detection = set() # type: Set[Callable]
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
......@@ -293,7 +293,10 @@ class Parser(ParserBase):
@property
def grammar(self) -> 'Grammar':
return self._grammar
if self._grammar:
return self._grammar
else:
raise AssertionError('Grammar has not yet been set!')
@grammar.setter
def grammar(self, grammar: 'Grammar'):
......@@ -301,8 +304,9 @@ class Parser(ParserBase):
self._grammar = grammar
self._grammar_assigned_notifier()
else:
assert self._grammar == grammar, \
"Parser has already been assigned to a different Grammar object!"
if self._grammar != grammar:
raise AssertionError("Parser has already been assigned"
"to a different Grammar object!")
def _grammar_assigned_notifier(self):
"""A function that notifies the parser object that it has been
......@@ -564,12 +568,6 @@ class Grammar:
def __init__(self, root: Parser = None) -> None:
# if not hasattr(self.__class__, 'parser_initialization__'):
# self.__class__.parser_initialization__ = "pending"
# if not hasattr(self.__class__, 'wspL__'):
# self.wspL__ = ''
# if not hasattr(self.__class__, 'wspR__'):
# self.wspR__ = ''
self.all_parsers__ = set() # type: Set[ParserBase]
self._dirty_flag__ = False # type: bool
self.history_tracking__ = False # type: bool
......@@ -609,7 +607,7 @@ class Grammar:
self.document_length__ = 0 # type: int
self.document_lbreaks__ = [] # type: List[int]
# variables stored and recalled by Capture and Retrieve parsers
self.variables__ = defaultdict(lambda :[]) # type: DefaultDict[str, List[str]]
self.variables__ = defaultdict(lambda: []) # type: DefaultDict[str, List[str]]
self.rollback__ = [] # type: List[Tuple[int, Callable]]
self.last_rb__loc__ = -1 # type: int
# support for call stack tracing
......@@ -650,7 +648,7 @@ class Grammar:
parser.grammar = self
def __call__(self, document: str, start_parser="root__", track_history=False) -> Node:
def __call__(self, document: str, start_parser="root__", track_history=False) -> RootNode:
"""
Parses a document with with parser-combinators.
......@@ -668,7 +666,7 @@ class Grammar:
Node: The root node to the parse tree.
"""
def tail_pos(predecessors: Union[List[Node], Tuple[Node, ...]]) -> int:
def tail_pos(predecessors: Union[List[Node], Tuple[Node, ...], None]) -> int:
"""Adds the position after the last node in the list of
predecessors to the node."""
return predecessors[-1].pos + len(predecessors[-1]) if predecessors else 0
......@@ -715,10 +713,11 @@ class Grammar:
str(HistoryRecord.last_match(self.history__)))
# Check if a Lookahead-Parser did match. Needed for testing, because
# in a test case this is not necessarily an error.
last_record = self.history__[-2] if len(self.history__) > 1 else []
last_record = self.history__[-2] if len(self.history__) > 1 else None # type: Optional[HistoryRecord]
if last_record and parser != self.root__ \
and last_record.status == HistoryRecord.MATCH \
and last_record.node.pos + len(last_record.node) >= len(self.document__) \
and last_record.node.pos \
+ len(last_record.node) >= len(self.document__) \
and any(isinstance(parser, Lookahead)
for parser in last_record.call_stack):
error_msg = 'Parser did not match except for lookahead! ' + err_info
......@@ -728,12 +727,12 @@ class Grammar:
error_code = Error.PARSER_DID_NOT_MATCH
else:
stitches.append(result)
error_msg = "Parser stopped before end" + \
(("! trying to recover" +
(" but stopping history recording at this point."
if self.history_tracking__ else "..."))
if len(stitches) < MAX_DROPOUTS
else " too often! Terminating parser.")
error_msg = "Parser stopped before end" \
+ (("! trying to recover"
+ (" but stopping history recording at this point."
if self.history_tracking__ else "..."))
if len(stitches) < MAX_DROPOUTS
else " too often! Terminating parser.")
error_code = Error.PARSER_STOPPED_BEFORE_END
stitches.append(Node(None, skip).init_pos(tail_pos(stitches)))
self.tree__.new_error(stitches[-1], error_msg, error_code)
......@@ -769,7 +768,8 @@ class Grammar:
self.tree__.new_error(result, error_msg, error_code)
# result.pos = 0 # calculate all positions
# result.collect_errors(self.document__)
self.tree__.swallow(result)
if result:
self.tree__.swallow(result)
return self.tree__
......@@ -849,7 +849,7 @@ class PreprocessorToken(Parser):
def __deepcopy__(self, memo):
duplicate = self.__class__(self.name)
duplicate.name = self.name
duplicate.ptype = self.ptype
duplicate.ptype = self.ptype
return duplicate
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
......@@ -857,19 +857,22 @@ class PreprocessorToken(Parser):
end = text.find(END_TOKEN, 1)
if end < 0:
node = Node(self, '')
self.grammar.tree__.new_error(node,
self.grammar.tree__.new_error(
node,
'END_TOKEN delimiter missing from preprocessor token. '
'(Most likely due to a preprocessor bug!)') # type: Node
return node, text[1:]
elif end == 0:
node = Node(self, '')
self.grammar.tree__.new_error(node,
self.grammar.tree__.new_error(
node,
'Preprocessor-token cannot have zero length. '
'(Most likely due to a preprocessor bug!)')
return node, text[2:]
elif text.find(BEGIN_TOKEN, 1, end) >= 0:
node = Node(self, text[len(self.name) + 1:end])
self.grammar.tree__.new_error(node,
self.grammar.tree__.new_error(
node,
'Preprocessor-tokens must not be nested or contain '
'BEGIN_TOKEN delimiter as part of their argument. '
'(Most likely due to a preprocessor bug!)')
......@@ -943,7 +946,7 @@ class RegExp(Parser):
regexp = self.regexp.pattern
duplicate = self.__class__(regexp)
duplicate.name = self.name
duplicate.ptype = self.ptype
duplicate.ptype = self.ptype
return duplicate
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
......@@ -964,7 +967,7 @@ class RegExp(Parser):