2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 694ca243 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- potential infinite loops are now caught when running parser and an error is...

- potential infinite loops are now caught when running parser and an error is reported; bug fixes, tests
parent b5fd9558
#!/usr/bin/python3 #!/usr/bin/python3
"""DSLsupport.py - Support for domain specific notations for DHParser """dsl.py - Support for domain specific notations for DHParser
Copyright 2016 by Eckhart Arnold (arnold@badw.de) Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de) Bavarian Academy of Sciences an Humanities (badw.de)
...@@ -23,15 +23,15 @@ compilation of domain specific languages based on an EBNF-grammar. ...@@ -23,15 +23,15 @@ compilation of domain specific languages based on an EBNF-grammar.
import collections import collections
import os import os
try: try:
import regex as re import regex as re
except ImportError: except ImportError:
import re import re
from .__init__ import __version__ from .ebnf import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler
from .EBNFcompiler import EBNFGrammar, EBNF_ASTPipeline, EBNFCompiler from .toolkit import load_if_file, is_python_code, compile_python_object
from .toolkit import load_if_file, is_python_code, md5, compile_python_object from .parsers import GrammarBase, CompilerBase, full_compilation, nil_scanner
from .parsercombinators import GrammarBase, CompilerBase, full_compilation, nil_scanner
from .syntaxtree import Node from .syntaxtree import Node
...@@ -39,8 +39,7 @@ __all__ = ['GrammarError', ...@@ -39,8 +39,7 @@ __all__ = ['GrammarError',
'CompilationError', 'CompilationError',
'load_compiler_suite', 'load_compiler_suite',
'compileDSL', 'compileDSL',
'run_compiler', 'run_compiler']
'source_changed']
SECTION_MARKER = """\n SECTION_MARKER = """\n
...@@ -94,7 +93,7 @@ try: ...@@ -94,7 +93,7 @@ try:
except ImportError: except ImportError:
import re import re
from DHParser.toolkit import load_if_file from DHParser.toolkit import load_if_file
from DHParser.parsercombinators import GrammarBase, CompilerBase, nil_scanner, \\ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\ Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\ Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, full_compilation ZeroOrMore, Forward, NegativeLookahead, mixin_comment, full_compilation
...@@ -109,8 +108,7 @@ DHPARSER_COMPILER = ''' ...@@ -109,8 +108,7 @@ DHPARSER_COMPILER = '''
def compile_{NAME}(source): def compile_{NAME}(source):
"""Compiles ``source`` and returns (result, errors, ast). """Compiles ``source`` and returns (result, errors, ast).
""" """
source_text = load_if_file(source) return full_compilation(source, {NAME}Scanner,
return full_compilation({NAME}Scanner(source_text),
{NAME}Grammar(), {NAME}_ASTPipeline, {NAME}Compiler()) {NAME}Grammar(), {NAME}_ASTPipeline, {NAME}Compiler())
if __name__ == "__main__": if __name__ == "__main__":
...@@ -139,10 +137,10 @@ def get_grammar_instance(grammar): ...@@ -139,10 +137,10 @@ def get_grammar_instance(grammar):
if is_python_code(grammar): if is_python_code(grammar):
parser_py, errors, AST = grammar_src, '', None parser_py, errors, AST = grammar_src, '', None
else: else:
parser_py, errors, AST = full_compilation(grammar_src, parser_py, errors, AST = full_compilation(grammar_src, None,
EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler()) EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler())
if errors: if errors:
raise GrammarError(errors, grammar_src) raise GrammarError('\n\n'.join(errors), grammar_src)
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')() parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
else: else:
# assume that dsl_grammar is a ParserHQ-object or Grammar class # assume that dsl_grammar is a ParserHQ-object or Grammar class
...@@ -175,9 +173,9 @@ def load_compiler_suite(compiler_suite): ...@@ -175,9 +173,9 @@ def load_compiler_suite(compiler_suite):
else: else:
# assume source is an ebnf grammar # assume source is an ebnf grammar
parser_py, errors, AST = full_compilation( parser_py, errors, AST = full_compilation(
source, EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler()) source, None, EBNFGrammar(), EBNF_ASTPipeline, EBNFCompiler())
if errors: if errors:
raise GrammarError(errors, source) raise GrammarError('\n\n'.join(errors), source)
scanner = nil_scanner scanner = nil_scanner
ast = EBNF_ASTPipeline ast = EBNF_ASTPipeline
compiler = EBNFCompiler() compiler = EBNFCompiler()
...@@ -196,14 +194,20 @@ def compileDSL(text_or_file, dsl_grammar, ast_pipeline, compiler, ...@@ -196,14 +194,20 @@ def compileDSL(text_or_file, dsl_grammar, ast_pipeline, compiler,
assert isinstance(compiler, CompilerBase) assert isinstance(compiler, CompilerBase)
assert isinstance(ast_pipeline, collections.abc.Sequence) or isinstance(ast_pipeline, dict) assert isinstance(ast_pipeline, collections.abc.Sequence) or isinstance(ast_pipeline, dict)
parser_root, grammar_src = get_grammar_instance(dsl_grammar) parser_root, grammar_src = get_grammar_instance(dsl_grammar)
src = scanner(load_if_file(text_or_file)) src = load_if_file(text_or_file)
result, errors, AST = full_compilation(src, parser_root, ast_pipeline, compiler) result, errors, AST = full_compilation(src, scanner, parser_root, ast_pipeline, compiler)
if errors: raise CompilationError(errors, src, grammar_src, AST) if errors: raise CompilationError('\n\n'.join(errors), src, grammar_src, AST)
return result return result
def compileEBNF(ebnf_src, ebnf_grammar_obj=None, source_only=False): def compileEBNF(ebnf_src, ebnf_grammar_obj=None, source_only=False):
"""Compiles an EBNF source file into a Grammar class """Compiles an EBNF source file into a Grammar class.
Please note: This functions returns a class which must be
instantiated before calling its parse()-method! Calling the method
directly from the class (which is technically possible in python
yields an error message complaining about a missing parameter,
the cause of which may not be obvious at first sight.
Args: Args:
ebnf_src(str): Either the file name of an EBNF grammar or ebnf_src(str): Either the file name of an EBNF grammar or
...@@ -216,7 +220,7 @@ def compileEBNF(ebnf_src, ebnf_grammar_obj=None, source_only=False): ...@@ -216,7 +220,7 @@ def compileEBNF(ebnf_src, ebnf_grammar_obj=None, source_only=False):
class is returned instead of the class itself. class is returned instead of the class itself.
Returns: Returns:
A Grammar class that can be instantiated for parsing a text A Grammar class that can be instantiated for parsing a text
which conforms to the language defined by ``ebnf_src`` which conforms to the language defined by ``ebnf_src``.
""" """
grammar = ebnf_grammar_obj or EBNFGrammar() grammar = ebnf_grammar_obj or EBNFGrammar()
grammar_src = compileDSL(ebnf_src, grammar, EBNF_ASTPipeline, EBNFCompiler()) grammar_src = compileDSL(ebnf_src, grammar, EBNF_ASTPipeline, EBNFCompiler())
...@@ -251,8 +255,7 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): ...@@ -251,8 +255,7 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
parser = EBNFGrammar() parser = EBNFGrammar()
trans = EBNF_ASTPipeline trans = EBNF_ASTPipeline
compiler = EBNFCompiler(compiler_name, source) compiler = EBNFCompiler(compiler_name, source)
result, errors, ast = full_compilation(scanner(source), parser, result, errors, ast = full_compilation(source, scanner, parser, trans, compiler)
trans, compiler)
if errors: if errors:
return errors return errors
...@@ -266,7 +269,8 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): ...@@ -266,7 +269,8 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
source = f.read() source = f.read()
intro, imports, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source) intro, imports, scanner, parser, ast, compiler, outro = RX_SECTION_MARKER.split(source)
except (PermissionError, FileNotFoundError, IOError) as error: except (PermissionError, FileNotFoundError, IOError) as error:
intro, outro = '', '' intro = '#!/usr/bin/python'
outro = DHPARSER_COMPILER.format(NAME=compiler_name)
imports = DHPARSER_IMPORTS imports = DHPARSER_IMPORTS
scanner = compiler.gen_scanner_skeleton() scanner = compiler.gen_scanner_skeleton()
ast = compiler.gen_AST_skeleton() ast = compiler.gen_AST_skeleton()
...@@ -279,7 +283,6 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): ...@@ -279,7 +283,6 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
try: try:
f = open(rootname + '_compiler.py', 'w', encoding="utf-8") f = open(rootname + '_compiler.py', 'w', encoding="utf-8")
f.write("#!/usr/bin/python")
f.write(intro) f.write(intro)
f.write(SECTION_MARKER.format(marker=SYMBOLS_SECTION)) f.write(SECTION_MARKER.format(marker=SYMBOLS_SECTION))
f.write(imports) f.write(imports)
...@@ -293,7 +296,6 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): ...@@ -293,7 +296,6 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
f.write(compiler) f.write(compiler)
f.write(SECTION_MARKER.format(marker=END_SECTIONS_MARKER)) f.write(SECTION_MARKER.format(marker=END_SECTIONS_MARKER))
f.write(outro) f.write(outro)
f.write(DHPARSER_COMPILER.format(NAME=compiler_name))
except (PermissionError, FileNotFoundError, IOError) as error: except (PermissionError, FileNotFoundError, IOError) as error:
print('# Could not write file "' + rootname + '_compiler.py" because of: ' print('# Could not write file "' + rootname + '_compiler.py" because of: '
+ "\n# ".join(str(error).split('\n)'))) + "\n# ".join(str(error).split('\n)')))
...@@ -318,32 +320,3 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"): ...@@ -318,32 +320,3 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
return [] return []
def source_changed(grammar_source, grammar_class):
"""Returns `True` if `grammar_class` does not reflect the latest
changes of `grammar_source`
Parameters:
grammar_source: File name or string representation of the
grammar source
grammar_class: the parser class representing the grammar
or the file name of a compiler suite containing the grammar
Returns (bool):
True, if the source text of the grammar is different from the
source from which the grammar class was generated
"""
grammar = load_if_file(grammar_source)
chksum = md5(grammar, __version__)
if isinstance(grammar_class, str):
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(GrammarBase\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
return not (m and m.groups() and m.groups()[-1] == chksum)
else:
return True
else:
return chksum != grammar_class.source_hash__
#!/usr/bin/python3 #!/usr/bin/python3
"""EBNFcompiler.py - EBNF -> Python-Parser compilation for DHParser """ebnf.py - EBNF -> Python-Parser compilation for DHParser
Copyright 2016 by Eckhart Arnold (arnold@badw.de) Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de) Bavarian Academy of Sciences an Humanities (badw.de)
...@@ -21,6 +21,7 @@ permissions and limitations under the License. ...@@ -21,6 +21,7 @@ permissions and limitations under the License.
# import collections # import collections
import keyword import keyword
from functools import partial from functools import partial
try: try:
import regex as re import regex as re
except ImportError: except ImportError:
...@@ -28,7 +29,7 @@ except ImportError: ...@@ -28,7 +29,7 @@ except ImportError:
from .__init__ import __version__ from .__init__ import __version__
from .toolkit import load_if_file, escape_re, md5, sane_parser_name from .toolkit import load_if_file, escape_re, md5, sane_parser_name
from .parsercombinators import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \ from .parsers import GrammarBase, mixin_comment, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase
from .syntaxtree import Node, remove_enclosing_delimiters, reduce_single_child, \ from .syntaxtree import Node, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \ replace_by_single_child, TOKEN_KEYWORD, remove_expendables, remove_tokens, flatten, \
...@@ -163,25 +164,21 @@ class EBNFCompilerError(Exception): ...@@ -163,25 +164,21 @@ class EBNFCompilerError(Exception):
pass pass
# Scanner = collections.namedtuple('Scanner',
# 'symbol instantiation_call cls_name cls')
class EBNFCompiler(CompilerBase): class EBNFCompiler(CompilerBase):
"""Generates a Parser from an abstract syntax tree of a grammar specified """Generates a Parser from an abstract syntax tree of a grammar specified
in EBNF-Notation. in EBNF-Notation.
""" """
COMMENT_KEYWORD = "COMMENT__" COMMENT_KEYWORD = "COMMENT__"
DEFAULT_WHITESPACE = r'[\t ]*'
RESERVED_SYMBOLS = {TOKEN_KEYWORD, WHITESPACE_KEYWORD, COMMENT_KEYWORD} RESERVED_SYMBOLS = {TOKEN_KEYWORD, WHITESPACE_KEYWORD, COMMENT_KEYWORD}
KNOWN_DIRECTIVES = {'comment', 'whitespace', 'tokens', 'literalws'}
VOWELS = {'A', 'E', 'I', 'O', 'U'} # what about cases like 'hour', 'universe' etc.?
AST_ERROR = "Badly structured syntax tree. " \ AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneuos AST transformation." "Potentially due to erroneuos AST transformation."
PREFIX_TABLE = [('§', 'Required'), ('&', 'Lookahead'), PREFIX_TABLE = [('§', 'Required'), ('&', 'Lookahead'),
('!', 'NegativeLookahead'), ('-&', 'Lookbehind'), ('!', 'NegativeLookahead'), ('-&', 'Lookbehind'),
('-!', 'NegativeLookbehind'), ('::', 'Pop'), ('-!', 'NegativeLookbehind'), ('::', 'Pop'),
(':', 'Retrieve')] (':', 'Retrieve')]
WHITESPACE = {'horizontal': r'[\t ]*', # default: horizontal
'linefeed': r'[ \t]*\n?(?!\s*\n)[ \t]*',
'vertical': r'\s*'}
def __init__(self, grammar_name="", source_text=""): def __init__(self, grammar_name="", source_text=""):
super(EBNFCompiler, self).__init__() super(EBNFCompiler, self).__init__()
...@@ -194,13 +191,13 @@ class EBNFCompiler(CompilerBase): ...@@ -194,13 +191,13 @@ class EBNFCompiler(CompilerBase):
self.rules = set() self.rules = set()
self.symbols = set() self.symbols = set()
self.variables = set() self.variables = set()
self.scanner_tokens = set()
self.definition_names = [] self.definition_names = []
self.recursive = set() self.recursive = set()
self.root = "" self.root = ""
self.directives = {'whitespace': self.DEFAULT_WHITESPACE, self.directives = {'whitespace': self.WHITESPACE['horizontal'],
'comment': '', 'comment': '',
'literalws': ['right']} 'literalws': ['right'],
'tokens': set()}
def gen_scanner_skeleton(self): def gen_scanner_skeleton(self):
name = self.grammar_name + "Scanner" name = self.grammar_name + "Scanner"
...@@ -263,8 +260,7 @@ class EBNFCompiler(CompilerBase): ...@@ -263,8 +260,7 @@ class EBNFCompiler(CompilerBase):
# prepare parser class header and docstring and # prepare parser class header and docstring and
# add EBNF grammar to the doc string of the parser class # add EBNF grammar to the doc string of the parser class
article = 'an ' if self.grammar_name[0:1].upper() \ article = 'an ' if self.grammar_name[0:1] in "AaEeIiOoUu" else 'a ' # what about 'hour', 'universe' etc.?
in EBNFCompiler.VOWELS else 'a '
declarations = ['class ' + self.grammar_name + declarations = ['class ' + self.grammar_name +
'Grammar(GrammarBase):', 'Grammar(GrammarBase):',
'r"""Parser for ' + article + self.grammar_name + 'r"""Parser for ' + article + self.grammar_name +
...@@ -324,7 +320,7 @@ class EBNFCompiler(CompilerBase): ...@@ -324,7 +320,7 @@ class EBNFCompiler(CompilerBase):
elif not sane_parser_name(rule): elif not sane_parser_name(rule):
node.add_error('Illegal symbol "%s". Symbols must not start or ' node.add_error('Illegal symbol "%s". Symbols must not start or '
' end with a doube underscore "__".' % rule) ' end with a doube underscore "__".' % rule)
elif rule in self.scanner_tokens: elif rule in self.directives['tokens']:
node.add_error('Symbol "%s" has already been defined as ' node.add_error('Symbol "%s" has already been defined as '
'a scanner token.' % rule) 'a scanner token.' % rule)
elif keyword.iskeyword(rule): elif keyword.iskeyword(rule):
...@@ -361,17 +357,17 @@ class EBNFCompiler(CompilerBase): ...@@ -361,17 +357,17 @@ class EBNFCompiler(CompilerBase):
def directive(self, node): def directive(self, node):
key = node.result[0].result.lower() key = node.result[0].result.lower()
assert key not in self.scanner_tokens assert key not in self.directives['tokens']
if key in {'comment', 'whitespace'}: if key in {'comment', 'whitespace'}:
if node.result[1].parser.name == "list_": if node.result[1].parser.name == "list_":
if len(node.result[1].result) != 1: if len(node.result[1].result) != 1:
node.add_error('Directive "%s" must have one, but not %i values.' % node.add_error('Directive "%s" must have one, but not %i values.' %
(key, len(node.result[1]))) (key, len(node.result[1])))
value = self.compile__(node.result[1]).pop() value = self.compile__(node.result[1]).pop()
if value in {'linefeed', 'standard'} and key == 'whitespace': if key == 'whitespace' and value in EBNFCompiler.WHITESPACE:
value = '\s*' if value == "linefeed" else self.DEFAULT_WHITESPACE value = EBNFCompiler.WHITESPACE[value] # replace whitespace-name by regex
else: else:
node.add_error('Value "%" not allowed for directive "%s".' % (value, key)) node.add_error('Value "%s" not allowed for directive "%s".' % (value, key))
else: else:
value = node.result[1].result.strip("~") value = node.result[1].result.strip("~")
if value != node.result[1].result: if value != node.result[1].result:
...@@ -382,6 +378,7 @@ class EBNFCompiler(CompilerBase): ...@@ -382,6 +378,7 @@ class EBNFCompiler(CompilerBase):
elif value[0] + value[-1] == '//': elif value[0] + value[-1] == '//':
value = self._check_rx(node, value[1:-1]) value = self._check_rx(node, value[1:-1])
self.directives[key] = value self.directives[key] = value
elif key == 'literalws': elif key == 'literalws':
value = {item.lower() for item in self.compile__(node.result[1])} value = {item.lower() for item in self.compile__(node.result[1])}
if (len(value - {'left', 'right', 'both', 'none'}) > 0 if (len(value - {'left', 'right', 'both', 'none'}) > 0
...@@ -394,11 +391,11 @@ class EBNFCompiler(CompilerBase): ...@@ -394,11 +391,11 @@ class EBNFCompiler(CompilerBase):
self.directives[key] = list(ws) self.directives[key] = list(ws)
elif key == 'tokens': elif key == 'tokens':
self.scanner_tokens |= self.compile__(node.result[1]) self.directives['tokens'] |= self.compile__(node.result[1])
else: else:
node.add_error('Unknown directive %s ! (Known ones are %s .)' % node.add_error('Unknown directive %s ! (Known ones are %s .)' %
(key, (key,
', '.join(list(EBNFCompiler.KNOWN_DIRECTIVES)))) ', '.join(list(self.directives.keys()))))
return "" return ""
def non_terminal(self, node, parser_class): def non_terminal(self, node, parser_class):
...@@ -463,7 +460,7 @@ class EBNFCompiler(CompilerBase): ...@@ -463,7 +460,7 @@ class EBNFCompiler(CompilerBase):
"AST transformation!") "AST transformation!")
def symbol(self, node): def symbol(self, node):
if node.result in self.scanner_tokens: if node.result in self.directives['tokens']:
return 'ScannerToken("' + node.result + '")' return 'ScannerToken("' + node.result + '")'
else: else:
self.symbols.add(node) self.symbols.add(node)
...@@ -472,7 +469,7 @@ class EBNFCompiler(CompilerBase): ...@@ -472,7 +469,7 @@ class EBNFCompiler(CompilerBase):
return node.result return node.result
def literal(self, node): def literal(self, node):
return 'Token(' + ', '.join([node.result]) + ')' return 'Token(' + node.result.replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ?
def regexp(self, node): def regexp(self, node):
rx = node.result rx = node.result
...@@ -501,3 +498,34 @@ class EBNFCompiler(CompilerBase): ...@@ -501,3 +498,34 @@ class EBNFCompiler(CompilerBase):
def list_(self, node): def list_(self, node):
assert node.children assert node.children
return set(item.result.strip() for item in node.result) return set(item.result.strip() for item in node.result)
def source_changed(grammar_source, grammar_class):
"""Returns `True` if `grammar_class` does not reflect the latest
changes of `grammar_source`
Parameters:
grammar_source: File name or string representation of the
grammar source
grammar_class: the parser class representing the grammar
or the file name of a compiler suite containing the grammar
Returns (bool):
True, if the source text of the grammar is different from the
source from which the grammar class was generated
"""
grammar = load_if_file(grammar_source)
chksum = md5(grammar, __version__)
if isinstance(grammar_class, str):
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(GrammarBase\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
return not (m and m.groups() and m.groups()[-1] == chksum)
else:
return True
else:
return chksum != grammar_class.source_hash__
#!/usr/bin/python3 #!/usr/bin/python3
"""parsercombinators.py - parser combinators for for DHParser """parsers.py - parser combinators for for DHParser
Copyright 2016 by Eckhart Arnold (arnold@badw.de) Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de) Bavarian Academy of Sciences an Humanities (badw.de)
...@@ -62,7 +62,7 @@ except ImportError: ...@@ -62,7 +62,7 @@ except ImportError:
from .toolkit import IS_LOGGING, LOGS_DIR, escape_re, sane_parser_name, smart_list from .toolkit import IS_LOGGING, LOGS_DIR, escape_re, sane_parser_name, smart_list
from .syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \ from .syntaxtree import WHITESPACE_KEYWORD, TOKEN_KEYWORD, ZOMBIE_PARSER, Node, \
traverse traverse
from DHParser.toolkit import error_messages from DHParser.toolkit import load_if_file, error_messages
__all__ = ['HistoryRecord', __all__ = ['HistoryRecord',
'Parser', 'Parser',
...@@ -168,6 +168,7 @@ def add_parser_guard(parser_func): ...@@ -168,6 +168,7 @@ def add_parser_guard(parser_func):
grammar.moving_forward = False grammar.moving_forward = False
record = HistoryRecord(grammar.call_stack.copy(), node, len(rest)) record = HistoryRecord(grammar.call_stack.copy(), node, len(rest))
grammar.history.append(record) grammar.history.append(record)
# print(record.stack, record.status, rest[:20].replace('\n', '|'))
grammar.call_stack.pop() grammar.call_stack.pop()
if node is not None: if node is not None:
...@@ -396,6 +397,27 @@ class GrammarBase: ...@@ -396,6 +397,27 @@ class GrammarBase:
write_log(errors_only, log_file_name + '_errors') write_log(errors_only, log_file_name + '_errors')
def dsl_error_msg(parser, error_str):
"""Returns an error messsage for errors in the parser configuration,
e.g. errors that result in infinite loops.
Args:
parser (Parser: The parser where the error was noticed. Note
that this is not necessarily the parser that caused the
error but only where the error became apparaent.
error_str (str): A short string describing the error.
Returns:
str: An error message including the call stack if history
tacking has been turned in the grammar object.
"""
msg = ["DSL parser specification error:", error_str, "caught by parser", str(parser)]
if parser.grammar.history:
msg.extend(["\nCall stack:", parser.grammar.history[-1].stack])
else:
msg.extend(["\nEnable history tracking in Grammar object to display call stack."])
return " ".join(msg)
######################################################################## ########################################################################
# #
# Token and Regular Expression parser classes (i.e. leaf classes) # Token and Regular Expression parser classes (i.e. leaf classes)
...@@ -517,7 +539,6 @@ class RE(Parser): ...@@ -517,7 +539,6 @@ class RE(Parser):
name: The optional name of the parser. name: The optional name of the parser.
""" """
super(RE, self).__init__(name) super(RE, self).__init__(name)
# assert wR or regexp == '.' or isinstance(self, Token)
self.wL = wL self.wL = wL
self.wR = wR self.wR = wR
self.wspLeft = RegExp(wL, WHITESPACE_KEYWORD) if wL else ZOMBIE_PARSER self.wspLeft = RegExp(wL, WHITESPACE_KEYWORD) if wL else ZOMBIE_PARSER
...@@ -649,10 +670,14 @@ class Optional(UnaryOperator): ...@@ -649,10 +670,14 @@ class Optional(UnaryOperator):
class ZeroOrMore(Optional): class ZeroOrMore(Optional):
def __call__(self, text): def __call__(self, text):
results = () results = ()
while text: n = len(text) + 1
while text and len(text) < n:
n = len(text)
node, text = self.parser(text) node, text = self.parser(text)
if not node: if not node:
break break
if len(text) == n:
node.add_error(dsl_error_msg(self, 'Infinite Loop.'))
results += (node,) results += (node,)
return Node(self, results), text return Node(self, results), text