Commit 739242bd authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

synonymhandling solved + reporting of unconnected rules in EBNFCompiler from ebnf.py

parent 72799c38
......@@ -73,7 +73,7 @@ except ImportError:
import re
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
nop_filter, counterpart_filter, accumulating_filter, ScannerFunc
......@@ -128,11 +128,12 @@ class CompilationError(Exception):
contains errors.
"""
def __init__(self, error_messages, dsl_text, dsl_grammar, AST):
def __init__(self, error_messages, dsl_text, dsl_grammar, AST, result):
self.error_messages = error_messages
self.dsl_text = dsl_text
self.dsl_grammar = dsl_grammar
self.AST = AST
self.result = result
def __str__(self):
return '\n'.join(self.error_messages)
......@@ -163,7 +164,6 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
parser_root = grammar_representation
else:
# assume ``grammar_representation`` is a grammar class and get the root object
# TODO: further case: grammar_representation is a method
parser_root = grammar_representation()
return parser_root, grammar_src
......@@ -188,7 +188,7 @@ def compileDSL(text_or_file: str,
ast_transformation, compiler)
if errors:
src = load_if_file(text_or_file)
raise CompilationError(errors, src, grammar_src, AST)
raise CompilationError(errors, src, grammar_src, AST, result)
return result
......
......@@ -16,13 +16,13 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
from collections import OrderedDict
import keyword
try:
import regex as re
except ImportError:
import re
from typing import Callable, List, Set, Tuple
from typing import Callable, Dict, List, Set, Tuple
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
......@@ -291,7 +291,7 @@ def get_compiler(grammar_name="{NAME}", grammar_source="") -> {NAME}Compiler:
class EBNFCompilerError(Exception):
"""Error raised by `EBNFCompiler` class. (Not compilation errors
in the strict sense, see `CompilationError` below)"""
in the strict sense, see `CompilationError` in module ``dsl.py``)"""
pass
......@@ -318,10 +318,11 @@ class EBNFCompiler(Compiler):
def _reset(self):
self._result = '' # type: str
self.rules = set() # type: Set[str]
self.rules = OrderedDict() # type: OrderedDict[str, List[Node]]
self.current_symbols = [] # type: List[Node]
self.symbols = {} # type: Dict[str, Node]
self.variables = set() # type: Set[str]
self.symbol_nodes = [] # type: List[Node]
self.definition_names = [] # type: List[str]
# self.definition_names = [] # type: List[str]
self.recursive = set() # type: Set[str]
self.root = "" # type: str
self.directives = {'whitespace': self.WHITESPACE['horizontal'],
......@@ -340,7 +341,7 @@ class EBNFCompiler(Compiler):
+ SCANNER_FACTORY.format(NAME=self.grammar_name)
def gen_transformer_skeleton(self) -> str:
if not self.definition_names:
if not self.rules:
raise EBNFCompilerError('Compiler must be run before calling '
'"gen_transformer_Skeleton()"!')
tt_name = self.grammar_name + '_AST_transformation_table'
......@@ -348,7 +349,7 @@ class EBNFCompiler(Compiler):
transtable = [tt_name + ' = {',
' # AST Transformations for the ' +
self.grammar_name + '-grammar']
for name in self.definition_names:
for name in self.rules:
transtable.append(' "' + name + '": no_transformation,')
transtable += [' "*": no_transformation', '}', '', tf_name +
' = partial(traverse, processing_table=%s)' % tt_name, '']
......@@ -356,7 +357,7 @@ class EBNFCompiler(Compiler):
return '\n'.join(transtable)
def gen_compiler_skeleton(self) -> str:
if not self.definition_names:
if not self.rules:
raise EBNFCompilerError('Compiler has not been run before calling '
'"gen_Compiler_Skeleton()"!')
compiler = ['class ' + self.grammar_name + 'Compiler(Compiler):',
......@@ -368,7 +369,7 @@ class EBNFCompiler(Compiler):
' super(' + self.grammar_name +
'Compiler, self).__init__(grammar_name, grammar_source)',
" assert re.match('\w+\Z', grammar_name)", '']
for name in self.definition_names:
for name in self.rules:
method_name = Compiler.derive_method_name(name)
if name == self.root:
compiler += [' def ' + method_name + '(self, node):',
......@@ -387,7 +388,6 @@ class EBNFCompiler(Compiler):
if definitions[i][0] in self.variables:
definitions[i] = (definitions[i][0], 'Capture(%s)' % definitions[i][1])
self.definition_names = [defn[0] for defn in definitions]
definitions.append(('wspR__', self.WHITESPACE_KEYWORD
if 'right' in self.directives['literalws'] else "''"))
definitions.append(('wspL__', self.WHITESPACE_KEYWORD
......@@ -417,12 +417,6 @@ class EBNFCompiler(Compiler):
declarations = declarations[:-1]
declarations.append('"""')
# add default functions for filter filters of pop or retrieve operators
# for symbol, fun in self.directives['filter']:
# declarations.append(symbol + '_filter = lambda value: value.replace("(", ")")'
# '.replace("[", "]").replace("{", "}").replace(">", "<")')
# turn definitions into declarations in reverse order
self.root = definitions[0][0] if definitions else ""
......@@ -434,11 +428,31 @@ class EBNFCompiler(Compiler):
declarations += [symbol + '.set(' + statement + ')']
else:
declarations += [symbol + ' = ' + statement]
known_symbols = self.rules | self.RESERVED_SYMBOLS
for nd in self.symbol_nodes:
if nd.result not in known_symbols:
nd.add_error("Missing production for symbol '%s'" % nd.result)
# check for symbols used but never defined
defined_symbols = set(self.rules.keys()) | self.RESERVED_SYMBOLS
for symbol in self.symbols:
if symbol not in defined_symbols:
self.symbols[symbol].add_error("Missing definition for symbol '%s'" % symbol)
root_node.error_flag = True
# check for unconnected rules
defined_symbols.difference_update(self.RESERVED_SYMBOLS)
def remove_connections(symbol):
if symbol in defined_symbols:
defined_symbols.remove(symbol)
for related in self.rules[symbol][1:]:
remove_connections(str(related))
remove_connections(self.root)
for leftover in defined_symbols:
self.rules[leftover][0].add_error(('Rule "%s" is not connected to parser '
'root "%s"') % (leftover, self.root))
# set root parser and assemble python grammar definition
if self.root and 'root__' not in self.rules:
declarations.append('root__ = ' + self.root)
declarations.append('')
......@@ -466,7 +480,7 @@ class EBNFCompiler(Compiler):
return self.assemble_parser(definitions, node)
def on_definition(self, node: Node) -> Tuple[str, str]:
rule = str(node.children[0]) # cast(str, node.children[0].result)
rule = str(node.children[0])
if rule in self.rules:
node.add_error('A rule with name "%s" has already been defined.' % rule)
elif rule in EBNFCompiler.RESERVED_SYMBOLS:
......@@ -479,13 +493,17 @@ class EBNFCompiler(Compiler):
'a scanner token.' % rule)
elif keyword.iskeyword(rule):
node.add_error('Python keyword "%s" may not be used as a symbol. '
% rule + '(This may change in the furute.)')
% rule + '(This may change in the future.)')
try:
self.rules.add(rule)
self.current_symbols = [node]
self.rules[rule] = self.current_symbols
defn = self._compile(node.children[1])
if rule in self.variables:
defn = 'Capture(%s)' % defn
self.variables.remove(rule)
elif defn.find("(") < 0:
# assume it's a synonym, like 'page = REGEX_PAGE_NR'
defn = 'Synonym(%s)' % defn
except TypeError as error:
errmsg = EBNFCompiler.AST_ERROR + " (" + str(error) + ")\n" + node.as_sexpr()
node.add_error(errmsg)
......@@ -622,21 +640,23 @@ class EBNFCompiler(Compiler):
raise EBNFCompilerError("Group nodes should have been eliminated by "
"AST transformation!")
def on_symbol(self, node: Node) -> str:
result = str(node) # ; assert result == cast(str, node.result)
if result in self.directives['tokens']:
return 'ScannerToken("' + result + '")'
def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side!
symbol = str(node) # ; assert result == cast(str, node.result)
if symbol in self.directives['tokens']:
return 'ScannerToken("' + symbol + '")'
else:
self.symbol_nodes.append(node)
if result in self.rules:
self.recursive.add(result)
return result
self.current_symbols.append(node)
if symbol not in self.symbols:
self.symbols[symbol] = node
if symbol in self.rules:
self.recursive.add(symbol)
return symbol
def on_literal(self, node) -> str:
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.join([node.result]) + ')' ?
def on_regexp(self, node: Node) -> str:
rx = str(node) # ; assert rx == cast(str, node.result)
rx = str(node)
name = [] # type: List[str]
if rx[:2] == '~/':
if not 'left' in self.directives['literalws']:
......
......@@ -78,8 +78,9 @@ __all__ = ['ScannerFunc',
'RE',
'Token',
'mixin_comment',
'UnaryOperator',
'NaryOperator',
# 'UnaryOperator',
# 'NaryOperator',
'Synonym',
'Optional',
'ZeroOrMore',
'OneOrMore',
......@@ -737,6 +738,24 @@ class NaryOperator(Parser):
parser.apply(func)
class Synonym(UnaryOperator):
"""Simply calls another parser and encapsulates the result in
another node if that parser matches.
This parser is needed to support synonyms in EBNF, e.g.
jahr = JAHRESZAHL
JAHRESZAHL = /\d\d\d\d/
Otherwise the first line could not be represented by any parser
class, in which case it would be unclear whether the parser
RE('\d\d\d\d') carries the name 'JAHRESZAHL' or 'jahr'
"""
def __call__(self, text: str) -> Tuple[Node, str]:
node, text = self.parser(text)
if node:
return Node(self, node), text
return None, text
class Optional(UnaryOperator):
def __init__(self, parser: Parser, name: str = '') -> None:
super(Optional, self).__init__(parser, name)
......@@ -1141,8 +1160,7 @@ class Compiler:
else:
compiler = self.__getattribute__(self.derive_method_name(elem))
result = compiler(node)
for child in node.children:
node.error_flag = node.error_flag or child.error_flag
node.propagate_error_flags()
return result
......
......@@ -28,7 +28,7 @@ sys.path.extend(['../', './'])
from DHParser.toolkit import is_logging
from DHParser.parsers import compile_source, Retrieve, WHITESPACE_PTYPE, nil_scanner
from DHParser.ebnf import get_ebnf_grammar, get_ebnf_transformer, EBNFTransformer, get_ebnf_compiler
from DHParser.dsl import compileEBNF, compileDSL, parser_factory
from DHParser.dsl import CompilationError, compileDSL, parser_factory
class TestDirectives:
......@@ -345,6 +345,29 @@ class TestBoundaryCases:
r = self.cp(t)
assert r
def test_unconnected_symbols(self):
ebnf = """root = /.*/
unconnected = /.*/
"""
try:
grammar = parser_factory(ebnf)()
assert False, "EBNF compiler should complain about unconnected rules."
except CompilationError as err:
grammar = err.result
assert grammar.__dict__['root']
assert grammar.__dict__['unconnected']
class TestSynonymDetection:
def test_synonym_detection(self):
ebnf = """a = b
b = /b/
"""
grammar = parser_factory(ebnf)()
assert grammar['a'].name == 'a', grammar['a'].name
assert grammar['b'].name == 'b', grammar['b'].name
assert grammar('b').as_sexpr().count('b') == 2
if __name__ == "__main__":
from DHParser.testing import runner
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment