In January 2021 we will introduce a 10 GB quota for project repositories. Higher limits for individual projects will be available on request. Please see https://doku.lrz.de/display/PUBLIC/GitLab for more information.

Commit 62c4cfe2 authored by Eckhart Arnold's avatar Eckhart Arnold

- can use operators "+" (Sequence) and "|" (Alternative) for constructing...

- can use operators "+" (Sequence) and "|" (Alternative) for constructing parsers in Python; better testing suppoer
parent 8ff06a68
......@@ -19,7 +19,6 @@ Module ``DSLsupport`` contains various functions to support the
compilation of domain specific languages based on an EBNF-grammar.
"""
import collections
import os
try:
......@@ -27,10 +26,10 @@ try:
except ImportError:
import re
from .ebnf import EBNFGrammar, EBNFTransformer, EBNFCompiler, grammar_changed, \
from .ebnf import EBNFTransformer, grammar_changed, \
get_ebnf_scanner, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler
from .toolkit import logging, load_if_file, is_python_code, compile_python_object
from .parsers import GrammarBase, CompilerBase, compile_source, nil_scanner
from .parsers import Grammar, CompilerBase, compile_source, nil_scanner
from .syntaxtree import Node
......@@ -72,7 +71,7 @@ try:
except ImportError:
import re
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \\
from DHParser.parsers import Grammar, CompilerBase, nil_scanner, \\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
......@@ -80,7 +79,7 @@ from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \\
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \\
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \\
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \\
WHITESPACE_PTYPE, TOKEN_PTYPE
collapse, map_content, WHITESPACE_PTYPE, TOKEN_PTYPE
'''
......@@ -141,7 +140,7 @@ class CompilationError(Exception):
def grammar_instance(grammar_representation):
"""Returns a grammar object and the source code of the grammar, from
the given `grammar`-data which can be either a file name, ebnf-code,
python-code, a GrammarBase-derived grammar class or an instance of
python-code, a Grammar-derived grammar class or an instance of
such a class (i.e. a grammar object already).
"""
if isinstance(grammar_representation, str):
......@@ -155,11 +154,11 @@ def grammar_instance(grammar_representation):
get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler())
if errors:
raise GrammarError('\n\n'.join(errors), grammar_src)
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w*Grammar$')()
parser_root = compile_python_object(DHPARSER_IMPORTS + parser_py, '\w+Grammar$')()
else:
# assume that dsl_grammar is a ParserHQ-object or Grammar class
grammar_src = ''
if isinstance(grammar_representation, GrammarBase):
if isinstance(grammar_representation, Grammar):
parser_root = grammar_representation
else:
# assume ``grammar_representation`` is a grammar class and get the root object
......
......@@ -16,21 +16,20 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
from functools import partial
import keyword
import os
from functools import partial
try:
import regex as re
except ImportError:
import re
from .toolkit import load_if_file, escape_re, md5, sane_parser_name
from .parsers import GrammarBase, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase, \
Capture, Retrieve
from .parsers import Grammar, mixin_comment, nil_scanner, Forward, RE, NegativeLookahead, \
Alternative, Sequence, Optional, Required, OneOrMore, ZeroOrMore, Token, CompilerBase
from .syntaxtree import Node, traverse, remove_enclosing_delimiters, reduce_single_child, \
replace_by_single_child, TOKEN_PTYPE, remove_expendables, remove_tokens, flatten, \
forbid, assert_content, WHITESPACE_PTYPE, key_parser_name, key_tag_name
forbid, assert_content, WHITESPACE_PTYPE, key_tag_name
from .versionnumber import __version__
......@@ -63,7 +62,7 @@ def get_ebnf_scanner():
########################################################################
class EBNFGrammar(GrammarBase):
class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file, with this grammar:
# EBNF-Grammar in EBNF
......@@ -109,7 +108,7 @@ class EBNFGrammar(GrammarBase):
"""
expression = Forward()
source_hash__ = "a410e1727fb7575e98ff8451dbf8f3bd"
parser_initialization__ = "upon instatiation"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'\s*', comment=r'#.*(?:\n|$)')
wspL__ = ''
......@@ -158,7 +157,7 @@ def grammar_changed(grammar_class, grammar_source):
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(GrammarBase\)', pycode)
m = re.search('class \w*\(Grammar\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
......@@ -339,7 +338,7 @@ class EBNFCompiler(CompilerBase):
self.grammar_name + '-grammar']
for name in self.definition_names:
transtable.append(' "' + name + '": no_operation,')
transtable += [' "": no_operation', '}', '', tf_name +
transtable += [' "*": no_operation', '}', '', tf_name +
' = partial(traverse, processing_table=%s)' % tt_name, '']
transtable += [TRANSFORMER_FACTORY.format(NAME=self.grammar_name)]
return '\n'.join(transtable)
......@@ -392,7 +391,7 @@ class EBNFCompiler(CompilerBase):
article = 'an ' if self.grammar_name[0:1] in "AaEeIiOoUu" else 'a ' # what about 'hour', 'universe' etc.?
declarations = ['class ' + self.grammar_name +
'Grammar(GrammarBase):',
'Grammar(Grammar):',
'r"""Parser for ' + article + self.grammar_name +
' source file' +
(', with this grammar:' if self.grammar_source else '.')]
......
......@@ -62,7 +62,7 @@ from DHParser.toolkit import load_if_file, error_messages
__all__ = ['HistoryRecord',
'Parser',
'GrammarBase',
'Grammar',
'RX_SCANNER_TOKEN',
'BEGIN_SCANNER_TOKEN',
'END_SCANNER_TOKEN',
......@@ -229,6 +229,12 @@ class Parser(metaclass=ParserMetaClass):
def __str__(self):
return self.name or self.ptype
def __add__(self, other):
return Sequence(self, other)
def __or__(self, other):
return Alternative(self, other)
@property
def grammar(self):
return self._grammar
......@@ -254,7 +260,7 @@ class Parser(metaclass=ParserMetaClass):
return True
class GrammarBase:
class Grammar:
root__ = None # should be overwritten by grammar subclass
@classmethod
......@@ -262,7 +268,7 @@ class GrammarBase:
"""Initializes the `parser.name` fields of those
Parser objects that are directly assigned to a class field with
the field's name, e.g.
class Grammar(GrammarBase):
class Grammar(Grammar):
...
symbol = RE('(?!\\d)\\w+')
After the call of this method symbol.name == "symbol"
......@@ -288,13 +294,19 @@ class GrammarBase:
parser.parser.name = entry
cls.parser_initialization__ = "done"
def __init__(self):
def __init__(self, root=None):
if not hasattr(self.__class__, 'parser_initialization__'):
self.__class__.parser_initialization__ = "pending"
if not hasattr(self.__class__, 'wspL__'):
self.wspL__ = ''
if not hasattr(self.__class__, 'wspR__'):
self.wspR__ = ''
self.all_parsers = set()
self.dirty_flag = False
self.history_tracking = False
self._reset()
self._assign_parser_names()
self.root__ = copy.deepcopy(self.__class__.root__)
self.root__ = root if root else copy.deepcopy(self.__class__.root__)
if self.wspL__:
self.wsp_left_parser__ = Whitespace(self.wspL__)
self.wsp_left_parser__.grammar = self
......@@ -322,7 +334,7 @@ class GrammarBase:
def _add_parser(self, parser):
"""Adds the copy of the classes parser object to this
particular instance of GrammarBase.
particular instance of Grammar.
"""
if parser.name:
setattr(self, parser.name, parser)
......@@ -382,7 +394,6 @@ class GrammarBase:
"""Writes a log of the parsing history of the most recently parsed
document.
"""
def prepare_line(record):
excerpt = self.document.__getitem__(slice(*record.extent))[:25].replace('\n', '\\n')
excerpt = "'%s'" % excerpt if len(excerpt) < 25 else "'%s...'" % excerpt
......@@ -758,8 +769,33 @@ class Sequence(NaryOperator):
assert len(results) <= len(self.parsers)
return Node(self, results), text_
def __add__(self, other):
return Sequence(*self.parsers, other)
def __radd__(self, other):
return Sequence(other, *self.parsers)
# def __iadd__(self, other):
# if isinstance(other, Sequence):
# self.parsers = self.parsers + other.parsers
# else:
# self.parsers = self.parsers + (other,)
# return self
class Alternative(NaryOperator):
"""Matches if at least one of several alternatives matches. Returns
the first match.
This parser represents the EBNF-operator "|" with the qualification
that both the symmetry and the ambiguity of the EBNF-or-operator
are broken by selecting the first match.
# the order of the sub-expression matters:
# the most selective expression should be put first:
"""
def __init__(self, *parsers, name=''):
super(Alternative, self).__init__(*parsers, name=name)
assert len(self.parsers) >= 1
......@@ -772,6 +808,19 @@ class Alternative(NaryOperator):
return Node(self, node), text_
return None, text
def __or__(self, other):
return Alternative(*self.parsers, other)
def __ror__(self, other):
return Alternative(other, *self.parsers)
# def __ior__(self, other):
# if isinstance(other, Sequence):
# self.parsers = self.parsers + other.parsers
# else:
# self.parsers = self.parsers + (other,)
# return self
########################################################################
#
......@@ -966,7 +1015,7 @@ class Forward(Parser):
def set(self, parser):
assert isinstance(parser, Parser)
self.name = parser.name # redundant, see GrammarBase-constructor
self.name = parser.name # redundant, see Grammar-constructor
self.parser = parser
def apply(self, func):
......
......@@ -450,12 +450,22 @@ def key_tag_name(node):
def traverse(root_node, processing_table, key_func=key_tag_name):
"""Traverses the snytax tree starting with the given ``node`` depth
first and applies the sequences of callback functions registered
first and applies the sequences of callback-functions registered
in the ``calltable``-dictionary.
Possible use cases are the transformation of a concrete syntax tree
into an abstract tree (AST) or the semantic analysis of the AST.
The most important use case is the transformation of a concrete
syntax tree into an abstract tree (AST). But it is also imaginable
to emloy tree-traversal for the semantic analysis of the AST.
In order to assign sequences of callback-functions to nodes, a
dictionary ("processing table") is used. The keys usually represent
tag names, but any other key function is possible. There exist
three special keys:
'+': always called (before any other processing function)
'*': called for those nodes for which no (other) processing
function appears in the table
'~': always called (after any other processing function)
Args:
root_node (Node): The root-node of the syntax tree to be traversed
processing_table (dict): node key -> sequence of functions that
......@@ -484,7 +494,7 @@ def traverse(root_node, processing_table, key_func=key_tag_name):
table.get(key_func(node), table.get('*', [])) + \
table.get('~', [])
# '+' always called (before any other processing function)
# '*' called for those nodes for which no (other) processing functions is in the table
# '*' called for those nodes for which no (other) processing function appears in the table
# '~' always called (after any other processing function)
for call in sequence:
call(node)
......@@ -530,12 +540,44 @@ def reduce_single_child(node):
def replace_parser(node, name, ptype=''):
"""Replaces the parser of a Node to a mock parser with the given
"""Replaces the parser of a Node with a mock parser with the given
name and pseudo-type.
"""
node.parser = MockParser(name, ptype)
def flatten(node):
"""Recursively flattens all unnamed sub-nodes, in case there is more
than one sub-node present. Flattening means that
wherever a node has child nodes, the child nodes are inserted in place
of the node. In other words, all leaves of this node and its child nodes
are collected in-order as direct children of this node.
This is meant to achieve these kinds of structural transformation:
(1 (+ 2) (+ 3) -> (1 + 2 + 3)
(1 (+ (2 + (3)))) -> (1 + 2 + 3)
Warning: Use with care. Du tue its recursive nature, flattening can
have unexpected side-effects.
"""
if node.children:
new_result = []
for child in node.children:
if not child.parser.name and child.children:
assert child.children, node.as_sexpr()
flatten(child)
new_result.extend(child.result)
else:
new_result.append(child)
node.result = tuple(new_result)
def collapse(node):
"""Collapses all sub-nodes by replacing the node's result with it's
string representation.
"""
node.result = str(node)
# ------------------------------------------------
#
# destructive transformations:
......@@ -585,31 +627,6 @@ def remove_tokens(node, tokens=frozenset()):
remove_children_if(node, partial(is_token, token_set=tokens))
def flatten(node):
"""Recursively flattens all unnamed sub-nodes, in case there is more
than one sub-node present. Flattening means that
wherever a node has child nodes, the child nodes are inserted in place
of the node. In other words, all leaves of this node and its child nodes
are collected in-order as direct children of this node.
This is meant to achieve these kinds of structural transformation:
(1 (+ 2) (+ 3) -> (1 + 2 + 3)
(1 (+ (2 + (3)))) -> (1 + 2 + 3)
Warning: Use with care. Du tue its recursive nature, flattening can
have unexpected side-effects.
"""
if node.children:
new_result = []
for child in node.children:
if not child.parser.name and child.children:
assert child.children, node.as_sexpr()
flatten(child)
new_result.extend(child.result)
else:
new_result.append(child)
node.result = tuple(new_result)
def remove_enclosing_delimiters(node):
"""Removes any enclosing delimiters from a structure (e.g. quotation marks
from a literal or braces from a group).
......@@ -619,6 +636,13 @@ def remove_enclosing_delimiters(node):
node.result = node.result[1:-1]
def map_content(node, func):
"""Replaces the content of the node. ``func`` takes the node
as an argument an returns the mapped result.
"""
node.result = func(node.result)
########################################################################
#
# syntax tree validation functions
......
......@@ -198,7 +198,11 @@ def report(test_unit):
report.append('\n### Error:')
report.append(error)
ast = tests.get('__ast__', {}).get(test_name, None)
if ast:
cst = tests.get('__cst__', {}).get(test_name, None)
if cst and (not ast or cst == ast):
report.append('\n### CST')
report.append(ast.as_sexpr())
elif ast:
report.append('\n### AST')
report.append(ast.as_sexpr())
return '\n'.join(report)
......
......@@ -51,7 +51,7 @@ https://bitbucket.org/apalala/grako
"""
# TODO: Replace copy.deepcopy() call in GrammarBase class by custom copy()-methods in the Parser classes. Is that really better?
# TODO: Replace copy.deepcopy() call in Grammar class by custom copy()-methods in the Parser classes. Is that really better?
import collections
......@@ -759,7 +759,7 @@ class GrammarBase:
"""Initializes the `parser.name` fields of those
Parser objects that are directly assigned to a class field with
the field's name, e.g.
class Grammar(GrammarBase):
class Grammar(Grammar):
...
symbol = RE('(?!\\d)\\w+')
After the call of this method symbol.name == "symbol"
......@@ -809,7 +809,7 @@ class GrammarBase:
def _add_parser(self, parser):
"""Adds the copy of the classes parser object to this
particular instance of GrammarBase.
particular instance of Grammar.
"""
setattr(self, parser.name, parser)
self.all_parsers.add(parser)
......@@ -1330,7 +1330,7 @@ class Forward(Parser):
def set(self, parser):
assert isinstance(parser, Parser)
self.name = parser.name # redundant, because of constructor of GrammarBase
self.name = parser.name # redundant, because of constructor of Grammar
self.parser = parser
def apply(self, func):
......@@ -1343,7 +1343,7 @@ PARSER_SYMBOLS = {'RegExp', 'mixin_comment', 'RE', 'Token', 'Required',
'Lookahead', 'NegativeLookahead', 'Optional',
'Lookbehind', 'NegativeLookbehind',
'ZeroOrMore', 'Sequence', 'Alternative', 'Forward',
'OneOrMore', 'GrammarBase', 'Capture', 'Retrieve',
'OneOrMore', 'Grammar', 'Capture', 'Retrieve',
'Pop'}
......@@ -1385,7 +1385,7 @@ def full_compilation(source, grammar_base, AST_transformations, compiler):
Paraemters:
source (str): The input text for compilation
grammar_base (GrammarBase): The GrammarBase object
grammar_base (Grammar): The Grammar object
AST_transformations (dict): The transformation-table that
assigns AST transformation functions to parser names (see
function traverse)
......@@ -1658,7 +1658,7 @@ class EBNFCompiler(CompilerBase):
article = 'an ' if self.grammar_name[0:1].upper() \
in EBNFCompiler.VOWELS else 'a '
declarations = ['class ' + self.grammar_name +
'Grammar(GrammarBase):',
'Grammar(Grammar):',
'r"""Parser for ' + article + self.grammar_name +
' source file' +
(', with this grammar:' if self.source_text else '.')]
......@@ -1966,7 +1966,7 @@ def compile_python_object(python_src, obj_name_ending="Grammar"):
def get_grammar_instance(grammar):
"""Returns a grammar object and the source code of the grammar, from
the given `grammar`-data which can be either a file name, ebnf-code,
python-code, a GrammarBase-derived grammar class or an instance of
python-code, a Grammar-derived grammar class or an instance of
such a class (i.e. a grammar object already).
"""
if isinstance(grammar, str):
......@@ -2157,7 +2157,7 @@ def source_changed(grammar_source, grammar_class):
# grammar_class = load_compiler_suite(grammar_class)[1]
with open(grammar_class, 'r', encoding='utf8') as f:
pycode = f.read()
m = re.search('class \w*\(GrammarBase\)', pycode)
m = re.search('class \w*\(Grammar\)', pycode)
if m:
m = re.search(' source_hash__ *= *"([a-z0-9]*)"',
pycode[m.span()[1]:])
......
......@@ -193,7 +193,7 @@ code = compile(parser_py, '<string>', 'exec')
module_vars = globals()
name_space = {k: module_vars[k] for k in {'RegExp', 'RE', 'Token', 'Required', 'Optional', 'mixin_comment',
'ZeroOrMore', 'OneOrMore', 'Sequence', 'Alternative', 'Forward',
'NegativeLookahead', 'PositiveLookahead', 'ScannerToken', 'GrammarBase'}}
'NegativeLookahead', 'PositiveLookahead', 'ScannerToken', 'Grammar'}}
exec(code, name_space)
parser = name_space['Grammar']()
......
......@@ -24,7 +24,8 @@ paragraph = { partext | parblock }
partext = !blockcmd (text | PARSEP)
text = cfgtext | BRACKETS
cfgtext = TEXTCHUNK | ESCAPED | WSPC
cfgtext = word_sequence | ESCAPED | WSPC
word_sequence = { TEXTCHUNK WSPC }+
blockcmd = "\subsection" | "\section" | "\chapter" | "\subsubsection"
| "\paragraph" | "\subparagraph" | "\begin{enumerate}"
......
[match:paragraph]
1: Die Stadt Göttingen, berühmt durch ihre Würste und Universität, gehört
dem Könige von Hannover, und enthält 999 Feuerstellen, diverse
Kirchen, eine Entbindungsanstalt, eine Sternwarte, einen Karzer, eine
Bibliothek und einen Ratskeller, wo das Bier sehr gut ist. Der
vorbeifließende Bach heißt »die Leine«, und dient des Sommers zum
Baden; das Wasser ist sehr kalt und an einigen Orten so breit, daß
Lüder wirklich einen großen Anlauf nehmen mußte, als er hinübersprang.
1 : Im allgemeinen werden die Bewohner Göttingens eingeteilt in Studenten,
Professoren, Philister und Vieh; welche vier Stände doch nichts weniger
als streng geschieden sind. Der Viehstand ist der bedeutendste.
......@@ -20,5 +20,9 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../../', '../', './'])
from DHParser.testing import recompile_grammar
recompile_grammar('.')
......@@ -7,20 +7,17 @@
#######################################################################
from functools import partial
import sys
from functools import partial
try:
import regex as re
except ImportError:
import re
from DHParser.toolkit import load_if_file
from DHParser.parsers import GrammarBase, CompilerBase, nil_scanner, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source
from DHParser.syntaxtree import Node, traverse, remove_enclosing_delimiters, \
remove_children_if, reduce_single_child, replace_by_single_child, remove_whitespace, \
no_operation, remove_expendables, remove_tokens, flatten, is_whitespace, is_expendable, \
from DHParser.parsers import Grammar, CompilerBase, Alternative, Required, Token, \
Optional, OneOrMore, Sequence, RE, ZeroOrMore, NegativeLookahead, mixin_comment, compile_source
from DHParser.syntaxtree import traverse, reduce_single_child, replace_by_single_child, no_operation, \
remove_expendables, remove_tokens, flatten, \
WHITESPACE_KEYWORD, TOKEN_KEYWORD
......@@ -40,7 +37,7 @@ def MLWScanner(text):
#
#######################################################################
class MLWGrammar(GrammarBase):
class MLWGrammar(Grammar):
r"""Parser for a MLW source file, with this grammar:
# EBNF-Syntax für MLW-Artikel
......
......@@ -20,14 +20,12 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import inspect
import os
import sys
sys.path.extend(['../', './'])
from DHParser.parsers import GrammarBase, CompilerBase
from DHParser.ebnf import get_ebnf_compiler, get_ebnf_scanner, get_ebnf_transformer
from DHParser.dsl import compile_on_disk, run_compiler, compileDSL, compileEBNF, parser_factory, \
from DHParser.parsers import Grammar, CompilerBase
from DHParser.dsl import compile_on_disk, run_compiler, compileEBNF, parser_factory, \
load_compiler_suite
......@@ -90,7 +88,7 @@ class TestCompilerGeneration:
transformer = transformer()
compiler = compiler()
assert callable(scanner)
assert isinstance(parser, GrammarBase)
assert isinstance(parser, Grammar)
assert callable(transformer)
assert isinstance(compiler, CompilerBase)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment