Commit 7cdf646a authored by eckhart's avatar eckhart

- examples/ArithmeticExperimental: bugfixes

parent f3b095b7
......@@ -39,7 +39,6 @@ DHParser/stringview.c
imperium.html
fascitergula.html
_build
examples/Tutorial/LyrikCompiler.py
_build
_static
_templates
......
......@@ -32,7 +32,7 @@ from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional,
from DHParser.compile import CompilerError, Compiler, compile_source, visitor_name
from DHParser.error import Error
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, Whitespace, \
from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, DropWhitespace, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \
GrammarError
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
......@@ -172,7 +172,7 @@ class EBNFGrammar(Grammar):
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wsp__ = Whitespace(WSP_RE__)
wsp__ = DropWhitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:(?<!\\\\)\\\\(?:/)|[^/])*?/'), wsp__)
......@@ -254,7 +254,7 @@ def get_ebnf_grammar() -> EBNFGrammar:
EBNF_AST_transformation_table = {
# AST Transformations for EBNF-grammar
"<":
[remove_whitespace, remove_empty],
[remove_empty], # remove_whitespace
"syntax":
[], # otherwise '"*": replace_by_single_child' would be applied
"directive, definition":
......@@ -277,8 +277,6 @@ EBNF_AST_transformation_table = {
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
reduce_single_child,
# "list_":
# [flatten, remove_infix_operator],
"*":
replace_by_single_child
}
......@@ -747,11 +745,12 @@ class EBNFCompiler(Compiler):
# add special fields for Grammar class
definitions.append((self.WHITESPACE_PARSER_KEYWORD,
'Whitespace(%s)' % self.WHITESPACE_KEYWORD))
if DROP_WSPC in self.directives.drop:
definitions.append((self.DROP_WHITESPACE_PARSER_KEYWORD,
'DropWhitespace(%s)' % self.WHITESPACE_KEYWORD))
else:
definitions.append((self.WHITESPACE_PARSER_KEYWORD,
'Whitespace(%s)' % self.WHITESPACE_KEYWORD))
definitions.append((self.WHITESPACE_KEYWORD,
("mixin_comment(whitespace=" + self.RAW_WS_KEYWORD
+ ", comment=" + self.COMMENT_KEYWORD + ")")))
......
......@@ -56,7 +56,7 @@ from typing import List, Tuple, Union, Optional
from DHParser.error import Error
from DHParser.stringview import StringView
from DHParser.syntaxtree import Node
from DHParser.syntaxtree import Node, ZOMBIE_TAG
from DHParser.toolkit import is_filename, escape_control_characters, GLOBALS
__all__ = ('log_dir',
......@@ -129,8 +129,8 @@ def logging(dirname="LOGS"):
except AttributeError:
save = ""
GLOBALS.LOGGING = dirname or ""
if dirname and not os.path.exists(dirname):
os.mkdir(dirname)
# if dirname and not os.path.exists(dirname):
# os.mkdir(dirname)
yield
GLOBALS.LOGGING = save
......@@ -291,7 +291,7 @@ class HistoryRecord:
@property
def status(self) -> str:
return self.FAIL if self.node is None else \
return self.FAIL if self.node is None or self.node.tag_name == ZOMBIE_TAG else \
('"%s"' % self.err_msg()) if self.errors else self.MATCH
@property
......
......@@ -140,7 +140,7 @@ except ModuleNotFoundError:
def recompile_grammar(grammar_src, force):
grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests')
testing.create_test_templates(grammar_src, grammar_tests_dir)
with DHParser.log.logging(LOGGING):
with DHParser.log.logging(False):
# recompiles Grammar only if it has changed
if not dsl.recompile_grammar(grammar_src, force=force,
notify=lambda: print('recompiling ' + grammar_src)):
......
......@@ -67,7 +67,6 @@ class ArithmeticGrammar(Grammar):
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
wsp__ = Whitespace(WSP_RE__)
VARIABLE = Series(RegExp('[A-Za-z]'), dwsp__)
NUMBER = Series(RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?'), dwsp__)
NEGATIVE = RegExp('[-]')
......
......@@ -72,7 +72,6 @@ class ArithmeticExperimentalGrammar(Grammar):
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
wsp__ = Whitespace(WSP_RE__)
VARIABLE = RegExp('[a-dj-z]')
NUMBER = RegExp('(?:0|(?:[1-9]\\d*))(?:\\.\\d+)?')
MINUS = RegExp('-')
......
......@@ -14,6 +14,7 @@ M1: 2^4
M2: 3^4
M3: 2^-x
M4: 2^3^4
M5: x^5
[ast:pow]
......@@ -42,6 +43,7 @@ M6: x
[match:tail_pow]
M1: x^5
[ast:tail_pow]
......
......@@ -5,26 +5,43 @@ M3: "-2.71828"
M4: "-x"
M5: "(2 + x)"
M6: "-(a * b)"
M7: "-5e(2+sin(x))(a-b)(c/d)i"
[fail:factor]
F1: "x4"
F2: "- 2"
[match:tail]
M1: "x(2+3)"
M2: "xyz"
M3: "a"
M4: "x"
M5: "x(-y)"
M6: "x^5y"
[ast:tail]
[fail:tail]
F1: "5x"
F2: "xia"
F3: "x-y"
[match:seq]
M1: "xy"
M2: "xesin(y)(a+b)"
[ast:seq]
[fail:seq]
F1: "a"
F2: "-xz"
[match:sign]
M1: "-"
M2: "+"
[ast:sign]
......
......@@ -16,6 +16,8 @@ M13: "1 - 2 - 3 - 4"
M14: "1 - 2 * 3 - 4 + 5"
M15: "1 - 3 + 3 * 4"
M16: "a * b"
M17: "(a + b) * (a - b)"
M18: "(a+b)(a-b)"
[ast:expression]
......@@ -31,6 +33,8 @@ M1: "2 + x"
[match:sub]
M1: "-2 - 5"
M2: "-2 -5"
[ast:sub]
......
......@@ -30,7 +30,7 @@ CONFIG_PRESET['test_parallelization'] = True
def recompile_grammar(grammar_src, force):
grammar_tests_dir = os.path.join(scriptpath, 'grammar_tests')
create_test_templates(grammar_src, grammar_tests_dir)
with DHParser.log.logging(LOGGING):
with DHParser.log.logging(False):
# recompiles Grammar only if it has changed
name = os.path.splitext(os.path.basename(grammar_src))[0]
if not dsl.recompile_grammar(grammar_src, force=force,
......
......@@ -71,8 +71,8 @@ Match test "entry" for parser "entry" failed:
6:68: Error (1010): '}' ~ expected, "%E2\%80\%9" found!
6:69: Error (1040): Parser stopped before end! trying to recover but stopping history recording at this point.
7:1: Error (1020): Parser did not match!
Most advanced: 7, 1: ; MATCH; "E2\%80\%93Quine\_the..."
Last match: 7, 1: ; MATCH; "E2\%80\%93Quine\_the...";
Most advanced: 6, 68: entry->:ZeroOrMore->:Series->content->:Series->text->:Alternative->CONTENT_STRING->:Alternative->:Series->:Lookahead->/(?i)%/; MATCH; "%"
Last match: 6, 68: entry->:ZeroOrMore->:Series->content->plain_content->COMMA_TERMINATED_STRING->:Alternative->:Series->:Lookahead->/(?i)%/; MATCH; "%";
......
......@@ -3,6 +3,7 @@
@ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
@ whitespace = /\s*/ # whitespace includes linefeed
@ literalws = right # trailing whitespace of literals will be ignored tacitly
@ drop = whitespace # do not include whitespace in concrete syntax tree
#: top-level
......
......@@ -59,33 +59,33 @@ class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file.
"""
expression = Forward()
source_hash__ = "7a7c3764b7b37241534fbb65b44b219d"
source_hash__ = "428380dcdcff7b2a3cbed187b71d248b"
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wsp__ = Whitespace(WSP_RE__)
dwsp__ = DropWhitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:(?<!\\\\)\\\\(?:/)|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:(?<!\\\\)\\\\`|[^"])*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:(?<!\\\\)\\\\"|[^"])*?"'), wsp__), Series(RegExp("'(?:(?<!\\\\)\\\\'|[^'])*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__), Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Series(Token("="), wsp__))), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__), Alternative(regexp, literal, symbol), ZeroOrMore(Series(Series(Token(","), wsp__), Alternative(regexp, literal, symbol))), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
whitespace = Series(RegExp('~'), dwsp__)
regexp = Series(RegExp('/(?:(?<!\\\\)\\\\(?:/)|[^/])*?/'), dwsp__)
plaintext = Series(RegExp('`(?:(?<!\\\\)\\\\`|[^"])*?`'), dwsp__)
literal = Alternative(Series(RegExp('"(?:(?<!\\\\)\\\\"|[^"])*?"'), dwsp__), Series(RegExp("'(?:(?<!\\\\)\\\\'|[^'])*?'"), dwsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), dwsp__)
option = Series(Series(Token("["), dwsp__), expression, Series(Token("]"), dwsp__), mandatory=1)
repetition = Series(Series(Token("{"), dwsp__), expression, Series(Token("}"), dwsp__), mandatory=1)
oneormore = Series(Series(Token("{"), dwsp__), expression, Series(Token("}+"), dwsp__))
unordered = Series(Series(Token("<"), dwsp__), expression, Series(Token(">"), dwsp__), mandatory=1)
group = Series(Series(Token("("), dwsp__), expression, Series(Token(")"), dwsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), dwsp__), Series(Token(":"), dwsp__))
flowmarker = Alternative(Series(Token("!"), dwsp__), Series(Token("&"), dwsp__), Series(Token("-!"), dwsp__), Series(Token("-&"), dwsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Series(Token("="), dwsp__))), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Series(Token("§"), dwsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), dwsp__), term))))
directive = Series(Series(Token("@"), dwsp__), symbol, Series(Token("="), dwsp__), Alternative(regexp, literal, symbol), ZeroOrMore(Series(Series(Token(","), dwsp__), Alternative(regexp, literal, symbol))), mandatory=1)
definition = Series(symbol, Series(Token("="), dwsp__), expression, mandatory=1)
syntax = Series(Option(Series(dwsp__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
def get_grammar() -> EBNFGrammar:
......
#!/usr/bin/python3
#######################################################################
#
# SYMBOLS SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
import collections
from functools import partial
import os
import sys
sys.path.extend(['../../', '../', './'])
sys.path.append(r'/home/eckhart/Entwicklung/DHParser')
try:
import regex as re
except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc, is_empty, \
Node, TransformationFunc, TransformationDict, transformation_factory, traverse, \
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_empty, remove_tokens, flatten, is_insignificant_whitespace, \
collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, remove_anonymous_tokens, \
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last, \
remove_anonymous_empty, keep_nodes, traverse_locally, strip, lstrip, rstrip, \
replace_content, replace_content_by, error_on, recompile_grammar, GLOBALS
#######################################################################
#
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def LyrikPreprocessor(text):
return text, lambda i: i
def get_preprocessor() -> PreprocessorFunc:
return LyrikPreprocessor
#######################################################################
#
# PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!
#
#######################################################################
class LyrikGrammar(Grammar):
r"""Parser for a Lyrik source file.
"""
source_hash__ = "3ff2e39f8bfc07d37ea5481d61d026bb"
static_analysis_pending__ = [True]
parser_initialization__ = ["upon instantiation"]
resume_rules__ = {}
COMMENT__ = r''
WHITESPACE__ = r'[\t ]*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wsp__ = Whitespace(WSP_RE__)
ENDE = NegativeLookahead(RegExp('.'))
JAHRESZAHL = Series(RegExp('\\d\\d\\d\\d'), wsp__)
LEERZEILE = Series(RegExp('\\n[ \\t]*(?=\\n)'), wsp__)
NZ = Series(RegExp('\\n'), wsp__)
ZEICHENFOLGE = Series(RegExp('[^ \\n<>]+'), wsp__)
NAME = Series(RegExp('\\w+\\.?'), wsp__)
WORT = Series(RegExp('\\w+'), wsp__)
vers = OneOrMore(ZEICHENFOLGE)
strophe = OneOrMore(Series(NZ, vers))
text = OneOrMore(Series(strophe, ZeroOrMore(LEERZEILE)))
zeile = OneOrMore(ZEICHENFOLGE)
titel = Series(OneOrMore(Series(NZ, zeile)), OneOrMore(LEERZEILE))
serie = Series(NegativeLookahead(Series(titel, vers, NZ, vers)), OneOrMore(Series(NZ, zeile)), OneOrMore(LEERZEILE))
ziel = Synonym(ZEICHENFOLGE)
verknüpfung = Series(Series(Token("<"), wsp__), ziel, Series(Token(">"), wsp__))
namenfolge = OneOrMore(NAME)
wortfolge = OneOrMore(WORT)
jahr = Synonym(JAHRESZAHL)
ort = Series(wortfolge, Option(verknüpfung))
untertitel = Series(wortfolge, Option(verknüpfung))
werk = Series(wortfolge, Option(Series(Series(Token("."), wsp__), untertitel, mandatory=1)), Option(verknüpfung))
autor = Series(namenfolge, Option(verknüpfung))
bibliographisches = Series(autor, Series(Token(","), wsp__), Option(NZ), werk, Series(Token(","), wsp__), Option(NZ), ort, Series(Token(","), wsp__), Option(NZ), jahr, Series(Token("."), wsp__), mandatory=1)
gedicht = Series(bibliographisches, OneOrMore(LEERZEILE), Option(serie), titel, text, RegExp('\\s*'), ENDE, mandatory=3)
root__ = gedicht
def get_grammar() -> LyrikGrammar:
global GLOBALS
try:
grammar = GLOBALS.Lyrik_00000001_grammar_singleton
except AttributeError:
GLOBALS.Lyrik_00000001_grammar_singleton = LyrikGrammar()
if hasattr(get_grammar, 'python_src__'):
GLOBALS.Lyrik_00000001_grammar_singleton.python_src__ = get_grammar.python_src__
grammar = GLOBALS.Lyrik_00000001_grammar_singleton
return grammar
#######################################################################
#
# AST SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
Lyrik_AST_transformation_table = {
# AST Transformations for the Lyrik-grammar
"<": remove_empty,
"gedicht": [],
"bibliographisches": [],
"autor": [],
"werk": [],
"untertitel": [],
"ort": [],
"jahr": [reduce_single_child],
"wortfolge": [],
"namenfolge": [],
"verknüpfung": [],
"ziel": [reduce_single_child],
"serie": [],
"titel": [],
"zeile": [],
"text": [],
"strophe": [],
"vers": [],
"WORT": [],
"NAME": [],
"ZEICHENFOLGE": [],
"NZ": [],
"LEERZEILE": [],
"JAHRESZAHL": [],
"ENDE": [],
":Token": reduce_single_child,
"*": replace_by_single_child
}
def LyrikTransform() -> TransformationFunc:
return partial(traverse, processing_table=Lyrik_AST_transformation_table.copy())
def get_transformer() -> TransformationFunc:
try:
transformer = GLOBALS.Lyrik_1_transformer_singleton
except AttributeError:
GLOBALS.Lyrik_1_transformer_singleton = LyrikTransform()
transformer = GLOBALS.Lyrik_1_transformer_singleton
return transformer
#######################################################################
#
# COMPILER SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
class LyrikCompiler(Compiler):
"""Compiler for the abstract-syntax-tree of a Lyrik source file.
"""
def __init__(self):
super(LyrikCompiler, self).__init__()
def _reset(self):
super()._reset()
# initialize your variables here, not in the constructor!
def on_gedicht(self, node):
return self.fallback_compiler(node)
# def on_bibliographisches(self, node):
# return node
# def on_autor(self, node):
# return node
# def on_werk(self, node):
# return node
# def on_untertitel(self, node):
# return node
# def on_ort(self, node):
# return node
# def on_jahr(self, node):
# return node
# def on_wortfolge(self, node):
# return node
# def on_namenfolge(self, node):
# return node
# def on_verknüpfung(self, node):
# return node
# def on_ziel(self, node):
# return node
# def on_serie(self, node):
# return node
# def on_titel(self, node):
# return node
# def on_zeile(self, node):
# return node
# def on_text(self, node):
# return node
# def on_strophe(self, node):
# return node
# def on_vers(self, node):
# return node
# def on_WORT(self, node):
# return node
# def on_NAME(self, node):
# return node
# def on_ZEICHENFOLGE(self, node):
# return node
# def on_NZ(self, node):
# return node
# def on_LEERZEILE(self, node):
# return node
# def on_JAHRESZAHL(self, node):
# return node
# def on_ENDE(self, node):
# return node
def get_compiler() -> LyrikCompiler:
try:
compiler = GLOBALS.Lyrik_1_compiler_singleton
except AttributeError:
GLOBALS.Lyrik_1_compiler_singleton = LyrikCompiler()
compiler = GLOBALS.Lyrik_1_compiler_singleton
return compiler
#######################################################################
#
# END OF DHPARSER-SECTIONS
#
#######################################################################
def compile_src(source, log_dir=''):
"""Compiles ``source`` and returns (result, errors, ast).
"""
with logging(log_dir):
compiler = get_compiler()
cname = compiler.__class__.__name__
result = compile_source(source, get_preprocessor(),
get_grammar(),
get_transformer(), compiler)
return result
if __name__ == "__main__":
# recompile grammar if needed
grammar_path = os.path.abspath(__file__).replace('Compiler.py', '.ebnf')
if os.path.exists(grammar_path):
if not recompile_grammar(grammar_path, force=False,
notify=lambda:print('recompiling ' + grammar_path)):
error_file = os.path.basename(__file__).replace('Compiler.py', '_ebnf_ERRORS.txt')
with open(error_file, encoding="utf-8") as f:
print(f.read())
sys.exit(1)
else:
print('Could not check whether grammar requires recompiling, '
'because grammar was not found at: ' + grammar_path)
if len(sys.argv) > 1:
# compile file
file_name, log_dir = sys.argv[1], ''
if file_name in ['-d', '--debug'] and len(sys.argv) > 2:
file_name, log_dir = sys.argv[2], 'LOGS'
result, errors, ast = compile_src(file_name, log_dir)
if errors:
cwd = os.getcwd()
rel_path = file_name[len(cwd):] if file_name.startswith(cwd) else file_name
for error in errors:
print(rel_path + ':' + str(error))
sys.exit(1)
else:
print(result.as_xml() if isinstance(result, Node) else result)
else:
print("Usage: LyrikCompiler.py [FILENAME]")
......@@ -66,7 +66,6 @@ class Lyrik_explicit_whitespaceGrammar(Grammar):
WHITESPACE__ = r'[\t ]*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
dwsp__ = DropWhitespace(WSP_RE__)
wsp__ = Whitespace(WSP_RE__)