Commit 5b548dc6 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

tests restored

parent 9e03d65b
document = { text | codeblock }
codeblock = delimiter { text | (!:delimiter delimiter_sign) } ::delimiter
delimiter = delimiter_sign
delimiter_sign = /`+/
text = /[^`]+/
Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde
Absatz ohne ``` codeblock, aber
das stellt sich erst am Ende herause...
Mehrzeliger ```code block
\ No newline at end of file
Anfang ```code block `` <- keine Ende-Zeichen ! ``` Ebde
#!/usr/bin/python
"""This testcase exposes a performance bug in the `re`-module of
the python standard-library:
>>> import re, timeit
>>> rx = re.compile('(\\s*(#.*)?\\s*)*X')
>>> print(timeit.timeit("rx.match(' # ')", number=1, globals=globals()))
24.814577618999465
>>> print(timeit.timeit("rx.match(' # ')", number=1, globals=globals()))
291.2432912450022
Please note the number of repetitions: number=1 !!!
"""
import timeit
try:
import regex
rx = regex.compile('(\\s*(#.*)?\\s*)*X')
print("The 'new' regex module:")
print(timeit.timeit("rx.match(' # ')",
number=1, globals=globals()))
except ImportError:
pass
import re
rx = re.compile('(\\s*(#.*)?\\s*)*X')
print("The re module of the Python standard library:")
print(timeit.timeit("rx.match(' # ')",
number=1, globals=globals()))
#!/usr/bin/python3
"""test_ParserCombinators.py - unit tests for module ParserCombinators
Copyright 2016 by Eckhart Arnold
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import re
import sys
sys.path.append(os.path.abspath('../'))
from ParserCombinators import EBNFGrammar, EBNFTransTable, EBNFCompiler, full_compilation, Forward, RegExp, \
Alternative, Sequence, Token, compile_python_object, compileDSL, ParserRoot
arithmetic_EBNF = r"""
expression = term { ("+" | "-") term}
term = factor { ("*"|"/") factor}
factor = constant | variable | "(" expression ")"
variable = "x" | "y" | "z"
constant = digit {digit}
digit = "0" | "1" | "..." | "9"
test = digit constant variable
"""
arithmetic_expected_result = """
class ArithmeticGrammar(ParserRoot):
constant = Forward()
digit = Forward()
expression = Forward()
variable = Forward()
wspc__ = mixin_comment(whitespace=r'\s*', comment=r'')
test = Sequence("test", digit, constant, variable)
digit.set(Alternative("digit", Token("0", wspcR=wspc__), Token("1", wspcR=wspc__), Token("...", wspcR=wspc__), Token("9", wspcR=wspc__)))
constant.set(Sequence("constant", digit, ZeroOrMore(None, digit)))
variable.set(Alternative("variable", Token("x", wspcR=wspc__), Token("y", wspcR=wspc__), Token("z", wspcR=wspc__)))
factor = Alternative("factor", constant, variable, Sequence(None, Token("(", wspcR=wspc__), expression, Token(")", wspcR=wspc__)))
term = Sequence("term", factor, ZeroOrMore(None, Sequence(None, Alternative(None, Token("*", wspcR=wspc__), Token("/", wspcR=wspc__)), factor)))
expression.set(Sequence("expression", term, ZeroOrMore(None, Sequence(None, Alternative(None, Token("+", wspcR=wspc__), Token("-", wspcR=wspc__)), term))))
root__ = expression
"""
ebnf_EBNF = r"""
# Starting comment
@ whitespace = /\s*/ # '@' means the folowing assingment is a compiler directive
syntax = { production }
production = symbol "=" expression "."
expression = term { "|" term }
term = factor { factor }
factor = symbol
| literal
| regexp # regular expressions
| option
| repetition
| group
option = "[" expression "]"
repetition = "{" expression "}"
group = "(" expression ")"
symbol = ~/\w+/~
literal = ~/"(?:[^"]|\\")*"/~
| ~/'(?:[^']|\\')*'/~
regexp = ~/~\/(?:[^\/]|(?<=\\)\/)*\/~/~
| ~/\/(?:[^\/]|(?<=\\)\/)*\//~
# trailing whitespace and comments
"""
ebnf_expected_result = r"""
class EBNFGrammar(ParserRoot):
expression = Forward()
wspc__ = mixin_comment(whitespace=r'\s*', comment=r'')
regexp = Alternative("regexp", RE('~/(?:[^/]|(?<=\\\\)/)*/~', wspcL=wspc__, wspcR=wspc__), RE('/(?:[^/]|(?<=\\\\)/)*/', wspcL=wspc__, wspcR=wspc__))
literal = Alternative("literal", RE('"(?:[^"]|\\\\")*"', wspcL=wspc__, wspcR=wspc__), RE("'(?:[^']|\\\\')*'", wspcL=wspc__, wspcR=wspc__))
symbol = RE('\\w+', "symbol", wspcL=wspc__, wspcR=wspc__)
group = Sequence("group", Token("(", wspcR=wspc__), expression, Token(")", wspcR=wspc__))
repetition = Sequence("repetition", Token("{", wspcR=wspc__), expression, Token("}", wspcR=wspc__))
option = Sequence("option", Token("[", wspcR=wspc__), expression, Token("]", wspcR=wspc__))
factor = Alternative("factor", symbol, literal, regexp, option, repetition, group)
term = Sequence("term", factor, ZeroOrMore(None, factor))
expression.set(Sequence("expression", term, ZeroOrMore(None, Sequence(None, Token("|", wspcR=wspc__), term))))
production = Sequence("production", symbol, Token("=", wspcR=wspc__), expression, Token(".", wspcR=wspc__))
syntax = ZeroOrMore("syntax", production)
root__ = syntax
"""
class LeftRecursiveGrammar(ParserRoot):
"""formula = expr "."
expr = expr ("+"|"-") term | term
term = term ("*"|"/") factor | factor
factor = /[0-9]+/
"""
expr = Forward()
term = Forward()
factor = RegExp("factor", '[0-9]+')
term.set(Alternative("term", Sequence(None, term, Alternative(None, Token("*"), Token("/")), factor), factor))
expr.set(Alternative("expr", Sequence(None, expr, Alternative(None, Token("+"), Token("-")), term), term))
formula = Sequence("formula", expr, Token("."))
root__ = formula
def rem_docstring(class_py):
return re.sub(r'r"""(?:.|\n)*"""\n ', '', class_py).strip()
class TestEBNFCompiler:
def test_EBNFGrammar(self):
assert (str(EBNFGrammar.root__) == str(EBNFGrammar.root__))
def test_arithmeticEBNF(self):
result, errors, syntax_tree = full_compilation(arithmetic_EBNF, EBNFGrammar(),
EBNFTransTable, EBNFCompiler('Arithmetic'))
assert result is not None, errors
assert arithmetic_expected_result.strip() == rem_docstring(result)
def test_ebnfEBNF(self):
result, errors, syntax_tree = full_compilation(ebnf_EBNF, EBNFGrammar(),
EBNFTransTable, EBNFCompiler('EBNF'))
assert not errors, str(errors)
assert ebnf_expected_result.strip() == rem_docstring(result)
def test_compileDSL(self):
bootstrap1 = compileDSL("../examples/EBNF/EBNF.ebnf", "../examples/EBNF/EBNF.ebnf",
EBNFTransTable, EBNFCompiler())
bootstrap2 = compileDSL("../examples/EBNF/EBNF.ebnf", bootstrap1, EBNFTransTable, EBNFCompiler())
assert bootstrap1 == bootstrap2
def test_regexErrorHandling(self):
ebnf_line = r"""regexfine = ~/~\/(?:[^\/]|(?<=\\)\/)*\/~/~""" + '\n' # no errors should be raised
result, messages, syntax_tree = full_compilation(ebnf_line, EBNFGrammar(),
EBNFTransTable, EBNFCompiler('RegExTest'))
assert messages == "", messages
ebnf_line = r"""regexbad = ~/\/(?:[^\/]|(?<=\\)*\//~""" + '\n' # missing ")" should be detected
result = EBNFGrammar().parse(ebnf_line)
result, messages, syntax_tree = full_compilation(ebnf_line, EBNFGrammar(),
EBNFTransTable, EBNFCompiler('RegExTest'))
assert messages != ""
def test_multilineRegex(self):
mlregex = r"""
regex = /\w+ # one or more alphabetical characters including the underscore
[+] # followed by a plus sign
\w* # possibly followed by more alpha chracters/
"""
result, messages, syntax_tree = full_compilation(mlregex, EBNFGrammar(), EBNFTransTable,
EBNFCompiler('MultilineRegexTest'))
assert result is not None, messages
assert not messages
parser = compile_python_object(result)()
node, rest = parser.regex('abc+def')
assert rest == ''
assert node.parser.component == "regex"
assert str(node) == 'abc+def'
def test_LeftRecursion():
input_str = "5 + 3 * 4 .\n"
syntax_tree = LeftRecursiveGrammar().parse(input_str)
assert str(syntax_tree) == input_str
#!/usr/bin/python3
"""test_markdown.py - unit tests for the markdown showcase
Copyright 2016 by Eckhart Arnold
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import re
import sys
sys.path.append(os.path.abspath('../'))
sys.path.append(os.path.abspath('../showcases'))
class test_regexps:
def setup(self):
self.rx = dict()
with open("../grammars/Markdown.enbf") as f:
for stmt in f:
m = re.match(r'\s*@?\s*(?P<symbol>\w+)\s*=\s*~?/(?P<regex>.*(?<![^\\]/))/', stmt)
if m:
gd = m.groupdict()
if 'symbol' in gd and 'regex' in gd:
self.rx[gd['symbol']] = gd['regex']
def test_whitespace(self):
assert 'WSPC' in self.rx, str(self.rx)
rx = re.compile(self.rx['WSPC'])
assert rx.match(' ').group(0) == ' '
assert rx.match(' \t ').group(0) == ' \t '
assert rx.match('\n') is None
assert rx.match(' \n') == ' '
assert rx.match(' \n') is None
assert rx.match(' \n') == ' '
assert rx.match('\t\n') is None
def test_ST(self):
assert 'ST' in self.rx, str(self.rx)
rx = re.compile(self.rx['ST'])
assert rx.match('**').group(0) == '**'
assert rx.match('__').group(0) == '__'
assert rx.match('* ').group(0) is None
assert rx.match('_').group(0) is None
assert rx.match('__ ').group(0) == '__'
assert rx.match('***') is None
assert rx.match('____') is None
def test_EM(self):
assert 'EM' in self.rx, str(self.rx)
rx = re.compile(self.rx['EM'])
assert rx.match('**').group(0) is None
assert rx.match('__').group(0) is None
assert rx.match('* ').group(0) == '*'
assert rx.match('_').group(0) == '_'
assert rx.match('__ ') is None
def test_BT(self):
assert 'EM' in self.rx, str(self.rx)
rx = re.compile(self.rx['BT'])
assert rx.match('`').group(0) == '`'
assert rx.match("'").group(0) == "'"
def test_chunk(self):
assert 'chunk' in self.rx, str(self.rx)
rx = re.compile(self.rx['chunk'])
assert rx.match('lore').group(0) == 'lore'
assert rx.match('ip*sum*').group(0) == 'ip'
assert rx.match(' ipsum') is None
# PARAGRAPH_TEST = """
# This is a paragraph.
# Here, the paragraph is being continued.
#
# This is a new paragraph.
# """
#
# CODE_TEST = """
# Code-Test
#
# def f():
#
# pass
# """
#
# FENCED_TEST = """
# Fenced Code Test
# ~~~ Info String
# def f():
#
# pass
# ~~~
# More Text
# """
#
# LIST_TEST = """
# This is a paragraph with
# two lines of text
#
# This is a paragraph.
# * First List Item
#
# A New Paragraph under the same item
# * A second Item
# A lazy line
#
# Another paragraph under the second item
#
# End of the list and a New paragraph
#
# 1. A Numbered List
#
# With several paragraphs
#
# def f():
# pass
#
# Item continued
#
# 2. Point Number 2
#
# 3. And three
#
# > 4. this is just a quote
# >
# > Example for a lazy line in a quote
# lazyline...
#
# New Paragraph.
# """
#
#
# def mark_special(text):
# return text.replace('\x1b', '<').replace('\x1c', '>')
#
#
# print(mark_special(markdown_scanner(FENCED_TEST)))
#
# sys.exit(0)
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/sh
rm test*.dvi
rm test*.pdf
rm test*.run.*
rm test*.aux
rm test*.bbl
rm test*.bcf
rm test*.blg
rm test*.dvi
rm test*.log
rm test*.toc
formula = expr "." .
expr = expr ("+"|"-") term | term.
term = term ("*"|"/") factor | factor.
factor = /[0-9]+/.
# example: "5 + 3 * 4"
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="utf-8"/>
<title>Markdown-Test</title>
</head>
<body>
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="utf-8"/>
<title>Markdown-Test</title>
</head>
<body>
<h1>Markdown-Link-Test</h1>
<h2>Normale Links</h2>
<p>Standard-Link-Beispiel <a href="http://eckhartarnold.de">Eckis Homepage</a> ohne Titel. Dasselbe nochmal mit Titel:
<a href="http://eckhartarnold.de" title="Ecki's Homepage">Eckis Homepage</a>.</p>
<h2>Referenz Links</h2>
<p>Dies ist ein Beispiel für einen Referenz-Link <a href="http://eckhartarnold.de" title="Ecki's Homepage">Referenz-Link 1</a>.
Dies ist ein Beispiel mit einem Leerzeichen <a href="http://eckhartarnold.de" title="Eckis Homepage">Referenz-Link 2</a>.
Noch ein Beispiel mit zwei Leerzeichen [Referenz-Link 3] [id3].
Impliziter Referenz Link <a href="http://eckhartarnold.de">Impliziter Link</a></p>
</body>
</html>
Markdown-Link-Test
====================
Normale Links
-------------
Standard-Link-Beispiel [Eckis Homepage](http://eckhartarnold.de) ohne Titel. Dasselbe nochmal mit Titel:
[Eckis Homepage](http://eckhartarnold.de "Ecki's Homepage").
Referenz Links
--------------
Dies ist ein Beispiel für einen Referenz-Link [Referenz-Link 1][id1].
Dies ist ein Beispiel mit einem Leerzeichen [Referenz-Link 2] [id2].
Noch ein Beispiel mit zwei Leerzeichen [Referenz-Link 3] [id3].
Impliziter Referenz Link [Impliziter Link][]
[id1]: http://eckhartarnold.de "Ecki's Homepage"
[id2]: http://eckhartarnold.de 'Eckis Homepage'
[id3]: http://eckhartarnold.de (Ecki's Homepage)
[Impliziter Link]: http://eckhartarnold.de
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="utf-8"/>
<title>Markdown-Test</title>
</head>
<body>
<h1>Markdown-Link-Test</h1>
<h2>Normale Links</h2>
<p>Standard-Link-Beispiel <a href="http://eckhartarnold.de">Eckis Homepage</a> ohne Titel. Dasselbe nochmal mit Titel:
<a href="http://eckhartarnold.de" title="Ecki's Homepage">Eckis Homepage</a>.</p>
<h2>Referenz Links</h2>
<p>Dies ist ein Beispiel für einen Referenz-Link <a href="http://eckhartarnold.de" title="Ecki's Homepage">Referenz-Link 1</a>.
Dies ist ein Beispiel mit einem Leerzeichen <a href="http://eckhartarnold.de" title="Eckis Homepage">Referenz-Link 2</a>.
Noch ein Beispiel mit zwei Leerzeichen [Referenz-Link 3] <a href="http://eckhartarnold.de" title="Ecki's Homepage">id3</a>.
Impliziter Referenz Link <a href="http://eckhartarnold.de">Impliziter Link</a></p></body>
</html>
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="utf-8"/>
<title>Markdown-Test</title>
</head>
<body>
<h1>Markdown-Listen-Test</h1>
<p>Einfache Liste:</p>
<ul>
<li>Punk**t 1</li>
<li>Punk**t 2</li>
</ul>
<p>Absatz Liste:</p>
<ul>
<li><p>Punkt 1</p></li>
<li><p>Punkt 2</p></li>
</ul>
<p>Gemische Liste:</p>
<ul>
<li>Punkt 1</li>
<li><p>Punkt 2</p></li>
<li><p>Punkt 3</p></li>
</ul>
<p>Absätze mit Punkten:</p>
<ol>
<li><p>Punkt 1</p>
<p>Unterliste</p>
<ul>
<li>Punkt A</li>
<li>Punkt B</li>
</ul>
<p>Unterabsatz</p></li>
<li><p>Punkt 2</p>
<p>Unterabsatz 2</p></li>
</ol>
</body>
</html>
Markdown-Listen-Test
====================
Einfache Liste:
* Punk**t 1
* Punk**t 2
Absatz Liste:
* Punkt 1
* Punkt 2
Gemische Liste:
* Punkt 1
* Punkt 2
* Punkt 3
Absätze mit Punkten:
1. Punkt 1
Unterliste
* Punkt A
* Punkt B
Unterabsatz
2. Punkt 2
Unterabsatz 2
Liste mit Absätzen
* Ein Listenpunkt
Ein Absatz unterhalb des Listenpunktes
Ein weiterer Absatz innerhalb des
Listen Punktes (2. Zeile ohne Einrückung)
Noch ein Absatz innerhalb eines Listenpunktes.
Diesmal ist die 2. Zeile eingerückt, aber
die 3. Zeile nicht mehr
Eine Liste mit unmittelbar umgebenden Absatz
* Erster Punkt
* Zweiter Punkt
Fortsetzung des zweiten Listenpunkts
Ein neuer Absatz
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="utf-8"/>
<title>Markdown-Test</title>
</head>
<body>
<h1>Markdown-Listen-Test</h1>
<p>Einfache Liste:</p>
<ul>