Commit 79f5bbdd authored by Eckhart Arnold's avatar Eckhart Arnold

LaTeX.ebnf reworked

parent 8e00e0f8
......@@ -362,7 +362,8 @@ class EBNFCompiler(Compiler):
"""
COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, COMMENT_KEYWORD}
RAW_WS_KEYWORD = "WHITESPACE__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation."
PREFIX_TABLE = {'§': 'Required',
......@@ -425,8 +426,7 @@ class EBNFCompiler(Compiler):
'"gen_transformer_Skeleton()"!')
tt_name = self.grammar_name + '_AST_transformation_table'
transtable = [tt_name + ' = {',
' # AST Transformations for the ' +
self.grammar_name + '-grammar']
' # AST Transformations for the ' + self.grammar_name + '-grammar']
transtable.append(' "+": remove_empty,')
for name in self.rules:
tf = '[]'
......@@ -498,9 +498,9 @@ class EBNFCompiler(Compiler):
definitions.append(('wspL__', self.WHITESPACE_KEYWORD
if 'left' in self.directives['literalws'] else "''"))
definitions.append((self.WHITESPACE_KEYWORD,
("mixin_comment(whitespace="
"r'{whitespace}', comment=r'{comment}')").
format(**self.directives)))
("mixin_comment(whitespace=" + self.RAW_WS_KEYWORD +
", comment=" + self.COMMENT_KEYWORD + ")")))
definitions.append((self.RAW_WS_KEYWORD, "r'{whitespace}'".format(**self.directives)))
definitions.append((self.COMMENT_KEYWORD, "r'{comment}'".format(**self.directives)))
# prepare parser class header and docstring and
......@@ -814,7 +814,7 @@ class EBNFCompiler(Compiler):
self.symbols[symbol] = node # remember first use of symbol
if symbol in self.rules:
self.recursive.add(symbol)
if symbol in (EBNFCompiler.WHITESPACE_KEYWORD, EBNFCompiler.COMMENT_KEYWORD):
if symbol in EBNFCompiler.RESERVED_SYMBOLS: # (EBNFCompiler.WHITESPACE_KEYWORD, EBNFCompiler.COMMENT_KEYWORD):
return "RegExp(%s)" % symbol
return symbol
......
......@@ -18,6 +18,7 @@ permissions and limitations under the License.
import collections
import configparser
import copy
import fnmatch
import inspect
import json
import os
......@@ -195,7 +196,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
# write parsing-history log only in case of test-failure
parser.log_parsing_history__("fail_%s_%s.log" % (parser_name, test_name))
if verbose:
print(infostr + "OK" if len(errata) == errflag else "FAIL")
print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
# write test-report
if report:
......@@ -208,12 +209,16 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
return errata
def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown_filetypes=False,
report=True, verbose=False):
def grammar_suite(directory, parser_factory, transformer_factory,
fn_patterns=['*test*'],
ignore_unknown_filetypes=False,
report=True, verbose=True):
"""
Runs all grammar unit tests in a directory. A file is considered a test
unit, if it has the word "test" in its name.
"""
if not isinstance(fn_patterns, collections.abc.Collection):
fn_patterns = [fn_patterns]
all_errors = collections.OrderedDict()
if verbose:
print("\nScanning test-directory: " + directory)
......@@ -221,7 +226,7 @@ def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown
os.chdir(directory)
if is_logging(): clear_logs()
for filename in sorted(os.listdir()):
if filename.lower().find("test") >= 0:
if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
try:
if verbose:
print("\nRunning grammar tests from: " + filename)
......
......@@ -40,7 +40,7 @@ EBNF_TEMPLATE = r"""-grammar
@ testing = True # testing supresses error messages for unconnected symbols
@ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*(?:\n|$)/ # comments range from a '#'-character to the end of the line
@ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive
......
......@@ -2,7 +2,7 @@
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
@ comment = /%.*/
latexdoc = preamble document
......@@ -151,13 +151,13 @@ INTEGER = /\d+/~
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
LFF = ~/\n?/ -&LB [ WSPC ] # at least one linefeed
LF = NEW_LINE { COMMENT__ WHITESPACE__ } # linefeed but not an empty line
LFF = NEW_LINE [ WSPC ] # at least one linefeed
PARSEP = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace
# WSPC = { /\s+/~ | ~/\s+/ }+ # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
NEW_LINE = /[ \t]*/ [COMMENT__] /\n/
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
BACKSLASH = /[\\]/
......
......@@ -51,7 +51,7 @@ class LaTeXGrammar(Grammar):
@ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/
@ comment = /%.*/
latexdoc = preamble document
......@@ -200,13 +200,13 @@ class LaTeXGrammar(Grammar):
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
LFF = ~/\n?/ -&LB [ WSPC ] # at least one linefeed
LF = NEW_LINE { COMMENT__ WHITESPACE__ } # linefeed but not an empty line
LFF = NEW_LINE [ WSPC ] # at least one linefeed
PARSEP = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace
# WSPC = { /\s+/~ | ~/\s+/ }+ # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
NEW_LINE = /[ \t]*/ [COMMENT__] /\n/
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code
BACKSLASH = /[\\]/
......@@ -220,20 +220,22 @@ class LaTeXGrammar(Grammar):
paragraph = Forward()
tabular_config = Forward()
text_element = Forward()
source_hash__ = "2d33db878d9e5354a05e23f48a756604"
source_hash__ = "ed181ac517b686f843e13d5783527fe3"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)')
COMMENT__ = r'%.*'
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
EOF = RegExp('(?!.)')
BACKSLASH = RegExp('[\\\\]')
LB = RegExp('\\s*?\\n|$')
NEW_LINE = Series(RegExp('[ \\t]*'), Optional(RegExp(COMMENT__)), RegExp('\\n'))
GAP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
PARSEP = OneOrMore(GAP)
WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
LFF = Series(RE('\\n?', wR='', wL=WSP__), Lookbehind(LB), Optional(WSPC))
LF = Series(NegativeLookahead(GAP), RegExp('[ \\t]*\\n[ \\t]*'))
PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Optional(WSPC))
LFF = Series(NEW_LINE, Optional(WSPC))
LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
INTEGER = RE('\\d+')
NAME = Capture(RE('\\w+'))
......
[match:LB]
1: """
"""
[match:GAP]
1: """
"""
2: """
% Comment
"""
3: """
"""
[fail:GAP]
1: """
"""
2: """
% Comment
% Comment
"""
[match:PARSEP]
1: """
"""
2: """
% Comment
"""
3: """
"""
4: """
% Comment
% Comment
"""
5: """ % Comment
% Comment
% Comment"""
[fail:PARSEP]
1: " "
2: """
"""
3: """
% Comment"""
4: """ % Comment
% Comment
% Comment"""
[match:WSPC]
1: " "
2: " % Comment"
3: " "
4: "% Comment"
5: """% Comment
"""
6: """
% Comment
% Comment
"""
7: """
"""
[fail:WSPC]
1: "X"
[match:LFF]
1: """
"""
2: """
% Comment"""
3: """
% Comment
"""
4: """
"""
[fail:LFF]
1: " "
[match:LF]
1: """
"""
2: """
% Comment"""
3: """
% Comment
% Comment
"""
[fail:LF]
1: """
"""
......@@ -25,6 +25,7 @@ sys.path.extend(['../../', '../', './'])
import DHParser.dsl
from DHParser import testing
from DHParser import toolkit
if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=False): # recompiles Grammar only if it has changed
print('\nErrors while recompiling "LaTeX.ebnf":\n--------------------------------------\n\n')
......@@ -35,8 +36,9 @@ if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=False): # recompiles
from LaTeXCompiler import get_grammar, get_transformer
with toolkit.logging(True):
error_report = testing.grammar_suite('grammar_tests', get_grammar,
get_transformer, report=True, verbose=True)
error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer,
fn_patterns=['*_test_*.ini'],
report=True, verbose=True)
if error_report:
print('\n')
print(error_report)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment