Commit 79f5bbdd authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

LaTeX.ebnf reworked

parent 8e00e0f8
...@@ -362,7 +362,8 @@ class EBNFCompiler(Compiler): ...@@ -362,7 +362,8 @@ class EBNFCompiler(Compiler):
""" """
COMMENT_KEYWORD = "COMMENT__" COMMENT_KEYWORD = "COMMENT__"
WHITESPACE_KEYWORD = "WSP__" WHITESPACE_KEYWORD = "WSP__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, COMMENT_KEYWORD} RAW_WS_KEYWORD = "WHITESPACE__"
RESERVED_SYMBOLS = {WHITESPACE_KEYWORD, RAW_WS_KEYWORD, COMMENT_KEYWORD}
AST_ERROR = "Badly structured syntax tree. " \ AST_ERROR = "Badly structured syntax tree. " \
"Potentially due to erroneous AST transformation." "Potentially due to erroneous AST transformation."
PREFIX_TABLE = {'§': 'Required', PREFIX_TABLE = {'§': 'Required',
...@@ -425,8 +426,7 @@ class EBNFCompiler(Compiler): ...@@ -425,8 +426,7 @@ class EBNFCompiler(Compiler):
'"gen_transformer_Skeleton()"!') '"gen_transformer_Skeleton()"!')
tt_name = self.grammar_name + '_AST_transformation_table' tt_name = self.grammar_name + '_AST_transformation_table'
transtable = [tt_name + ' = {', transtable = [tt_name + ' = {',
' # AST Transformations for the ' + ' # AST Transformations for the ' + self.grammar_name + '-grammar']
self.grammar_name + '-grammar']
transtable.append(' "+": remove_empty,') transtable.append(' "+": remove_empty,')
for name in self.rules: for name in self.rules:
tf = '[]' tf = '[]'
...@@ -498,9 +498,9 @@ class EBNFCompiler(Compiler): ...@@ -498,9 +498,9 @@ class EBNFCompiler(Compiler):
definitions.append(('wspL__', self.WHITESPACE_KEYWORD definitions.append(('wspL__', self.WHITESPACE_KEYWORD
if 'left' in self.directives['literalws'] else "''")) if 'left' in self.directives['literalws'] else "''"))
definitions.append((self.WHITESPACE_KEYWORD, definitions.append((self.WHITESPACE_KEYWORD,
("mixin_comment(whitespace=" ("mixin_comment(whitespace=" + self.RAW_WS_KEYWORD +
"r'{whitespace}', comment=r'{comment}')"). ", comment=" + self.COMMENT_KEYWORD + ")")))
format(**self.directives))) definitions.append((self.RAW_WS_KEYWORD, "r'{whitespace}'".format(**self.directives)))
definitions.append((self.COMMENT_KEYWORD, "r'{comment}'".format(**self.directives))) definitions.append((self.COMMENT_KEYWORD, "r'{comment}'".format(**self.directives)))
# prepare parser class header and docstring and # prepare parser class header and docstring and
...@@ -814,7 +814,7 @@ class EBNFCompiler(Compiler): ...@@ -814,7 +814,7 @@ class EBNFCompiler(Compiler):
self.symbols[symbol] = node # remember first use of symbol self.symbols[symbol] = node # remember first use of symbol
if symbol in self.rules: if symbol in self.rules:
self.recursive.add(symbol) self.recursive.add(symbol)
if symbol in (EBNFCompiler.WHITESPACE_KEYWORD, EBNFCompiler.COMMENT_KEYWORD): if symbol in EBNFCompiler.RESERVED_SYMBOLS: # (EBNFCompiler.WHITESPACE_KEYWORD, EBNFCompiler.COMMENT_KEYWORD):
return "RegExp(%s)" % symbol return "RegExp(%s)" % symbol
return symbol return symbol
......
...@@ -18,6 +18,7 @@ permissions and limitations under the License. ...@@ -18,6 +18,7 @@ permissions and limitations under the License.
import collections import collections
import configparser import configparser
import copy import copy
import fnmatch
import inspect import inspect
import json import json
import os import os
...@@ -195,7 +196,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -195,7 +196,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
# write parsing-history log only in case of test-failure # write parsing-history log only in case of test-failure
parser.log_parsing_history__("fail_%s_%s.log" % (parser_name, test_name)) parser.log_parsing_history__("fail_%s_%s.log" % (parser_name, test_name))
if verbose: if verbose:
print(infostr + "OK" if len(errata) == errflag else "FAIL") print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
# write test-report # write test-report
if report: if report:
...@@ -208,12 +209,16 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve ...@@ -208,12 +209,16 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
return errata return errata
def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown_filetypes=False, def grammar_suite(directory, parser_factory, transformer_factory,
report=True, verbose=False): fn_patterns=['*test*'],
ignore_unknown_filetypes=False,
report=True, verbose=True):
""" """
Runs all grammar unit tests in a directory. A file is considered a test Runs all grammar unit tests in a directory. A file is considered a test
unit, if it has the word "test" in its name. unit, if it has the word "test" in its name.
""" """
if not isinstance(fn_patterns, collections.abc.Collection):
fn_patterns = [fn_patterns]
all_errors = collections.OrderedDict() all_errors = collections.OrderedDict()
if verbose: if verbose:
print("\nScanning test-directory: " + directory) print("\nScanning test-directory: " + directory)
...@@ -221,7 +226,7 @@ def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown ...@@ -221,7 +226,7 @@ def grammar_suite(directory, parser_factory, transformer_factory, ignore_unknown
os.chdir(directory) os.chdir(directory)
if is_logging(): clear_logs() if is_logging(): clear_logs()
for filename in sorted(os.listdir()): for filename in sorted(os.listdir()):
if filename.lower().find("test") >= 0: if any(fnmatch.fnmatch(filename, pattern) for pattern in fn_patterns):
try: try:
if verbose: if verbose:
print("\nRunning grammar tests from: " + filename) print("\nRunning grammar tests from: " + filename)
......
...@@ -40,7 +40,7 @@ EBNF_TEMPLATE = r"""-grammar ...@@ -40,7 +40,7 @@ EBNF_TEMPLATE = r"""-grammar
@ testing = True # testing supresses error messages for unconnected symbols @ testing = True # testing supresses error messages for unconnected symbols
@ whitespace = vertical # implicit whitespace, includes any number of line feeds @ whitespace = vertical # implicit whitespace, includes any number of line feeds
@ literalws = right # literals have implicit whitespace on the right hand side @ literalws = right # literals have implicit whitespace on the right hand side
@ comment = /#.*(?:\n|$)/ # comments range from a '#'-character to the end of the line @ comment = /#.*/ # comments range from a '#'-character to the end of the line
@ ignorecase = False # literals and regular expressions are case-sensitive @ ignorecase = False # literals and regular expressions are case-sensitive
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
@ testing = True @ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed @ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/ @ comment = /%.*/
latexdoc = preamble document latexdoc = preamble document
...@@ -151,13 +151,13 @@ INTEGER = /\d+/~ ...@@ -151,13 +151,13 @@ INTEGER = /\d+/~
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace, TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters # linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line LF = NEW_LINE { COMMENT__ WHITESPACE__ } # linefeed but not an empty line
LFF = ~/\n?/ -&LB [ WSPC ] # at least one linefeed LFF = NEW_LINE [ WSPC ] # at least one linefeed
PARSEP = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace
# WSPC = { /\s+/~ | ~/\s+/ }+ # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e. GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed # [whitespace] linefeed [whitespace] linefeed
NEW_LINE = /[ \t]*/ [COMMENT__] /\n/
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code # beginning of text marker '$' added for test code
BACKSLASH = /[\\]/ BACKSLASH = /[\\]/
......
...@@ -51,7 +51,7 @@ class LaTeXGrammar(Grammar): ...@@ -51,7 +51,7 @@ class LaTeXGrammar(Grammar):
@ testing = True @ testing = True
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed @ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*(?:\n|$)/ @ comment = /%.*/
latexdoc = preamble document latexdoc = preamble document
...@@ -200,13 +200,13 @@ class LaTeXGrammar(Grammar): ...@@ -200,13 +200,13 @@ class LaTeXGrammar(Grammar):
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace, TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters # linefeed and special characters
LF = !GAP /[ \t]*\n[ \t]*/ # linefeed but not an empty line LF = NEW_LINE { COMMENT__ WHITESPACE__ } # linefeed but not an empty line
LFF = ~/\n?/ -&LB [ WSPC ] # at least one linefeed LFF = NEW_LINE [ WSPC ] # at least one linefeed
PARSEP = { WHITESPACE__ COMMENT__ } GAP [WSPC] # paragraph separator
WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace WSPC = { COMMENT__ | /\s+/ }+ # arbitrary horizontal or vertical whitespace
# WSPC = { /\s+/~ | ~/\s+/ }+ # arbitrary horizontal or vertical whitespace
PARSEP = { GAP }+ # paragraph separator
GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e. GAP = /[ \t]*(?:\n[ \t]*)+\n/~ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed # [whitespace] linefeed [whitespace] linefeed
NEW_LINE = /[ \t]*/ [COMMENT__] /\n/
LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator LB = /\s*?\n|$/ # backwards line break for Lookbehind-Operator
# beginning of text marker '$' added for test code # beginning of text marker '$' added for test code
BACKSLASH = /[\\]/ BACKSLASH = /[\\]/
...@@ -220,20 +220,22 @@ class LaTeXGrammar(Grammar): ...@@ -220,20 +220,22 @@ class LaTeXGrammar(Grammar):
paragraph = Forward() paragraph = Forward()
tabular_config = Forward() tabular_config = Forward()
text_element = Forward() text_element = Forward()
source_hash__ = "2d33db878d9e5354a05e23f48a756604" source_hash__ = "ed181ac517b686f843e13d5783527fe3"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*(?:\n|$)' COMMENT__ = r'%.*'
WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)') WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = '' wspL__ = ''
wspR__ = WSP__ wspR__ = WSP__
EOF = RegExp('(?!.)') EOF = RegExp('(?!.)')
BACKSLASH = RegExp('[\\\\]') BACKSLASH = RegExp('[\\\\]')
LB = RegExp('\\s*?\\n|$') LB = RegExp('\\s*?\\n|$')
NEW_LINE = Series(RegExp('[ \\t]*'), Optional(RegExp(COMMENT__)), RegExp('\\n'))
GAP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n') GAP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
PARSEP = OneOrMore(GAP)
WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+'))) WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
LFF = Series(RE('\\n?', wR='', wL=WSP__), Lookbehind(LB), Optional(WSPC)) PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Optional(WSPC))
LF = Series(NegativeLookahead(GAP), RegExp('[ \\t]*\\n[ \\t]*')) LFF = Series(NEW_LINE, Optional(WSPC))
LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+') TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
INTEGER = RE('\\d+') INTEGER = RE('\\d+')
NAME = Capture(RE('\\w+')) NAME = Capture(RE('\\w+'))
......
[match:LB]
1: """
"""
[match:GAP]
1: """
"""
2: """
% Comment
"""
3: """
"""
[fail:GAP]
1: """
"""
2: """
% Comment
% Comment
"""
[match:PARSEP]
1: """
"""
2: """
% Comment
"""
3: """
"""
4: """
% Comment
% Comment
"""
5: """ % Comment
% Comment
% Comment"""
[fail:PARSEP]
1: " "
2: """
"""
3: """
% Comment"""
4: """ % Comment
% Comment
% Comment"""
[match:WSPC]
1: " "
2: " % Comment"
3: " "
4: "% Comment"
5: """% Comment
"""
6: """
% Comment
% Comment
"""
7: """
"""
[fail:WSPC]
1: "X"
[match:LFF]
1: """
"""
2: """
% Comment"""
3: """
% Comment
"""
4: """
"""
[fail:LFF]
1: " "
[match:LF]
1: """
"""
2: """
% Comment"""
3: """
% Comment
% Comment
"""
[fail:LF]
1: """
"""
...@@ -25,6 +25,7 @@ sys.path.extend(['../../', '../', './']) ...@@ -25,6 +25,7 @@ sys.path.extend(['../../', '../', './'])
import DHParser.dsl import DHParser.dsl
from DHParser import testing from DHParser import testing
from DHParser import toolkit
if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=False): # recompiles Grammar only if it has changed if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=False): # recompiles Grammar only if it has changed
print('\nErrors while recompiling "LaTeX.ebnf":\n--------------------------------------\n\n') print('\nErrors while recompiling "LaTeX.ebnf":\n--------------------------------------\n\n')
...@@ -35,8 +36,9 @@ if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=False): # recompiles ...@@ -35,8 +36,9 @@ if not DHParser.dsl.recompile_grammar('LaTeX.ebnf', force=False): # recompiles
from LaTeXCompiler import get_grammar, get_transformer from LaTeXCompiler import get_grammar, get_transformer
with toolkit.logging(True): with toolkit.logging(True):
error_report = testing.grammar_suite('grammar_tests', get_grammar, error_report = testing.grammar_suite('grammar_tests', get_grammar, get_transformer,
get_transformer, report=True, verbose=True) fn_patterns=['*_test_*.ini'],
report=True, verbose=True)
if error_report: if error_report:
print('\n') print('\n')
print(error_report) print(error_report)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment