Commit 71dca11d authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- Mandatory operator finished and tested

parent 89b64a8c
......@@ -409,8 +409,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
A (potentially empty) list of error or warning messages.
"""
filepath = os.path.normpath(source_file)
# with open(source_file, encoding="utf-8") as f:
# source = f.read()
with open(source_file, encoding="utf-8") as f:
source = f.read()
rootname = os.path.splitext(filepath)[0]
compiler_name = os.path.basename(rootname)
if compiler_suite:
......@@ -422,7 +422,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
cfactory = get_ebnf_compiler
compiler1 = cfactory()
compiler1.set_grammar_name(compiler_name, source_file)
result, messages, ast = compile_source(source_file, sfactory(), pfactory(), tfactory(), compiler1)
result, messages, ast = compile_source(source, sfactory(), pfactory(), tfactory(), compiler1)
if has_errors(messages):
return messages
......
......@@ -30,9 +30,9 @@ except ImportError:
from .typing34 import Callable, Dict, List, Set, Tuple, Union
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name
from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, RE, NegativeLookahead, \
Alternative, Series, Option, Required, OneOrMore, ZeroOrMore, Token, Compiler, \
PreprocessorFunc
from DHParser.parser import Grammar, mixin_comment, nil_preprocessor, Forward, RegExp, RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \
Compiler, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.error import Error
from DHParser.transform import traverse, remove_brackets, \
......@@ -73,6 +73,77 @@ def get_ebnf_preprocessor() -> PreprocessorFunc:
########################################################################
# class EBNFGrammar(Grammar):
# r"""Parser for an EBNF source file, with this grammar:
#
# # EBNF-Grammar in EBNF
#
# @ comment = /#.*(?:\n|$)/ # comments start with '#' and eat all chars up to and including '\n'
# @ whitespace = /\s*/ # whitespace includes linefeed
# @ literalws = right # trailing whitespace of literals will be ignored tacitly
#
# syntax = [~//] { definition | directive } §EOF
# definition = symbol §"=" expression
# directive = "@" §symbol §"=" ( regexp | literal | list_ )
#
# expression = term { "|" term }
# term = { factor }+
# factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
# | [flowmarker] literal
# | [flowmarker] regexp
# | [flowmarker] group
# | [flowmarker] oneormore
# | repetition
# | option
#
# flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&' positive lookahead, '§' required
# "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
# retrieveop = "::" | ":" # '::' pop, ':' retrieve
#
# group = "(" expression §")"
# oneormore = "{" expression "}+"
# repetition = "{" expression §"}"
# option = "[" expression §"]"
#
# symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
# literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
# | /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
# regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# # '~' is a whitespace-marker, if present leading or trailing
# # whitespace of a regular expression will be ignored tacitly.
# list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# # BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
# EOF = !/./
# """
# expression = Forward()
# source_hash__ = "a410e1727fb7575e98ff8451dbf8f3bd"
# parser_initialization__ = "upon instantiation"
# COMMENT__ = r'#.*(?:\n|$)'
# WSP__ = mixin_comment(whitespace=r'\s*', comment=r'#.*(?:\n|$)')
# wspL__ = ''
# wspR__ = WSP__
# EOF = NegativeLookahead(RE('.', wR=''))
# list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
# regexp = RE(r'~?/(?:\\/|[^/])*?/~?') # RE('~?/(?:[^/]|(?<=\\\\)/)*/~?')
# literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
# symbol = RE('(?!\\d)\\w+')
# option = Series(Token("["), expression, Required(Token("]")))
# repetition = Series(Token("{"), expression, Required(Token("}")))
# oneormore = Series(Token("{"), expression, Token("}+"))
# group = Series(Token("("), expression, Required(Token(")")))
# retrieveop = Alternative(Token("::"), Token(":"))
# flowmarker = Alternative(Token("!"), Token("&"), Token("§"), Token("-!"), Token("-&"))
# factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
# Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp),
# Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore),
# repetition, option)
# term = OneOrMore(factor)
# expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
# directive = Series(Token("@"), Required(symbol), Required(Token("=")), Alternative(regexp, literal, list_))
# definition = Series(symbol, Required(Token("=")), expression)
# syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
# root__ = syntax
class EBNFGrammar(Grammar):
r"""Parser for an EBNF source file, with this grammar:
......@@ -84,10 +155,10 @@ class EBNFGrammar(Grammar):
syntax = [~//] { definition | directive } §EOF
definition = symbol §"=" expression
directive = "@" §symbol §"=" ( regexp | literal | list_ )
directive = "@" §symbol "=" ( regexp | literal | list_ )
expression = term { "|" term }
term = { factor }+
term = { ["§"] factor }+ # "§" means all following factors mandatory
factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be sure it's not a definition
| [flowmarker] literal
| [flowmarker] regexp
......@@ -96,8 +167,8 @@ class EBNFGrammar(Grammar):
| repetition
| option
flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&' positive lookahead, '§' required
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
......@@ -108,7 +179,7 @@ class EBNFGrammar(Grammar):
symbol = /(?!\d)\w+/~ # e.g. expression, factor, parameter_list
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
regexp = /~?\/(?:[^\/]|(?<=\\)\/)*\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
......@@ -116,32 +187,39 @@ class EBNFGrammar(Grammar):
EOF = !/./
"""
expression = Forward()
source_hash__ = "a410e1727fb7575e98ff8451dbf8f3bd"
source_hash__ = "a131abc5259738631000cda90d2fc65b"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'\s*', comment=r'#.*(?:\n|$)')
WHITESPACE__ = r'\s*'
WSP__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
EOF = NegativeLookahead(RE('.', wR=''))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'))))
regexp = RE(r'~?/(?:\\/|[^/])*?/~?') # RE('~?/(?:[^/]|(?<=\\\\)/)*/~?')
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(RE('\\w+'), ZeroOrMore(Series(Token(","), RE('\\w+'), mandatory=1000)),
mandatory=1000)
regexp = RE('~?/(?:\\\\/|[^/])*?/~?')
literal = Alternative(RE('"(?:[^"]|\\\\")*?"'), RE("'(?:[^']|\\\\')*?'"))
symbol = RE('(?!\\d)\\w+')
option = Series(Token("["), expression, Required(Token("]")))
repetition = Series(Token("{"), expression, Required(Token("}")))
oneormore = Series(Token("{"), expression, Token("}+"))
group = Series(Token("("), expression, Required(Token(")")))
option = Series(Token("["), expression, Token("]"), mandatory=2)
repetition = Series(Token("{"), expression, Token("}"), mandatory=2)
oneormore = Series(Token("{"), expression, Token("}+"), mandatory=1000)
group = Series(Token("("), expression, Token(")"), mandatory=2)
retrieveop = Alternative(Token("::"), Token(":"))
flowmarker = Alternative(Token("!"), Token("&"), Token("§"), Token("-!"), Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("="))),
Series(Option(flowmarker), literal), Series(Option(flowmarker), regexp),
Series(Option(flowmarker), group), Series(Option(flowmarker), oneormore),
repetition, option)
term = OneOrMore(factor)
expression.set(Series(term, ZeroOrMore(Series(Token("|"), term))))
directive = Series(Token("@"), Required(symbol), Required(Token("=")), Alternative(regexp, literal, list_))
definition = Series(symbol, Required(Token("=")), expression)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), Required(EOF))
flowmarker = Alternative(Token("!"), Token("&"), Token("-!"), Token("-&"))
factor = Alternative(
Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Token("=")),
mandatory=1000), Series(Option(flowmarker), literal, mandatory=1000),
Series(Option(flowmarker), regexp, mandatory=1000),
Series(Option(flowmarker), group, mandatory=1000),
Series(Option(flowmarker), oneormore, mandatory=1000), repetition, option)
term = OneOrMore(Series(Option(Token("§")), factor, mandatory=1000))
expression.set(
Series(term, ZeroOrMore(Series(Token("|"), term, mandatory=1000)), mandatory=1000))
directive = Series(Token("@"), symbol, Token("="), Alternative(regexp, literal, list_),
mandatory=1)
definition = Series(symbol, Token("="), expression, mandatory=1)
syntax = Series(Option(RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)),
EOF, mandatory=2)
root__ = syntax
......@@ -583,7 +661,7 @@ class EBNFCompiler(Compiler):
else:
assert nd.parser.name == "directive", nd.as_sxpr()
self.compile(nd)
node.error_flag = max(node.error_flag, nd.error_flag)
node.error_flag = max(node.error_flag, nd.error_flag)
self.definitions.update(definitions)
return self.assemble_parser(definitions, node)
......@@ -715,6 +793,7 @@ class EBNFCompiler(Compiler):
name for the particular non-terminal.
"""
arguments = [self.compile(r) for r in node.children] + custom_args
node.error_flag = max(node.error_flag, max(t.error_flag for t in node.children))
return parser_class + '(' + ', '.join(arguments) + ')'
......@@ -731,10 +810,10 @@ class EBNFCompiler(Compiler):
mandatory_marker.append(i)
if i == 0:
nd.add_error('First item of a series should not be mandatory.',
code=Error.WARNING)
Error.WARNING)
elif len(mandatory_marker) > 1:
nd.add_error('One mandatory marker (§) sufficient to declare the '
'rest of the series as mandatory.', code=Error.WARNING)
'rest of the series as mandatory.', Error.WARNING)
else:
filtered_children.append(nd)
i += 1
......
......@@ -27,8 +27,7 @@ __all__ = ('Error',
'has_errors',
'only_errors',
'linebreaks',
'line_col',
'error_messages')
'line_col')
class Error:
......@@ -44,14 +43,15 @@ class Error:
MANDATORY_CONTINUATION = 1001
def __init__(self, message: str, level: int=ERROR, code: Hashable=0):
def __init__(self, message: str, level: int = ERROR, code: Hashable = 0,
pos: int = -1, line: int = -1, column: int = -1):
self.message = message
assert level >= 0
self.level = level or Error.ERROR
self.code = code
self.pos = -1
self.line = -1
self.column = -1
self.pos = pos
self.line = line
self.column = column
def __str__(self):
prefix = ''
......@@ -59,6 +59,10 @@ class Error:
prefix = "line: %3i, column: %2i, " % (self.line, self.column)
return prefix + "%s: %s" % (self.level_str, self.message)
def __repr__(self):
return 'Error("%s", %i, %s, %i, %i, %i)' \
% (self.message, self.level, repr(self.code), self.pos, self.line, self.column)
@property
def level_str(self):
return "Warning" if is_warning(self.level) else "Error"
......@@ -124,21 +128,20 @@ def _line_col(lbreaks: List[int], pos: int) -> Tuple[int, int]:
column = pos - lbreaks[line - 1]
return line, column
def error_messages(source_text, errors) -> List[str]:
"""Returns the sequence or iterator of error objects as an intertor
of error messages with line and column numbers at the beginning.
Args:
source_text (str): The source text on which the errors occurred.
(Needed in order to determine the line and column numbers.)
errors (list): The list of errors as returned by the method
``collect_errors()`` of a Node object
Returns:
a list that contains all error messages in string form. Each
string starts with "line: [Line-No], column: [Column-No]
"""
for err in errors:
if err.pos >= 0 and err.line <= 0:
err.line, err.column = line_col(source_text, err.pos)
return [str(err) for err in sorted(errors, key=lambda err: err.pos)]
# def error_messages(source_text:str, errors: List[Error]) -> List[str]:
# """Adds line, column information for error messages, if the position
# is given.
#
# Args:
# source_text (str): The source text on which the errors occurred.
# (Needed in order to determine the line and column numbers.)
# errors (list): The list of errors as returned by the method
# ``collect_errors()`` of a Node object
# Returns:
# The same list of error messages, which now contain line and
# column numbers.
# """
# for err in errors:
# if err.pos >= 0 and err.line <= 0:
# err.line, err.column = line_col(source_text, err.pos)
# return errors
......@@ -690,9 +690,9 @@ class Grammar:
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
if not parser.name:
parser.name = entry
if (isinstance(parser, Forward) and (not parser.parser.name)):
parser.parser.name = entry
parser._name = entry
if (isinstance(parser, Forward) and (not parser.parser._name)):
parser.parser._name = entry
cls.parser_initialization__ = "done"
......@@ -843,7 +843,7 @@ class Grammar:
stitches[-1].add_error(error_msg)
if self.history_tracking__:
# some parsers may have matched and left history records with nodes != None.
# Because these are not connected to the stiched root node, their pos
# Because these are not connected to the stitched root node, their pos-
# properties will not be initialized by setting the root node's pos property
# to zero. Therefore, their pos properties need to be initialized here
for record in self.history__:
......@@ -869,7 +869,7 @@ class Grammar:
else:
result.add_error(error_str)
result.pos = 0 # calculate all positions
result.finalize_errors(self.document__)
# result.collect_errors(self.document__)
return result
......@@ -1426,9 +1426,10 @@ class Series(NaryOperator):
text_ = text[i:]
node.add_error('%s expected; "%s" found!' % (str(parser), text[:10]),
code=Error.MANDATORY_CONTINUATION)
return node, text_
results += (node,)
if node.error_flag:
break
# if node.error_flag:
# break
pos += 1
assert len(results) <= len(self.parsers)
return Node(self, results), text_
......@@ -1976,14 +1977,14 @@ def compile_source(source: str,
# likely that error list gets littered with compile error messages
result = None
ef = syntax_tree.error_flag
messages = syntax_tree.collect_errors(clear_errors=True)
messages = syntax_tree.collect_errors(source_text, clear_errors=True)
if not is_error(ef):
transformer(syntax_tree)
ef = max(ef, syntax_tree.error_flag)
messages.extend(syntax_tree.collect_errors(clear_errors=True))
messages.extend(syntax_tree.collect_errors(source_text, clear_errors=True))
if is_logging(): syntax_tree.log(log_file_name + '.ast')
if not is_error(syntax_tree.error_flag):
result = compiler(syntax_tree)
messages.extend(syntax_tree.collect_errors())
messages.extend(syntax_tree.collect_errors(source_text))
syntax_tree.error_flag = max(syntax_tree.error_flag, ef)
return result, messages, syntax_tree
......@@ -60,7 +60,7 @@ class ParserBase:
for instantiation.
"""
def __init__(self, name=''): # , pbases=frozenset()):
self.name = name # type: str
self._name = name # type: str
self._ptype = ':' + self.__class__.__name__ # type: str
def __repr__(self):
......@@ -69,6 +69,10 @@ class ParserBase:
def __str__(self):
return self.name + (' = ' if self.name else '') + repr(self)
@property
def name(self):
return self._name
@property
def ptype(self) -> str:
return self._ptype
......@@ -94,8 +98,7 @@ class MockParser(ParserBase):
"""
def __init__(self, name='', ptype=''): # , pbases=frozenset()):
assert not ptype or ptype[0] == ':'
super(MockParser, self).__init__(name)
self.name = name
super().__init__(name)
self._ptype = ptype or ':' + self.__class__.__name__
......@@ -303,44 +306,41 @@ class Node(collections.abc.Sized):
def errors(self) -> List[Error]:
return self._errors.copy()
def add_error(self, message: str, level: int= Error.ERROR, code: Hashable=0) -> 'Node':
def add_error(self, message: str, level: int = Error.ERROR, code: Hashable = 0) -> 'Node':
self._errors.append(Error(message, level, code))
self.error_flag = max(self.error_flag, self._errors[-1].level)
return self
def _finalize_errors(self, lbreaks: List[int]):
if self.error_flag:
for err in self._errors:
assert err.pos >= 0
err.line, err.column = line_col(lbreaks, err.pos)
for child in self.children:
child._finalize_errors(lbreaks)
def finalize_errors(self, source_text: Union[StringView, str]):
"""Recursively adds line- and column-numbers to all error objects.
"""
if self.error_flag:
lbreaks = linebreaks(source_text)
self._finalize_errors(lbreaks)
def collect_errors(self, clear_errors=False) -> List[Error]:
def collect_errors(self, document: Union[StringView, str] = '', clear_errors=False) -> List[
Error]:
"""
Recursively adds line- and column-numbers to all error objects.
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
"""
errors = self.errors
if clear_errors:
self._errors = []
self.error_flag = 0
if self.children:
for child in self.children:
errors.extend(child.collect_errors(clear_errors))
return errors
if self.error_flag:
lbreaks = linebreaks(document) if document else []
return self._collect_errors(lbreaks, clear_errors)
else:
return []
def _collect_errors(self, lbreaks: List[int] = [], clear_errors=False) -> List[Error]:
if self.error_flag:
errors = self.errors
if lbreaks:
for err in errors:
err.pos = self.pos
err.line, err.column = line_col(lbreaks, err.pos)
if clear_errors:
self._errors = []
self.error_flag = 0
if self.children:
for child in self.children:
errors.extend(child._collect_errors(lbreaks, clear_errors))
return errors
else:
return []
def _tree_repr(self, tab, openF, closeF, dataF=identity, density=0) -> str:
......@@ -408,7 +408,7 @@ class Node(collections.abc.Sized):
s = lB + node.tag_name
# s += " '(pos %i)" % node.pos
if src:
s += " '(pos %i " % node.pos + " %i %i)" % line_col(src, node.pos)
s += " '(pos %i " % node.pos # + " %i %i)" % line_col(src, node.pos)
if node.errors:
s += " '(err '(%s))" % ' '.join(str(err).replace('"', r'\"')
for err in node.errors)
......
......@@ -29,7 +29,7 @@ except ImportError:
from DHParser.toolkit import is_logging, clear_logs
from DHParser.syntaxtree import mock_syntax_tree, flatten_sxpr
from DHParser.error import is_error, error_messages
from DHParser.error import is_error
__all__ = ('unit_from_configfile',
'unit_from_json',
......@@ -78,7 +78,7 @@ def unit_from_json(json_filename):
for symbol in unit:
for stage in unit[symbol]:
if stage not in UNIT_STAGES:
raise ValueError('Test stage %s not in: ' % (stage, str(UNIT_STAGES)))
raise ValueError('Test stage %s not in: %s' % (stage, str(UNIT_STAGES)))
return unit
# TODO: add support for yaml, cson, toml
......@@ -163,8 +163,8 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
if is_error(cst.error_flag):
errata.append('Match test "%s" for parser "%s" failed:\n\tExpr.: %s\n\n\t%s\n\n' %
(test_name, parser_name, '\n\t'.join(test_code.split('\n')),
'\n\t'.join(m.replace('\n', '\n\t\t') for m in
error_messages(test_code, cst.collect_errors()))))
'\n\t'.join(str(m).replace('\n', '\n\t\t') for m in
cst.collect_errors(test_code))))
tests.setdefault('__err__', {})[test_name] = errata[-1]
# write parsing-history log only in case of failure!
parser.log_parsing_history__("match_%s_%s.log" % (parser_name, test_name))
......
......@@ -275,11 +275,13 @@ def TRUE_CONDITION(context: List[Node]) -> bool:
def replace_child(node: Node):
assert len(node.children) == 1
if not node.children[0].parser.name:
node.children[0].parser.name = node.parser.name
node.parser = node.children[0].parser
node._errors.extend(node.children[0]._errors)
node.result = node.children[0].result
child = node.children[0]
if not child.parser.name:
child.parser = MockParser(node.parser.name, child.parser.ptype)
# parser names must not be overwritten, else: child.parser.name = node.parser.name
node.parser = child.parser
node._errors.extend(child._errors)
node.result = child.result
def reduce_child(node: Node):
......
......@@ -19,7 +19,7 @@ factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be
| option
flowmarker = "!" | "&" # '!' negative lookahead, '&' positive lookahead
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
......
......@@ -18,8 +18,8 @@ factor = [flowmarker] [retrieveop] symbol !"=" # negative lookahead to be
| repetition
| option
flowmarker = "!" | "&" | "§" | # '!' negative lookahead, '&' positive lookahead, '§' required
"-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
flowmarker = "!" | "&" | "§" # '!' negative lookahead, '&' positive lookahead, '§' required
| "-!" | "-&" # '-' negative lookbehind, '-&' positive lookbehind
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
......
# LaTeX-Grammar for DHParser
# preamble
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*/
########################################################################
#
# outer document structure
......
......@@ -16,7 +16,7 @@ try:
except ImportError:
import re
from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
Required, Token, Synonym, \
Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, TransformationDict, \
......@@ -49,10 +49,10 @@ class LaTeXGrammar(Grammar):
# LaTeX-Grammar for DHParser
# preamble
@ whitespace = /[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?/ # optional whitespace, including at most one linefeed
@ comment = /%.*/
########################################################################
#
# outer document structure
......@@ -228,7 +228,7 @@ class LaTeXGrammar(Grammar):
paragraph = Forward()
tabular_config = Forward()
text_element = Forward()
source_hash__ = "37585004123d6b80ecf8f67217b43479"
source_hash__ = "6f0e961d68f21a54a6e4b1fb01fe17bf"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*'
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
......@@ -238,12 +238,15 @@ class LaTeXGrammar(Grammar):
EOF = RegExp('(?!.)')
BACKSLASH = RegExp('[\\\\]')
LB = RegExp('\\s*?\\n|$')
NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'), mandatory=1000)
GAP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
LFF = Series(NEW_LINE, Option(WSPC))
LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__), mandatory=1000)),
GAP, Option(WSPC), mandatory=1000)
LFF = Series(NEW_LINE, Option(WSPC), mandatory=1000)
LF = Series(NEW_LINE,