10.12., 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 813bebe5 authored by Eckhart Arnold's avatar Eckhart Arnold

- bugfixes

parent 821cb67c
......@@ -81,7 +81,7 @@ from DHParser import logging, is_filename, load_if_file, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
last_value, counterpart, accumulate, PreprocessorFunc, \\
Node, TransformationFunc, \\
traverse, remove_children_if, \\
traverse, remove_children_if, join, \\
reduce_single_child, replace_by_single_child, remove_whitespace, \\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \\
......
......@@ -132,7 +132,7 @@ PreprocessorFunc = Union[Callable[[str], str], partial]
LEFT_RECURSION_DEPTH = 8 # type: int
# because of python's recursion depth limit, this value ought not to be
# set too high. PyPy allows higher values than CPython
MAX_DROPOUTS = 5 # type: int
MAX_DROPOUTS = 3 # type: int
# stop trying to recover parsing after so many errors
......@@ -231,7 +231,8 @@ def add_parser_guard(parser_func):
# in case of left recursion, the first recursive step that
# matches will store its result in the cache
parser.visited[location] = (node, rest)
grammar.last_node__ = node # store last node for Lookbehind parser
# store last non-empty node for Lookbehind parser
if len(rest) < location: grammar.last_node__ = node
parser.recursion_counter[location] -= 1
......@@ -293,6 +294,15 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
2. *Anonymous parsers* where the name-field just contains the empty
string. AST-transformation of Anonymous parsers can be hooked
only to their class name, and not to the individual parser.
Parser objects are callable and parsing is done by calling a parser
object with the text to parse. If the parser matches it returns
a tuple consisting of a node representing the root of the concrete
syntax tree resulting from the match as well as the substring
`text[i:]` where i is the length of matched text (which can be
zero in the case of parsers like `ZeroOrMore` or `Optional`).
If `i > 0` then the parser has "moved forward". If the parser does
not match it returns `(None, text).
"""
ApplyFunc = Callable[['Parser'], None]
......@@ -304,15 +314,27 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
self.reset()
def __deepcopy__(self, memo):
"""Deepcopy method of the parser. Upon instantiation of a Grammar-
object, parsers will be deep-copied to the Grammar object. If a
derived parser-class changes the signature of the constructor,
`__deepcopy__`-method must be replaced (i.e. overridden without
calling the same method from the superclass) by the derived class.
"""
return self.__class__(self.name)
def reset(self):
"""Initializes or resets any parser variables. If overwritten,
the `reset()`-method of the parent class must be called from the
`reset()`-method of the derived class."""
self.visited = dict() # type: Dict[int, Tuple[Node, str]]
self.recursion_counter = dict() # type: Dict[int, int]
self.cycle_detection = set() # type: Set[Callable]
return self
def __call__(self, text: str) -> Tuple[Node, str]:
"""Applies the parser to the given `text` and returns a node with
the results or None as well as the text at the position right behind
the matching string."""
return None, text # default behaviour: don't match
def __add__(self, other: 'Parser') -> 'Series':
......@@ -332,10 +354,12 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
@grammar.setter
def grammar(self, grammar: 'Grammar'):
assert self._grammar is None or self._grammar == grammar, \
"Parser has already been assigned to a Grammar object!"
self._grammar = grammar
self._grammar_assigned_notifier()
if self._grammar is None:
self._grammar = grammar
self._grammar_assigned_notifier()
else:
assert self._grammar == grammar, \
"Parser has already been assigned to a different Grammar object!"
def _grammar_assigned_notifier(self):
"""A function that notifies the parser object that it has been
......@@ -345,7 +369,7 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
def apply(self, func: ApplyFunc):
"""
Applies function `func(parser)` recursively to this parser and all
descendants of the tree of parsers. The same function can never
descendant parsers if any exist. The same function can never
be applied twice between calls of the ``reset()``-method!
"""
if func in self.cycle_detection:
......@@ -387,7 +411,7 @@ class Grammar:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> number_parser = Grammar(number)
>>> number_parser("3.1416").show()
>>> number_parser("3.1416").content()
'3.1416'
Collecting the parsers that define a grammar in a descentand class of
......@@ -518,7 +542,7 @@ class Grammar:
# parsers not connected to the root object will be copied later
# on demand (see Grammar.__getitem__()). Usually, the need to
# do so only arises during testing.
self.root__ = root if root else copy.deepcopy(self.__class__.root__)
self.root__ = copy.deepcopy(root) if root else copy.deepcopy(self.__class__.root__)
if self.wspL__:
self.wsp_left_parser__ = Whitespace(self.wspL__) # type: ParserBase
......@@ -556,7 +580,7 @@ class Grammar:
self.rollback__ = [] # type: List[Tuple[int, Callable]]
self.last_rb__loc__ = -1 # type: int
# previously parsed node, needed by Lookbehind parser
self.last_node__ = None # type: Node
self.last_node__ = Node(ZOMBIE_PARSER, '') # type: Node
# support for call stack tracing
self.call_stack__ = [] # type: List[Parser]
# snapshots of call stacks
......@@ -807,13 +831,20 @@ class PreprocessorToken(Parser):
class RegExp(Parser):
"""
Regular expression parser.
"""Regular expression parser.
The RegExp-parser parses text that matches a regular expression.
RegExp can also be considered as the "atomic parser", because all
other parsers delegate part of the parsing job to other parsers,
but do not match text directly.
Example:
>>> word = RegExp(r'\w+')
>>> Grammar(word)("Haus").content()
'Haus'
EBNF-Notation: `/ ... /`
EBNF-Example: `word = /\w+/`
"""
def __init__(self, regexp, name: str = '') -> None:
......@@ -856,6 +887,21 @@ class RE(Parser):
string, e.g. use r'\s*' or r'[\t ]+', but not r'\s+'. If the
respective parameters in the constructor are set to ``None`` the
default whitespace expression from the Grammar object will be used.
Example (allowing whitespace on the right hand side, but not on
the left hand side of a regular expression):
>>> word = RE(r'\w+', wR=r'\s*')
>>> parser = Grammar(word)
>>> result = parser('Haus ')
>>> result.content()
'Haus '
>>> result.structure()
'(:RE (:RegExp "Haus") (:Whitespace " "))'
>>> parser(' Haus').content()
' <<< Error on " Haus" | Parser did not match! Invalid source file? >>> '
EBNF-Notation: `/ ... /~` or `~/ ... /` or `~/ ... /~`
EBNF-Example: `word = /\w+/~`
"""
def __init__(self, regexp, wL=None, wR=None, name=''):
"""Constructor for class RE.
......@@ -1004,6 +1050,30 @@ class NaryOperator(Parser):
class Optional(UnaryOperator):
"""
Parser `Optional` always matches, even if its child-parser
did not match.
If the child-parser did not match `Optional` returns a node
with no content and does not move forward in the text.
If the child-parser did match, `Optional` returns the a node
with the node returnd by the child-parser as its single
child and the text at the position where the child-parser
left it.
Examples:
>>> number = Optional(Token('-')) + RegExp(r'\d+') + Optional(RegExp(r'\.\d+'))
>>> Grammar(number)('3.14159').content()
'3.14159'
>>> Grammar(number)('3.14159').structure()
'(:Series (:Optional) (:RegExp "3") (:Optional (:RegExp ".14159")))'
>>> Grammar(number)('-1').content()
'-1'
EBNF-Notation: `[ ... ]`
EBNF-Example: `number = ["-"] /\d+/ [ /\.\d+/ ]
"""
def __init__(self, parser: Parser, name: str = '') -> None:
super(Optional, self).__init__(parser, name)
# assert isinstance(parser, Parser)
......@@ -1024,6 +1094,7 @@ class Optional(UnaryOperator):
return '[' + (self.parser.repr[1:-1] if isinstance(self.parser, Alternative)
and not self.parser.name else self.parser.repr) + ']'
class ZeroOrMore(Optional):
def __call__(self, text: str) -> Tuple[Node, str]:
results = () # type: Tuple[Node, ...]
......@@ -1120,12 +1191,12 @@ class Alternative(NaryOperator):
# the order of the sub-expression matters!
>>> number = RE('\d+') | RE('\d+') + RE('\.') + RE('\d+')
>>> Grammar(number)("3.1416").show()
>>> Grammar(number)("3.1416").content()
'3 <<< Error on ".1416" | Parser stopped before end! trying to recover... >>> '
# the most selective expression should be put first:
>>> number = RE('\d+') + RE('\.') + RE('\d+') | RE('\d+')
>>> Grammar(number)("3.1416").show()
>>> Grammar(number)("3.1416").content()
'3.1416'
"""
......@@ -1246,7 +1317,6 @@ class Lookbehind(FlowOperator):
assert isinstance(p, RegExp), str(type(p))
self.regexp = p.main.regexp if isinstance(p, RE) else p.regexp
super(Lookbehind, self).__init__(parser, name)
print("WARNING: Lookbehind Operator is experimental!")
def __call__(self, text: str) -> Tuple[Node, str]:
if self.sign(self.condition()):
......@@ -1262,7 +1332,10 @@ class Lookbehind(FlowOperator):
def condition(self):
node = self.grammar.last_node__
return node and self.regexp.match(str(node))
assert node is not None # can be removed
s = str(node)
assert s or node.parser.name == '__ZOMBIE__', str(node.parser)
return self.regexp.match(s)
class NegativeLookbehind(Lookbehind):
......
......@@ -132,6 +132,17 @@ StrictResultType = Union[ChildrenType, str]
ResultType = Union[ChildrenType, 'Node', str, None]
def oneliner_sxpr(sxpr: str) -> str:
"""Returns S-expression `sxpr` as a one liner without unnecessary
whitespace.
Example:
>>> oneliner_sxpr('(a\\n (b\\n c\\n )\\n)\\n')
'(a (b c))'
"""
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', sxpr)).strip()
class Node:
"""
Represents a node in the concrete or abstract syntax tree.
......@@ -259,13 +270,34 @@ class Node:
def errors(self) -> List[Error]:
return [Error(self.pos, err) for err in self._errors]
def show(self) -> str:
"""Returns content as string, inserting error messages where
errors occurred.
def add_error(self, error_str) -> 'Node':
self._errors.append(error_str)
self.error_flag = True
return self
def propagate_error_flags(self) -> None:
"""Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
nodes after syntaxtree construction, i.e. in the compile phase.
"""
s = "".join(child.show() for child in self.children) if self.children \
else str(self.result)
return (' <<< Error on "%s" | %s >>> ' % (s, '; '.join(self._errors))) if self._errors else s
for child in self.children:
child.propagate_error_flags()
self.error_flag = self.error_flag or child.error_flag
def collect_errors(self, clear_errors=False) -> List[Error]:
"""
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
"""
errors = self.errors
if clear_errors:
self._errors = []
self.error_flag = False
if self.children:
for child in self.children:
errors.extend(child.collect_errors(clear_errors))
return errors
def _tree_repr(self, tab, openF, closeF, dataF=identity, density=0) -> str:
"""
......@@ -363,39 +395,20 @@ class Node:
return self._tree_repr(' ', opening, closing, density=1)
def add_error(self, error_str) -> 'Node':
self._errors.append(error_str)
self.error_flag = True
return self
def propagate_error_flags(self) -> None:
"""Recursively propagates error flags set on child nodes to its
parents. This can be used if errors are added to descendant
nodes after syntaxtree construction, i.e. in the compile phase.
"""
for child in self.children:
child.propagate_error_flags()
self.error_flag = self.error_flag or child.error_flag
def structure(self) -> str:
"""Return structure (and content) as S-expression on a single line
without any line breaks."""
return oneliner_sxpr(self.as_sxpr())
def collect_errors(self, clear_errors=False) -> List[Error]:
def content(self) -> str:
"""
Returns all errors of this node or any child node in the form
of a set of tuples (position, error_message), where position
is always relative to this node.
Returns content as string, inserting error messages where
errors occurred.
"""
errors = self.errors
if clear_errors:
self._errors = []
self.error_flag = False
if self.children:
for child in self.children:
errors.extend(child.collect_errors(clear_errors))
return errors
def log(self, log_file_name):
st_file_name = log_file_name
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sxpr())
s = "".join(child.content() for child in self.children) if self.children \
else str(self.result)
return (
' <<< Error on "%s" | %s >>> ' % (s, '; '.join(self._errors))) if self._errors else s
def find(self, match_function: Callable) -> Iterator['Node']:
"""Finds nodes in the tree that match a specific criterion.
......@@ -458,6 +471,11 @@ class Node:
# return self.result,
# return nav(path.split('/'))
def log(self, log_file_name):
st_file_name = log_file_name
with open(os.path.join(log_dir(), st_file_name), "w", encoding="utf-8") as f:
f.write(self.as_sxpr())
def mock_syntax_tree(sxpr):
"""
......@@ -511,17 +529,6 @@ def mock_syntax_tree(sxpr):
return Node(MockParser(name, ':' + class_name), result)
def compact_sxpr(s) -> str:
"""Returns S-expression ``s`` as a one liner without unnecessary
whitespace.
Example:
>>> compact_sxpr('(a\\n (b\\n c\\n )\\n)\\n')
'(a (b c))'
"""
return re.sub('\s(?=\))', '', re.sub('\s+', ' ', s)).strip()
TransformationFunc = Union[Callable[[Node], Any], partial]
......
......@@ -28,7 +28,7 @@ except ImportError:
from DHParser import error_messages
from DHParser.toolkit import is_logging
from DHParser.syntaxtree import mock_syntax_tree, compact_sxpr
from DHParser.syntaxtree import mock_syntax_tree, oneliner_sxpr
__all__ = ('unit_from_configfile',
'unit_from_json',
......@@ -171,8 +171,8 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
errata.append('Abstract syntax tree test "%s" for parser "%s" failed:'
'\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s'
% (test_name, parser_name, '\n\t'.join(test_code.split('\n')),
compact_sxpr(compare.as_sxpr()),
compact_sxpr(ast.as_sxpr())))
oneliner_sxpr(compare.as_sxpr()),
oneliner_sxpr(ast.as_sxpr())))
tests.setdefault('__err__', {})[test_name] = errata[-1]
if verbose:
print(infostr + ("OK" if len(errata) == errflag else "FAIL"))
......
......@@ -57,8 +57,8 @@ block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&SUCC_LB begin_environment &PRED_LB
end_generic_block = -&SUCC_LB end_environment &PRED_LB
begin_generic_block = -&SUCC_LB begin_environment -&SUCC_LB
end_generic_block = -&SUCC_LB end_environment -&SUCC_LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
......@@ -86,8 +86,8 @@ text_elements = command | text | block | inline_environment
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_inline_env { text_elements }+ §end_inline_env
begin_inline_env = (-!SUCC_LB begin_environment) | (begin_environment !PRED_LB)
end_inline_env = (-!SUCC_LB end_environment) | (end_environment !PRED_LB)
begin_inline_env = (-!SUCC_LB begin_environment) | (begin_environment -!SUCC_LB)
end_inline_env = (-!SUCC_LB end_environment) | (end_environment -!SUCC_LB)
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
......@@ -144,7 +144,7 @@ WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
EOF = /(?!.)/
SUCC_LB = /(?:.*\n)+\s*$/ # linebreak succeeding an arbitrary chunk of text
PRED_LB = /\s*?\n/ # linebreak preeceding any text
SUCC_LB = /(?!.)|(?:.*\n)+\s*$/ # linebreak succeeding an arbitrary chunk of text
# PRED_LB = /\s*(?!.)|\s*?\n/ # linebreak preeceding any text
......@@ -15,29 +15,29 @@ try:
import regex as re
except ImportError:
import re
from DHParser.toolkit import logging, is_filename
from DHParser.parser import Grammar, Compiler, Alternative, Pop, Required, Token, Synonym, \
Optional, OneOrMore, Series, RE, Capture, \
from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
Required, Token, Synonym, \
Optional, NegativeLookbehind, OneOrMore, RegExp, Series, RE, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc
from DHParser.syntaxtree import traverse, remove_brackets, reduce_single_child, replace_by_single_child, \
remove_expendables, flatten, join, \
collapse, replace_content, TransformationFunc, \
remove_empty
PreprocessorFunc, \
Node, TransformationFunc, \
traverse, join, \
reduce_single_child, replace_by_single_child, remove_expendables, remove_empty, flatten, \
collapse, replace_content, remove_brackets
#######################################################################
#
# SCANNER SECTION - Can be edited. Changes will be preserved.
# PREPROCESSOR SECTION - Can be edited. Changes will be preserved.
#
#######################################################################
def LaTeXScanner(text):
def LaTeXPreprocessor(text):
return text
def get_scanner() -> PreprocessorFunc:
return LaTeXScanner
def get_preprocessor() -> PreprocessorFunc:
return LaTeXPreprocessor
#######################################################################
......@@ -104,12 +104,12 @@ class LaTeXGrammar(Grammar):
#### block environments ####
# TODO: ambiguity between generic bock envieronments and generic inline environments
block_environment = known_environment | generic_environment
block_environment = known_environment | generic_block
known_environment = itemize | enumerate | figure | table | quotation
| verbatim
generic_environment = begin_environment sequence §end_environment
generic_block = begin_generic_block sequence §end_generic_block
begin_generic_block = -&SUCC_LB begin_environment -&SUCC_LB
end_generic_block = -&SUCC_LB end_environment -&SUCC_LB
itemize = "\begin{itemize}" [PARSEP] { item } §"\end{itemize}"
enumerate = "\begin{enumerate}" [PARSEP] {item } §"\end{enumerate}"
......@@ -136,7 +136,9 @@ class LaTeXGrammar(Grammar):
inline_environment = known_inline_env | generic_inline_env
known_inline_env = inline_math
generic_inline_env = begin_environment { text_elements }+ §end_environment
generic_inline_env = begin_inline_env { text_elements }+ §end_inline_env
begin_inline_env = (-!SUCC_LB begin_environment) | (begin_environment -!SUCC_LB)
end_inline_env = (-!SUCC_LB end_environment) | (end_environment -!SUCC_LB)
begin_environment = "\begin{" §NAME §"}"
end_environment = "\end{" §::NAME §"}"
......@@ -190,41 +192,45 @@ class LaTeXGrammar(Grammar):
TEXTCHUNK = /[^\\%$&\{\}\[\]\s\n]+/ # some piece of text excluding whitespace,
# linefeed and special characters
WSPC = /[ \t]+/ # (horizontal) whitespace
LF = !PARSEP /[ \t]*\n[ \t]*/ # LF but not an empty line
LF = !PARSEP /[ \t]*\n[ \t]*/ # linefeed but not an empty line
PARSEP = /[ \t]*(?:\n[ \t]*)+\n[ \t]*/ # at least one empty line, i.e.
# [whitespace] linefeed [whitespace] linefeed
EOF = !/./
EOF = /(?!.)/
SUCC_LB = /(?!.)|(?:.*\n)+\s*$/ # linebreak succeeding an arbitrary chunk of text
# PRED_LB = /\s*(?!.)|\s*?\n/ # linebreak preeceding any text
"""
block_environment = Forward()
block_of_paragraphs = Forward()
text_elements = Forward()
source_hash__ = "9a8cba2b425d276af78e141d7dda162c"
source_hash__ = "eb91cd592f8a8c60a796ba705a121b72"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)')
wspL__ = ''
wspR__ = WSP__
EOF = NegativeLookahead(RE('.', wR=''))
PARSEP = RE('[ \\t]*(?:\\n[ \\t]*)+\\n[ \\t]*', wR='')
LF = Series(NegativeLookahead(PARSEP), RE('[ \\t]*\\n[ \\t]*', wR=''))
WSPC = RE('[ \\t]+', wR='')
TEXTCHUNK = RE('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+', wR='')
BRACKETS = RE('[\\[\\]]', wR='')
ESCAPED = RE('\\\\[%$&_/]', wR='')
SUCC_LB = RegExp('(?!.)|(?:.*\\n)+\\s*$')
EOF = RegExp('(?!.)')
PARSEP = RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n[ \\t]*')
LF = Series(NegativeLookahead(PARSEP), RegExp('[ \\t]*\\n[ \\t]*'))
WSPC = RegExp('[ \\t]+')
TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
BRACKETS = RegExp('[\\[\\]]')
ESCAPED = RegExp('\\\\[%$&_/]')
MATH = RE('[\\w_^{}[\\]]*')
NAME = Capture(RE('\\w+'))
CMDNAME = RE('\\\\(?:(?!_)\\w)+')
structural = Alternative(Token("subsection"), Token("section"), Token("chapter"), Token("subsubsection"),
Token("paragraph"), Token("subparagraph"), Token("item"))
blockcmd = Series(RE('[\\\\]', wR=''), Alternative(Series(Alternative(Token("begin{"), Token("end{")),
Alternative(Token("enumerate"), Token("itemize"),
Token("figure"), Token("quote"),
Token("quotation"), Token("tabular")),
Token("}")), structural))
structural = Alternative(Token("subsection"), Token("section"), Token("chapter"),
Token("subsubsection"), Token("paragraph"), Token("subparagraph"),
Token("item"))
blockcmd = Series(RegExp('[\\\\]'), Alternative(
Series(Alternative(Token("begin{"), Token("end{")),
Alternative(Token("enumerate"), Token("itemize"), Token("figure"), Token("quote"),
Token("quotation"), Token("tabular")), Token("}")), structural))
word_sequence = OneOrMore(Series(TEXTCHUNK, RE('')))
cfgtext = OneOrMore(Alternative(word_sequence, Series(ESCAPED, RE(''))))
text = OneOrMore(Alternative(cfgtext, Series(BRACKETS, RE(''))))
block = Series(RE('{', wR=''), ZeroOrMore(text_elements), Required(RE('}', wR='')))
block = Series(RegExp('{'), ZeroOrMore(text_elements), Required(RegExp('}')))
config = Series(Token("["), cfgtext, Required(Token("]")))
caption = Series(Token("\\caption"), block)
includegraphics = Series(Token("\\includegraphics"), config, block)
......@@ -235,13 +241,18 @@ class LaTeXGrammar(Grammar):
inline_math = Series(Token("$"), MATH, Token("$"))
end_environment = Series(Token("\\end{"), Required(Pop(NAME)), Required(Token("}")))
begin_environment = Series(Token("\\begin{"), Required(NAME), Required(Token("}")))
generic_inline_env = Series(begin_environment, OneOrMore(text_elements), Required(end_environment))
end_inline_env = Alternative(Series(NegativeLookbehind(SUCC_LB), end_environment),
Series(end_environment, NegativeLookbehind(SUCC_LB)))
begin_inline_env = Alternative(Series(NegativeLookbehind(SUCC_LB), begin_environment),
Series(begin_environment, NegativeLookbehind(SUCC_LB)))
generic_inline_env = Series(begin_inline_env, OneOrMore(text_elements),
Required(end_inline_env))
known_inline_env = Synonym(inline_math)
inline_environment = Alternative(known_inline_env, generic_inline_env)
text_elements.set(Alternative(command, text, block, inline_environment))
paragraph = OneOrMore(Series(NegativeLookahead(blockcmd), text_elements, RE('')))
sequence = OneOrMore(Series(Alternative(paragraph, block_environment), Optional(PARSEP)))
block_of_paragraphs.set(Series(RE('{', wR=''), sequence, Required(RE('}', wR=''))))
block_of_paragraphs.set(Series(RegExp('{'), sequence, Required(RegExp('}'))))
table_config = Series(Token("{"), RE('[lcr|]+'), Token("}"))
table = Series(Token("\\begin{tabular}"), table_config, sequence, Token("\\end{tabular}"))
verbatim = Series(Token("\\begin{verbatim}"), sequence, Token("\\end{verbatim}"))
......@@ -251,9 +262,11 @@ class LaTeXGrammar(Grammar):
enumerate = Series(Token("\\begin{enumerate}"), Optional(PARSEP), ZeroOrMore(item),
Required(Token("\\end{enumerate}")))
itemize = Series(Token("\\begin{itemize}"), Optional(PARSEP), ZeroOrMore(item), Required(Token("\\end{itemize}")))
generic_environment = Series(begin_environment, sequence, Required(end_environment))
end_generic_block = Series(Lookbehind(SUCC_LB), end_environment, Lookbehind(SUCC_LB))
begin_generic_block = Series(Lookbehind(SUCC_LB), begin_environment, Lookbehind(SUCC_LB))
generic_block = Series(begin_generic_block, sequence, Required(end_generic_block))
known_environment = Alternative(itemize, enumerate, figure, table, quotation, verbatim)
block_environment.set(Alternative(known_environment, generic_environment))
block_environment.set(Alternative(known_environment, generic_block))
Index = Series(Token("\\printindex"), Optional(PARSEP))
Bibliography = Series(Token("\\bibliography"), block, Optional(PARSEP))
SubParagraph = Series(Token("\\subparagpaph"), block, Optional(PARSEP), ZeroOrMore(sequence))
......@@ -369,7 +382,7 @@ class LaTeXCompiler(Compiler):
assert re.match('\w+\Z', grammar_name)
def on_latexdoc(self, node):
return node.as_sexpr()
return node
def on_preamble(self, node):
pass
......@@ -377,10 +390,91 @@ class LaTeXCompiler(Compiler):
def on_document(self, node):
pass
def on_blockenv(self, node):
def on_frontpages(self, node):
pass
def on_Chapters(self, node):
pass
def on_Chapter(self, node):
pass
def on_Sections(self, node):
pass
def on_Section(self, node):
pass
def on_SubSections(self, node):
pass
def on_SubSection(self, node):
pass
def on_SubSubSections(self, node):
pass