Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit e48bd3c2 authored by di68kap's avatar di68kap
Browse files

- Beispiele an Refactoring angepasst

parent 65158426
......@@ -97,7 +97,8 @@ __all__ = ('TransformationDict',
'assert_content',
'error_on',
'warn_on',
'assert_has_children')
'assert_has_children',
'peek')
TransformationProc = Callable[[List[Node]], None]
......@@ -409,19 +410,21 @@ def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> boo
whitespace-tokens will be ignored. In case an empty set of tokens is passed,
any token is a match.
"""
def stripped(nd: Node) -> str:
"""Removes leading and trailing whitespace-nodes from content."""
# assert node.parser.ptype == TOKEN_PTYPE
if nd.children:
i, k = 0, len(nd.children)
while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
i += 1
while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
k -= 1
return "".join(child.content for child in node.children[i:k])
return nd.content
# def stripped(nd: Node) -> str:
# """Removes leading and trailing whitespace-nodes from content."""
# # assert node.parser.ptype == TOKEN_PTYPE
# if nd.children:
# i, k = 0, len(nd.children)
# while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
# i += 1
# while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
# k -= 1
# return "".join(child.content for child in node.children[i:k])
# return nd.content
# node = context[-1]
# return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens)
node = context[-1]
return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens)
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.content in tokens)
@transformation_factory(collections.abc.Set)
......@@ -983,3 +986,8 @@ def forbid(context: List[Node], child_tags: AbstractSet[str]):
if child.tag_name in child_tags:
context[0].new_error(node, 'Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
def peek(context: List[Node]):
"""For debugging: Prints the last node in the context as S-expression."""
print(context[-1].as_sxpr())
......@@ -16,12 +16,12 @@ except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, \
Lookbehind, Lookahead, Alternative, Pop, _Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, \
traverse, remove_children_if, merge_children, is_anonymous, \
traverse, remove_children_if, merge_children, is_anonymous, Whitespace, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -64,20 +64,21 @@ class ArithmeticGrammar(Grammar):
digit = Forward()
expression = Forward()
variable = Forward()
source_hash__ = "3064cea87c9ceb59ade35566a31c3d75"
source_hash__ = "385a94a70cb629d46a13e15305692667"
parser_initialization__ = "upon instantiation"
COMMENT__ = r''
WHITESPACE__ = r'[\t ]*'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
test = Series(digit, constant, variable)
digit.set(Alternative(_Token("0"), _Token("1"), _Token("..."), _Token("9")))
digit.set(Alternative(Series(Token("0"), wsp__), Series(Token("1"), wsp__), Series(Token("..."), wsp__), Series(Token("9"), wsp__)))
constant.set(Series(digit, ZeroOrMore(digit)))
variable.set(Alternative(_Token("x"), _Token("y"), _Token("z")))
factor = Alternative(constant, variable, Series(_Token("("), expression, _Token(")")))
term = Series(factor, ZeroOrMore(Series(Alternative(_Token("*"), _Token("/")), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(_Token("+"), _Token("-")), term))))
variable.set(Alternative(Series(Token("x"), wsp__), Series(Token("y"), wsp__), Series(Token("z"), wsp__)))
factor = Alternative(constant, variable, Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__)))
term = Series(factor, ZeroOrMore(Series(Alternative(Series(Token("*"), wsp__), Series(Token("/"), wsp__)), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(Series(Token("+"), wsp__), Series(Token("-"), wsp__)), term))))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
......
#!/usr/bin/python3
"""recompile_grammar.py - recompiles all pdf files in the current directoy
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../../', '../', './'])
from DHParser import dsl
dsl.recompile_grammar('.', force=True)
......@@ -19,8 +19,8 @@ sys.path.extend(['../../', '../', './'])
from DHParser import is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, _Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationDict, Whitespace, \
......@@ -106,29 +106,29 @@ class BibTeXGrammar(Grammar):
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
"""
text = Forward()
source_hash__ = "5ce8838ebbb255548cf3e14cd90bae6d"
source_hash__ = "534895885bfdddb19785f5d943b356a7"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), _RE('(?i)(?=%)')))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), _RE('(?i)(?=%)')))
NO_BLANK_STRING = _RE('(?i)[^ \\t\\n,%]+')
WORD_ = _RE('(?i)\\w+')
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
NO_BLANK_STRING = Series(RegExp('(?i)[^ \\t\\n,%]+'), wsp__)
WORD_ = Series(RegExp('(?i)\\w+'), wsp__)
WORD = RegExp('(?i)\\w+')
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(_Token("{"), text, _Token("}")))))
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)))))
plain_content = Synonym(COMMA_TERMINATED_STRING)
content = Alternative(Series(_Token("{"), text, _Token("}")), plain_content)
content = Alternative(Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)), plain_content)
field = Synonym(WORD_)
key = Synonym(NO_BLANK_STRING)
type = Synonym(WORD)
entry = Series(RegExp('(?i)@'), type, _Token("{"), key, ZeroOrMore(Series(_Token(","), field, _Token("="), content, mandatory=2)), _Token("}"), mandatory=5)
comment = Series(_Token("@Comment{"), text, _Token("}"), mandatory=2)
entry = Series(RegExp('(?i)@'), type, Series(Token("{"), wsp__), key, ZeroOrMore(Series(Series(Token(","), wsp__), field, Series(Token("="), wsp__), content, mandatory=2)), Series(Token("}"), wsp__), mandatory=5)
comment = Series(Series(Token("@Comment{"), wsp__), text, Series(Token("}"), wsp__), mandatory=2)
pre_code = ZeroOrMore(Alternative(RegExp('(?i)[^"%]+'), RegExp('(?i)%.*\\n')))
preamble = Series(_Token("@Preamble{"), RegExp('(?i)"'), pre_code, _RE('(?i)"'), _Token("}"), mandatory=4)
preamble = Series(Series(Token("@Preamble{"), wsp__), RegExp('(?i)"'), pre_code, RegExp('(?i)"'), wsp__, Series(Token("}"), wsp__), mandatory=5)
bibliography = ZeroOrMore(Alternative(preamble, comment, entry))
root__ = bibliography
......
......@@ -12,17 +12,17 @@ Match-test "simple"
### AST
(content
(:Token
"{"
)
(text
(CONTENT_STRING
"Edward N. Zalta"
)
)
(:Token
"}"
(:Token
"{"
)
(text
(CONTENT_STRING
"Edward N. Zalta"
)
)
(:Token
"}"
)
)
Match-test "nested_braces"
......@@ -33,28 +33,28 @@ Match-test "nested_braces"
### AST
(content
(:Token
"{"
(:Token
"{"
)
(text
(CONTENT_STRING
"\url"
)
(text
(:Series
(:Token
"{"
)
(text
(CONTENT_STRING
"\url"
"https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/"
)
(:Series
(:Token
"{"
)
(text
(CONTENT_STRING
"https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/"
)
)
(:Token
"}"
)
)
)
(:Token
)
(:Token
"}"
)
)
)
(:Token
"}"
)
)
\ No newline at end of file
......@@ -19,8 +19,8 @@ except ImportError:
import re
from DHParser import is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, _Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, Whitespace, \
......@@ -29,7 +29,8 @@ from DHParser import is_filename, load_if_file, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
forbid, assert_content, remove_infix_operator
from DHParser.log import logging
......@@ -92,43 +93,41 @@ class EBNFGrammar(Grammar):
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
"""
expression = Forward()
source_hash__ = "d807c57c29ef6c674abe1addfce146c4"
source_hash__ = "97b616756462a59e1f5162a95ae84c5f"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(_RE('\\w+'), ZeroOrMore(Series(_Token(","), _RE('\\w+'))))
whitespace = _RE('~')
regexp = _RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = _RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(_RE('"(?:[^"]|\\\\")*?"'), _RE("'(?:[^']|\\\\')*?'"))
symbol = _RE('(?!\\d)\\w+')
option = Series(_Token("["), expression, _Token("]"), mandatory=1)
repetition = Series(_Token("{"), expression, _Token("}"), mandatory=1)
oneormore = Series(_Token("{"), expression, _Token("}+"))
unordered = Series(_Token("<"), expression, _Token(">"), mandatory=1)
group = Series(_Token("("), expression, _Token(")"), mandatory=1)
retrieveop = Alternative(_Token("::"), _Token(":"))
flowmarker = Alternative(_Token("!"), _Token("&"), _Token("-!"), _Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(_Token("="))), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(_Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(_Token("|"), term))))
directive = Series(_Token("@"), symbol, _Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, _Token("="), expression, mandatory=1)
syntax = Series(Option(_RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__), Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Series(Token("="), wsp__))), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
def get_grammar() -> EBNFGrammar:
......@@ -148,27 +147,35 @@ def get_grammar() -> EBNFGrammar:
#######################################################################
EBNF_AST_transformation_table = {
# AST Transformations for the EBNF-grammar
"+": remove_empty,
"syntax": [],
"definition": [],
"directive": [],
"expression": [],
"term": [],
"factor": [replace_or_reduce],
"flowmarker": [replace_or_reduce],
"retrieveop": [replace_or_reduce],
"group": [],
"oneormore": [],
"repetition": [],
"option": [],
"symbol": [],
"literal": [replace_or_reduce],
"regexp": [],
"list_": [],
"EOF": [],
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
# AST Transformations for EBNF-grammar
"+":
remove_expendables,
"syntax":
[], # otherwise '"*": replace_by_single_child' would be applied
"directive, definition":
remove_tokens('@', '='),
"expression":
[replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator],
"term":
[replace_by_single_child, flatten], # supports both idioms:
# "{ factor }+" and "factor { factor }"
"factor, flowmarker, retrieveop":
replace_by_single_child,
"group":
[remove_brackets, replace_by_single_child],
"unordered":
remove_brackets,
"oneormore, repetition, option":
[reduce_single_child, remove_brackets,
forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)(?:.|\n)*')],
"symbol, literal, regexp":
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
reduce_single_child,
"list_":
[flatten, remove_infix_operator],
"*":
replace_by_single_child
}
......
......@@ -17,8 +17,8 @@ try:
except ImportError:
import re
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
_Token, Synonym, Whitespace, \
Option, NegativeLookbehind, OneOrMore, RegExp, Series, _RE, Capture, \
Synonym, Whitespace, Token, \
Option, NegativeLookbehind, OneOrMore, RegExp, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, TransformationDict, \
Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
......@@ -243,44 +243,44 @@ class LaTeXGrammar(Grammar):
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
EOF = RegExp('(?!.)')
BACKSLASH = RegExp('[\\\\]')
LB = RegExp('\\s*?\\n|$')
NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
GAP = _RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
GAP = Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), wsp__)
WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
LFF = Series(NEW_LINE, Option(WSPC))
LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
INTEGER = _RE('\\d+')
NAME = Capture(_RE('\\w+'))
INTEGER = Series(RegExp('\\d+'), wsp__)
NAME = Capture(Series(RegExp('\\w+'), wsp__))
LINEFEED = RegExp('[\\\\][\\\\]')
BRACKETS = RegExp('[\\[\\]]')
SPECIAL = RegExp('[$&_\\\\\\\\/]')
ESCAPED = RegExp('\\\\[%$&_/{}]')
TXTCOMMAND = RegExp('\\\\text\\w+')
CMDNAME = _RE('\\\\(?:(?!_)\\w)+')
structural = Alternative(_Token("subsection"), _Token("section"), _Token("chapter"), _Token("subsubsection"), _Token("paragraph"), _Token("subparagraph"), _Token("item"))
blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(_Token("begin{"), _Token("end{")), Alternative(_Token("enumerate"), _Token("itemize"), _Token("figure"), _Token("quote"), _Token("quotation"), _Token("tabular")), _Token("}")), structural, begin_generic_block, end_generic_block))
no_command = Alternative(_Token("\\begin{"), _Token("\\end"), Series(BACKSLASH, structural))
text = Series(TEXTCHUNK, ZeroOrMore(Series(_RE(''), TEXTCHUNK)))
block = Series(RegExp('{'), _RE(''), ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, _RE(''))), RegExp('}'), mandatory=3)
cfg_text = ZeroOrMore(Alternative(Series(Option(_RE('')), text), CMDNAME, SPECIAL))
config = Series(_Token("["), cfg_text, _Token("]"), mandatory=2)
pdfinfo = Series(_Token("\\pdfinfo"), block)
documentclass = Series(_Token("\\documentclass"), Option(config), block)
cline = Series(_Token("\\cline{"), INTEGER, _Token("-"), INTEGER, _Token("}"))
hline = _Token("\\hline")
multicolumn = Series(_Token("\\multicolumn"), _Token("{"), INTEGER, _Token("}"), tabular_config, block_of_paragraphs)
caption = Series(_Token("\\caption"), block)
includegraphics = Series(_Token("\\includegraphics"), Option(config), block)
footnote = Series(_Token("\\footnote"), block_of_paragraphs)
citep = Series(Alternative(_Token("\\citep"), _Token("\\cite")), Option(config), block)
citet = Series(_Token("\\citet"), Option(config), block)
generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(_RE(''), config)), _RE(''), block)))
CMDNAME = Series(RegExp('\\\\(?:(?!_)\\w)+'), wsp__)
structural = Alternative(Series(Token("subsection"), wsp__), Series(Token("section"), wsp__), Series(Token("chapter"), wsp__), Series(Token("subsubsection"), wsp__), Series(Token("paragraph"), wsp__), Series(Token("subparagraph"), wsp__), Series(Token("item"), wsp__))
blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Series(Token("begin{"), wsp__), Series(Token("end{"), wsp__)), Alternative(Series(Token("enumerate"), wsp__), Series(Token("itemize"), wsp__), Series(Token("figure"), wsp__), Series(Token("quote"), wsp__), Series(Token("quotation"), wsp__), Series(Token("tabular"), wsp__)), Series(Token("}"), wsp__)), structural, begin_generic_block, end_generic_block))
no_command = Alternative(Series(Token("\\begin{"), wsp__), Series(Token("\\end"), wsp__), Series(BACKSLASH, structural))
text = Series(TEXTCHUNK, ZeroOrMore(Series(RegExp(''), wsp__, TEXTCHUNK)))
block = Series(RegExp('{'), RegExp(''), wsp__, ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)), RegExp('}'), mandatory=4)
cfg_text = ZeroOrMore(Alternative(Series(Option(Series(RegExp(''), wsp__)), text), CMDNAME, SPECIAL))
config = Series(Series(Token("["), wsp__), cfg_text, Series(Token("]"), wsp__), mandatory=2)
pdfinfo = Series(Series(Token("\\pdfinfo"), wsp__), block)
documentclass = Series(Series(Token("\\documentclass"), wsp__), Option(config), block)
cline = Series(Series(Token("\\cline{"), wsp__), INTEGER, Series(Token("-"), wsp__), INTEGER, Series(Token("}"), wsp__))
hline = Series(Token("\\hline"), wsp__)
multicolumn = Series(Series(Token("\\multicolumn"), wsp__), Series(Token("{"), wsp__), INTEGER, Series(Token("}"), wsp__), tabular_config, block_of_paragraphs)
caption = Series(Series(Token("\\caption"), wsp__), block)
includegraphics = Series(Series(Token("\\includegraphics"), wsp__), Option(config), block)
footnote = Series(Series(Token("\\footnote"), wsp__), block_of_paragraphs)
citep = Series(Alternative(Series(Token("\\citep"), wsp__), Series(Token("\\cite"), wsp__)), Option(config), block)
citet = Series(Series(Token("\\citet"), wsp__), Option(config), block)
generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RegExp(''), wsp__, config)), RegExp(''), wsp__, block)))
text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
known_command = Alternative(citet, citep, footnote, includegraphics, caption, multicolumn, hline, cline, documentclass, pdfinfo)
command = Alternative(known_command, text_command, generic_command)
......@@ -289,46 +289,46 @@ class LaTeXGrammar(Grammar):
begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
end_inline_env = Synonym(end_environment)
begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
generic_inline_env = Series(begin_inline_env, _RE(''), paragraph, end_inline_env, mandatory=3)
generic_inline_env = Series(begin_inline_env, RegExp(''), wsp__, paragraph, end_inline_env, mandatory=4)
known_inline_env = Synonym(inline_math)
inline_environment = Alternative(known_inline_env, generic_inline_env)
line_element = Alternative(text, block, inline_environment, command)
text_element.set(Alternative(line_element, LINEFEED))
paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, _RE(''))))
paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)))
sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
block_of_paragraphs.set(Series(_Token("{"), Option(sequence), _Token("}"), mandatory=2))
tabular_config.set(Series(_Token("{"), _RE('[lcr|]+'), _Token("}"), mandatory=2))
tabular_cell = ZeroOrMore(Series(line_element, _RE('')))
tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(_Token("&"), Alternative(multicolumn, tabular_cell))), _Token("\\\\"), Alternative(hline, ZeroOrMore(cline)))
tabular = Series(_Token("\\begin{tabular}"), tabular_config, ZeroOrMore(tabular_row), _Token("\\end{tabular}"), mandatory=3)
verbatim = Series(_Token("\\begin{verbatim}"), sequence, _Token("\\end{verbatim}"), mandatory=2)
quotation = Alternative(Series(_Token("\\begin{quotation}"), sequence, _Token("\\end{quotation}"), mandatory=2), Series(_Token("\\begin{quote}"), sequence, _Token("\\end{quote}"), mandatory=2))
figure = Series(_Token("\\begin{figure}"), sequence, _Token("\\end{figure}"), mandatory=2)
item = Series(_Token("\\item"), sequence)
enumerate = Series(_Token("\\begin{enumerate}"), Option(WSPC), ZeroOrMore(item), _Token("\\end{enumerate}"), mandatory=3)
itemize = Series(_Token("\\begin{itemize}"), Option(WSPC), ZeroOrMore(item), _Token("\\end{itemize}"), mandatory=3)
block_of_paragraphs.set(Series(Series(Token("{"), wsp__), Option(sequence), Series(Token("}"), wsp__), mandatory=2))
tabular_config.set(Series(Series(Token("{"), wsp__), RegExp('[lcr|]+'), wsp__, Series(Token("}"), wsp__), mandatory=3))
tabular_cell = ZeroOrMore(Series(line_element, RegExp(''), wsp__))
tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Series(Token("&"), wsp__), Alternative(multicolumn, tabular_cell))), Series(Token("\\\\"), wsp__), Alternative(hline, ZeroOrMore(cline)))
tabular = Series(Series(Token("\\begin{tabular}"), wsp__), tabular_config, ZeroOrMore(tabular_row), Series(Token("\\end{tabular}"), wsp__), mandatory=3)
verbatim = Series(Series(Token("\\begin{verbatim}"), wsp__), sequence, Series(Token("\\end{verbatim}"), wsp__), mandatory=2)
quotation = Alternative(Series(Series(Token("\\begin{quotation}"), wsp__), sequence, Series(Token("\\end{quotation}"), wsp__), mandatory=2), Series(Series(Token("\\begin{quote}"), wsp__), sequence, Series(Token("\\end{quote}"), wsp__), mandatory=2))
figure = Series(Series(Token("\\begin{figure}"), wsp__), sequence, Series(Token("\\end{figure}"), wsp__), mandatory=2)
item = Series(Series(Token("\\item"), wsp__), sequence)
enumerate = Series(Series(Token("\\begin{enumerate}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{enumerate}"), wsp__), mandatory=3)
itemize = Series(Series(Token("\\begin{itemize}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{itemize}"), wsp__), mandatory=3)
end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
block_environment.set(Alternative(known_environment, generic_block))
heading = Synonym(block)
Index = Series(Option(WSPC), _Token("\\printindex"))
Bibliography = Series(Option(WSPC), _Token("\\bibliography"), heading)
SubParagraph = Series(_Token("\\subparagraph"), heading, Option(sequence))
Index = Series(Option(WSPC), Series(Token("\\printindex"), wsp__))
Bibliography = Series(Option(WSPC), Series(Token("\\bibliography"), wsp__), heading)
SubParagraph = Series(Series(Token("\\subparagraph"), wsp__), heading, Option(sequence))
SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
Paragraph = Series(_Token("\\paragraph"), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
Paragraph = Series(Series(Token("\\paragraph"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
SubSubSection = Series(_Token("\\subsubsection"), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))