Commit e48bd3c2 authored by di68kap's avatar di68kap
Browse files

- Beispiele an Refactoring angepasst

parent 65158426
......@@ -97,7 +97,8 @@ __all__ = ('TransformationDict',
'assert_content',
'error_on',
'warn_on',
'assert_has_children')
'assert_has_children',
'peek')
TransformationProc = Callable[[List[Node]], None]
......@@ -409,19 +410,21 @@ def is_token(context: List[Node], tokens: AbstractSet[str] = frozenset()) -> boo
whitespace-tokens will be ignored. In case an empty set of tokens is passed,
any token is a match.
"""
def stripped(nd: Node) -> str:
"""Removes leading and trailing whitespace-nodes from content."""
# assert node.parser.ptype == TOKEN_PTYPE
if nd.children:
i, k = 0, len(nd.children)
while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
i += 1
while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
k -= 1
return "".join(child.content for child in node.children[i:k])
return nd.content
# def stripped(nd: Node) -> str:
# """Removes leading and trailing whitespace-nodes from content."""
# # assert node.parser.ptype == TOKEN_PTYPE
# if nd.children:
# i, k = 0, len(nd.children)
# while i < len(nd.children) and nd.children[i].parser.ptype == WHITESPACE_PTYPE:
# i += 1
# while k > 0 and nd.children[k - 1].parser.ptype == WHITESPACE_PTYPE:
# k -= 1
# return "".join(child.content for child in node.children[i:k])
# return nd.content
# node = context[-1]
# return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens)
node = context[-1]
return node.parser.ptype == TOKEN_PTYPE and (not tokens or stripped(node) in tokens)
return node.parser.ptype == TOKEN_PTYPE and (not tokens or node.content in tokens)
@transformation_factory(collections.abc.Set)
......@@ -983,3 +986,8 @@ def forbid(context: List[Node], child_tags: AbstractSet[str]):
if child.tag_name in child_tags:
context[0].new_error(node, 'Element "%s" cannot be nested inside "%s".' %
(child.parser.name, node.parser.name))
def peek(context: List[Node]):
"""For debugging: Prints the last node in the context as S-expression."""
print(context[-1].as_sxpr())
......@@ -16,12 +16,12 @@ except ImportError:
import re
from DHParser import logging, is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, PreprocessorToken, \
Lookbehind, Lookahead, Alternative, Pop, _Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, \
traverse, remove_children_if, merge_children, is_anonymous, \
traverse, remove_children_if, merge_children, is_anonymous, Whitespace, \
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
......@@ -64,20 +64,21 @@ class ArithmeticGrammar(Grammar):
digit = Forward()
expression = Forward()
variable = Forward()
source_hash__ = "3064cea87c9ceb59ade35566a31c3d75"
source_hash__ = "385a94a70cb629d46a13e15305692667"
parser_initialization__ = "upon instantiation"
COMMENT__ = r''
WHITESPACE__ = r'[\t ]*'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
test = Series(digit, constant, variable)
digit.set(Alternative(_Token("0"), _Token("1"), _Token("..."), _Token("9")))
digit.set(Alternative(Series(Token("0"), wsp__), Series(Token("1"), wsp__), Series(Token("..."), wsp__), Series(Token("9"), wsp__)))
constant.set(Series(digit, ZeroOrMore(digit)))
variable.set(Alternative(_Token("x"), _Token("y"), _Token("z")))
factor = Alternative(constant, variable, Series(_Token("("), expression, _Token(")")))
term = Series(factor, ZeroOrMore(Series(Alternative(_Token("*"), _Token("/")), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(_Token("+"), _Token("-")), term))))
variable.set(Alternative(Series(Token("x"), wsp__), Series(Token("y"), wsp__), Series(Token("z"), wsp__)))
factor = Alternative(constant, variable, Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__)))
term = Series(factor, ZeroOrMore(Series(Alternative(Series(Token("*"), wsp__), Series(Token("/"), wsp__)), factor)))
expression.set(Series(term, ZeroOrMore(Series(Alternative(Series(Token("+"), wsp__), Series(Token("-"), wsp__)), term))))
root__ = expression
def get_grammar() -> ArithmeticGrammar:
......
#!/usr/bin/python3
"""recompile_grammar.py - recompiles all pdf files in the current directoy
Author: Eckhart Arnold <arnold@badw.de>
Copyright 2017 Bavarian Academy of Sciences and Humanities
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
sys.path.extend(['../../', '../', './'])
from DHParser import dsl
dsl.recompile_grammar('.', force=True)
......@@ -19,8 +19,8 @@ sys.path.extend(['../../', '../', './'])
from DHParser import is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, _Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationDict, Whitespace, \
......@@ -106,29 +106,29 @@ class BibTeXGrammar(Grammar):
CONTENT_STRING = { /[^{}%]+/ | /(?=%)/~ }+
"""
text = Forward()
source_hash__ = "5ce8838ebbb255548cf3e14cd90bae6d"
source_hash__ = "534895885bfdddb19785f5d943b356a7"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'(?i)%.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), _RE('(?i)(?=%)')))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), _RE('(?i)(?=%)')))
NO_BLANK_STRING = _RE('(?i)[^ \\t\\n,%]+')
WORD_ = _RE('(?i)\\w+')
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
CONTENT_STRING = OneOrMore(Alternative(RegExp('(?i)[^{}%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
COMMA_TERMINATED_STRING = ZeroOrMore(Alternative(RegExp('(?i)[^,%]+'), Series(RegExp('(?i)(?=%)'), wsp__)))
NO_BLANK_STRING = Series(RegExp('(?i)[^ \\t\\n,%]+'), wsp__)
WORD_ = Series(RegExp('(?i)\\w+'), wsp__)
WORD = RegExp('(?i)\\w+')
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(_Token("{"), text, _Token("}")))))
text.set(ZeroOrMore(Alternative(CONTENT_STRING, Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)))))
plain_content = Synonym(COMMA_TERMINATED_STRING)
content = Alternative(Series(_Token("{"), text, _Token("}")), plain_content)
content = Alternative(Series(Series(Token("{"), wsp__), text, Series(Token("}"), wsp__)), plain_content)
field = Synonym(WORD_)
key = Synonym(NO_BLANK_STRING)
type = Synonym(WORD)
entry = Series(RegExp('(?i)@'), type, _Token("{"), key, ZeroOrMore(Series(_Token(","), field, _Token("="), content, mandatory=2)), _Token("}"), mandatory=5)
comment = Series(_Token("@Comment{"), text, _Token("}"), mandatory=2)
entry = Series(RegExp('(?i)@'), type, Series(Token("{"), wsp__), key, ZeroOrMore(Series(Series(Token(","), wsp__), field, Series(Token("="), wsp__), content, mandatory=2)), Series(Token("}"), wsp__), mandatory=5)
comment = Series(Series(Token("@Comment{"), wsp__), text, Series(Token("}"), wsp__), mandatory=2)
pre_code = ZeroOrMore(Alternative(RegExp('(?i)[^"%]+'), RegExp('(?i)%.*\\n')))
preamble = Series(_Token("@Preamble{"), RegExp('(?i)"'), pre_code, _RE('(?i)"'), _Token("}"), mandatory=4)
preamble = Series(Series(Token("@Preamble{"), wsp__), RegExp('(?i)"'), pre_code, RegExp('(?i)"'), wsp__, Series(Token("}"), wsp__), mandatory=5)
bibliography = ZeroOrMore(Alternative(preamble, comment, entry))
root__ = bibliography
......
......@@ -12,17 +12,17 @@ Match-test "simple"
### AST
(content
(:Token
"{"
)
(text
(CONTENT_STRING
"Edward N. Zalta"
)
)
(:Token
"}"
(:Token
"{"
)
(text
(CONTENT_STRING
"Edward N. Zalta"
)
)
(:Token
"}"
)
)
Match-test "nested_braces"
......@@ -33,28 +33,28 @@ Match-test "nested_braces"
### AST
(content
(:Token
"{"
(:Token
"{"
)
(text
(CONTENT_STRING
"\url"
)
(text
(:Series
(:Token
"{"
)
(text
(CONTENT_STRING
"\url"
"https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/"
)
(:Series
(:Token
"{"
)
(text
(CONTENT_STRING
"https://plato.stanford.edu/archives/fall2013/entries/thomas-kuhn/"
)
)
(:Token
"}"
)
)
)
(:Token
)
(:Token
"}"
)
)
)
(:Token
"}"
)
)
\ No newline at end of file
......@@ -19,8 +19,8 @@ except ImportError:
import re
from DHParser import is_filename, load_if_file, \
Grammar, Compiler, nil_preprocessor, \
Lookbehind, Lookahead, Alternative, Pop, Required, _Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, _RE, Capture, \
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym, \
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
last_value, counterpart, accumulate, PreprocessorFunc, \
Node, TransformationFunc, TransformationDict, Whitespace, \
......@@ -29,7 +29,8 @@ from DHParser import is_filename, load_if_file, \
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace, \
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE, \
remove_nodes, remove_content, remove_brackets, replace_parser, \
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last
keep_children, is_one_of, has_content, apply_if, remove_first, remove_last, \
forbid, assert_content, remove_infix_operator
from DHParser.log import logging
......@@ -92,43 +93,41 @@ class EBNFGrammar(Grammar):
literal = /"(?:[^"]|\\")*?"/~ # e.g. "(", '+', 'while'
| /'(?:[^']|\\')*?'/~ # whitespace following literals will be ignored tacitly.
plaintext = /`(?:[^"]|\\")*?`/~ # like literal but does not eat whitespace
regexp = /~?\/(?:\\\/|[^\/])*?\/~?/~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
# '~' is a whitespace-marker, if present leading or trailing
# whitespace of a regular expression will be ignored tacitly.
whitespace = /~/~ # implicit or default whitespace
regexp = /\/(?:\\\/|[^\/])*?\//~ # e.g. /\w+/, ~/#.*(?:\n|$)/~
whitespace = /~/~ # insignificant whitespace
list_ = /\w+/~ { "," /\w+/~ } # comma separated list of symbols, e.g. BEGIN_LIST, END_LIST,
# BEGIN_QUOTE, END_QUOTE ; see CommonMark/markdown.py for an exmaple
EOF = !/./
"""
expression = Forward()
source_hash__ = "d807c57c29ef6c674abe1addfce146c4"
source_hash__ = "97b616756462a59e1f5162a95ae84c5f"
parser_initialization__ = "upon instantiation"
COMMENT__ = r'#.*(?:\n|$)'
WHITESPACE__ = r'\s*'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
EOF = NegativeLookahead(RegExp('.'))
list_ = Series(_RE('\\w+'), ZeroOrMore(Series(_Token(","), _RE('\\w+'))))
whitespace = _RE('~')
regexp = _RE('~?/(?:\\\\/|[^/])*?/~?')
plaintext = _RE('`(?:[^"]|\\\\")*?`')
literal = Alternative(_RE('"(?:[^"]|\\\\")*?"'), _RE("'(?:[^']|\\\\')*?'"))
symbol = _RE('(?!\\d)\\w+')
option = Series(_Token("["), expression, _Token("]"), mandatory=1)
repetition = Series(_Token("{"), expression, _Token("}"), mandatory=1)
oneormore = Series(_Token("{"), expression, _Token("}+"))
unordered = Series(_Token("<"), expression, _Token(">"), mandatory=1)
group = Series(_Token("("), expression, _Token(")"), mandatory=1)
retrieveop = Alternative(_Token("::"), _Token(":"))
flowmarker = Alternative(_Token("!"), _Token("&"), _Token("-!"), _Token("-&"))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(_Token("="))), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(_Token("§")), factor))
expression.set(Series(term, ZeroOrMore(Series(_Token("|"), term))))
directive = Series(_Token("@"), symbol, _Token("="), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, _Token("="), expression, mandatory=1)
syntax = Series(Option(_RE('', wR='', wL=WSP__)), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
list_ = Series(RegExp('\\w+'), wsp__, ZeroOrMore(Series(Series(Token(","), wsp__), RegExp('\\w+'), wsp__)))
whitespace = Series(RegExp('~'), wsp__)
regexp = Series(RegExp('/(?:\\\\/|[^/])*?/'), wsp__)
plaintext = Series(RegExp('`(?:[^"]|\\\\")*?`'), wsp__)
literal = Alternative(Series(RegExp('"(?:[^"]|\\\\")*?"'), wsp__), Series(RegExp("'(?:[^']|\\\\')*?'"), wsp__))
symbol = Series(RegExp('(?!\\d)\\w+'), wsp__)
option = Series(Series(Token("["), wsp__), expression, Series(Token("]"), wsp__), mandatory=1)
repetition = Series(Series(Token("{"), wsp__), expression, Series(Token("}"), wsp__), mandatory=1)
oneormore = Series(Series(Token("{"), wsp__), expression, Series(Token("}+"), wsp__))
unordered = Series(Series(Token("<"), wsp__), expression, Series(Token(">"), wsp__), mandatory=1)
group = Series(Series(Token("("), wsp__), expression, Series(Token(")"), wsp__), mandatory=1)
retrieveop = Alternative(Series(Token("::"), wsp__), Series(Token(":"), wsp__))
flowmarker = Alternative(Series(Token("!"), wsp__), Series(Token("&"), wsp__), Series(Token("-!"), wsp__), Series(Token("-&"), wsp__))
factor = Alternative(Series(Option(flowmarker), Option(retrieveop), symbol, NegativeLookahead(Series(Token("="), wsp__))), Series(Option(flowmarker), literal), Series(Option(flowmarker), plaintext), Series(Option(flowmarker), regexp), Series(Option(flowmarker), whitespace), Series(Option(flowmarker), oneormore), Series(Option(flowmarker), group), Series(Option(flowmarker), unordered), repetition, option)
term = OneOrMore(Series(Option(Series(Token("§"), wsp__)), factor))
expression.set(Series(term, ZeroOrMore(Series(Series(Token("|"), wsp__), term))))
directive = Series(Series(Token("@"), wsp__), symbol, Series(Token("="), wsp__), Alternative(regexp, literal, list_), mandatory=1)
definition = Series(symbol, Series(Token("="), wsp__), expression, mandatory=1)
syntax = Series(Option(Series(wsp__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF, mandatory=2)
root__ = syntax
def get_grammar() -> EBNFGrammar:
......@@ -148,27 +147,35 @@ def get_grammar() -> EBNFGrammar:
#######################################################################
EBNF_AST_transformation_table = {
# AST Transformations for the EBNF-grammar
"+": remove_empty,
"syntax": [],
"definition": [],
"directive": [],
"expression": [],
"term": [],
"factor": [replace_or_reduce],
"flowmarker": [replace_or_reduce],
"retrieveop": [replace_or_reduce],
"group": [],
"oneormore": [],
"repetition": [],
"option": [],
"symbol": [],
"literal": [replace_or_reduce],
"regexp": [],
"list_": [],
"EOF": [],
":_Token, :_RE": reduce_single_child,
"*": replace_by_single_child
# AST Transformations for EBNF-grammar
"+":
remove_expendables,
"syntax":
[], # otherwise '"*": replace_by_single_child' would be applied
"directive, definition":
remove_tokens('@', '='),
"expression":
[replace_by_single_child, flatten, remove_tokens('|')], # remove_infix_operator],
"term":
[replace_by_single_child, flatten], # supports both idioms:
# "{ factor }+" and "factor { factor }"
"factor, flowmarker, retrieveop":
replace_by_single_child,
"group":
[remove_brackets, replace_by_single_child],
"unordered":
remove_brackets,
"oneormore, repetition, option":
[reduce_single_child, remove_brackets,
forbid('repetition', 'option', 'oneormore'), assert_content(r'(?!§)(?:.|\n)*')],
"symbol, literal, regexp":
reduce_single_child,
(TOKEN_PTYPE, WHITESPACE_PTYPE):
reduce_single_child,
"list_":
[flatten, remove_infix_operator],
"*":
replace_by_single_child
}
......
......@@ -17,8 +17,8 @@ try:
except ImportError:
import re
from DHParser import is_filename, Grammar, Compiler, Lookbehind, Alternative, Pop, \
_Token, Synonym, Whitespace, \
Option, NegativeLookbehind, OneOrMore, RegExp, Series, _RE, Capture, \
Synonym, Whitespace, Token, \
Option, NegativeLookbehind, OneOrMore, RegExp, Series, Capture, \
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \
PreprocessorFunc, TransformationDict, \
Node, TransformationFunc, traverse, remove_children_if, is_anonymous, \
......@@ -243,44 +243,44 @@ class LaTeXGrammar(Grammar):
WHITESPACE__ = r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?'
WSP_RE__ = mixin_comment(whitespace=WHITESPACE__, comment=COMMENT__)
wspL__ = ''
wspR__ = WSP__
whitespace__ = Whitespace(WSP__)
wspR__ = WSP_RE__
wsp__ = Whitespace(WSP_RE__)
EOF = RegExp('(?!.)')
BACKSLASH = RegExp('[\\\\]')
LB = RegExp('\\s*?\\n|$')
NEW_LINE = Series(RegExp('[ \\t]*'), Option(RegExp(COMMENT__)), RegExp('\\n'))
GAP = _RE('[ \\t]*(?:\\n[ \\t]*)+\\n')
GAP = Series(RegExp('[ \\t]*(?:\\n[ \\t]*)+\\n'), wsp__)
WSPC = OneOrMore(Alternative(RegExp(COMMENT__), RegExp('\\s+')))
PARSEP = Series(ZeroOrMore(Series(RegExp(WHITESPACE__), RegExp(COMMENT__))), GAP, Option(WSPC))
LFF = Series(NEW_LINE, Option(WSPC))
LF = Series(NEW_LINE, ZeroOrMore(Series(RegExp(COMMENT__), RegExp(WHITESPACE__))))
TEXTCHUNK = RegExp('[^\\\\%$&\\{\\}\\[\\]\\s\\n]+')
INTEGER = _RE('\\d+')
NAME = Capture(_RE('\\w+'))
INTEGER = Series(RegExp('\\d+'), wsp__)
NAME = Capture(Series(RegExp('\\w+'), wsp__))
LINEFEED = RegExp('[\\\\][\\\\]')
BRACKETS = RegExp('[\\[\\]]')
SPECIAL = RegExp('[$&_\\\\\\\\/]')
ESCAPED = RegExp('\\\\[%$&_/{}]')
TXTCOMMAND = RegExp('\\\\text\\w+')
CMDNAME = _RE('\\\\(?:(?!_)\\w)+')
structural = Alternative(_Token("subsection"), _Token("section"), _Token("chapter"), _Token("subsubsection"), _Token("paragraph"), _Token("subparagraph"), _Token("item"))
blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(_Token("begin{"), _Token("end{")), Alternative(_Token("enumerate"), _Token("itemize"), _Token("figure"), _Token("quote"), _Token("quotation"), _Token("tabular")), _Token("}")), structural, begin_generic_block, end_generic_block))
no_command = Alternative(_Token("\\begin{"), _Token("\\end"), Series(BACKSLASH, structural))
text = Series(TEXTCHUNK, ZeroOrMore(Series(_RE(''), TEXTCHUNK)))
block = Series(RegExp('{'), _RE(''), ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, _RE(''))), RegExp('}'), mandatory=3)
cfg_text = ZeroOrMore(Alternative(Series(Option(_RE('')), text), CMDNAME, SPECIAL))
config = Series(_Token("["), cfg_text, _Token("]"), mandatory=2)
pdfinfo = Series(_Token("\\pdfinfo"), block)
documentclass = Series(_Token("\\documentclass"), Option(config), block)
cline = Series(_Token("\\cline{"), INTEGER, _Token("-"), INTEGER, _Token("}"))
hline = _Token("\\hline")
multicolumn = Series(_Token("\\multicolumn"), _Token("{"), INTEGER, _Token("}"), tabular_config, block_of_paragraphs)
caption = Series(_Token("\\caption"), block)
includegraphics = Series(_Token("\\includegraphics"), Option(config), block)
footnote = Series(_Token("\\footnote"), block_of_paragraphs)
citep = Series(Alternative(_Token("\\citep"), _Token("\\cite")), Option(config), block)
citet = Series(_Token("\\citet"), Option(config), block)
generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(_RE(''), config)), _RE(''), block)))
CMDNAME = Series(RegExp('\\\\(?:(?!_)\\w)+'), wsp__)
structural = Alternative(Series(Token("subsection"), wsp__), Series(Token("section"), wsp__), Series(Token("chapter"), wsp__), Series(Token("subsubsection"), wsp__), Series(Token("paragraph"), wsp__), Series(Token("subparagraph"), wsp__), Series(Token("item"), wsp__))
blockcmd = Series(BACKSLASH, Alternative(Series(Alternative(Series(Token("begin{"), wsp__), Series(Token("end{"), wsp__)), Alternative(Series(Token("enumerate"), wsp__), Series(Token("itemize"), wsp__), Series(Token("figure"), wsp__), Series(Token("quote"), wsp__), Series(Token("quotation"), wsp__), Series(Token("tabular"), wsp__)), Series(Token("}"), wsp__)), structural, begin_generic_block, end_generic_block))
no_command = Alternative(Series(Token("\\begin{"), wsp__), Series(Token("\\end"), wsp__), Series(BACKSLASH, structural))
text = Series(TEXTCHUNK, ZeroOrMore(Series(RegExp(''), wsp__, TEXTCHUNK)))
block = Series(RegExp('{'), RegExp(''), wsp__, ZeroOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)), RegExp('}'), mandatory=4)
cfg_text = ZeroOrMore(Alternative(Series(Option(Series(RegExp(''), wsp__)), text), CMDNAME, SPECIAL))
config = Series(Series(Token("["), wsp__), cfg_text, Series(Token("]"), wsp__), mandatory=2)
pdfinfo = Series(Series(Token("\\pdfinfo"), wsp__), block)
documentclass = Series(Series(Token("\\documentclass"), wsp__), Option(config), block)
cline = Series(Series(Token("\\cline{"), wsp__), INTEGER, Series(Token("-"), wsp__), INTEGER, Series(Token("}"), wsp__))
hline = Series(Token("\\hline"), wsp__)
multicolumn = Series(Series(Token("\\multicolumn"), wsp__), Series(Token("{"), wsp__), INTEGER, Series(Token("}"), wsp__), tabular_config, block_of_paragraphs)
caption = Series(Series(Token("\\caption"), wsp__), block)
includegraphics = Series(Series(Token("\\includegraphics"), wsp__), Option(config), block)
footnote = Series(Series(Token("\\footnote"), wsp__), block_of_paragraphs)
citep = Series(Alternative(Series(Token("\\citep"), wsp__), Series(Token("\\cite"), wsp__)), Option(config), block)
citet = Series(Series(Token("\\citet"), wsp__), Option(config), block)
generic_command = Series(NegativeLookahead(no_command), CMDNAME, Option(Series(Option(Series(RegExp(''), wsp__, config)), RegExp(''), wsp__, block)))
text_command = Alternative(TXTCOMMAND, ESCAPED, BRACKETS)
known_command = Alternative(citet, citep, footnote, includegraphics, caption, multicolumn, hline, cline, documentclass, pdfinfo)
command = Alternative(known_command, text_command, generic_command)
......@@ -289,46 +289,46 @@ class LaTeXGrammar(Grammar):
begin_environment = Series(RegExp('\\\\begin{'), NAME, RegExp('}'), mandatory=1)
end_inline_env = Synonym(end_environment)
begin_inline_env = Alternative(Series(NegativeLookbehind(LB), begin_environment), Series(begin_environment, NegativeLookahead(LFF)))
generic_inline_env = Series(begin_inline_env, _RE(''), paragraph, end_inline_env, mandatory=3)
generic_inline_env = Series(begin_inline_env, RegExp(''), wsp__, paragraph, end_inline_env, mandatory=4)
known_inline_env = Synonym(inline_math)
inline_environment = Alternative(known_inline_env, generic_inline_env)
line_element = Alternative(text, block, inline_environment, command)
text_element.set(Alternative(line_element, LINEFEED))
paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, _RE(''))))
paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), text_element, RegExp(''), wsp__)))
sequence = Series(Option(WSPC), OneOrMore(Series(Alternative(paragraph, block_environment), Option(PARSEP))))
block_of_paragraphs.set(Series(_Token("{"), Option(sequence), _Token("}"), mandatory=2))
tabular_config.set(Series(_Token("{"), _RE('[lcr|]+'), _Token("}"), mandatory=2))
tabular_cell = ZeroOrMore(Series(line_element, _RE('')))
tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(_Token("&"), Alternative(multicolumn, tabular_cell))), _Token("\\\\"), Alternative(hline, ZeroOrMore(cline)))
tabular = Series(_Token("\\begin{tabular}"), tabular_config, ZeroOrMore(tabular_row), _Token("\\end{tabular}"), mandatory=3)
verbatim = Series(_Token("\\begin{verbatim}"), sequence, _Token("\\end{verbatim}"), mandatory=2)
quotation = Alternative(Series(_Token("\\begin{quotation}"), sequence, _Token("\\end{quotation}"), mandatory=2), Series(_Token("\\begin{quote}"), sequence, _Token("\\end{quote}"), mandatory=2))
figure = Series(_Token("\\begin{figure}"), sequence, _Token("\\end{figure}"), mandatory=2)
item = Series(_Token("\\item"), sequence)
enumerate = Series(_Token("\\begin{enumerate}"), Option(WSPC), ZeroOrMore(item), _Token("\\end{enumerate}"), mandatory=3)
itemize = Series(_Token("\\begin{itemize}"), Option(WSPC), ZeroOrMore(item), _Token("\\end{itemize}"), mandatory=3)
block_of_paragraphs.set(Series(Series(Token("{"), wsp__), Option(sequence), Series(Token("}"), wsp__), mandatory=2))
tabular_config.set(Series(Series(Token("{"), wsp__), RegExp('[lcr|]+'), wsp__, Series(Token("}"), wsp__), mandatory=3))
tabular_cell = ZeroOrMore(Series(line_element, RegExp(''), wsp__))
tabular_row = Series(Alternative(multicolumn, tabular_cell), ZeroOrMore(Series(Series(Token("&"), wsp__), Alternative(multicolumn, tabular_cell))), Series(Token("\\\\"), wsp__), Alternative(hline, ZeroOrMore(cline)))
tabular = Series(Series(Token("\\begin{tabular}"), wsp__), tabular_config, ZeroOrMore(tabular_row), Series(Token("\\end{tabular}"), wsp__), mandatory=3)
verbatim = Series(Series(Token("\\begin{verbatim}"), wsp__), sequence, Series(Token("\\end{verbatim}"), wsp__), mandatory=2)
quotation = Alternative(Series(Series(Token("\\begin{quotation}"), wsp__), sequence, Series(Token("\\end{quotation}"), wsp__), mandatory=2), Series(Series(Token("\\begin{quote}"), wsp__), sequence, Series(Token("\\end{quote}"), wsp__), mandatory=2))
figure = Series(Series(Token("\\begin{figure}"), wsp__), sequence, Series(Token("\\end{figure}"), wsp__), mandatory=2)
item = Series(Series(Token("\\item"), wsp__), sequence)
enumerate = Series(Series(Token("\\begin{enumerate}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{enumerate}"), wsp__), mandatory=3)
itemize = Series(Series(Token("\\begin{itemize}"), wsp__), Option(WSPC), ZeroOrMore(item), Series(Token("\\end{itemize}"), wsp__), mandatory=3)
end_generic_block.set(Series(Lookbehind(LB), end_environment, LFF))
begin_generic_block.set(Series(Lookbehind(LB), begin_environment, LFF))
generic_block = Series(begin_generic_block, sequence, end_generic_block, mandatory=2)
known_environment = Alternative(itemize, enumerate, figure, tabular, quotation, verbatim)
block_environment.set(Alternative(known_environment, generic_block))
heading = Synonym(block)
Index = Series(Option(WSPC), _Token("\\printindex"))
Bibliography = Series(Option(WSPC), _Token("\\bibliography"), heading)
SubParagraph = Series(_Token("\\subparagraph"), heading, Option(sequence))
Index = Series(Option(WSPC), Series(Token("\\printindex"), wsp__))
Bibliography = Series(Option(WSPC), Series(Token("\\bibliography"), wsp__), heading)
SubParagraph = Series(Series(Token("\\subparagraph"), wsp__), heading, Option(sequence))
SubParagraphs = OneOrMore(Series(Option(WSPC), SubParagraph))
Paragraph = Series(_Token("\\paragraph"), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
Paragraph = Series(Series(Token("\\paragraph"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubParagraphs)))
Paragraphs = OneOrMore(Series(Option(WSPC), Paragraph))
SubSubSection = Series(_Token("\\subsubsection"), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
SubSubSection = Series(Series(Token("\\subsubsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, Paragraphs)))
SubSubSections = OneOrMore(Series(Option(WSPC), SubSubSection))
SubSection = Series(_Token("\\subsection"), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
SubSection = Series(Series(Token("\\subsection"), wsp__), heading, ZeroOrMore(Alternative(sequence, SubSubSections)))
SubSections = OneOrMore(Series(Option(WSPC), SubSection))