2.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit 5ec9ebc8 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- LaTeX tables can now be parsed (or so it ssems...)

parent 3fd3a2dc
...@@ -156,7 +156,9 @@ class HistoryRecord: ...@@ -156,7 +156,9 @@ class HistoryRecord:
FAIL = "FAIL" FAIL = "FAIL"
def __init__(self, call_stack: List['Parser'], node: Node, remaining: int) -> None: def __init__(self, call_stack: List['Parser'], node: Node, remaining: int) -> None:
self.call_stack = call_stack # type: List['Parser'] # copy call stack, dropping uninformative Forward-Parsers
self.call_stack = [p for p in call_stack if p.ptype != ":Forward"]
# type: List['Parser']
self.node = node # type: Node self.node = node # type: Node
self.remaining = remaining # type: int self.remaining = remaining # type: int
document = call_stack[-1].grammar.document__ if call_stack else '' document = call_stack[-1].grammar.document__ if call_stack else ''
...@@ -184,7 +186,6 @@ class HistoryRecord: ...@@ -184,7 +186,6 @@ class HistoryRecord:
return (slice(-self.remaining - self.node.len, -self.remaining) if self.node return (slice(-self.remaining - self.node.len, -self.remaining) if self.node
else slice(-self.remaining, None)) else slice(-self.remaining, None))
@staticmethod @staticmethod
def last_match(history: List['HistoryRecord']) -> Optional['HistoryRecord']: def last_match(history: List['HistoryRecord']) -> Optional['HistoryRecord']:
""" """
...@@ -280,7 +281,7 @@ def add_parser_guard(parser_func): ...@@ -280,7 +281,7 @@ def add_parser_guard(parser_func):
if grammar.history_tracking__: if grammar.history_tracking__:
# don't track returning parsers except in case an error has occurred # don't track returning parsers except in case an error has occurred
if grammar.moving_forward__ or (node and node._errors): if grammar.moving_forward__ or (node and node._errors):
record = HistoryRecord(grammar.call_stack__.copy(), node, len(rest)) record = HistoryRecord(grammar.call_stack__, node, len(rest))
grammar.history__.append(record) grammar.history__.append(record)
# print(record.stack, record.status, rest[:20].replace('\n', '|')) # print(record.stack, record.status, rest[:20].replace('\n', '|'))
grammar.call_stack__.pop() grammar.call_stack__.pop()
...@@ -1365,7 +1366,8 @@ class Alternative(NaryOperator): ...@@ -1365,7 +1366,8 @@ class Alternative(NaryOperator):
def __init__(self, *parsers: Parser, name: str = '') -> None: def __init__(self, *parsers: Parser, name: str = '') -> None:
super(Alternative, self).__init__(*parsers, name=name) super(Alternative, self).__init__(*parsers, name=name)
assert len(self.parsers) >= 1 assert len(self.parsers) >= 1
assert all(not isinstance(p, Optional) for p in self.parsers) # only the last alternative may be optional. Could this be checked at compile time?
assert all(not isinstance(p, Optional) for p in self.parsers[:-1])
self.been_here = dict() # type: Dict[int, int] self.been_here = dict() # type: Dict[int, int]
def __call__(self, text: str) -> Tuple[Node, str]: def __call__(self, text: str) -> Tuple[Node, str]:
......
...@@ -69,13 +69,14 @@ quotation = ("\begin{quotation}" sequence §"\end{quotation}") ...@@ -69,13 +69,14 @@ quotation = ("\begin{quotation}" sequence §"\end{quotation}")
| ("\begin{quote}" sequence §"\end{quote}") | ("\begin{quote}" sequence §"\end{quote}")
verbatim = "\begin{verbatim}" sequence §"\end{verbatim}" verbatim = "\begin{verbatim}" sequence §"\end{verbatim}"
tabular = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}" tabular = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}"
tabular_row = [multicolumn | tabular_cell] { "&" [multicolumn | tabular_cell] } "\\" [ hline | cline ] tabular_row = (multicolumn | tabular_cell) { "&" (multicolumn | tabular_cell) }
tabular_cell = { text_element //~ }+ "\\" ( hline | { cline } )
tabular_cell = { text_element //~ }
tabular_config = "{" /[lcr|]+/~ §"}" tabular_config = "{" /[lcr|]+/~ §"}"
#### paragraphs and sequences of paragraphs #### #### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/~ [sequence] §/}/ block_of_paragraphs = "{" [sequence] §"}"
sequence = { (paragraph | block_environment ) [PARSEP] }+ sequence = { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd (text_element | LINEFEED) //~ }+ paragraph = { !blockcmd (text_element | LINEFEED) //~ }+
text_element = text | block | inline_environment | command text_element = text | block | inline_environment | command
......
...@@ -118,13 +118,14 @@ class LaTeXGrammar(Grammar): ...@@ -118,13 +118,14 @@ class LaTeXGrammar(Grammar):
| ("\begin{quote}" sequence §"\end{quote}") | ("\begin{quote}" sequence §"\end{quote}")
verbatim = "\begin{verbatim}" sequence §"\end{verbatim}" verbatim = "\begin{verbatim}" sequence §"\end{verbatim}"
tabular = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}" tabular = "\begin{tabular}" tabular_config { tabular_row } §"\end{tabular}"
tabular_row = [multicolumn | tabular_cell] { "&" [multicolumn | tabular_cell] } "\\" [ hline | cline ] tabular_row = (multicolumn | tabular_cell) { "&" (multicolumn | tabular_cell) }
tabular_cell = { text_element //~ }+ "\\" ( hline | { cline } )
tabular_cell = { text_element //~ }
tabular_config = "{" /[lcr|]+/~ §"}" tabular_config = "{" /[lcr|]+/~ §"}"
#### paragraphs and sequences of paragraphs #### #### paragraphs and sequences of paragraphs ####
block_of_paragraphs = /{/~ [sequence] §/}/ block_of_paragraphs = "{" [sequence] §"}"
sequence = { (paragraph | block_environment ) [PARSEP] }+ sequence = { (paragraph | block_environment ) [PARSEP] }+
paragraph = { !blockcmd (text_element | LINEFEED) //~ }+ paragraph = { !blockcmd (text_element | LINEFEED) //~ }+
text_element = text | block | inline_environment | command text_element = text | block | inline_environment | command
...@@ -218,7 +219,7 @@ class LaTeXGrammar(Grammar): ...@@ -218,7 +219,7 @@ class LaTeXGrammar(Grammar):
paragraph = Forward() paragraph = Forward()
tabular_config = Forward() tabular_config = Forward()
text_element = Forward() text_element = Forward()
source_hash__ = "4003d206b4ecbd76dd8be99df87af4c0" source_hash__ = "e493869bdc02eb835ec3ce1ebfb0a4ea"
parser_initialization__ = "upon instantiation" parser_initialization__ = "upon instantiation"
COMMENT__ = r'%.*(?:\n|$)' COMMENT__ = r'%.*(?:\n|$)'
WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)') WSP__ = mixin_comment(whitespace=r'[ \t]*(?:\n(?![ \t]*\n)[ \t]*)?', comment=r'%.*(?:\n|$)')
...@@ -269,10 +270,12 @@ class LaTeXGrammar(Grammar): ...@@ -269,10 +270,12 @@ class LaTeXGrammar(Grammar):
text_element.set(Alternative(text, block, inline_environment, command)) text_element.set(Alternative(text, block, inline_environment, command))
paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), Alternative(text_element, LINEFEED), RE('')))) paragraph.set(OneOrMore(Series(NegativeLookahead(blockcmd), Alternative(text_element, LINEFEED), RE(''))))
sequence = OneOrMore(Series(Alternative(paragraph, block_environment), Optional(PARSEP))) sequence = OneOrMore(Series(Alternative(paragraph, block_environment), Optional(PARSEP)))
block_of_paragraphs.set(Series(RE('{'), Optional(sequence), Required(RegExp('}')))) block_of_paragraphs.set(Series(Token("{"), Optional(sequence), Required(Token("}"))))
tabular_config.set(Series(Token("{"), RE('[lcr|]+'), Required(Token("}")))) tabular_config.set(Series(Token("{"), RE('[lcr|]+'), Required(Token("}"))))
tabular_cell = OneOrMore(Series(text_element, RE(''))) tabular_cell = ZeroOrMore(Series(text_element, RE('')))
tabular_row = Series(Optional(Alternative(multicolumn, tabular_cell)), ZeroOrMore(Series(Token("&"), Optional(Alternative(multicolumn, tabular_cell)))), Token("\\\\"), Optional(Alternative(hline, cline))) tabular_row = Series(Alternative(multicolumn, tabular_cell),
ZeroOrMore(Series(Token("&"), Alternative(multicolumn, tabular_cell))),
Token("\\\\"), Alternative(hline, ZeroOrMore(cline)))
tabular = Series(Token("\\begin{tabular}"), tabular_config, ZeroOrMore(tabular_row), Required(Token("\\end{tabular}"))) tabular = Series(Token("\\begin{tabular}"), tabular_config, ZeroOrMore(tabular_row), Required(Token("\\end{tabular}")))
verbatim = Series(Token("\\begin{verbatim}"), sequence, Required(Token("\\end{verbatim}"))) verbatim = Series(Token("\\begin{verbatim}"), sequence, Required(Token("\\end{verbatim}")))
quotation = Alternative(Series(Token("\\begin{quotation}"), sequence, Required(Token("\\end{quotation}"))), Series(Token("\\begin{quote}"), sequence, Required(Token("\\end{quote}")))) quotation = Alternative(Series(Token("\\begin{quotation}"), sequence, Required(Token("\\end{quotation}"))), Series(Token("\\begin{quote}"), sequence, Required(Token("\\end{quote}"))))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment