Commit fab4160e authored by eckhart's avatar eckhart
Browse files

- sync commit

parent f09ba4fd
...@@ -1068,7 +1068,7 @@ class PreprocessorToken(Parser): ...@@ -1068,7 +1068,7 @@ class PreprocessorToken(Parser):
def __init__(self, token: str) -> None: def __init__(self, token: str) -> None:
assert token and token.isupper() assert token and token.isupper()
assert RX_TOKEN_NAME.match(token) assert RX_TOKEN_NAME.match(token)
super(PreprocessorToken, self).__init__(token) super().__init__(token)
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]: def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
if text[0:1] == BEGIN_TOKEN: if text[0:1] == BEGIN_TOKEN:
...@@ -1095,6 +1095,30 @@ class PreprocessorToken(Parser): ...@@ -1095,6 +1095,30 @@ class PreprocessorToken(Parser):
return None, text return None, text
class PlainText(Parser):
"""
Parses plain text strings.
Example:
>>> while_token = PlainText("while")
>>> Grammar(while_token)("while").content
'while'
"""
def __init__(self, text: str, name: str = '') -> None:
super().__init__(name)
self.text = text
self.textlen = len(text)
def __deepcopy__(self, memo):
return self.__class__(self.text, self.name)
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
if text.startswith(self.text):
return Node(self, self.text, True), text[self.textlen:]
return None, text
class RegExp(Parser): class RegExp(Parser):
r""" r"""
Regular expression parser. Regular expression parser.
...@@ -1114,7 +1138,7 @@ class RegExp(Parser): ...@@ -1114,7 +1138,7 @@ class RegExp(Parser):
""" """
def __init__(self, regexp, name: str = '') -> None: def __init__(self, regexp, name: str = '') -> None:
super(RegExp, self).__init__(name) super().__init__(name)
self.regexp = re.compile(regexp) if isinstance(regexp, str) else regexp self.regexp = re.compile(regexp) if isinstance(regexp, str) else regexp
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
...@@ -1123,7 +1147,7 @@ class RegExp(Parser): ...@@ -1123,7 +1147,7 @@ class RegExp(Parser):
regexp = copy.deepcopy(self.regexp, memo) regexp = copy.deepcopy(self.regexp, memo)
except TypeError: except TypeError:
regexp = self.regexp.pattern regexp = self.regexp.pattern
return RegExp(regexp, self.name) return self.__class__(regexp, self.name)
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]: def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
match = text.match(self.regexp) match = text.match(self.regexp)
...@@ -1179,7 +1203,7 @@ class RE(Parser): ...@@ -1179,7 +1203,7 @@ class RE(Parser):
EBNF-Example: `word = /\w+/~` EBNF-Example: `word = /\w+/~`
""" """
def __init__(self, regexp, wL=None, wR=None, name=''): def __init__(self, regexp, wL=None, wR=None, name: str='') -> None:
r"""Constructor for class RE. r"""Constructor for class RE.
Args: Args:
...@@ -1195,12 +1219,12 @@ class RE(Parser): ...@@ -1195,12 +1219,12 @@ class RE(Parser):
See above. See above.
name: The optional name of the parser. name: The optional name of the parser.
""" """
super(RE, self).__init__(name) super().__init__(name)
self.rx_wsl = wL self.rx_wsl = wL
self.rx_wsr = wR self.rx_wsr = wR
self.wsp_left = Whitespace(wL) if wL else ZOMBIE_PARSER self.wsp_left = Whitespace(wL) if wL else ZOMBIE_PARSER
self.wsp_right = Whitespace(wR) if wR else ZOMBIE_PARSER self.wsp_right = Whitespace(wR) if wR else ZOMBIE_PARSER
self.main = RegExp(regexp) self.main = self.create_main_parser(regexp)
def __deepcopy__(self, memo={}): def __deepcopy__(self, memo={}):
try: try:
...@@ -1216,8 +1240,7 @@ class RE(Parser): ...@@ -1216,8 +1240,7 @@ class RE(Parser):
main, txt = self.main(txt) main, txt = self.main(txt)
if main: if main:
wsr, txt = self.wsp_right(txt) wsr, txt = self.wsp_right(txt)
result = tuple(nd for nd in (wsl, main, wsr) result = tuple(nd for nd in (wsl, main, wsr) if nd)
if nd and nd.result != '')
return Node(self, result), txt return Node(self, result), txt
return None, text return None, text
...@@ -1244,6 +1267,10 @@ class RE(Parser): ...@@ -1244,6 +1267,10 @@ class RE(Parser):
return True return True
return False return False
def create_main_parser(self, arg) -> Parser:
"""Creates the main parser of this compound parser. Can be overridden."""
return RegExp(arg)
class Token(RE): class Token(RE):
""" """
...@@ -1259,7 +1286,7 @@ class Token(RE): ...@@ -1259,7 +1286,7 @@ class Token(RE):
def __init__(self, token: str, wL=None, wR=None, name: str = '') -> None: def __init__(self, token: str, wL=None, wR=None, name: str = '') -> None:
self.token = token self.token = token
super(Token, self).__init__(escape_re(token), wL, wR, name) super().__init__(token, wL, wR, name)
def __deepcopy__(self, memo={}): def __deepcopy__(self, memo={}):
return self.__class__(self.token, self.rx_wsl, self.rx_wsr, self.name) return self.__class__(self.token, self.rx_wsl, self.rx_wsr, self.name)
...@@ -1267,6 +1294,9 @@ class Token(RE): ...@@ -1267,6 +1294,9 @@ class Token(RE):
def __repr__(self): def __repr__(self):
return '"%s"' % self.token if self.token.find('"') < 0 else "'%s'" % self.token return '"%s"' % self.token if self.token.find('"') < 0 else "'%s'" % self.token
def create_main_parser(self, arg) -> Parser:
return PlainText(arg)
######################################################################## ########################################################################
# #
...@@ -1316,7 +1346,7 @@ class NaryOperator(Parser): ...@@ -1316,7 +1346,7 @@ class NaryOperator(Parser):
""" """
def __init__(self, *parsers: Parser, name: str = '') -> None: def __init__(self, *parsers: Parser, name: str = '') -> None:
super(NaryOperator, self).__init__(name) super().__init__(name)
assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers) assert all([isinstance(parser, Parser) for parser in parsers]), str(parsers)
self.parsers = parsers # type: Tuple[Parser, ...] self.parsers = parsers # type: Tuple[Parser, ...]
...@@ -1359,7 +1389,7 @@ class Option(UnaryOperator): ...@@ -1359,7 +1389,7 @@ class Option(UnaryOperator):
""" """
def __init__(self, parser: Parser, name: str = '') -> None: def __init__(self, parser: Parser, name: str = '') -> None:
super(Option, self).__init__(parser, name) super().__init__(parser, name)
# assert isinstance(parser, Parser) # assert isinstance(parser, Parser)
assert not isinstance(parser, Option), \ assert not isinstance(parser, Option), \
"Redundant nesting of options: %s(%s)" % (str(name), str(parser.name)) "Redundant nesting of options: %s(%s)" % (str(name), str(parser.name))
...@@ -1431,7 +1461,7 @@ class OneOrMore(UnaryOperator): ...@@ -1431,7 +1461,7 @@ class OneOrMore(UnaryOperator):
""" """
def __init__(self, parser: Parser, name: str = '') -> None: def __init__(self, parser: Parser, name: str = '') -> None:
super(OneOrMore, self).__init__(parser, name) super().__init__(parser, name)
assert not isinstance(parser, Option), \ assert not isinstance(parser, Option), \
"Use ZeroOrMore instead of nesting OneOrMore and Option: " \ "Use ZeroOrMore instead of nesting OneOrMore and Option: " \
"%s(%s)" % (str(name), str(parser.name)) "%s(%s)" % (str(name), str(parser.name))
...@@ -1476,7 +1506,7 @@ class Series(NaryOperator): ...@@ -1476,7 +1506,7 @@ class Series(NaryOperator):
NOPE = 1000 NOPE = 1000
def __init__(self, *parsers: Parser, mandatory: int = NOPE, name: str = '') -> None: def __init__(self, *parsers: Parser, mandatory: int = NOPE, name: str = '') -> None:
super(Series, self).__init__(*parsers, name=name) super().__init__(*parsers, name=name)
length = len(self.parsers) length = len(self.parsers)
assert 1 <= length < Series.NOPE, \ assert 1 <= length < Series.NOPE, \
'Length %i of series exceeds maximum length of %i' % (length, Series.NOPE) 'Length %i of series exceeds maximum length of %i' % (length, Series.NOPE)
...@@ -1581,7 +1611,7 @@ class Alternative(NaryOperator): ...@@ -1581,7 +1611,7 @@ class Alternative(NaryOperator):
""" """
def __init__(self, *parsers: Parser, name: str = '') -> None: def __init__(self, *parsers: Parser, name: str = '') -> None:
super(Alternative, self).__init__(*parsers, name=name) super().__init__(*parsers, name=name)
assert len(self.parsers) >= 1 assert len(self.parsers) >= 1
# only the last alternative may be optional. Could this be checked at compile time? # only the last alternative may be optional. Could this be checked at compile time?
assert all(not isinstance(p, Option) for p in self.parsers[:-1]), \ assert all(not isinstance(p, Option) for p in self.parsers[:-1]), \
......
General TODO-List General TODO-List
----------------- -----------------
- Position Handling: `Node._pos` and `Node._len` should be set by
parser guard to allow for early dropping of nodes. (Should speed
up tree-traversal later)
- Position handling should provide for position shifts during preprocessing
...@@ -36,13 +36,16 @@ Match-test "1" ...@@ -36,13 +36,16 @@ Match-test "1"
### CST ### AST
(GAP (GAP
(:RegExp (:RegExp
"" ""
"" ""
"" ""
) )
(:Whitespace
" "
)
) )
Match-test "2" Match-test "2"
...@@ -76,7 +79,7 @@ Match-test "3" ...@@ -76,7 +79,7 @@ Match-test "3"
### CST ### AST
(GAP (GAP
(:RegExp (:RegExp
"" ""
...@@ -84,6 +87,9 @@ Match-test "3" ...@@ -84,6 +87,9 @@ Match-test "3"
"" ""
"" ""
) )
(:Whitespace
" "
)
) )
Fail-test "10" Fail-test "10"
......
...@@ -82,6 +82,9 @@ Match-test "3" ...@@ -82,6 +82,9 @@ Match-test "3"
(text (text
"footnote" "footnote"
) )
(:Whitespace
" "
)
) )
) )
...@@ -160,12 +163,21 @@ Match-test "7" ...@@ -160,12 +163,21 @@ Match-test "7"
### AST ### AST
(block (block
(:Whitespace
" "
)
(generic_command (generic_command
(CMDNAME (CMDNAME
"\em" "\em"
) )
) )
(:Whitespace
" "
)
(text (text
"block" "block"
) )
(:Whitespace
" "
)
) )
\ No newline at end of file
...@@ -19,6 +19,9 @@ Match-test "1" ...@@ -19,6 +19,9 @@ Match-test "1"
"Professoren, Philister und Vieh; welche vier Stände doch nichts weniger" "Professoren, Philister und Vieh; welche vier Stände doch nichts weniger"
"als streng geschieden sind. Der Viehstand ist der bedeutendste." "als streng geschieden sind. Der Viehstand ist der bedeutendste."
) )
(:Whitespace
" "
)
) )
Match-test "2" Match-test "2"
...@@ -37,14 +40,23 @@ Match-test "2" ...@@ -37,14 +40,23 @@ Match-test "2"
" " " "
) )
(block (block
(:Whitespace
" "
)
(generic_command (generic_command
(CMDNAME (CMDNAME
"\em" "\em"
) )
) )
(:Whitespace
" "
)
(text (text
"inline blocks" "inline blocks"
) )
(:Whitespace
" "
)
) )
(:Whitespace (:Whitespace
" " " "
...@@ -59,8 +71,19 @@ Match-test "2" ...@@ -59,8 +71,19 @@ Match-test "2"
(CMDNAME (CMDNAME
"\emph" "\emph"
) )
(text (:Whitespace
"inline commands" " "
)
(block
(:Whitespace
" "
)
(text
"inline commands"
)
(:Whitespace
" "
)
) )
) )
(:Whitespace (:Whitespace
...@@ -84,6 +107,9 @@ Match-test "2" ...@@ -84,6 +107,9 @@ Match-test "2"
(text (text
"characters." "characters."
) )
(:Whitespace
" "
)
) )
Match-test "3" Match-test "3"
...@@ -101,6 +127,9 @@ Match-test "3" ...@@ -101,6 +127,9 @@ Match-test "3"
"Therefore," "Therefore,"
"this line still belongs to the same paragraph." "this line still belongs to the same paragraph."
) )
(:Whitespace
" "
)
) )
Match-test "4" Match-test "4"
...@@ -123,6 +152,9 @@ Match-test "4" ...@@ -123,6 +152,9 @@ Match-test "4"
"Comment lines do not break paragraphs." "Comment lines do not break paragraphs."
"in sequence." "in sequence."
) )
(:Whitespace
" "
)
) )
Match-test "5" Match-test "5"
...@@ -142,14 +174,23 @@ Match-test "5" ...@@ -142,14 +174,23 @@ Match-test "5"
" " " "
) )
(block (block
(:Whitespace
" "
)
(generic_command (generic_command
(CMDNAME (CMDNAME
"\em" "\em"
) )
) )
(:Whitespace
" "
)
(text (text
"emphasized" "emphasized"
) )
(:Whitespace
" "
)
) )
(:Whitespace (:Whitespace
" " " "
...@@ -161,14 +202,23 @@ Match-test "5" ...@@ -161,14 +202,23 @@ Match-test "5"
" " " "
) )
(block (block
(:Whitespace
" "
)
(generic_command (generic_command
(CMDNAME (CMDNAME
"\bf" "\bf"
) )
) )
(:Whitespace
" "
)
(text (text
"bold" "bold"
) )
(:Whitespace
" "
)
) )
(:Whitespace (:Whitespace
" " " "
...@@ -210,18 +260,33 @@ Match-test "5" ...@@ -210,18 +260,33 @@ Match-test "5"
" " " "
) )
(block (block
(:Whitespace
" "
)
(generic_command (generic_command
(CMDNAME (CMDNAME
"\large" "\large"
) )
) )
(:Whitespace
" "
)
(text (text
"large" "large"
) )
(:Whitespace
" "
)
)
(:Whitespace
" "
) )
(text (text
"." "."
) )
(:Whitespace
" "
)
) )
Match-test "6" Match-test "6"
...@@ -239,11 +304,17 @@ Match-test "6" ...@@ -239,11 +304,17 @@ Match-test "6"
" " " "
) )
(block (block
(:Whitespace
" "
)
(generic_command (generic_command
(CMDNAME (CMDNAME
"\xy" "\xy"
) )
) )
(:Whitespace
" "
)
(text (text
"unknown blocks" "unknown blocks"
) )
...@@ -251,9 +322,15 @@ Match-test "6" ...@@ -251,9 +322,15 @@ Match-test "6"
" " " "
) )
) )
(:Whitespace
" "
)
(text (text