16.12.2021, 9:00 - 11:00: Due to updates GitLab may be unavailable for some minutes between 09:00 and 11:00.

Commit d18f157c authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

- preprocessing tests + some bug fixes

parent f2162cfb
...@@ -18,19 +18,20 @@ implied. See the License for the specific language governing ...@@ -18,19 +18,20 @@ implied. See the License for the specific language governing
permissions and limitations under the License. permissions and limitations under the License.
""" """
# Flat namespace for the DHParser Package. Is this a good idea...?
from .error import *
from .dsl import * from .dsl import *
from .ebnf import * from .ebnf import *
from .parsers import * # Flat namespace for the DHParser Package. Is this a good idea...?
from .error import *
from .parse import *
from .preprocess import *
from .stringview import * from .stringview import *
from .syntaxtree import * from .syntaxtree import *
from .testing import *
from .toolkit import * from .toolkit import *
from .transform import * from .transform import *
from .testing import *
from .versionnumber import __version__ from .versionnumber import __version__
__author__ = "Eckhart Arnold <arnold@badw.de>" __author__ = "Eckhart Arnold <arnold@badw.de>"
__copyright__ = "http://www.apache.org/licenses/LICENSE-2.0" __copyright__ = "http://www.apache.org/licenses/LICENSE-2.0"
# __all__ = ['toolkit', 'stringview', 'error', 'syntaxtree', 'parser', 'transform', 'ebnf', 'dsl', 'testing', # __all__ = ['toolkit', 'stringview', 'error', 'syntaxtree', 'preprocess', 'parse',
# 'versionnumber'] # flat namespace # 'transform', 'ebnf', 'dsl', 'testing', 'versionnumber']
...@@ -20,18 +20,20 @@ compilation of domain specific languages based on an EBNF-grammar. ...@@ -20,18 +20,20 @@ compilation of domain specific languages based on an EBNF-grammar.
""" """
import os import os
from typing import Any, cast, List, Tuple, Union, Iterator, Iterable
from DHParser.ebnf import EBNFCompiler, grammar_changed, \ from DHParser.ebnf import EBNFCompiler, grammar_changed, \
get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \ get_ebnf_preprocessor, get_ebnf_grammar, get_ebnf_transformer, get_ebnf_compiler, \
PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc
from DHParser.error import Error, is_error, has_errors, only_errors from DHParser.error import Error, is_error, has_errors, only_errors
from DHParser.parsers import Grammar, Compiler, compile_source, nil_preprocessor, PreprocessorFunc from DHParser.parse import Grammar, Compiler, compile_source
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc from DHParser.syntaxtree import Node, TransformationFunc
from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object, \ from DHParser.toolkit import logging, load_if_file, is_python_code, compile_python_object, \
re, typing re
from typing import Any, cast, List, Tuple, Union, Iterator, Iterable
__all__ = ('GrammarError', __all__ = ('DHPARSER_IMPORTS',
'GrammarError',
'CompilationError', 'CompilationError',
'load_compiler_suite', 'load_compiler_suite',
'compileDSL', 'compileDSL',
...@@ -70,7 +72,7 @@ try: ...@@ -70,7 +72,7 @@ try:
except ImportError: except ImportError:
import re import re
from DHParser import logging, is_filename, load_if_file, \\ from DHParser import logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, \\ Grammar, Compiler, nil_preprocessor, PreprocessorToken, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\ Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered, \\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\ Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\ ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source, \\
...@@ -495,14 +497,15 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It ...@@ -495,14 +497,15 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
+ "\n# ".join(str(error).split('\n)'))) + "\n# ".join(str(error).split('\n)')))
print(result) print(result)
finally: finally:
if f: f.close() if f:
f.close()
return messages return messages
def recompile_grammar(ebnf_filename, force=False) -> bool: def recompile_grammar(ebnf_filename, force=False) -> bool:
""" """
Recompiles an ebnf-grammar if necessary, that is, if either no Re-compiles an EBNF-grammar if necessary, that is, if either no
corresponding 'XXXXCompiler.py'-file exists or if that file is corresponding 'XXXXCompiler.py'-file exists or if that file is
outdated. outdated.
......
...@@ -19,18 +19,19 @@ permissions and limitations under the License. ...@@ -19,18 +19,19 @@ permissions and limitations under the License.
import keyword import keyword
from collections import OrderedDict from collections import OrderedDict
from functools import partial from functools import partial
from typing import Callable, Dict, List, Set, Tuple
from DHParser.error import Error from DHParser.error import Error
from DHParser.parsers import Grammar, mixin_comment, nil_preprocessor, Forward, RegExp, RE, \ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, RE, \
NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \ NegativeLookahead, Alternative, Series, Option, OneOrMore, ZeroOrMore, Token, \
Compiler, PreprocessorFunc Compiler
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE from DHParser.syntaxtree import Node, TransformationFunc, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re, typing from DHParser.toolkit import load_if_file, escape_re, md5, sane_parser_name, re
from DHParser.transform import traverse, remove_brackets, \ from DHParser.transform import traverse, remove_brackets, \
reduce_single_child, replace_by_single_child, remove_expendables, \ reduce_single_child, replace_by_single_child, remove_expendables, \
remove_tokens, flatten, forbid, assert_content, remove_infix_operator remove_tokens, flatten, forbid, assert_content, remove_infix_operator
from DHParser.versionnumber import __version__ from DHParser.versionnumber import __version__
from typing import Callable, Dict, List, Set, Tuple
__all__ = ('get_ebnf_preprocessor', __all__ = ('get_ebnf_preprocessor',
'get_ebnf_grammar', 'get_ebnf_grammar',
...@@ -332,7 +333,7 @@ class EBNFCompiler(Compiler): ...@@ -332,7 +333,7 @@ class EBNFCompiler(Compiler):
`alternative = a | b` `alternative = a | b`
Now `[str(node) for node in self.rules['alternative']]` Now `[node.content for node in self.rules['alternative']]`
yields `['alternative = a | b', 'a', 'b']` yields `['alternative = a | b', 'a', 'b']`
symbols: A mapping of symbol names to their first usage (not symbols: A mapping of symbol names to their first usage (not
...@@ -597,7 +598,7 @@ class EBNFCompiler(Compiler): ...@@ -597,7 +598,7 @@ class EBNFCompiler(Compiler):
def on_definition(self, node: Node) -> Tuple[str, str]: def on_definition(self, node: Node) -> Tuple[str, str]:
rule = str(node.children[0]) rule = node.children[0].content
if rule in self.rules: if rule in self.rules:
first = self.rules[rule][0] first = self.rules[rule][0]
if not first._errors: if not first._errors:
...@@ -652,7 +653,7 @@ class EBNFCompiler(Compiler): ...@@ -652,7 +653,7 @@ class EBNFCompiler(Compiler):
def on_directive(self, node: Node) -> str: def on_directive(self, node: Node) -> str:
key = str(node.children[0]).lower() key = node.children[0].content.lower()
assert key not in self.directives['tokens'] assert key not in self.directives['tokens']
if key not in self.REPEATABLE_DIRECTIVES: if key not in self.REPEATABLE_DIRECTIVES:
...@@ -674,8 +675,9 @@ class EBNFCompiler(Compiler): ...@@ -674,8 +675,9 @@ class EBNFCompiler(Compiler):
else: else:
node.add_error('Value "%s" not allowed for directive "%s".' % (value, key)) node.add_error('Value "%s" not allowed for directive "%s".' % (value, key))
else: else:
value = str(node.children[1]).strip("~") # cast(str, node.children[1].result).strip("~") value = node.children[1].content.strip("~") # cast(str, node.children[
if value != str(node.children[1]): # cast(str, node.children[1].result): # 1].result).strip("~")
if value != node.children[1].content: # cast(str, node.children[1].result):
node.add_error("Whitespace marker '~' not allowed in definition of " node.add_error("Whitespace marker '~' not allowed in definition of "
"%s regular expression." % key) "%s regular expression." % key)
if value[0] + value[-1] in {'""', "''"}: if value[0] + value[-1] in {'""', "''"}:
...@@ -688,11 +690,11 @@ class EBNFCompiler(Compiler): ...@@ -688,11 +690,11 @@ class EBNFCompiler(Compiler):
self.directives[key] = value self.directives[key] = value
elif key == 'ignorecase': elif key == 'ignorecase':
if str(node.children[1]).lower() not in {"off", "false", "no"}: if node.children[1].content.lower() not in {"off", "false", "no"}:
self.re_flags.add('i') self.re_flags.add('i')
# elif key == 'testing': # elif key == 'testing':
# value = str(node.children[1]) # value = node.children[1].content
# self.directives['testing'] = value.lower() not in {"off", "false", "no"} # self.directives['testing'] = value.lower() not in {"off", "false", "no"}
elif key == 'literalws': elif key == 'literalws':
...@@ -708,7 +710,7 @@ class EBNFCompiler(Compiler): ...@@ -708,7 +710,7 @@ class EBNFCompiler(Compiler):
elif key in {'tokens', 'preprocessor_tokens'}: elif key in {'tokens', 'preprocessor_tokens'}:
tokens = self.compile(node.children[1]) tokens = self.compile(node.children[1])
redeclared = self.directives['tokes'] & tokens redeclared = self.directives['tokens'] & tokens
if redeclared: if redeclared:
node.add_error('Tokens %s have already been declared earlier. ' node.add_error('Tokens %s have already been declared earlier. '
% str(redeclared) + 'Later declaration will be ignored', % str(redeclared) + 'Later declaration will be ignored',
...@@ -752,7 +754,7 @@ class EBNFCompiler(Compiler): ...@@ -752,7 +754,7 @@ class EBNFCompiler(Compiler):
filtered_children = [] filtered_children = []
i = 0 i = 0
for nd in node.children: for nd in node.children:
if nd.parser.ptype == TOKEN_PTYPE and str(nd) == "§": if nd.parser.ptype == TOKEN_PTYPE and nd.content == "§":
mandatory_marker.append(i) mandatory_marker.append(i)
if i == 0: if i == 0:
nd.add_error('First item of a series should not be mandatory.', nd.add_error('First item of a series should not be mandatory.',
...@@ -774,7 +776,7 @@ class EBNFCompiler(Compiler): ...@@ -774,7 +776,7 @@ class EBNFCompiler(Compiler):
def on_factor(self, node: Node) -> str: def on_factor(self, node: Node) -> str:
assert node.children assert node.children
assert len(node.children) >= 2, node.as_sxpr() assert len(node.children) >= 2, node.as_sxpr()
prefix = str(node.children[0]) # cast(str, node.children[0].result) prefix = node.children[0].content
custom_args = [] # type: List[str] custom_args = [] # type: List[str]
if prefix in {'::', ':'}: if prefix in {'::', ':'}:
...@@ -806,15 +808,15 @@ class EBNFCompiler(Compiler): ...@@ -806,15 +808,15 @@ class EBNFCompiler(Compiler):
if len(nd.children) >= 1: if len(nd.children) >= 1:
nd = nd.children[0] nd = nd.children[0]
while nd.parser.name == "symbol": while nd.parser.name == "symbol":
symlist = self.rules.get(str(nd), []) symlist = self.rules.get(nd.content, [])
if len(symlist) == 2: if len(symlist) == 2:
nd = symlist[1] nd = symlist[1]
else: else:
if len(symlist) == 1: if len(symlist) == 1:
nd = symlist[0].children[1] nd = symlist[0].children[1]
break break
if (nd.parser.name != "regexp" or str(nd)[:1] != '/' if (nd.parser.name != "regexp" or nd.content[:1] != '/'
or str(nd)[-1:] != '/'): or nd.content[-1:] != '/'):
node.add_error("Lookbehind-parser can only be used with plain RegExp-" node.add_error("Lookbehind-parser can only be used with plain RegExp-"
"parsers, not with: " + nd.parser.name + nd.parser.ptype) "parsers, not with: " + nd.parser.name + nd.parser.ptype)
...@@ -838,10 +840,6 @@ class EBNFCompiler(Compiler): ...@@ -838,10 +840,6 @@ class EBNFCompiler(Compiler):
return self.non_terminal(node, 'OneOrMore') return self.non_terminal(node, 'OneOrMore')
def on_regexchain(self, node) -> str:
raise EBNFCompilerError("Not yet implemented!")
def on_group(self, node) -> str: def on_group(self, node) -> str:
raise EBNFCompilerError("Group nodes should have been eliminated by " raise EBNFCompilerError("Group nodes should have been eliminated by "
"AST transformation!") "AST transformation!")
...@@ -851,7 +849,7 @@ class EBNFCompiler(Compiler): ...@@ -851,7 +849,7 @@ class EBNFCompiler(Compiler):
assert len(node.children) == 1 assert len(node.children) == 1
nd = node.children[0] nd = node.children[0]
for child in nd.children: for child in nd.children:
if child.parser.ptype == TOKEN_PTYPE and str(nd) == "§": if child.parser.ptype == TOKEN_PTYPE and nd.content == "§":
node.add_error("Unordered parser lists cannot contain mandatory (§) items.") node.add_error("Unordered parser lists cannot contain mandatory (§) items.")
args = ', '.join(self.compile(child) for child in nd.children) args = ', '.join(self.compile(child) for child in nd.children)
if nd.parser.name == "term": if nd.parser.name == "term":
...@@ -863,7 +861,7 @@ class EBNFCompiler(Compiler): ...@@ -863,7 +861,7 @@ class EBNFCompiler(Compiler):
return "" return ""
def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side! def on_symbol(self, node: Node) -> str: # called only for symbols on the right hand side!
symbol = str(node) # ; assert result == cast(str, node.result) symbol = node.content # ; assert result == cast(str, node.result)
if symbol in self.directives['tokens']: if symbol in self.directives['tokens']:
return 'PreprocessorToken("' + symbol + '")' return 'PreprocessorToken("' + symbol + '")'
else: else:
...@@ -878,11 +876,12 @@ class EBNFCompiler(Compiler): ...@@ -878,11 +876,12 @@ class EBNFCompiler(Compiler):
def on_literal(self, node) -> str: def on_literal(self, node) -> str:
return 'Token(' + str(node).replace('\\', r'\\') + ')' # return 'Token(' + ', '.merge_children([node.result]) + ')' ? return 'Token(' + node.content.replace('\\', r'\\') + ')' # return 'Token(' + ',
# '.merge_children([node.result]) + ')' ?
def on_regexp(self, node: Node) -> str: def on_regexp(self, node: Node) -> str:
rx = str(node) rx = node.content
name = [] # type: List[str] name = [] # type: List[str]
if rx[0] == '/' and rx[-1] == '/': if rx[0] == '/' and rx[-1] == '/':
parser = 'RegExp(' parser = 'RegExp('
......
...@@ -18,11 +18,9 @@ permissions and limitations under the License. ...@@ -18,11 +18,9 @@ permissions and limitations under the License.
import bisect import bisect
import functools import functools
from typing import Iterable, Iterator, Union, Tuple, List
from DHParser.stringview import StringView from DHParser.stringview import StringView
from DHParser.toolkit import typing
from typing import Hashable, Iterable, Iterator, Union, Tuple, List
__all__ = ('Error', __all__ = ('Error',
'is_error', 'is_error',
...@@ -71,10 +69,16 @@ class Error: ...@@ -71,10 +69,16 @@ class Error:
@property @property
def level_str(self): def level_str(self):
"""Returns a string representation of the error level, e.g. "warning". """Returns a string representation of the error level, e.g. "warning"."""
"""
return "Warning" if is_warning(self.code) else "Error" return "Warning" if is_warning(self.code) else "Error"
def visualize(self, document: str) -> str:
"""Shows the line of the document and the position where the error
occurred."""
start = document.rfind('\n', 0, self.pos) + 1
stop = document.find('\n', self.pos)
return document[start:stop] + '\n' + ' ' * (self.pos - start) + '^\n'
def is_warning(code: int) -> bool: def is_warning(code: int) -> bool:
"""Returns True, if error is merely a warning.""" """Returns True, if error is merely a warning."""
......
...@@ -59,26 +59,20 @@ import collections ...@@ -59,26 +59,20 @@ import collections
import copy import copy
import html import html
import os import os
from functools import partial
from DHParser.error import Error, is_error, has_errors, linebreaks, line_col from DHParser.error import Error, is_error, has_errors, linebreaks, line_col
from DHParser.stringview import StringView, EMPTY_STRING_VIEW from DHParser.stringview import StringView, EMPTY_STRING_VIEW
from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \ from DHParser.syntaxtree import Node, TransformationFunc, ParserBase, WHITESPACE_PTYPE, \
TOKEN_PTYPE, ZOMBIE_PARSER TOKEN_PTYPE, ZOMBIE_PARSER
from DHParser.preprocess import BEGIN_TOKEN, END_TOKEN, RX_TOKEN_NAME, \
PreprocessorFunc
from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \ from DHParser.toolkit import is_logging, log_dir, logfile_basename, escape_re, sane_parser_name, \
load_if_file, re, typing escape_control_characters, load_if_file, re, typing
from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional from typing import Any, Callable, cast, Dict, List, Set, Tuple, Union, Optional
__all__ = ('HistoryRecord',
__all__ = ('PreprocessorFunc',
'HistoryRecord',
'Parser', 'Parser',
'Grammar', 'Grammar',
'RX_PREPROCESSOR_TOKEN',
'BEGIN_TOKEN',
'END_TOKEN',
'make_token',
'nil_preprocessor',
'PreprocessorToken', 'PreprocessorToken',
'RegExp', 'RegExp',
'RE', 'RE',
...@@ -117,9 +111,6 @@ __all__ = ('PreprocessorFunc', ...@@ -117,9 +111,6 @@ __all__ = ('PreprocessorFunc',
######################################################################## ########################################################################
PreprocessorFunc = Union[Callable[[str], str], partial]
LEFT_RECURSION_DEPTH = 8 # type: int LEFT_RECURSION_DEPTH = 8 # type: int
# because of python's recursion depth limit, this value ought not to be # because of python's recursion depth limit, this value ought not to be
# set too high. PyPy allows higher values than CPython # set too high. PyPy allows higher values than CPython
...@@ -242,7 +233,7 @@ class HistoryRecord: ...@@ -242,7 +233,7 @@ class HistoryRecord:
def excerpt(self): def excerpt(self):
length = len(self.node) if self.node else len(self.text) length = len(self.node) if self.node else len(self.text)
excerpt = str(self.node)[:min(length, 20)] if self.node else self.text[:20] excerpt = str(self.node)[:min(length, 20)] if self.node else self.text[:20]
excerpt = excerpt.replace('\n', '\\n') excerpt = escape_control_characters(excerpt)
if length > 20: if length > 20:
excerpt += '...' excerpt += '...'
return excerpt return excerpt
...@@ -1007,27 +998,28 @@ class Grammar: ...@@ -1007,27 +998,28 @@ class Grammar:
if html and len(log) % 100 == 0: if html and len(log) % 100 == 0:
log.append('\n</table>\n<table>\n' + HistoryRecord.COLGROUP) log.append('\n</table>\n<table>\n' + HistoryRecord.COLGROUP)
if is_logging(): if not is_logging():
assert self.history__, \ raise AssertionError("Cannot log history when logging is turned off!")
"Parser did not yet run or logging was turned off when running parser!" assert self.history__, \
if not log_file_name: "Parser did not yet run or logging was turned off when running parser!"
name = self.__class__.__name__ if not log_file_name:
log_file_name = name[:-7] if name.lower().endswith('grammar') else name name = self.__class__.__name__
elif log_file_name.lower().endswith('.log'): log_file_name = name[:-7] if name.lower().endswith('grammar') else name
log_file_name = log_file_name[:-4] elif log_file_name.lower().endswith('.log'):
full_history, match_history, errors_only = [], [], [] log_file_name = log_file_name[:-4]
for record in self.history__: full_history, match_history, errors_only = [], [], []
line = record.as_html_tr() if html else str(record) for record in self.history__:
append_line(full_history, line) line = record.as_html_tr() if html else str(record)
if record.node and record.node.parser.ptype != WHITESPACE_PTYPE: append_line(full_history, line)
append_line(match_history, line) if record.node and record.node.parser.ptype != WHITESPACE_PTYPE:
if record.node.error_flag: append_line(match_history, line)
append_line(errors_only, line) if record.node.error_flag:
write_log(full_history, log_file_name + '_full') append_line(errors_only, line)
if len(full_history) > 250: write_log(full_history, log_file_name + '_full')
write_log(full_history[-200:], log_file_name + '_full.tail') if len(full_history) > 250:
write_log(match_history, log_file_name + '_match') write_log(full_history[-200:], log_file_name + '_full.tail')
write_log(errors_only, log_file_name + '_errors') write_log(match_history, log_file_name + '_match')
write_log(errors_only, log_file_name + '_errors')
def dsl_error_msg(parser: Parser, error_str: str) -> str: def dsl_error_msg(parser: Parser, error_str: str) -> str:
...@@ -1059,31 +1051,6 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str: ...@@ -1059,31 +1051,6 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
######################################################################## ########################################################################
RX_PREPROCESSOR_TOKEN = re.compile(r'\w+')
BEGIN_TOKEN = '\x1b'
END_TOKEN = '\x1c'
def make_token(token: str, argument: str = '') -> str:
"""
Turns the ``token`` and ``argument`` into a special token that
will be caught by the `PreprocessorToken`-parser.
This function is a support function that should be used by
preprocessors to inject preprocessor tokens into the source text.
"""
assert RX_PREPROCESSOR_TOKEN.match(token)
assert argument.find(BEGIN_TOKEN) < 0
assert argument.find(END_TOKEN) < 0
return BEGIN_TOKEN + token + argument + END_TOKEN
def nil_preprocessor(text: str) -> str:
"""A preprocessor that does nothing, i.e. just returns the input."""
return text
class PreprocessorToken(Parser): class PreprocessorToken(Parser):
""" """
Parses tokens that have been inserted by a preprocessor. Parses tokens that have been inserted by a preprocessor.
...@@ -1097,7 +1064,7 @@ class PreprocessorToken(Parser): ...@@ -1097,7 +1064,7 @@ class PreprocessorToken(Parser):
def __init__(self, token: str) -> None: def __init__(self, token: str) -> None:
assert token and token.isupper() assert token and token.isupper()
assert RX_PREPROCESSOR_TOKEN.match(token) assert RX_TOKEN_NAME.match(token)
super(PreprocessorToken, self).__init__(token) super(PreprocessorToken, self).__init__(token)
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]: def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
...@@ -1121,8 +1088,7 @@ class PreprocessorToken(Parser): ...@@ -1121,8 +1088,7 @@ class PreprocessorToken(Parser):
'(Most likely due to a preprocessor bug!)') '(Most likely due to a preprocessor bug!)')
return node, text[end:] return node, text[end:]
if text[1:len(self.name) + 1] == self.name: if text[1:len(self.name) + 1] == self.name:
return Node(self, text[len(self.name) + 1:end]), \ return Node(self, text[len(self.name) + 2:end]), text[end + 1:]
text[end + 1:]
return None, text return None, text
...@@ -1157,15 +1123,21 @@ class RegExp(Parser): ...@@ -1157,15 +1123,21 @@ class RegExp(Parser):
return RegExp(regexp, self.name) return RegExp(regexp, self.name)
def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]: def __call__(self, text: StringView) -> Tuple[Optional[Node], StringView]:
if text[0:1] != BEGIN_TOKEN: # ESC starts a preprocessor token. match = text.match(self.regexp)
match = text.match(self.regexp) if match:
if match: capture = match.group(0)
end = text.index(match.end()) end = text.index(match.end())
return Node(self, match.group(0), True), text[end:] # regular expresseion must never match preprocessor-tokens!
# TODO: Find a better solution here, e.g. static checking/re-mangling at compile time
i = capture.find(BEGIN_TOKEN)
if i >= 0:
capture = capture[:i]
end = i
return Node(self, capture, True), text[end:]