Commit da4a9441 authored by eckhart's avatar eckhart
Browse files

drop directive extended: not yet tested

parent 56362aa1
......@@ -282,6 +282,13 @@ CONFIG_PRESET['static_analysis'] = "none"
# Default value: False
CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
# Default value vor the regular expression by which identifiers for
# parsers that yield anonymous nodes are distinguished from identifiers
# for parsers that yield named nodes. The default value is a regular
# expression that catches names with a leading underscore.
# Default value : '_'
CONFIG_PRESET['default_anonymous_regexp'] = r'_'
########################################################################
#
......
......@@ -83,8 +83,8 @@ try:
except ImportError:
import re
from DHParser import start_logging, suspend_logging, resume_logging, is_filename, load_if_file, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, DropRegExp, \\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf, \\
Grammar, Compiler, nil_preprocessor, PreprocessorToken, Whitespace, Drop, \\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, \\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture, \\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source, \\
grammar_changed, last_value, counterpart, PreprocessorFunc, is_empty, remove_if, \\
......@@ -382,7 +382,8 @@ WHITESPACE_TYPES = {'horizontal': r'[\t ]*', # default: horizontal
DROP_TOKEN = 'token'
DROP_WSPC = 'whitespace'
DROP_VALUES = {DROP_TOKEN, DROP_WSPC}
DROP_REGEXP = 'regexp'
DROP_VALUES = {DROP_TOKEN, DROP_WSPC, DROP_REGEXP}
# Representation of Python code or, rather, something that will be output as Python code
ReprType = Union[str, unrepr]
......@@ -429,7 +430,10 @@ class EBNFDirectives:
the failing parser has returned.
drop: A set that may contain the elements `DROP_TOKEN` and
`DROP_WSP'
`DROP_WSP', 'DROP_REGEXP' or any name of a symbol
of an anonymous parser (e.g. '_linefeed') the results
of which will be dropped during the parsing process,
already.
super_ws(property): Cache for the "super whitespace" which
is a regular expression that merges whitespace and
......@@ -553,6 +557,13 @@ class EBNFCompiler(Compiler):
re_flags: A set of regular expression flags to be added to all
regular expressions found in the current parsing process
anonymous_regexp: A regular expression to identify symbols that stand
for parsers that shall yield anonymous nodes. The pattern of
the regular expression is configured in configuration.py but
can also be set by a directive. The default value is a regular
expression that catches names with a leading underscore.
See also `parser.Grammar.anonymous__`
grammar_name: The name of the grammar to be compiled
grammar_source: The source code of the grammar to be compiled.
......@@ -611,6 +622,7 @@ class EBNFCompiler(Compiler):
self.defined_directives = set() # type: Set[str]
self.consumed_custom_errors = set() # type: Set[str]
self.consumed_skip_rules = set() # type: Set[str]
self.anonymous_regexp = re.compile(get_config_value('default_anonymous_regexp'))
self.grammar_id += 1
......@@ -807,7 +819,7 @@ class EBNFCompiler(Compiler):
if DROP_WSPC in self.directives.drop or DROP_TOKEN in self.directives.drop:
definitions.append((EBNFCompiler.DROP_WHITESPACE_PARSER_KEYWORD,
'DropRegExp(%s)' % EBNFCompiler.WHITESPACE_KEYWORD))
'Drop(RegExp(%s))' % EBNFCompiler.WHITESPACE_KEYWORD))
definitions.append((EBNFCompiler.WHITESPACE_PARSER_KEYWORD,
'Whitespace(%s)' % EBNFCompiler.WHITESPACE_KEYWORD))
definitions.append((EBNFCompiler.WHITESPACE_KEYWORD,
......@@ -905,6 +917,8 @@ class EBNFCompiler(Compiler):
+ ('. Grammar:' if self.grammar_source and show_source else '.')]
definitions.append(('parser_initialization__', '["upon instantiation"]'))
definitions.append(('static_analysis_pending__', '[True]'))
definitions.append(('anonymous__',
're.compile(' + repr(self.anonymous_regexp.pattern) + ')'))
if self.grammar_source:
definitions.append(('source_hash__',
'"%s"' % md5(self.grammar_source, __version__)))
......@@ -1030,6 +1044,8 @@ class EBNFCompiler(Compiler):
elif defn.find("(") < 0:
# assume it's a synonym, like 'page = REGEX_PAGE_NR'
defn = 'Synonym(%s)' % defn
if rule in self.directives['drop'] and rule not in DROP_VALUES:
defn = 'Drop(%s)' % defn # TODO: Recursively drop all contained parsers for optimization
except TypeError as error:
from traceback import extract_tb
trace = str(extract_tb(error.__traceback__)[-1])
......@@ -1073,12 +1089,19 @@ class EBNFCompiler(Compiler):
self.directives[key] = value
elif key == 'drop':
check_argnum(2)
if len(node.children) <= 1:
self.tree.new_error(node, 'Directive "@ drop" requires as least one argument!')
for child in node.children[1:]:
content = child.content
node_type = content.lower()
assert node_type in DROP_VALUES, content
self.directives[key].add(node_type)
if self.anonymous_regexp.match(content):
self.directives[key].add(content)
elif content.lower() in DROP_VALUES:
self.directives[key].add(content.lower())
else:
self.tree.new_error(
node, 'Illegal value "%s" for Directive "@ drop"! Should be one of %s '
'or a string matching %s' % (content, str(DROP_VALUES),
self.anonymous_regexp.pattern))
elif key == 'ignorecase':
check_argnum()
......@@ -1236,9 +1259,13 @@ class EBNFCompiler(Compiler):
if arg.tag_name != 'symbol':
self.tree.new_error(node, ('Retrieve Operator "%s" requires a symbol, '
'and not a %s.') % (prefix, arg.tag_name))
return str(arg.result)
if str(arg) in self.directives.filter:
custom_args = ['rfilter=%s' % self.directives.filter[str(arg)]]
return arg.content
elif self.anonymous_regexp.match(arg.content):
self.tree.new_error(node, ('Retrie does not work with anonymous parsers like %s')
% (prefix, arg.content))
return arg.content
if arg.content in self.directives.filter:
custom_args = ['rfilter=%s' % self.directives.filter[arg.content]]
self.variables.add(str(arg)) # cast(str, arg.result)
elif len(node.children) > 2:
......@@ -1334,10 +1361,15 @@ class EBNFCompiler(Compiler):
return symbol
def TOKEN_PARSER(self):
def TOKEN_PARSER(self, token):
if DROP_TOKEN in self.directives.drop and self.context[-2].tag_name != "definition":
return 'DropToken('
return 'Token('
return 'Drop(Token(' + token + '))'
return 'Token(' + token + ')'
def REGEXP_PARSER(self, regexp):
if DROP_REGEXP in self.directives.drop and sel.context[-2].tag_name != "definition":
return 'Drop(RegExp(' + regexp + '))'
return 'RegExp(' + regexp + ')'
def WSPC_PARSER(self, force_drop=False):
......@@ -1348,7 +1380,7 @@ class EBNFCompiler(Compiler):
return 'wsp__'
def on_literal(self, node: Node) -> str:
center = self.TOKEN_PARSER() + node.content.replace('\\', r'\\') + ')'
center = self.TOKEN_PARSER(node.content.replace('\\', r'\\'))
force = DROP_TOKEN in self.directives.drop
left = self.WSPC_PARSER(force) if 'left' in self.directives.literalws else ''
right = self.WSPC_PARSER(force) if 'right' in self.directives.literalws else ''
......@@ -1364,14 +1396,13 @@ class EBNFCompiler(Compiler):
tk = rpl + tk[1:-1] + rpl
else:
tk = rpl + tk.replace('"', '\\"')[1:-1] + rpl
return self.TOKEN_PARSER() + tk + ')'
return self.TOKEN_PARSER(tk)
def on_regexp(self, node: Node) -> str:
rx = node.content
name = [] # type: List[str]
assert rx[0] == '/' and rx[-1] == '/'
parser = 'RegExp('
try:
arg = repr(self._check_rx(node, rx[1:-1].replace(r'\/', '/')))
except AttributeError as error:
......@@ -1381,7 +1412,7 @@ class EBNFCompiler(Compiler):
% (EBNFCompiler.AST_ERROR, str(error), trace, node.as_sxpr())
self.tree.new_error(node, errmsg)
return '"' + errmsg + '"'
return parser + ', '.join([arg] + name) + ')'
return self.REGEXP_PARSER(', '.join([arg] + name))
def on_whitespace(self, node: Node) -> str:
......
......@@ -924,7 +924,7 @@ class Grammar:
for entry, parser in cdict.items():
if isinstance(parser, Parser) and sane_parser_name(entry):
anonymous = True if cls.anonymous__.match(entry) else False
assert anonymous or not parser.drop_content
assert anonymous or not parser.drop_content, entry
if isinstance(parser, Forward):
if not cast(Forward, parser).parser.pname:
cast(Forward, parser).parser.pname = entry
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment