Commit f65adbb2 authored by eckhart's avatar eckhart
Browse files

ebnf.py: mode setting for ebnf-parser added

parent 404df517
......@@ -256,10 +256,10 @@ CONFIG_PRESET['resume_notices'] = False
# Default values: "compact" for concrete syntax trees and "XML" for abstract
# syntax trees and "S-expression" for any other kind of tree.
XML_SERIALIZATION = "XML"
SXPRESSION_SERIALIZATION = "S-expression"
COMPACT_SERIALIZATION = "compact"
SMART_SERIALIZATION = "smart"
JSON_SERIALIZATION = "json"
SMART_SERIALIZATION = "smart"
COMPACT_SERIALIZATION = "compact"
SXPRESSION_SERIALIZATION = "S-expression"
SERIALIZATIONS = frozenset({XML_SERIALIZATION,
SXPRESSION_SERIALIZATION,
......@@ -331,6 +331,36 @@ CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
CONFIG_PRESET['default_anonymous_regexp'] = r'..(?<=^)'
# Default value for the brand of EBNF that DHParser accepts
# 'classic' - relatively closest to the ISO-standard, i.e. uses [] and {}
# for optional and zero or more elements, respectively. Does not allow
# the ?, +, * suffixes. Allows the specification of character-ranges
# within square brackets only with the ordinal unicode numbers,
# not with the characters itself, i.e. [0x41-0x5A]
# 'regex-like' - similar to regular expression syntax, allows ?, +, *
# suffixes for optional, one or more repetitions, zero or more
# repetitions, but not {} or []. Allows character-ranges within
# square bracket in any form.
# 'peg-like' - like regex-like, but uses / instead of | for the
# alternative-parser. Does not allow regular expressions between, i.e.
# / ... / within the EBNF-code!
# 'strict' - allows both classic and regex-like syntax to be mixed, but
# allows character ranges within square brackets with oridinal values,
# only. Uses | as delimiter for alternatives.
# 'heuristic' - the most liberal mode, allows about everything. However,
# because it employs heuristics to distinguish ambiguous cases, it
# may lead to unexcpeted errors and require the user to resolve the
# ambiguieties
EBNF_CLASSIC_SYNTAX = "classic"
EBNF_ANY_SYNTAX_STRICT = "strict"
EBNF_ANY_SYNTAX_HEURISTICAL = "heuristic"
EBNF_REGULAR_EXPRESSION_SYNTAX = "regex-like"
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX = "peg-like"
CONFIG_PRESET['syntax_variant'] = EBNF_ANY_SYNTAX_STRICT
########################################################################
#
# compiler server configuration
......
......@@ -31,16 +31,18 @@ import os
from typing import Callable, Dict, List, Set, Tuple, Sequence, Union, Optional, Any
from DHParser.compile import CompilerError, Compiler, ResultTuple, compile_source, visitor_name
from DHParser.configuration import access_thread_locals, get_config_value
from DHParser.configuration import access_thread_locals, get_config_value, \
EBNF_ANY_SYNTAX_HEURISTICAL, EBNF_ANY_SYNTAX_STRICT, EBNF_CLASSIC_SYNTAX, \
EBNF_REGULAR_EXPRESSION_SYNTAX, EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
from DHParser.error import Error, AMBIGUOUS_ERROR_HANDLING, WARNING, REDECLARED_TOKEN_WARNING,\
REDEFINED_DIRECTIVE, UNUSED_ERROR_HANDLING_WARNING, INAPPROPRIATE_SYMBOL_FOR_DIRECTIVE, \
DIRECTIVE_FOR_NONEXISTANT_SYMBOL, UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING
from DHParser.parse import Grammar, mixin_comment, mixin_nonempty, Forward, RegExp, Drop, \
Lookahead, NegativeLookahead, Alternative, Series, Option, ZeroOrMore, OneOrMore, Token, \
Capture, Retrieve, Pop, optional_last_value, GrammarError, Whitespace, INFINITE, \
matching_bracket
from DHParser.parse import Parser, Grammar, mixin_comment, mixin_nonempty, Forward, RegExp, \
Drop, Lookahead, NegativeLookahead, Alternative, Series, Option, ZeroOrMore, OneOrMore, \
Token, Capture, Retrieve, Pop, optional_last_value, GrammarError, Whitespace, Always, Never, \
INFINITE, matching_bracket, ParseFunc
from DHParser.preprocess import nil_preprocessor, PreprocessorFunc
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE
from DHParser.syntaxtree import Node, WHITESPACE_PTYPE, TOKEN_PTYPE, EMPTY_NODE
from DHParser.toolkit import load_if_file, escape_re, escape_control_characters, md5, \
sane_parser_name, re, expand_table, unrepr, compile_python_object, DHPARSER_PARENTDIR, \
RX_NEVER_MATCH
......@@ -216,6 +218,70 @@ class EBNFGrammar(Grammar):
syntax = Series(Option(Series(dwsp__, RegExp(''))), ZeroOrMore(Alternative(definition, directive)), EOF)
root__ = syntax
free_char_parsefunc__ = free_char._parse
char_range_heuristics_parsefunc__ = char_range_heuristics._parse
regex_heuristics_parserfunc__ = regex_heuristics._parse
@property
def mode(self) -> str:
def which(p: Parser) -> str:
if p._parse.__qualname__ == 'Never._parse':
return 'never'
elif p._parse.__qualname__ == 'Always._parse':
return 'always'
else:
return 'custom'
signature = (
which(self.free_char),
which(self.regex_heuristics),
which(self.char_range_heuristics)
)
if signature == ('custom', 'custom', 'custom'):
return EBNF_ANY_SYNTAX_HEURISTICAL
elif signature == ('never', 'always', 'always'):
return EBNF_ANY_SYNTAX_STRICT # or EBNF_CLASSIC_SYNTAX
elif signature == ('custom', 'never', 'always'):
return EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
elif signature == ('custom', 'always', 'always'):
return EBNF_REGULAR_EXPRESSION_SYNTAX
else:
return "undefined"
@mode.setter
def mode(self, mode: str):
def set_parsefunc(p: Parser, f: ParseFunc):
method = f.__get__(p, type(p)) # bind function f to parser p
if p._parse == p._parse_proxy:
p._parse_proxy = method
p._parse = method
always = Always._parse
never = Never._parse
if mode == EBNF_ANY_SYNTAX_HEURISTICAL:
set_parsefunc(self.free_char, self.free_char_parsefunc__)
set_parsefunc(self.regex_heuristics, self.regex_heuristics_parserfunc__)
set_parsefunc(self.char_range_heuristics, self.char_range_heuristics_parsefunc__)
elif mode in (EBNF_ANY_SYNTAX_STRICT, EBNF_CLASSIC_SYNTAX):
set_parsefunc(self.free_char, never)
set_parsefunc(self.regex_heuristics, always)
set_parsefunc(self.char_range_heuristics, always)
elif mode == EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX:
set_parsefunc(self.free_char, self.free_char_parsefunc__)
set_parsefunc(self.regex_heuristics, never)
set_parsefunc(self.char_range_heuristics, always)
elif mode == EBNF_REGULAR_EXPRESSION_SYNTAX:
set_parsefunc(self.free_char, self.free_char_parsefunc__)
set_parsefunc(self.regex_heuristics, always)
set_parsefunc(self.char_range_heuristics, always)
else:
raise ValueError('Mode must be one of: ' + ', '.join((
EBNF_ANY_SYNTAX_HEURISTICAL,
EBNF_ANY_SYNTAX_STRICT,
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX,
EBNF_REGULAR_EXPRESSION_SYNTAX,
EBNF_CLASSIC_SYNTAX
)))
def grammar_changed(grammar_class, grammar_source: str) -> bool:
"""
......@@ -254,10 +320,11 @@ def get_ebnf_grammar() -> EBNFGrammar:
THREAD_LOCALS = access_thread_locals()
try:
grammar = THREAD_LOCALS.ebnf_grammar_singleton
return grammar
except AttributeError:
THREAD_LOCALS.ebnf_grammar_singleton = EBNFGrammar()
return THREAD_LOCALS.ebnf_grammar_singleton
grammar = THREAD_LOCALS.ebnf_grammar_singleton
grammar.mode = get_config_value('syntax_variant')
return grammar
def parse_ebnf(ebnf: str) -> Node:
......
......@@ -337,7 +337,7 @@ class Parser:
self.tag_name = self.ptype # type: str
self.cycle_detection = set() # type: Set[ApplyFunc]
# this indirection is required for Cython-compatibility
self._parse_proxy = self._parse # type: ParseFunc
self._parse_proxy = self._parse # type: ParseFunc
# self.proxied = None # type: Optional[ParseFunc]
try:
self._grammar = GRAMMAR_PLACEHOLDER # type: Grammar
......@@ -559,11 +559,9 @@ class Parser:
return None
def set_proxy(self, proxy: Optional[ParseFunc]):
"""Sets a proxy that replaces the _parse()-method. The original
parse-method is copied to the `proxied`-filed of the Parser object and
can be called by the proxy. Call `set_proxy` with `None` to remove
a previously set proxy. Typical use case is the installation of a
tracing debugger. See module `trace`.
"""Sets a proxy that replaces the _parse()-method. Call `set_proxy`
with `None` to remove a previously set proxy. Typical use case is
the installation of a tracing debugger. See module `trace`.
"""
if proxy is None:
self._parse_proxy = self._parse
......
......@@ -30,6 +30,9 @@ sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser.toolkit import compile_python_object, re, DHPARSER_PARENTDIR
from DHParser.preprocess import nil_preprocessor
from DHParser import compile_source
from DHParser.configuration import access_thread_locals, get_config_value, \
EBNF_ANY_SYNTAX_HEURISTICAL, EBNF_ANY_SYNTAX_STRICT, EBNF_CLASSIC_SYNTAX, \
EBNF_REGULAR_EXPRESSION_SYNTAX, EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
from DHParser.error import has_errors, Error, PARSER_DID_NOT_MATCH, MANDATORY_CONTINUATION, \
REDEFINED_DIRECTIVE, UNUSED_ERROR_HANDLING_WARNING, AMBIGUOUS_ERROR_HANDLING
from DHParser.syntaxtree import WHITESPACE_PTYPE
......@@ -878,6 +881,48 @@ class TestSyntaxExtensions:
assert st.errors and any(e.code == PARSER_DID_NOT_MATCH for e in st.errors)
class TestModeSetting:
testdoc = """# hey, you
doc = sequence | re | char | char_range | char_range2 | multiple1 | multiple2 | multiple3 | mutliple4
sequence = '</' Name S? '>'
re = /abc*/
char = #x32 # shell-style comment
char_range = [#xDFF88-#xEEFF00] /*
C-style comment
*/ char_range2 = [-'()+,./:=?;!*#@$_%]
multiple1 = `a` * 3
multiple2 = 4 * `b`
multiple3 = `c`{3}
multiple4 = `d`{2,5}
Name = /\w+/
S = /\s*/
"""
def test_setmode_getmode(self):
gr = get_ebnf_grammar()
gr.mode = EBNF_ANY_SYNTAX_STRICT
assert gr.mode == EBNF_ANY_SYNTAX_STRICT
gr.mode = EBNF_REGULAR_EXPRESSION_SYNTAX
assert gr.mode == EBNF_REGULAR_EXPRESSION_SYNTAX
gr.mode = EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
assert gr.mode == EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
gr.mode = EBNF_ANY_SYNTAX_HEURISTICAL
assert gr.mode == EBNF_ANY_SYNTAX_HEURISTICAL
gr.mode = EBNF_CLASSIC_SYNTAX
assert gr.mode == EBNF_ANY_SYNTAX_STRICT
def test_heuristic_mode(self):
gr = get_ebnf_grammar()
gr.mode = EBNF_ANY_SYNTAX_STRICT
st = gr(self.testdoc)
assert st.errors
gr.mode = EBNF_ANY_SYNTAX_HEURISTICAL
st = gr(self.testdoc)
assert not st.errors
if __name__ == "__main__":
from DHParser.testing import runner
runner("", globals())
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment