Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
The container registry cleanup task is now completed and the registry can be used normally.
Open sidebar
badw-it
DHParser
Commits
f65adbb2
Commit
f65adbb2
authored
Apr 13, 2020
by
eckhart
Browse files
ebnf.py: mode setting for ebnf-parser added
parent
404df517
Changes
4
Hide whitespace changes
Inline
Side-by-side
DHParser/configuration.py
View file @
f65adbb2
...
...
@@ -256,10 +256,10 @@ CONFIG_PRESET['resume_notices'] = False
# Default values: "compact" for concrete syntax trees and "XML" for abstract
# syntax trees and "S-expression" for any other kind of tree.
XML_SERIALIZATION
=
"XML"
SXPRESSION_SERIALIZATION
=
"S-expression"
COMPACT_SERIALIZATION
=
"compact"
SMART_SERIALIZATION
=
"smart"
JSON_SERIALIZATION
=
"json"
SMART_SERIALIZATION
=
"smart"
COMPACT_SERIALIZATION
=
"compact"
SXPRESSION_SERIALIZATION
=
"S-expression"
SERIALIZATIONS
=
frozenset
({
XML_SERIALIZATION
,
SXPRESSION_SERIALIZATION
,
...
...
@@ -331,6 +331,36 @@ CONFIG_PRESET['add_grammar_source_to_parser_docstring'] = False
CONFIG_PRESET
[
'default_anonymous_regexp'
]
=
r
'..(?<=^)'
# Default value for the brand of EBNF that DHParser accepts
# 'classic' - relatively closest to the ISO-standard, i.e. uses [] and {}
# for optional and zero or more elements, respectively. Does not allow
# the ?, +, * suffixes. Allows the specification of character-ranges
# within square brackets only with the ordinal unicode numbers,
# not with the characters itself, i.e. [0x41-0x5A]
# 'regex-like' - similar to regular expression syntax, allows ?, +, *
# suffixes for optional, one or more repetitions, zero or more
# repetitions, but not {} or []. Allows character-ranges within
# square bracket in any form.
# 'peg-like' - like regex-like, but uses / instead of | for the
# alternative-parser. Does not allow regular expressions between, i.e.
# / ... / within the EBNF-code!
# 'strict' - allows both classic and regex-like syntax to be mixed, but
# allows character ranges within square brackets with oridinal values,
# only. Uses | as delimiter for alternatives.
# 'heuristic' - the most liberal mode, allows about everything. However,
# because it employs heuristics to distinguish ambiguous cases, it
# may lead to unexcpeted errors and require the user to resolve the
# ambiguieties
EBNF_CLASSIC_SYNTAX
=
"classic"
EBNF_ANY_SYNTAX_STRICT
=
"strict"
EBNF_ANY_SYNTAX_HEURISTICAL
=
"heuristic"
EBNF_REGULAR_EXPRESSION_SYNTAX
=
"regex-like"
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
=
"peg-like"
CONFIG_PRESET
[
'syntax_variant'
]
=
EBNF_ANY_SYNTAX_STRICT
########################################################################
#
# compiler server configuration
...
...
DHParser/ebnf.py
View file @
f65adbb2
...
...
@@ -31,16 +31,18 @@ import os
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
,
Sequence
,
Union
,
Optional
,
Any
from
DHParser.compile
import
CompilerError
,
Compiler
,
ResultTuple
,
compile_source
,
visitor_name
from
DHParser.configuration
import
access_thread_locals
,
get_config_value
from
DHParser.configuration
import
access_thread_locals
,
get_config_value
,
\
EBNF_ANY_SYNTAX_HEURISTICAL
,
EBNF_ANY_SYNTAX_STRICT
,
EBNF_CLASSIC_SYNTAX
,
\
EBNF_REGULAR_EXPRESSION_SYNTAX
,
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
from
DHParser.error
import
Error
,
AMBIGUOUS_ERROR_HANDLING
,
WARNING
,
REDECLARED_TOKEN_WARNING
,
\
REDEFINED_DIRECTIVE
,
UNUSED_ERROR_HANDLING_WARNING
,
INAPPROPRIATE_SYMBOL_FOR_DIRECTIVE
,
\
DIRECTIVE_FOR_NONEXISTANT_SYMBOL
,
UNDEFINED_SYMBOL_IN_TRANSTABLE_WARNING
from
DHParser.parse
import
Grammar
,
mixin_comment
,
mixin_nonempty
,
Forward
,
RegExp
,
Drop
,
\
Lookahead
,
NegativeLookahead
,
Alternative
,
Series
,
Option
,
ZeroOrMore
,
OneOrMore
,
Token
,
\
Capture
,
Retrieve
,
Pop
,
optional_last_value
,
GrammarError
,
Whitespace
,
INFINITE
,
\
matching_bracket
from
DHParser.parse
import
Parser
,
Grammar
,
mixin_comment
,
mixin_nonempty
,
Forward
,
RegExp
,
\
Drop
,
Lookahead
,
NegativeLookahead
,
Alternative
,
Series
,
Option
,
ZeroOrMore
,
OneOrMore
,
\
Token
,
Capture
,
Retrieve
,
Pop
,
optional_last_value
,
GrammarError
,
Whitespace
,
Always
,
Never
,
\
INFINITE
,
matching_bracket
,
ParseFunc
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.syntaxtree
import
Node
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
EMPTY_NODE
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
escape_control_characters
,
md5
,
\
sane_parser_name
,
re
,
expand_table
,
unrepr
,
compile_python_object
,
DHPARSER_PARENTDIR
,
\
RX_NEVER_MATCH
...
...
@@ -216,6 +218,70 @@ class EBNFGrammar(Grammar):
syntax
=
Series
(
Option
(
Series
(
dwsp__
,
RegExp
(
''
))),
ZeroOrMore
(
Alternative
(
definition
,
directive
)),
EOF
)
root__
=
syntax
free_char_parsefunc__
=
free_char
.
_parse
char_range_heuristics_parsefunc__
=
char_range_heuristics
.
_parse
regex_heuristics_parserfunc__
=
regex_heuristics
.
_parse
@
property
def
mode
(
self
)
->
str
:
def
which
(
p
:
Parser
)
->
str
:
if
p
.
_parse
.
__qualname__
==
'Never._parse'
:
return
'never'
elif
p
.
_parse
.
__qualname__
==
'Always._parse'
:
return
'always'
else
:
return
'custom'
signature
=
(
which
(
self
.
free_char
),
which
(
self
.
regex_heuristics
),
which
(
self
.
char_range_heuristics
)
)
if
signature
==
(
'custom'
,
'custom'
,
'custom'
):
return
EBNF_ANY_SYNTAX_HEURISTICAL
elif
signature
==
(
'never'
,
'always'
,
'always'
):
return
EBNF_ANY_SYNTAX_STRICT
# or EBNF_CLASSIC_SYNTAX
elif
signature
==
(
'custom'
,
'never'
,
'always'
):
return
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
elif
signature
==
(
'custom'
,
'always'
,
'always'
):
return
EBNF_REGULAR_EXPRESSION_SYNTAX
else
:
return
"undefined"
@
mode
.
setter
def
mode
(
self
,
mode
:
str
):
def
set_parsefunc
(
p
:
Parser
,
f
:
ParseFunc
):
method
=
f
.
__get__
(
p
,
type
(
p
))
# bind function f to parser p
if
p
.
_parse
==
p
.
_parse_proxy
:
p
.
_parse_proxy
=
method
p
.
_parse
=
method
always
=
Always
.
_parse
never
=
Never
.
_parse
if
mode
==
EBNF_ANY_SYNTAX_HEURISTICAL
:
set_parsefunc
(
self
.
free_char
,
self
.
free_char_parsefunc__
)
set_parsefunc
(
self
.
regex_heuristics
,
self
.
regex_heuristics_parserfunc__
)
set_parsefunc
(
self
.
char_range_heuristics
,
self
.
char_range_heuristics_parsefunc__
)
elif
mode
in
(
EBNF_ANY_SYNTAX_STRICT
,
EBNF_CLASSIC_SYNTAX
):
set_parsefunc
(
self
.
free_char
,
never
)
set_parsefunc
(
self
.
regex_heuristics
,
always
)
set_parsefunc
(
self
.
char_range_heuristics
,
always
)
elif
mode
==
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
:
set_parsefunc
(
self
.
free_char
,
self
.
free_char_parsefunc__
)
set_parsefunc
(
self
.
regex_heuristics
,
never
)
set_parsefunc
(
self
.
char_range_heuristics
,
always
)
elif
mode
==
EBNF_REGULAR_EXPRESSION_SYNTAX
:
set_parsefunc
(
self
.
free_char
,
self
.
free_char_parsefunc__
)
set_parsefunc
(
self
.
regex_heuristics
,
always
)
set_parsefunc
(
self
.
char_range_heuristics
,
always
)
else
:
raise
ValueError
(
'Mode must be one of: '
+
', '
.
join
((
EBNF_ANY_SYNTAX_HEURISTICAL
,
EBNF_ANY_SYNTAX_STRICT
,
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
,
EBNF_REGULAR_EXPRESSION_SYNTAX
,
EBNF_CLASSIC_SYNTAX
)))
def
grammar_changed
(
grammar_class
,
grammar_source
:
str
)
->
bool
:
"""
...
...
@@ -254,10 +320,11 @@ def get_ebnf_grammar() -> EBNFGrammar:
THREAD_LOCALS
=
access_thread_locals
()
try
:
grammar
=
THREAD_LOCALS
.
ebnf_grammar_singleton
return
grammar
except
AttributeError
:
THREAD_LOCALS
.
ebnf_grammar_singleton
=
EBNFGrammar
()
return
THREAD_LOCALS
.
ebnf_grammar_singleton
grammar
=
THREAD_LOCALS
.
ebnf_grammar_singleton
grammar
.
mode
=
get_config_value
(
'syntax_variant'
)
return
grammar
def
parse_ebnf
(
ebnf
:
str
)
->
Node
:
...
...
DHParser/parse.py
View file @
f65adbb2
...
...
@@ -337,7 +337,7 @@ class Parser:
self
.
tag_name
=
self
.
ptype
# type: str
self
.
cycle_detection
=
set
()
# type: Set[ApplyFunc]
# this indirection is required for Cython-compatibility
self
.
_parse_proxy
=
self
.
_parse
# type: ParseFunc
self
.
_parse_proxy
=
self
.
_parse
# type: ParseFunc
# self.proxied = None # type: Optional[ParseFunc]
try
:
self
.
_grammar
=
GRAMMAR_PLACEHOLDER
# type: Grammar
...
...
@@ -559,11 +559,9 @@ class Parser:
return
None
def
set_proxy
(
self
,
proxy
:
Optional
[
ParseFunc
]):
"""Sets a proxy that replaces the _parse()-method. The original
parse-method is copied to the `proxied`-filed of the Parser object and
can be called by the proxy. Call `set_proxy` with `None` to remove
a previously set proxy. Typical use case is the installation of a
tracing debugger. See module `trace`.
"""Sets a proxy that replaces the _parse()-method. Call `set_proxy`
with `None` to remove a previously set proxy. Typical use case is
the installation of a tracing debugger. See module `trace`.
"""
if
proxy
is
None
:
self
.
_parse_proxy
=
self
.
_parse
...
...
test/test_ebnf.py
View file @
f65adbb2
...
...
@@ -30,6 +30,9 @@ sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from
DHParser.toolkit
import
compile_python_object
,
re
,
DHPARSER_PARENTDIR
from
DHParser.preprocess
import
nil_preprocessor
from
DHParser
import
compile_source
from
DHParser.configuration
import
access_thread_locals
,
get_config_value
,
\
EBNF_ANY_SYNTAX_HEURISTICAL
,
EBNF_ANY_SYNTAX_STRICT
,
EBNF_CLASSIC_SYNTAX
,
\
EBNF_REGULAR_EXPRESSION_SYNTAX
,
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
from
DHParser.error
import
has_errors
,
Error
,
PARSER_DID_NOT_MATCH
,
MANDATORY_CONTINUATION
,
\
REDEFINED_DIRECTIVE
,
UNUSED_ERROR_HANDLING_WARNING
,
AMBIGUOUS_ERROR_HANDLING
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
...
...
@@ -878,6 +881,48 @@ class TestSyntaxExtensions:
assert
st
.
errors
and
any
(
e
.
code
==
PARSER_DID_NOT_MATCH
for
e
in
st
.
errors
)
class
TestModeSetting
:
testdoc
=
"""# hey, you
doc = sequence | re | char | char_range | char_range2 | multiple1 | multiple2 | multiple3 | mutliple4
sequence = '</' Name S? '>'
re = /abc*/
char = #x32 # shell-style comment
char_range = [#xDFF88-#xEEFF00] /*
C-style comment
*/ char_range2 = [-'()+,./:=?;!*#@$_%]
multiple1 = `a` * 3
multiple2 = 4 * `b`
multiple3 = `c`{3}
multiple4 = `d`{2,5}
Name = /\w+/
S = /\s*/
"""
def
test_setmode_getmode
(
self
):
gr
=
get_ebnf_grammar
()
gr
.
mode
=
EBNF_ANY_SYNTAX_STRICT
assert
gr
.
mode
==
EBNF_ANY_SYNTAX_STRICT
gr
.
mode
=
EBNF_REGULAR_EXPRESSION_SYNTAX
assert
gr
.
mode
==
EBNF_REGULAR_EXPRESSION_SYNTAX
gr
.
mode
=
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
assert
gr
.
mode
==
EBNF_PARSING_EXPRESSION_GRAMMAR_SYNTAX
gr
.
mode
=
EBNF_ANY_SYNTAX_HEURISTICAL
assert
gr
.
mode
==
EBNF_ANY_SYNTAX_HEURISTICAL
gr
.
mode
=
EBNF_CLASSIC_SYNTAX
assert
gr
.
mode
==
EBNF_ANY_SYNTAX_STRICT
def
test_heuristic_mode
(
self
):
gr
=
get_ebnf_grammar
()
gr
.
mode
=
EBNF_ANY_SYNTAX_STRICT
st
=
gr
(
self
.
testdoc
)
assert
st
.
errors
gr
.
mode
=
EBNF_ANY_SYNTAX_HEURISTICAL
st
=
gr
(
self
.
testdoc
)
assert
not
st
.
errors
if
__name__
==
"__main__"
:
from
DHParser.testing
import
runner
runner
(
""
,
globals
())
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment