Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
DHParser
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Iterations
Merge Requests
0
Merge Requests
0
Requirements
Requirements
List
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Operations
Operations
Incidents
Analytics
Analytics
Code Review
Insights
Issue
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
badw-it
DHParser
Commits
c74091ca
Commit
c74091ca
authored
Jun 10, 2017
by
Eckhart Arnold
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- added type annotations for better documentation and mypy type checks
parent
4589c6b6
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
358 additions
and
303 deletions
+358
-303
DHParser/dsl.py
DHParser/dsl.py
+38
-28
DHParser/ebnf.py
DHParser/ebnf.py
+101
-87
DHParser/parsers.py
DHParser/parsers.py
+101
-83
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+51
-38
DHParser/toolkit.py
DHParser/toolkit.py
+14
-12
OLDSTUFF/ParserCombinators_obsolete.py
OLDSTUFF/ParserCombinators_obsolete.py
+4
-4
examples/MLW/OLDSTUFF/MLW_compiler.py
examples/MLW/OLDSTUFF/MLW_compiler.py
+2
-2
examples/Tutorial/LyrikCompiler.py
examples/Tutorial/LyrikCompiler.py
+45
-47
test/test_dsl.py
test/test_dsl.py
+2
-2
No files found.
DHParser/dsl.py
View file @
c74091ca
...
...
@@ -20,17 +20,18 @@ compilation of domain specific languages based on an EBNF-grammar.
"""
import
os
try
:
import
regex
as
re
except
ImportError
:
import
re
from
typing
import
Any
,
Tuple
,
cast
from
.ebnf
import
EBNFTransformer
,
grammar_changed
,
\
get_ebnf_scanner
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
.toolkit
import
logging
,
load_if_file
,
is_python_code
,
compile_python_object
from
.parsers
import
Grammar
,
CompilerBase
,
compile_source
,
nil_scanner
from
.syntaxtree
import
Node
from
DHParser.ebnf
import
EBNFTransformer
,
EBNFCompiler
,
grammar_changed
,
\
get_ebnf_scanner
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
\
ScannerFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
from
DHParser.toolkit
import
logging
,
load_if_file
,
is_python_code
,
compile_python_object
from
DHParser.parsers
import
Grammar
,
Compiler
,
compile_source
,
nil_scanner
,
ScannerFunc
from
DHParser.syntaxtree
import
Node
,
TransformerFunc
__all__
=
[
'GrammarError'
,
...
...
@@ -71,7 +72,7 @@ try:
except ImportError:
import re
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler
Base
, nil_scanner,
\\
from DHParser.parsers import Grammar, Compiler, nil_scanner,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token,
\\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Sequence, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
...
...
@@ -137,7 +138,7 @@ class CompilationError(Exception):
return
'
\n
'
.
join
(
self
.
error_messages
)
def
grammar_instance
(
grammar_representation
):
def
grammar_instance
(
grammar_representation
)
->
Tuple
[
Grammar
,
str
]
:
"""Returns a grammar object and the source code of the grammar, from
the given `grammar`-data which can be either a file name, ebnf-code,
python-code, a Grammar-derived grammar class or an instance of
...
...
@@ -167,7 +168,11 @@ def grammar_instance(grammar_representation):
return
parser_root
,
grammar_src
def
compileDSL
(
text_or_file
,
scanner
,
dsl_grammar
,
ast_transformation
,
compiler
):
def
compileDSL
(
text_or_file
:
str
,
scanner
:
ScannerFunc
,
dsl_grammar
:
Grammar
,
ast_transformation
:
TransformerFunc
,
compiler
:
Compiler
)
->
Any
:
"""Compiles a text in a domain specific language (DSL) with an
EBNF-specified grammar. Returns the compiled text or raises a
compilation error.
...
...
@@ -176,10 +181,10 @@ def compileDSL(text_or_file, scanner, dsl_grammar, ast_transformation, compiler)
CompilationError if any errors occurred during compilation
"""
assert
isinstance
(
text_or_file
,
str
)
assert
isinstance
(
compiler
,
Compiler
Base
)
assert
isinstance
(
compiler
,
Compiler
)
parser
_root
,
grammar_src
=
grammar_instance
(
dsl_grammar
)
result
,
errors
,
AST
=
compile_source
(
text_or_file
,
scanner
,
parser
_root
,
parser
,
grammar_src
=
grammar_instance
(
dsl_grammar
)
result
,
errors
,
AST
=
compile_source
(
text_or_file
,
scanner
,
parser
,
ast_transformation
,
compiler
)
if
errors
:
src
=
load_if_file
(
text_or_file
)
...
...
@@ -187,7 +192,7 @@ def compileDSL(text_or_file, scanner, dsl_grammar, ast_transformation, compiler)
return
result
def
raw_compileEBNF
(
ebnf_src
,
branding
=
"DSL"
)
:
def
raw_compileEBNF
(
ebnf_src
:
str
,
branding
=
"DSL"
)
->
EBNFCompiler
:
"""Compiles an EBNF grammar file and returns the compiler object
that was used and which can now be queried for the result as well
as skeleton code for scanner, transformer and compiler objects.
...
...
@@ -208,7 +213,7 @@ def raw_compileEBNF(ebnf_src, branding="DSL"):
return
compiler
def
compileEBNF
(
ebnf_src
,
branding
=
"DSL"
)
:
def
compileEBNF
(
ebnf_src
:
str
,
branding
=
"DSL"
)
->
str
:
"""Compiles an EBNF source file and returns the source code of a
compiler suite with skeletons for scanner, transformer and
compiler.
...
...
@@ -234,7 +239,7 @@ def compileEBNF(ebnf_src, branding="DSL"):
return
'
\n
'
.
join
(
src
)
def
parser_factory
(
ebnf_src
,
branding
=
"DSL"
)
:
def
parser_factory
(
ebnf_src
:
str
,
branding
=
"DSL"
)
->
Grammar
:
"""Compiles an EBNF grammar and returns a grammar-parser factory
function for that grammar.
...
...
@@ -253,7 +258,8 @@ def parser_factory(ebnf_src, branding="DSL"):
return
compile_python_object
(
DHPARSER_IMPORTS
+
grammar_src
,
'get_(?:\w+_)?grammar$'
)
def
load_compiler_suite
(
compiler_suite
):
def
load_compiler_suite
(
compiler_suite
:
str
)
->
\
Tuple
[
ScannerFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
]:
"""Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (scanner, parser, ast, compiler).
...
...
@@ -282,13 +288,14 @@ def load_compiler_suite(compiler_suite):
if
errors
:
raise
GrammarError
(
'
\n\n
'
.
join
(
errors
),
source
)
scanner
=
get_ebnf_scanner
parser
=
get_ebnf_grammar
ast
=
get_ebnf_transformer
compiler
=
compile_python_object
(
imports
+
compiler_py
,
'get_(?:\w+_)?compiler$'
)
return
scanner
,
parser
,
ast
,
compiler
def
is_outdated
(
compiler_suite
,
grammar_source
)
:
def
is_outdated
(
compiler_suite
:
str
,
grammar_source
:
str
)
->
bool
:
"""Returns ``True`` if the ``compile_suite`` needs to be updated.
An update is needed, if either the grammar in the compieler suite
...
...
@@ -313,7 +320,7 @@ def is_outdated(compiler_suite, grammar_source):
return
True
def
run_compiler
(
text_or_file
,
compiler_suite
)
:
def
run_compiler
(
text_or_file
:
str
,
compiler_suite
:
str
)
->
Any
:
"""Compiles a source with a given compiler suite.
Args:
...
...
@@ -336,7 +343,7 @@ def run_compiler(text_or_file, compiler_suite):
return
compileDSL
(
text_or_file
,
scanner
(),
parser
(),
ast
(),
compiler
())
def
compile_on_disk
(
source_file
,
compiler_suite
=
""
,
extension
=
".xml"
):
def
compile_on_disk
(
source_file
:
str
,
compiler_suite
=
""
,
extension
=
".xml"
):
"""Compiles the a source file with a given compiler and writes the
result to a file.
...
...
@@ -373,18 +380,20 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
rootname
=
os
.
path
.
splitext
(
filepath
)[
0
]
compiler_name
=
os
.
path
.
basename
(
rootname
)
if
compiler_suite
:
s
canner
,
parser
,
trans
,
cfactory
=
load_compiler_suite
(
compiler_suite
)
s
factory
,
pfactory
,
tfactory
,
cfactory
=
load_compiler_suite
(
compiler_suite
)
else
:
s
canner
=
get_ebnf_scanner
p
arser
=
get_ebnf_grammar
t
rans
=
get_ebnf_transformer
s
factory
=
get_ebnf_scanner
p
factory
=
get_ebnf_grammar
t
factory
=
get_ebnf_transformer
cfactory
=
get_ebnf_compiler
compiler1
=
cfactory
(
compiler_name
,
source_file
)
result
,
errors
,
ast
=
compile_source
(
source_file
,
scanner
(),
parser
(),
trans
(),
compiler1
)
compiler1
=
cfactory
()
compiler1
.
set_grammar_name
(
compiler_name
,
source_file
)
result
,
errors
,
ast
=
compile_source
(
source_file
,
sfactory
(),
pfactory
(),
tfactory
(),
compiler1
)
if
errors
:
return
errors
elif
cfactory
==
get_ebnf_compiler
:
# trans == get_ebnf_transformer or trans == EBNFTransformer: # either an EBNF- or no compiler suite given
ebnf_compiler
=
cast
(
EBNFCompiler
,
compiler1
)
global
SECTION_MARKER
,
RX_SECTION_MARKER
,
SCANNER_SECTION
,
PARSER_SECTION
,
\
AST_SECTION
,
COMPILER_SECTION
,
END_SECTIONS_MARKER
,
RX_WHITESPACE
,
\
DHPARSER_MAIN
,
DHPARSER_IMPORTS
...
...
@@ -412,11 +421,11 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
if
RX_WHITESPACE
.
fullmatch
(
imports
):
imports
=
DHPARSER_IMPORTS
if
RX_WHITESPACE
.
fullmatch
(
scanner
):
scanner
=
compiler1
.
gen_scanner_skeleton
()
scanner
=
ebnf_compiler
.
gen_scanner_skeleton
()
if
RX_WHITESPACE
.
fullmatch
(
ast
):
ast
=
compiler1
.
gen_transformer_skeleton
()
ast
=
ebnf_compiler
.
gen_transformer_skeleton
()
if
RX_WHITESPACE
.
fullmatch
(
compiler
):
compiler
=
compiler1
.
gen_compiler_skeleton
()
compiler
=
ebnf_compiler
.
gen_compiler_skeleton
()
try
:
f
=
open
(
rootname
+
'Compiler.py'
,
'w'
,
encoding
=
"utf-8"
)
...
...
@@ -441,6 +450,7 @@ def compile_on_disk(source_file, compiler_suite="", extension=".xml"):
if
f
:
f
.
close
()
else
:
f
=
None
try
:
f
=
open
(
rootname
+
extension
,
'w'
,
encoding
=
"utf-8"
)
if
isinstance
(
result
,
Node
):
...
...
DHParser/ebnf.py
View file @
c74091ca
...
...
@@ -18,19 +18,20 @@ permissions and limitations under the License.
import
keyword
from
functools
import
partial
try
:
import
regex
as
re
except
ImportError
:
import
re
from
typing
import
Callable
,
cast
,
List
,
Set
,
Tuple
from
.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
from
.parsers
import
Grammar
,
mixin_comment
,
nil_scanner
,
Forward
,
RE
,
NegativeLookahead
,
\
Alternative
,
Sequence
,
Optional
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
CompilerBase
from
.syntaxtree
import
Node
,
traverse
,
remove_enclosing_delimiters
,
reduce_single_child
,
\
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
from
DHParser.parsers
import
Grammar
,
mixin_comment
,
nil_scanner
,
Forward
,
RE
,
NegativeLookahead
,
\
Alternative
,
Sequence
,
Optional
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
Compiler
,
\
ScannerFunc
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_enclosing_delimiters
,
reduce_single_child
,
\
replace_by_single_child
,
TOKEN_PTYPE
,
remove_expendables
,
remove_tokens
,
flatten
,
\
forbid
,
assert_content
,
WHITESPACE_PTYPE
,
key_tag_name
from
.versionnumber
import
__version__
forbid
,
assert_content
,
WHITESPACE_PTYPE
,
key_tag_name
,
TransformerFunc
from
DHParser
.versionnumber
import
__version__
__all__
=
[
'get_ebnf_scanner'
,
...
...
@@ -41,7 +42,11 @@ __all__ = ['get_ebnf_scanner',
'EBNFTransformer'
,
'EBNFCompilerError'
,
'EBNFCompiler'
,
'grammar_changed'
]
'grammar_changed'
,
'ScannerFactoryFunc'
,
'ParserFactoryFunc'
,
'TransformerFactoryFunc'
,
'CompilerFactoryFunc'
]
########################################################################
...
...
@@ -51,7 +56,7 @@ __all__ = ['get_ebnf_scanner',
########################################################################
def
get_ebnf_scanner
():
def
get_ebnf_scanner
()
->
ScannerFunc
:
return
nil_scanner
...
...
@@ -137,7 +142,7 @@ class EBNFGrammar(Grammar):
root__
=
syntax
def
grammar_changed
(
grammar_class
,
grammar_source
)
:
def
grammar_changed
(
grammar_class
,
grammar_source
:
str
)
->
bool
:
"""Returns ``True`` if ``grammar_class`` does not reflect the latest
changes of ``grammar_source``
...
...
@@ -168,7 +173,7 @@ def grammar_changed(grammar_class, grammar_source):
return
chksum
!=
grammar_class
.
source_hash__
def
get_ebnf_grammar
():
def
get_ebnf_grammar
()
->
EBNFGrammar
:
global
thread_local_ebnf_grammar_singleton
try
:
grammar
=
thread_local_ebnf_grammar_singleton
...
...
@@ -223,13 +228,13 @@ EBNF_validation_table = {
}
def
EBNFTransformer
(
syntax_tree
):
def
EBNFTransformer
(
syntax_tree
:
Node
):
for
processing_table
,
key_func
in
[(
EBNF_transformation_table
,
key_tag_name
),
(
EBNF_validation_table
,
key_tag_name
)]:
traverse
(
syntax_tree
,
processing_table
,
key_func
)
def
get_ebnf_transformer
():
def
get_ebnf_transformer
()
->
TransformerFunc
:
return
EBNFTransformer
...
...
@@ -239,6 +244,13 @@ def get_ebnf_transformer():
#
########################################################################
ScannerFactoryFunc
=
Callable
[[],
ScannerFunc
]
ParserFactoryFunc
=
Callable
[[],
Grammar
]
TransformerFactoryFunc
=
Callable
[[],
TransformerFunc
]
CompilerFactoryFunc
=
Callable
[[],
Compiler
]
SCANNER_FACTORY
=
'''
def get_scanner():
return {NAME}Scanner
...
...
@@ -283,7 +295,7 @@ class EBNFCompilerError(Exception):
pass
class
EBNFCompiler
(
Compiler
Base
):
class
EBNFCompiler
(
Compiler
):
"""Generates a Parser from an abstract syntax tree of a grammar specified
in EBNF-Notation.
"""
...
...
@@ -305,13 +317,13 @@ class EBNFCompiler(CompilerBase):
self
.
_reset
()
def
_reset
(
self
):
self
.
_result
=
None
self
.
rules
=
set
()
self
.
variables
=
set
()
self
.
symbol_nodes
=
[]
self
.
definition_names
=
[]
self
.
recursive
=
set
()
self
.
root
=
""
self
.
_result
=
''
# type: str
self
.
rules
=
set
()
# type: Set[str]
self
.
variables
=
set
()
# type: Set[str]
self
.
symbol_nodes
=
[]
# type: List[Node]
self
.
definition_names
=
[]
# type: List[str]
self
.
recursive
=
set
()
# type: Set[str]
self
.
root
=
""
# type: str
self
.
directives
=
{
'whitespace'
:
self
.
WHITESPACE
[
'horizontal'
],
'comment'
:
''
,
'literalws'
:
[
'right'
],
...
...
@@ -319,15 +331,15 @@ class EBNFCompiler(CompilerBase):
'filter'
:
dict
()}
# alt. 'retrieve_filter'
@
property
def
result
(
self
):
def
result
(
self
)
->
str
:
return
self
.
_result
def
gen_scanner_skeleton
(
self
):
def
gen_scanner_skeleton
(
self
)
->
str
:
name
=
self
.
grammar_name
+
"Scanner"
return
"def %s(text):
\n
return text
\n
"
%
name
\
+
SCANNER_FACTORY
.
format
(
NAME
=
self
.
grammar_name
)
def
gen_transformer_skeleton
(
self
):
def
gen_transformer_skeleton
(
self
)
->
str
:
if
not
self
.
definition_names
:
raise
EBNFCompilerError
(
'Compiler must be run before calling '
'"gen_transformer_Skeleton()"!'
)
...
...
@@ -343,11 +355,11 @@ class EBNFCompiler(CompilerBase):
transtable
+=
[
TRANSFORMER_FACTORY
.
format
(
NAME
=
self
.
grammar_name
)]
return
'
\n
'
.
join
(
transtable
)
def
gen_compiler_skeleton
(
self
):
def
gen_compiler_skeleton
(
self
)
->
str
:
if
not
self
.
definition_names
:
raise
EBNFCompilerError
(
'Compiler has not been run before calling '
'"gen_Compiler_Skeleton()"!'
)
compiler
=
[
'class '
+
self
.
grammar_name
+
'Compiler(Compiler
Base
):'
,
compiler
=
[
'class '
+
self
.
grammar_name
+
'Compiler(Compiler):'
,
' """Compiler for the abstract-syntax-tree of a '
+
self
.
grammar_name
+
' source file.'
,
' """'
,
''
,
...
...
@@ -357,23 +369,23 @@ class EBNFCompiler(CompilerBase):
'Compiler, self).__init__(grammar_name, grammar_source)'
,
" assert re.match('\w+\Z', grammar_name)"
,
''
]
for
name
in
self
.
definition_names
:
method_name
=
Compiler
Base
.
derive_method_name
(
name
)
method_name
=
Compiler
.
derive_method_name
(
name
)
if
name
==
self
.
root
:
compiler
+=
[
' def '
+
method_name
+
'(self, node
)
:'
,
compiler
+=
[
' def '
+
method_name
+
'(self, node
: Node) -> str
:'
,
' return node'
,
''
]
else
:
compiler
+=
[
' def '
+
method_name
+
'(self, node
)
:'
,
compiler
+=
[
' def '
+
method_name
+
'(self, node
: Node) -> str
:'
,
' pass'
,
''
]
compiler
+=
[
COMPILER_FACTORY
.
format
(
NAME
=
self
.
grammar_name
)]
return
'
\n
'
.
join
(
compiler
)
def
assemble_parser
(
self
,
definitions
,
root_node
)
:
def
assemble_parser
(
self
,
definitions
:
List
[
Tuple
[
str
,
str
]],
root_node
:
Node
)
->
str
:
# fix capture of variables that have been defined before usage [sic!]
if
self
.
variables
:
for
i
in
range
(
len
(
definitions
)):
if
definitions
[
i
][
0
]
in
self
.
variables
:
definitions
[
i
]
=
(
definitions
[
i
][
0
],
'Capture(%s)'
%
definitions
[
1
])
definitions
[
i
]
=
(
definitions
[
i
][
0
],
'Capture(%s)'
%
definitions
[
i
][
1
])
self
.
definition_names
=
[
defn
[
0
]
for
defn
in
definitions
]
definitions
.
append
((
'wspR__'
,
self
.
WHITESPACE_KEYWORD
...
...
@@ -434,27 +446,27 @@ class EBNFCompiler(CompilerBase):
+
GRAMMAR_FACTORY
.
format
(
NAME
=
self
.
grammar_name
)
return
self
.
_result
def
on_syntax
(
self
,
node
)
:
def
on_syntax
(
self
,
node
:
Node
)
->
str
:
self
.
_reset
()
definitions
=
[]
# drop the wrapping sequence node
if
len
(
node
.
children
)
==
1
and
not
node
.
result
[
0
].
parser
.
name
:
node
=
node
.
result
[
0
]
if
len
(
node
.
children
)
==
1
and
not
node
.
children
[
0
].
parser
.
name
:
node
=
node
.
children
[
0
]
# compile definitions and directives and collect definitions
for
nd
in
node
.
result
:
for
nd
in
node
.
children
:
if
nd
.
parser
.
name
==
"definition"
:
definitions
.
append
(
self
.
_compile
(
nd
))
else
:
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_sexpr
()
self
.
_compile
(
nd
)
node
.
error_flag
|=
nd
.
error_flag
node
.
error_flag
=
node
.
error_flag
or
nd
.
error_flag
return
self
.
assemble_parser
(
definitions
,
node
)
def
on_definition
(
self
,
node
)
:
rule
=
node
.
result
[
0
].
result
def
on_definition
(
self
,
node
:
Node
)
->
Tuple
[
str
,
str
]
:
rule
=
cast
(
str
,
node
.
children
[
0
].
result
)
if
rule
in
self
.
rules
:
node
.
add_error
(
'A rule with name "%s" has already been defined.'
%
rule
)
elif
rule
in
EBNFCompiler
.
RESERVED_SYMBOLS
:
...
...
@@ -470,7 +482,7 @@ class EBNFCompiler(CompilerBase):
%
rule
+
'(This may change in the furute.)'
)
try
:
self
.
rules
.
add
(
rule
)
defn
=
self
.
_compile
(
node
.
result
[
1
])
defn
=
self
.
_compile
(
node
.
children
[
1
])
if
rule
in
self
.
variables
:
defn
=
'Capture(%s)'
%
defn
self
.
variables
.
remove
(
rule
)
...
...
@@ -481,7 +493,7 @@ class EBNFCompiler(CompilerBase):
return
rule
,
defn
@
staticmethod
def
_check_rx
(
node
,
rx
)
:
def
_check_rx
(
node
:
Node
,
rx
:
str
)
->
str
:
"""Checks whether the string `rx` represents a valid regular
expression. Makes sure that multiline regular expressions are
prepended by the multiline-flag. Returns the regular expression string.
...
...
@@ -494,22 +506,22 @@ class EBNFCompiler(CompilerBase):
(
repr
(
rx
),
str
(
re_error
)))
return
rx
def
on_directive
(
self
,
node
)
:
key
=
node
.
result
[
0
].
result
.
lower
()
def
on_directive
(
self
,
node
:
Node
)
->
str
:
key
=
cast
(
str
,
node
.
children
[
0
].
result
)
.
lower
()
assert
key
not
in
self
.
directives
[
'tokens'
]
if
key
in
{
'comment'
,
'whitespace'
}:
if
node
.
result
[
1
].
parser
.
name
==
"list_"
:
if
len
(
node
.
result
[
1
].
result
)
!=
1
:
if
node
.
children
[
1
].
parser
.
name
==
"list_"
:
if
len
(
node
.
children
[
1
].
result
)
!=
1
:
node
.
add_error
(
'Directive "%s" must have one, but not %i values.'
%
(
key
,
len
(
node
.
result
[
1
]
)))
value
=
self
.
_compile
(
node
.
result
[
1
]).
pop
()
(
key
,
len
(
node
.
children
[
1
].
result
)))
value
=
self
.
_compile
(
node
.
children
[
1
]).
pop
()
if
key
==
'whitespace'
and
value
in
EBNFCompiler
.
WHITESPACE
:
value
=
EBNFCompiler
.
WHITESPACE
[
value
]
# replace whitespace-name by regex
else
:
node
.
add_error
(
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
else
:
value
=
node
.
result
[
1
].
result
.
strip
(
"~"
)
if
value
!=
node
.
result
[
1
].
result
:
value
=
cast
(
str
,
node
.
children
[
1
].
result
)
.
strip
(
"~"
)
if
value
!=
cast
(
str
,
node
.
children
[
1
].
result
)
:
node
.
add_error
(
"Whitespace marker '~' not allowed in definition of "
"%s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
...
...
@@ -522,7 +534,7 @@ class EBNFCompiler(CompilerBase):
self
.
directives
[
key
]
=
value
elif
key
==
'literalws'
:
value
=
{
item
.
lower
()
for
item
in
self
.
_compile
(
node
.
result
[
1
])}
value
=
{
item
.
lower
()
for
item
in
self
.
_compile
(
node
.
children
[
1
])}
if
(
len
(
value
-
{
'left'
,
'right'
,
'both'
,
'none'
})
>
0
or
(
'none'
in
value
and
len
(
value
)
>
1
)):
node
.
add_error
(
'Directive "literalws" allows the values '
...
...
@@ -533,10 +545,10 @@ class EBNFCompiler(CompilerBase):
self
.
directives
[
key
]
=
list
(
ws
)
elif
key
in
{
'tokens'
,
'scanner_tokens'
}:
self
.
directives
[
'tokens'
]
|=
self
.
_compile
(
node
.
result
[
1
])
self
.
directives
[
'tokens'
]
|=
self
.
_compile
(
node
.
children
[
1
])
elif
key
.
endswith
(
'_filter'
):
filter_set
=
self
.
_compile
(
node
.
result
[
1
])
filter_set
=
self
.
_compile
(
node
.
children
[
1
])
if
not
isinstance
(
filter_set
,
set
)
or
len
(
filter_set
)
!=
1
:
node
.
add_error
(
'Directive "%s" accepts exactly on symbol, not %s'
%
(
key
,
str
(
filter_set
)))
...
...
@@ -548,82 +560,84 @@ class EBNFCompiler(CompilerBase):
', '
.
join
(
list
(
self
.
directives
.
keys
()))))
return
""
def
non_terminal
(
self
,
node
,
parser_class
,
custom_args
=
[])
:
def
non_terminal
(
self
,
node
:
Node
,
parser_class
:
str
,
custom_args
:
List
[
str
]
=
[])
->
str
:
"""Compiles any non-terminal, where `parser_class` indicates the Parser class
name for the particular non-terminal.
"""
arguments
=
[
self
.
_compile
(
r
)
for
r
in
node
.
result
]
+
custom_args
arguments
=
[
self
.
_compile
(
r
)
for
r
in
node
.
children
]
+
custom_args
return
parser_class
+
'('
+
', '
.
join
(
arguments
)
+
')'
def
on_expression
(
self
,
node
):
def
on_expression
(
self
,
node
)
->
str
:
return
self
.
non_terminal
(
node
,
'Alternative'
)
def
on_term
(
self
,
node
):
def
on_term
(
self
,
node
)
->
str
:
return
self
.
non_terminal
(
node
,
'Sequence'
)
def
on_factor
(
self
,
node
)
:
def
on_factor
(
self
,
node
:
Node
)
->
str
:
assert
node
.
children
assert
len
(
node
.
result
)
>=
2
,
node
.
as_sexpr
()
prefix
=
node
.
result
[
0
].
result
custom_args
=
[]
assert
len
(
node
.
children
)
>=
2
,
node
.
as_sexpr
()
prefix
=
cast
(
str
,
node
.
children
[
0
].
result
)
custom_args
=
[]
# type: List[str]
if
prefix
in
{
'::'
,
':'
}:
assert
len
(
node
.
result
)
==
2
arg
=
node
.
result
[
-
1
]
assert
len
(
node
.
children
)
==
2
arg
=
node
.
children
[
-
1
]
if
arg
.
parser
.
name
!=
'symbol'
:
node
.
add_error
((
'Retrieve Operator "%s" requires a symbol, '
'and not a %s.'
)
%
(
prefix
,
str
(
arg
.
parser
)))
return
str
(
arg
.
result
)
if
str
(
arg
)
in
self
.
directives
[
'filter'
]:
custom_args
=
[
'retrieve_filter=%s'
%
self
.
directives
[
'filter'
][
str
(
arg
)]]
self
.
variables
.
add
(
arg
.
result
)
self
.
variables
.
add
(
cast
(
str
,
arg
.
result
)
)
elif
len
(
node
.
result
)
>
2
:
elif
len
(
node
.
children
)
>
2
:
# shift = (Node(node.parser, node.result[1].result),)
# node.result[1].result = shift + node.result[2:]
node
.
result
[
1
].
result
=
(
Node
(
node
.
result
[
1
].
parser
,
node
.
result
[
1
].
result
),)
\
+
node
.
result
[
2
:]
node
.
result
[
1
].
parser
=
node
.
parser
node
.
result
=
(
node
.
result
[
0
],
node
.
result
[
1
])
node
.
children
[
1
].
result
=
(
Node
(
node
.
children
[
1
].
parser
,
node
.
children
[
1
].
result
),)
\
+
node
.
children
[
2
:]
node
.
children
[
1
].
parser
=
node
.
parser
node
.
result
=
(
node
.
children
[
0
],
node
.
children
[
1
])
node
.
result
=
node
.
result
[
1
:]
node
.
result
=
node
.
children
[
1
:]
try
:
parser_class
=
self
.
PREFIX_TABLE
[
prefix
]
return
self
.
non_terminal
(
node
,
parser_class
,
custom_args
)
except
KeyError
:
node
.
add_error
(
'Unknown prefix "%s".'
%
prefix
)
return
""
def
on_option
(
self
,
node
):
def
on_option
(
self
,
node
)
->
str
:
return
self
.
non_terminal
(
node
,
'Optional'
)
def
on_repetition
(
self
,
node
):
def
on_repetition
(
self
,
node
)
->
str
:
return
self
.
non_terminal
(
node
,
'ZeroOrMore'
)
def
on_oneormore
(
self
,
node
):
def
on_oneormore
(
self
,
node
)
->
str
:
return
self
.
non_terminal
(
node
,
'OneOrMore'
)
def
on_regexchain
(
self
,
node
):
def
on_regexchain
(
self
,
node
)
->
str
:
raise
EBNFCompilerError
(
"Not yet implemented!"
)
def
on_group
(
self
,
node
):
def
on_group
(
self
,
node
)
->
str
:
raise
EBNFCompilerError
(
"Group nodes should have been eliminated by "
"AST transformation!"
)
def
on_symbol
(
self
,
node
):
if
node
.
result
in
self
.
directives
[
'tokens'
]:
return
'ScannerToken("'
+
node
.
result
+
'")'
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
result
=
cast
(
str
,
node
.
result
)
if
result
in
self
.
directives
[
'tokens'
]:
return
'ScannerToken("'
+
result
+
'")'
else
:
self
.
symbol_nodes
.
append
(
node
)
if
node
.
result
in
self
.
rules
:
self
.
recursive
.
add
(
node
.
result
)
return
node
.
result
if
result
in
self
.
rules
:
self
.
recursive
.
add
(
result
)
return
result
def
on_literal
(
self
,
node
):
return
'Token('
+
node
.
result
.
replace
(
'
\\
'
,
r'\\'
)
+
')'
# return 'Token(' + ', '.join([node.result]) + ')' ?
def
on_literal
(
self
,
node
)
->
str
:
return
'Token('
+
cast
(
str
,
node
.
result
)
.
replace
(
'
\\
'
,
r'\\'
)
+
')'
# return 'Token(' + ', '.join([node.result]) + ')' ?
def
on_regexp
(
self
,
node
)
:
rx
=
node
.
result
name
=
[]
def
on_regexp
(
self
,
node
:
Node
)
->
str
:
rx
=
cast
(
str
,
node
.
result
)
name
=
[]
# type: List[str]
if
rx
[:
2
]
==
'~/'
:
if
not
'left'
in
self
.
directives
[
'literalws'
]:
name
=
[
'wL='
+
self
.
WHITESPACE_KEYWORD
]
+
name
...
...
@@ -645,12 +659,12 @@ class EBNFCompiler(CompilerBase):
return
'"'
+
errmsg
+
'"'
return
'RE('
+
', '
.
join
([
arg
]
+
name
)
+
')'
def
on_list_
(
self
,
node
):
def
on_list_
(
self
,
node
)
->
Set
[
str
]
:
assert
node
.
children
return
set
(
item
.
result
.
strip
()
for
item
in
node
.
result
)
return
set
(
item
.
result
.
strip
()
for
item
in
node
.
children
)
def
get_ebnf_compiler
(
grammar_name
=
""
,
grammar_source
=
""
):
def
get_ebnf_compiler
(
grammar_name
=
""
,
grammar_source
=
""
)
->
EBNFCompiler
:
global
thread_local_ebnf_compiler_singleton
try
:
compiler
=
thread_local_ebnf_compiler_singleton
...
...
DHParser/parsers.py
View file @
c74091ca
...
...
@@ -50,17 +50,21 @@ https://bitbucket.org/apalala/grako
import
copy
from
functools
import
partial
import
os
try
:
import
regex
as
re
except
ImportError
:
import
re
from
typing
import
Any
,
Callable
,
Dict
,
Iterator
,
List
,
Set
,
Tuple
,
Union
from
.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
ZOMBIE_PARSER
,
Node
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
ZOMBIE_PARSER
,
Node
,
\
TransformerFunc
from
DHParser.toolkit
import
load_if_file
,
error_messages
__all__
=
[
'HistoryRecord'
,
__all__
=
[
'ScannerFunc'
,
'HistoryRecord'
,
'Parser'
,