Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
c7b50f80
Commit
c7b50f80
authored
Apr 02, 2017
by
Eckhart Arnold
Browse files
- various refactorings
parent
4b26772d
Changes
6
Hide whitespace changes
Inline
Side-by-side
DSLsupport.py
View file @
c7b50f80
...
...
@@ -21,8 +21,8 @@ Module ``DSLsupport`` contains various functions to support the
compilation of domain specific languages based on an EBNF-grammar.
"""
from
functools
import
partial
import
os
try
:
import
regex
as
re
except
ImportError
:
...
...
@@ -30,11 +30,19 @@ except ImportError:
from
EBNFcompiler
import
EBNFGrammar
,
EBNFCompiler
,
EBNFTransTable
,
load_if_file
,
md5
from
logging
import
LOGGING
from
parser
import
PARSER_SYMBOLS
,
COMPILER_SYMBOLS
,
GrammarBase
,
CompilerBase
,
\
full_compilation
,
nil_scanner
from
syntaxtree
import
AST_SYMBOLS
,
Node
from
parser
import
*
from
syntaxtree
import
*
from
version
import
__version__
__all__
=
[
'GrammarError'
,
'CompilationError'
,
'load_compiler_suite'
,
'compileDSL'
,
'run_compiler'
,
'source_changed'
]
SECTION_MARKER
=
"""
\n
#######################################################################
#
...
...
@@ -142,6 +150,9 @@ def get_grammar_instance(grammar):
def
load_compiler_suite
(
compiler_suite
):
"""Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (scanner, parser, ast, compiler).
"""
global
RX_SECTION_MARKER
assert
isinstance
(
compiler_suite
,
str
)
source
=
load_if_file
(
compiler_suite
)
...
...
@@ -189,24 +200,24 @@ def run_compiler(source_file, compiler_suite="", extension=".xml"):
"""Compiles the a source file with a given compiler and writes the
result to a file.
If no ``compiler_suite`` is given it is assumed that the source
file is an EBNF grammar. In this case the result will be a Python
script containing a parser for that grammar as well as the
skeletons for a scanner, AST transformation table, and compiler.
If the Python script already exists only the parser name in the
script will be updated. (For this to work, the different names
need to be delimited section marker blocks.). `run_compiler()`
returns a list of error messages or an empty list if no errors
occurred.
"""
If no ``compiler_suite`` is given it is assumed that the source
file is an EBNF grammar. In this case the result will be a Python
script containing a parser for that grammar as well as the
skeletons for a scanner, AST transformation table, and compiler.
If the Python script already exists only the parser name in the
script will be updated. (For this to work, the different names
need to be delimited section marker blocks.). `run_compiler()`
returns a list of error messages or an empty list if no errors
occurred.
"""
def
import_block
(
module
,
symbols
):
def
import_block
(
python_
module
,
symbols
):
"""Generates an Python-``import`` statement that imports all
alls symbols in ``symbols`` (set or other container) from
module ``module``."""
python_
module ``
python_
module``."""
symlist
=
list
(
symbols
)
grouped
=
[
symlist
[
i
:
i
+
4
]
for
i
in
range
(
0
,
len
(
symlist
),
4
)]
return
(
"
\n
from "
+
module
+
" import "
return
(
"
\n
from "
+
python_
module
+
" import "
+
',
\\\n
'
.
join
(
', '
.
join
(
g
)
for
g
in
grouped
)
+
'
\n\n
'
)
filepath
=
os
.
path
.
normpath
(
source_file
)
...
...
EBNFcompiler.py
View file @
c7b50f80
...
...
@@ -18,29 +18,27 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import
collections
#
import collections
import
hashlib
import
keyword
from
functools
import
partial
try
:
import
regex
as
re
except
ImportError
:
import
re
from
parser
import
mixin_comment
,
RE
,
Token
,
Required
,
NegativeLookahead
,
Optional
,
ZeroOrMore
,
\
Sequence
,
Alternative
,
Forward
,
OneOrMore
,
GrammarBase
,
CompilerBase
,
escape_re
,
\
sane_parser_name
from
syntaxtree
import
replace_by_single_child
,
reduce_single_child
,
remove_expendables
,
\
flatten
,
remove_tokens
,
remove_brackets
,
TOKEN_KEYWORD
,
WHITESPACE_KEYWORD
,
Node
from
parser
import
*
from
syntaxtree
import
*
from
version
import
__version__
########################################################################
#
# EBNF-Grammar-Compiler
#
########################################################################
__all__
=
[
'EBNFGrammar'
,
'EBNFTransTable'
,
'load_if_file'
,
'EBNFCompilerError'
,
# 'Scanner',
'md5'
,
'EBNFCompiler'
]
class
EBNFGrammar
(
GrammarBase
):
...
...
@@ -159,8 +157,8 @@ class EBNFCompilerError(Exception):
pass
Scanner
=
collections
.
namedtuple
(
'Scanner'
,
'symbol instantiation_call cls_name cls'
)
#
Scanner = collections.namedtuple('Scanner',
#
'symbol instantiation_call cls_name cls')
def
md5
(
*
txt
):
...
...
@@ -254,10 +252,10 @@ class EBNFCompiler(CompilerBase):
(
definitions
[
1
],
definitions
[
0
]))
self
.
definition_names
=
[
defn
[
0
]
for
defn
in
definitions
]
definitions
.
append
((
'wspR__'
,
WHITESPACE_KEYWORD
\
if
'right'
in
self
.
directives
[
'literalws'
]
else
"''"
))
definitions
.
append
((
'wspL__'
,
WHITESPACE_KEYWORD
\
if
'left'
in
self
.
directives
[
'literalws'
]
else
"''"
))
definitions
.
append
((
'wspR__'
,
WHITESPACE_KEYWORD
if
'right'
in
self
.
directives
[
'literalws'
]
else
"''"
))
definitions
.
append
((
'wspL__'
,
WHITESPACE_KEYWORD
if
'left'
in
self
.
directives
[
'literalws'
]
else
"''"
))
definitions
.
append
((
WHITESPACE_KEYWORD
,
(
"mixin_comment(whitespace="
"r'{whitespace}', comment=r'{comment}')"
).
...
...
@@ -346,7 +344,7 @@ class EBNFCompiler(CompilerBase):
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
node
.
as_sexpr
()
node
.
add_error
(
errmsg
)
rule
,
defn
=
rule
+
':error'
,
'"'
+
errmsg
+
'"'
return
(
rule
,
defn
)
return
rule
,
defn
@
staticmethod
def
_check_rx
(
node
,
rx
):
...
...
@@ -377,7 +375,7 @@ class EBNFCompiler(CompilerBase):
elif
key
==
'literalws'
:
value
=
{
item
.
lower
()
for
item
in
self
.
compile__
(
node
.
result
[
1
])}
if
(
len
(
value
-
{
'left'
,
'right'
,
'both'
,
'none'
})
>
0
or
(
'none'
in
value
and
len
(
value
)
>
1
)):
or
(
'none'
in
value
and
len
(
value
)
>
1
)):
node
.
add_error
(
'Directive "literalws" allows the values '
'`left`, `right`, `both` or `none`, '
'but not `%s`'
%
", "
.
join
(
value
))
...
...
@@ -473,7 +471,7 @@ class EBNFCompiler(CompilerBase):
elif
'left'
in
self
.
directives
[
'literalws'
]:
name
=
[
"wL=''"
]
+
name
if
rx
[
-
2
:]
==
'/~'
:
if
not
'right'
in
self
.
directives
[
'literalws'
]:
if
'right'
not
in
self
.
directives
[
'literalws'
]:
name
=
[
'wR='
+
WHITESPACE_KEYWORD
]
+
name
rx
=
rx
[:
-
1
]
elif
'right'
in
self
.
directives
[
'literalws'
]:
...
...
dhparser.py
View file @
c7b50f80
...
...
@@ -69,7 +69,7 @@ if __name__ == "__main__":
if
len
(
sys
.
argv
)
>
1
:
_errors
=
run_compiler
(
sys
.
argv
[
1
],
sys
.
argv
[
2
]
if
len
(
sys
.
argv
)
>
2
else
""
)
if
(
_errors
)
:
if
_errors
:
print
(
_errors
)
sys
.
exit
(
1
)
else
:
...
...
logging.py
View file @
c7b50f80
...
...
@@ -30,6 +30,10 @@ already exists.
import
os
__all__
=
[
'LOGGING'
,
'LOGS_DIR'
]
LOGGING
:
str
=
"LOGS"
# LOGGING = "" turns logging off!
...
...
parser.py
View file @
c7b50f80
...
...
@@ -16,22 +16,94 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
Module ``parsers.py`` contains a number of classes that together
make up parser combinators for left-recursive grammers. For each
element of the extended Backus-Naur-Form as well as for a regular
expression token a class is defined. The set of classes can be used to
define a parser for (ambiguous) left-recursive grammers.
References and Acknowledgements:
Dominikus Herzberg: Objekt-orientierte Parser-Kombinatoren in Python,
Blog-Post, September, 18th 2008 on denkspuren. gedanken, ideen,
anregungen und links rund um informatik-themen, URL:
http://denkspuren.blogspot.de/2008/09/objekt-orientierte-parser-kombinatoren.html
Dominikus Herzberg: Eine einfache Grammatik für LaTeX, Blog-Post,
September, 18th 2008 on denkspuren. gedanken, ideen, anregungen und
links rund um informatik-themen, URL:
http://denkspuren.blogspot.de/2008/09/eine-einfache-grammatik-fr-latex.html
Dominikus Herzberg: Uniform Syntax, Blog-Post, February, 27th 2007 on
denkspuren. gedanken, ideen, anregungen und links rund um
informatik-themen, URL:
http://denkspuren.blogspot.de/2007/02/uniform-syntax.html
Richard A. Frost, Rahmatullah Hafiz and Paul Callaghan: Parser
Combinators for Ambiguous Left-Recursive Grammars, in: P. Hudak and
D.S. Warren (Eds.): PADL 2008, LNCS 4902, pp. 167–181, Springer-Verlag
Berlin Heidelberg 2008.
Juancarlo Añez: grako, a PEG parser generator in Python,
https://bitbucket.org/apalala/grako
"""
import
copy
import
os
try
:
import
regex
as
re
except
ImportError
:
import
re
from
logging
import
LOGGING
,
LOGS_DIR
from
syntaxtree
import
WHITESPACE_KEYWORD
,
TOKEN_KEYWORD
,
ZOMBIE_PARSER
,
Node
,
error_messages
,
\
ASTTransform
from
syntaxtree
import
WHITESPACE_KEYWORD
,
TOKEN_KEYWORD
,
ZOMBIE_PARSER
,
Node
,
\
error_messages
,
ASTTransform
__all__
=
[
'HistoryRecord'
,
'Parser'
,
'GrammarBase'
,
'RX_SCANNER_TOKEN'
,
'BEGIN_SCANNER_TOKEN'
,
'END_SCANNER_TOKEN'
,
'make_token'
,
'nil_scanner'
,
'ScannerToken'
,
'RegExp'
,
'RE'
,
'escape_re'
,
'Token'
,
'mixin_comment'
,
'UnaryOperator'
,
'NaryOperator'
,
'Optional'
,
'ZeroOrMore'
,
'OneOrMore'
,
'Sequence'
,
'Alternative'
,
'FlowOperator'
,
'Required'
,
'Lookahead'
,
'NegativeLookahead'
,
'Lookbehind'
,
'NegativeLookbehind'
,
'Capture'
,
'Retrieve'
,
'Pop'
,
'Forward'
,
'PARSER_SYMBOLS'
,
'sane_parser_name'
,
'CompilerBase'
,
'full_compilation'
,
'COMPILER_SYMBOLS'
]
LEFT_RECURSION_DEPTH
=
10
# because of pythons recursion depth limit, this
# value ought not to be set too high
# value ought not to be set too high
MAX_DROPOUTS
=
25
# stop trying to recover parsing after so many errors
...
...
@@ -169,7 +241,7 @@ class Parser(metaclass=ParserMetaClass):
def
apply
(
self
,
func
):
"""Applies function `func(parser)` recursively to this parser and all
descend
end
ants of the tree of parsers. The same function can never
descendants of the tree of parsers. The same function can never
be applied twice between calls of the ``reset()``-method!
"""
if
func
in
self
.
cycle_detection
:
...
...
@@ -320,6 +392,7 @@ class GrammarBase:
write_log
(
errors_only
,
'_errors'
)
########################################################################
#
# Token and Regular Expression parser classes (i.e. leaf classes)
...
...
@@ -327,7 +400,6 @@ class GrammarBase:
########################################################################
RX_SCANNER_TOKEN
=
re
.
compile
(
'\w+'
)
BEGIN_SCANNER_TOKEN
=
'
\x1b
'
END_SCANNER_TOKEN
=
'
\x1c
'
...
...
@@ -347,7 +419,8 @@ def make_token(token, argument=''):
return
BEGIN_SCANNER_TOKEN
+
token
+
argument
+
END_SCANNER_TOKEN
nil_scanner
=
lambda
text
:
text
def
nil_scanner
(
text
):
return
text
class
ScannerToken
(
Parser
):
...
...
@@ -355,7 +428,7 @@ class ScannerToken(Parser):
assert
isinstance
(
scanner_token
,
str
)
and
scanner_token
and
\
scanner_token
.
isupper
()
assert
RX_SCANNER_TOKEN
.
match
(
scanner_token
)
super
(
ScannerToken
,
self
).
__init__
(
scanner_token
,
name
=
TOKEN_KEYWORD
)
super
(
ScannerToken
,
self
).
__init__
(
scanner_token
)
def
__call__
(
self
,
text
):
if
text
[
0
:
1
]
==
BEGIN_SCANNER_TOKEN
:
...
...
@@ -400,8 +473,7 @@ class RegExp(Parser):
duplicate
.
regexp
=
self
.
regexp
duplicate
.
grammar
=
self
.
grammar
duplicate
.
visited
=
copy
.
deepcopy
(
self
.
visited
,
memo
)
duplicate
.
recursion_counter
=
copy
.
deepcopy
(
self
.
recursion_counter
,
memo
)
duplicate
.
recursion_counter
=
copy
.
deepcopy
(
self
.
recursion_counter
,
memo
)
return
duplicate
def
__call__
(
self
,
text
):
...
...
syntaxtree.py
View file @
c7b50f80
...
...
@@ -27,6 +27,31 @@ from typing import NamedTuple
from
logging
import
LOGGING
,
LOGS_DIR
__all__
=
[
'WHITESPACE_KEYWORD'
,
'TOKEN_KEYWORD'
,
'line_col'
,
'ZOMBIE_PARSER'
,
'Error'
,
'Node'
,
'error_messages'
,
'ASTTransform'
,
'no_transformation'
,
'replace_by_single_child'
,
'reduce_single_child'
,
'is_whitespace'
,
'is_empty'
,
'is_expendable'
,
'is_token'
,
'remove_children_if'
,
'remove_whitespace'
,
'remove_expendables'
,
'remove_tokens'
,
'flatten'
,
'remove_brackets'
,
'AST_SYMBOLS'
]
WHITESPACE_KEYWORD
=
'WSP__'
TOKEN_KEYWORD
=
'TOKEN__'
...
...
@@ -373,10 +398,8 @@ def ASTTransform(node, transtable):
"""
# normalize transformation entries by turning single transformations
# into lists with a single item
table
=
{
name
:
transformation
if
isinstance
(
transformation
,
collections
.
abc
.
Sequence
)
else
[
transformation
]
for
name
,
transformation
in
list
(
transtable
.
items
())}
table
=
{
name
:
transformation
if
isinstance
(
transformation
,
collections
.
abc
.
Sequence
)
else
[
transformation
]
for
name
,
transformation
in
list
(
transtable
.
items
())}
table
=
expand_table
(
table
)
def
recursive_ASTTransform
(
nd
):
...
...
@@ -456,7 +479,7 @@ def is_expendable(node):
return
is_empty
(
node
)
or
is_whitespace
(
node
)
# or is_scanner_token(node)
def
is_token
(
node
,
token_set
=
{}
):
def
is_token
(
node
,
token_set
=
frozenset
()
):
return
node
.
parser
.
name
==
TOKEN_KEYWORD
and
(
not
token_set
or
node
.
result
in
token_set
)
...
...
@@ -472,7 +495,7 @@ remove_whitespace = partial(remove_children_if, condition=is_whitespace)
remove_expendables
=
partial
(
remove_children_if
,
condition
=
is_expendable
)
def
remove_tokens
(
node
,
tokens
=
set
()):
def
remove_tokens
(
node
,
tokens
=
frozen
set
()):
"""Reomoves any among a particular set of tokens from the immediate
descendants of a node. If ``tokens`` is the empty set, all tokens
are removed.
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment