Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
db24cec1
Commit
db24cec1
authored
Jul 15, 2017
by
Eckhart Arnold
Browse files
- refactoring: Scanner now named Preprocessor
parent
e2d7ea45
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
db24cec1
...
...
@@ -30,10 +30,10 @@ except ImportError:
from
.typing34
import
Any
,
cast
,
Tuple
,
Union
from
DHParser.ebnf
import
EBNFTransformer
,
EBNFCompiler
,
grammar_changed
,
\
get_ebnf_
scanne
r
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
\
Scanne
rFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
get_ebnf_
preprocesso
r
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
\
Preprocesso
rFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
from
DHParser.toolkit
import
logging
,
load_if_file
,
is_python_code
,
compile_python_object
from
DHParser.parsers
import
Grammar
,
Compiler
,
compile_source
,
nil_
scanner
,
Scanne
rFunc
from
DHParser.parsers
import
Grammar
,
Compiler
,
compile_source
,
nil_
preprocessor
,
Preprocesso
rFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
...
...
@@ -59,7 +59,7 @@ RX_SECTION_MARKER = re.compile(SECTION_MARKER.format(marker=r'.*?SECTION.*?'))
RX_WHITESPACE
=
re
.
compile
(
'\s*'
)
SYMBOLS_SECTION
=
"SYMBOLS SECTION - Can be edited. Changes will be preserved."
SCANNE
R_SECTION
=
"
SCANNE
R SECTION - Can be edited. Changes will be preserved."
PREPROCESSO
R_SECTION
=
"
PREPROCESSO
R SECTION - Can be edited. Changes will be preserved."
PARSER_SECTION
=
"PARSER SECTION - Don't edit! CHANGES WILL BE OVERWRITTEN!"
AST_SECTION
=
"AST SECTION - Can be edited. Changes will be preserved."
COMPILER_SECTION
=
"COMPILER SECTION - Can be edited. Changes will be preserved."
...
...
@@ -75,11 +75,11 @@ try:
except ImportError:
import re
from DHParser.toolkit import logging, is_filename, load_if_file
from DHParser.parsers import Grammar, Compiler, nil_
scanne
r,
\\
from DHParser.parsers import Grammar, Compiler, nil_
preprocesso
r,
\\
Lookbehind, Lookahead, Alternative, Pop, Required, Token, Synonym,
\\
Optional, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
last_value, counterpart, accumulate,
Scanne
rFunc
last_value, counterpart, accumulate,
Preprocesso
rFunc
from DHParser.syntaxtree import Node, traverse, remove_children_if,
\\
reduce_single_child, replace_by_single_child, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace,
\\
...
...
@@ -98,7 +98,7 @@ def compile_src(source):
cname = compiler.__class__.__name__
log_file_name = os.path.basename(os.path.splitext(source)[0])
\\
if is_filename(source) < 0 else cname[:cname.find('.')] + '_out'
result = compile_source(source, get_
scanne
r(),
result = compile_source(source, get_
preprocesso
r(),
get_grammar(),
get_transformer(), compiler)
return result
...
...
@@ -176,7 +176,7 @@ def grammar_instance(grammar_representation) -> Tuple[Grammar, str]:
def
compileDSL
(
text_or_file
:
str
,
scanner
:
Scanne
rFunc
,
preprocessor
:
Preprocesso
rFunc
,
dsl_grammar
:
Union
[
str
,
Grammar
],
ast_transformation
:
TransformationFunc
,
compiler
:
Compiler
)
->
Any
:
...
...
@@ -192,7 +192,7 @@ def compileDSL(text_or_file: str,
assert
isinstance
(
compiler
,
Compiler
)
parser
,
grammar_src
=
grammar_instance
(
dsl_grammar
)
result
,
errors
,
AST
=
compile_source
(
text_or_file
,
scanne
r
,
parser
,
result
,
errors
,
AST
=
compile_source
(
text_or_file
,
preprocesso
r
,
parser
,
ast_transformation
,
compiler
)
if
errors
:
src
=
load_if_file
(
text_or_file
)
...
...
@@ -204,7 +204,7 @@ def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler:
"""
Compiles an EBNF grammar file and returns the compiler object
that was used and which can now be queried for the result as well
as skeleton code for
scanne
r, transformer and compiler objects.
as skeleton code for
preprocesso
r, transformer and compiler objects.
Args:
ebnf_src(str): Either the file name of an EBNF grammar or
...
...
@@ -218,14 +218,14 @@ def raw_compileEBNF(ebnf_src: str, branding="DSL") -> EBNFCompiler:
"""
grammar
=
get_ebnf_grammar
()
compiler
=
get_ebnf_compiler
(
branding
,
ebnf_src
)
compileDSL
(
ebnf_src
,
nil_
scanne
r
,
grammar
,
EBNFTransformer
,
compiler
)
compileDSL
(
ebnf_src
,
nil_
preprocesso
r
,
grammar
,
EBNFTransformer
,
compiler
)
return
compiler
def
compileEBNF
(
ebnf_src
:
str
,
branding
=
"DSL"
)
->
str
:
"""
Compiles an EBNF source file and returns the source code of a
compiler suite with skeletons for
scanne
r, transformer and
compiler suite with skeletons for
preprocesso
r, transformer and
compiler.
Args:
...
...
@@ -241,7 +241,7 @@ def compileEBNF(ebnf_src: str, branding="DSL") -> str:
compiler
=
raw_compileEBNF
(
ebnf_src
,
branding
)
src
=
[
"#/usr/bin/python
\n
"
,
SECTION_MARKER
.
format
(
marker
=
SYMBOLS_SECTION
),
DHPARSER_IMPORTS
,
SECTION_MARKER
.
format
(
marker
=
SCANNE
R_SECTION
),
compiler
.
gen_
scanne
r_skeleton
(),
SECTION_MARKER
.
format
(
marker
=
PREPROCESSO
R_SECTION
),
compiler
.
gen_
preprocesso
r_skeleton
(),
SECTION_MARKER
.
format
(
marker
=
PARSER_SECTION
),
compiler
.
result
,
SECTION_MARKER
.
format
(
marker
=
AST_SECTION
),
compiler
.
gen_transformer_skeleton
(),
SECTION_MARKER
.
format
(
marker
=
COMPILER_SECTION
),
compiler
.
gen_compiler_skeleton
(),
...
...
@@ -264,32 +264,32 @@ def parser_factory(ebnf_src: str, branding="DSL") -> Grammar:
A factory function for a grammar-parser for texts in the
language defined by ``ebnf_src``.
"""
grammar_src
=
compileDSL
(
ebnf_src
,
nil_
scanne
r
,
get_ebnf_grammar
(),
grammar_src
=
compileDSL
(
ebnf_src
,
nil_
preprocesso
r
,
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
(
branding
))
return
compile_python_object
(
DHPARSER_IMPORTS
+
grammar_src
,
'get_(?:\w+_)?grammar$'
)
def
load_compiler_suite
(
compiler_suite
:
str
)
->
\
Tuple
[
Scanne
rFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
]:
Tuple
[
Preprocesso
rFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
]:
"""
Extracts a compiler suite from file or string ``compiler suite``
and returns it as a tuple (
scanne
r, parser, ast, compiler).
and returns it as a tuple (
preprocesso
r, parser, ast, compiler).
Returns:
4-tuple (
scanne
r function, parser class, ast transformer function, compiler class)
4-tuple (
preprocesso
r function, parser class, ast transformer function, compiler class)
"""
global
RX_SECTION_MARKER
assert
isinstance
(
compiler_suite
,
str
)
source
=
load_if_file
(
compiler_suite
)
if
is_python_code
(
compiler_suite
):
try
:
intro
,
imports
,
scanne
r_py
,
parser_py
,
ast_py
,
compiler_py
,
outro
=
\
intro
,
imports
,
preprocesso
r_py
,
parser_py
,
ast_py
,
compiler_py
,
outro
=
\
RX_SECTION_MARKER
.
split
(
source
)
except
ValueError
as
error
:
raise
AssertionError
(
'File "'
+
compiler_suite
+
'" seems to be corrupted. '
'Please delete or repair file manually.'
)
# TODO: Compile in one step and pick parts from namespace later ?
scanne
r
=
compile_python_object
(
imports
+
scanne
r_py
,
'get_(?:\w+_)?
scanne
r$'
)
preprocesso
r
=
compile_python_object
(
imports
+
preprocesso
r_py
,
'get_(?:\w+_)?
preprocesso
r$'
)
parser
=
compile_python_object
(
imports
+
parser_py
,
'get_(?:\w+_)?grammar$'
)
ast
=
compile_python_object
(
imports
+
ast_py
,
'get_(?:\w+_)?transformer$'
)
else
:
...
...
@@ -299,12 +299,12 @@ def load_compiler_suite(compiler_suite: str) -> \
get_ebnf_grammar
(),
get_ebnf_transformer
(),
get_ebnf_compiler
())
if
errors
:
raise
GrammarError
(
'
\n\n
'
.
join
(
errors
),
source
)
scanne
r
=
get_ebnf_
scanne
r
preprocesso
r
=
get_ebnf_
preprocesso
r
parser
=
get_ebnf_grammar
ast
=
get_ebnf_transformer
compiler
=
compile_python_object
(
imports
+
compiler_py
,
'get_(?:\w+_)?compiler$'
)
return
scanne
r
,
parser
,
ast
,
compiler
return
preprocesso
r
,
parser
,
ast
,
compiler
def
is_outdated
(
compiler_suite
:
str
,
grammar_source
:
str
)
->
bool
:
...
...
@@ -327,7 +327,7 @@ def is_outdated(compiler_suite: str, grammar_source: str) -> bool:
True, if ``compiler_suite`` seems to be out of date.
"""
try
:
scanne
r
,
grammar
,
ast
,
compiler
=
load_compiler_suite
(
compiler_suite
)
preprocesso
r
,
grammar
,
ast
,
compiler
=
load_compiler_suite
(
compiler_suite
)
return
grammar_changed
(
grammar
(),
grammar_source
)
except
ValueError
:
return
True
...
...
@@ -352,8 +352,8 @@ def run_compiler(text_or_file: str, compiler_suite: str) -> Any:
Raises:
CompilerError
"""
scanne
r
,
parser
,
ast
,
compiler
=
load_compiler_suite
(
compiler_suite
)
return
compileDSL
(
text_or_file
,
scanne
r
(),
parser
(),
ast
(),
compiler
())
preprocesso
r
,
parser
,
ast
,
compiler
=
load_compiler_suite
(
compiler_suite
)
return
compileDSL
(
text_or_file
,
preprocesso
r
(),
parser
(),
ast
(),
compiler
())
def
compile_on_disk
(
source_file
:
str
,
compiler_suite
=
""
,
extension
=
".xml"
):
...
...
@@ -364,7 +364,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
If no ``compiler_suite`` is given it is assumed that the source
file is an EBNF grammar. In this case the result will be a Python
script containing a parser for that grammar as well as the
skeletons for a
scanne
r, AST transformation table, and compiler.
skeletons for a
preprocesso
r, AST transformation table, and compiler.
If the Python script already exists only the parser name in the
script will be updated. (For this to work, the different names
need to be delimited section marker blocks.). `compile_on_disk()`
...
...
@@ -396,7 +396,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
if
compiler_suite
:
sfactory
,
pfactory
,
tfactory
,
cfactory
=
load_compiler_suite
(
compiler_suite
)
else
:
sfactory
=
get_ebnf_
scanne
r
sfactory
=
get_ebnf_
preprocesso
r
pfactory
=
get_ebnf_grammar
tfactory
=
get_ebnf_transformer
cfactory
=
get_ebnf_compiler
...
...
@@ -408,7 +408,7 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
elif
cfactory
==
get_ebnf_compiler
:
# trans == get_ebnf_transformer or trans == EBNFTransformer: # either an EBNF- or no compiler suite given
ebnf_compiler
=
cast
(
EBNFCompiler
,
compiler1
)
global
SECTION_MARKER
,
RX_SECTION_MARKER
,
SCANNE
R_SECTION
,
PARSER_SECTION
,
\
global
SECTION_MARKER
,
RX_SECTION_MARKER
,
PREPROCESSO
R_SECTION
,
PARSER_SECTION
,
\
AST_SECTION
,
COMPILER_SECTION
,
END_SECTIONS_MARKER
,
RX_WHITESPACE
,
\
DHPARSER_MAIN
,
DHPARSER_IMPORTS
f
=
None
...
...
@@ -416,9 +416,9 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
f
=
open
(
rootname
+
'Compiler.py'
,
'r'
,
encoding
=
"utf-8"
)
source
=
f
.
read
()
sections
=
RX_SECTION_MARKER
.
split
(
source
)
intro
,
imports
,
scanne
r
,
parser
,
ast
,
compiler
,
outro
=
sections
intro
,
imports
,
preprocesso
r
,
parser
,
ast
,
compiler
,
outro
=
sections
except
(
PermissionError
,
FileNotFoundError
,
IOError
)
as
error
:
intro
,
imports
,
scanne
r
,
parser
,
ast
,
compiler
,
outro
=
''
,
''
,
''
,
''
,
''
,
''
,
''
intro
,
imports
,
preprocesso
r
,
parser
,
ast
,
compiler
,
outro
=
''
,
''
,
''
,
''
,
''
,
''
,
''
except
ValueError
as
error
:
name
=
'"'
+
rootname
+
'Compiler.py"'
raise
ValueError
(
'Could not identify all required sections in '
+
name
+
...
...
@@ -434,8 +434,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
outro
=
DHPARSER_MAIN
.
format
(
NAME
=
compiler_name
)
if
RX_WHITESPACE
.
fullmatch
(
imports
):
imports
=
DHPARSER_IMPORTS
if
RX_WHITESPACE
.
fullmatch
(
scanne
r
):
scanne
r
=
ebnf_compiler
.
gen_
scanne
r_skeleton
()
if
RX_WHITESPACE
.
fullmatch
(
preprocesso
r
):
preprocesso
r
=
ebnf_compiler
.
gen_
preprocesso
r_skeleton
()
if
RX_WHITESPACE
.
fullmatch
(
ast
):
ast
=
ebnf_compiler
.
gen_transformer_skeleton
()
if
RX_WHITESPACE
.
fullmatch
(
compiler
):
...
...
@@ -446,8 +446,8 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
f
.
write
(
intro
)
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
SYMBOLS_SECTION
))
f
.
write
(
imports
)
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
SCANNE
R_SECTION
))
f
.
write
(
scanne
r
)
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
PREPROCESSO
R_SECTION
))
f
.
write
(
preprocesso
r
)
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
PARSER_SECTION
))
f
.
write
(
result
)
f
.
write
(
SECTION_MARKER
.
format
(
marker
=
AST_SECTION
))
...
...
DHParser/ebnf.py
View file @
db24cec1
...
...
@@ -29,17 +29,16 @@ except ImportError:
from
.typing34
import
Callable
,
Dict
,
List
,
Set
,
Tuple
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
from
DHParser.parsers
import
Grammar
,
mixin_comment
,
nil_
scanne
r
,
Forward
,
RE
,
NegativeLookahead
,
\
from
DHParser.parsers
import
Grammar
,
mixin_comment
,
nil_
preprocesso
r
,
Forward
,
RE
,
NegativeLookahead
,
\
Alternative
,
Series
,
Optional
,
Required
,
OneOrMore
,
ZeroOrMore
,
Token
,
Compiler
,
\
Scanne
rFunc
Preprocesso
rFunc
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
TOKEN_PTYPE
,
remove_expendables
,
\
remove_tokens
,
flatten
,
forbid
,
assert_content
,
WHITESPACE_PTYPE
,
key_tag_name
,
\
TransformationFunc
from
DHParser.versionnumber
import
__version__
__all__
=
[
'get_ebnf_scanner'
,
__all__
=
[
'get_ebnf_preprocessor'
,
'get_ebnf_grammar'
,
'get_ebnf_transformer'
,
'get_ebnf_compiler'
,
...
...
@@ -48,7 +47,7 @@ __all__ = ['get_ebnf_scanner',
'EBNFCompilerError'
,
'EBNFCompiler'
,
'grammar_changed'
,
'
Scanne
rFactoryFunc'
,
'
Preprocesso
rFactoryFunc'
,
'ParserFactoryFunc'
,
'TransformerFactoryFunc'
,
'CompilerFactoryFunc'
]
...
...
@@ -61,8 +60,8 @@ __all__ = ['get_ebnf_scanner',
########################################################################
def
get_ebnf_
scanner
()
->
Scanne
rFunc
:
return
nil_
scanne
r
def
get_ebnf_
preprocessor
()
->
Preprocesso
rFunc
:
return
nil_
preprocesso
r
########################################################################
...
...
@@ -247,15 +246,14 @@ def get_ebnf_transformer() -> TransformationFunc:
########################################################################
Scanne
rFactoryFunc
=
Callable
[[],
Scanne
rFunc
]
Preprocesso
rFactoryFunc
=
Callable
[[],
Preprocesso
rFunc
]
ParserFactoryFunc
=
Callable
[[],
Grammar
]
TransformerFactoryFunc
=
Callable
[[],
TransformationFunc
]
CompilerFactoryFunc
=
Callable
[[],
Compiler
]
SCANNER_FACTORY
=
'''
def get_scanner() -> ScannerFunc:
return {NAME}Scanner
PREPROCESSOR_FACTORY
=
'''
def get_preprocessor() -> PreprocessorFunc:
return {NAME}Preprocessor
'''
...
...
@@ -335,21 +333,20 @@ class EBNFCompiler(Compiler):
self
.
directives
=
{
'whitespace'
:
self
.
WHITESPACE
[
'horizontal'
],
'comment'
:
''
,
'literalws'
:
[
'right'
],
'tokens'
:
set
(),
# alt. '
scanne
r_tokens'
'filter'
:
dict
(),
# alt. 'filter'
'testing'
:
False
}
'tokens'
:
set
(),
# alt. '
preprocesso
r_tokens'
'filter'
:
dict
(),
# alt. 'filter'
'testing'
:
False
}
@
property
def
result
(
self
)
->
str
:
return
self
.
_result
# methods for generating skeleton code for preprocessor, transformer, and compiler
# methods for generating skeleton code for scanner, transformer, and compiler
def
gen_scanner_skeleton
(
self
)
->
str
:
name
=
self
.
grammar_name
+
"Scanner"
def
gen_preprocessor_skeleton
(
self
)
->
str
:
name
=
self
.
grammar_name
+
"Preprocessor"
return
"def %s(text):
\n
return text
\n
"
%
name
\
+
SCANNE
R_FACTORY
.
format
(
NAME
=
self
.
grammar_name
)
+
PREPROCESSO
R_FACTORY
.
format
(
NAME
=
self
.
grammar_name
)
def
gen_transformer_skeleton
(
self
)
->
str
:
...
...
@@ -515,7 +512,7 @@ class EBNFCompiler(Compiler):
' end with a doube underscore "__".'
%
rule
)
elif
rule
in
self
.
directives
[
'tokens'
]:
node
.
add_error
(
'Symbol "%s" has already been defined as '
'a
scanne
r token.'
%
rule
)
'a
preprocesso
r token.'
%
rule
)
elif
keyword
.
iskeyword
(
rule
):
node
.
add_error
(
'Python keyword "%s" may not be used as a symbol. '
%
rule
+
'(This may change in the future.)'
)
...
...
@@ -595,7 +592,7 @@ class EBNFCompiler(Compiler):
else
{}
if
'none'
in
value
else
value
self
.
directives
[
key
]
=
list
(
ws
)
elif
key
in
{
'tokens'
,
'
scanne
r_tokens'
}:
elif
key
in
{
'tokens'
,
'
preprocesso
r_tokens'
}:
self
.
directives
[
'tokens'
]
|=
self
.
compile
(
node
.
children
[
1
])
elif
key
.
endswith
(
'_filter'
):
...
...
@@ -687,7 +684,7 @@ class EBNFCompiler(Compiler):
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
# called only for symbols on the right hand side!
symbol
=
str
(
node
)
# ; assert result == cast(str, node.result)
if
symbol
in
self
.
directives
[
'tokens'
]:
return
'
Scanne
rToken("'
+
symbol
+
'")'
return
'
Preprocesso
rToken("'
+
symbol
+
'")'
else
:
self
.
current_symbols
.
append
(
node
)
if
symbol
not
in
self
.
symbols
:
...
...
DHParser/parsers.py
View file @
db24cec1
...
...
@@ -73,17 +73,16 @@ from DHParser.syntaxtree import WHITESPACE_PTYPE, TOKEN_PTYPE, ZOMBIE_PARSER, Pa
Node
,
TransformationFunc
from
DHParser.toolkit
import
load_if_file
,
error_messages
__all__
=
[
'ScannerFunc'
,
__all__
=
[
'PreprocessorFunc'
,
'HistoryRecord'
,
'Parser'
,
'Grammar'
,
'RX_
SCANNE
R_TOKEN'
,
'BEGIN_
SCANNER_
TOKEN'
,
'END_
SCANNER_
TOKEN'
,
'RX_
PREPROCESSO
R_TOKEN'
,
'BEGIN_TOKEN'
,
'END_TOKEN'
,
'make_token'
,
'nil_
scanne
r'
,
'
Scanne
rToken'
,
'nil_
preprocesso
r'
,
'
Preprocesso
rToken'
,
'RegExp'
,
'RE'
,
'Token'
,
...
...
@@ -121,7 +120,7 @@ __all__ = ['ScannerFunc',
########################################################################
Scanne
rFunc
=
Union
[
Callable
[[
str
],
str
],
partial
]
Preprocesso
rFunc
=
Union
[
Callable
[[
str
],
str
],
partial
]
LEFT_RECURSION_DEPTH
=
20
if
platform
.
python_implementation
()
==
"PyPy"
\
...
...
@@ -610,66 +609,65 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
########################################################################
RX_
SCANNE
R_TOKEN
=
re
.
compile
(
'\w+'
)
BEGIN_
SCANNER_
TOKEN
=
'
\x1b
'
END_
SCANNER_
TOKEN
=
'
\x1c
'
RX_
PREPROCESSO
R_TOKEN
=
re
.
compile
(
'\w+'
)
BEGIN_TOKEN
=
'
\x1b
'
END_TOKEN
=
'
\x1c
'
def
make_token
(
token
:
str
,
argument
:
str
=
''
)
->
str
:
"""
Turns the ``token`` and ``argument`` into a special token that
will be caught by the `
Scanne
rToken`-parser.
will be caught by the `
Preprocesso
rToken`-parser.
This function is a support function that should be used by
scanners
to inject scanne
r tokens into the source text.
This function is a support function that should be used by
preprocessors to inject preprocesso
r tokens into the source text.
"""
assert
RX_
SCANNE
R_TOKEN
.
match
(
token
)
assert
argument
.
find
(
BEGIN_
SCANNER_
TOKEN
)
<
0
assert
argument
.
find
(
END_
SCANNER_
TOKEN
)
<
0
assert
RX_
PREPROCESSO
R_TOKEN
.
match
(
token
)
assert
argument
.
find
(
BEGIN_TOKEN
)
<
0
assert
argument
.
find
(
END_TOKEN
)
<
0
return
BEGIN_
SCANNER_
TOKEN
+
token
+
argument
+
END_
SCANNER_
TOKEN
return
BEGIN_TOKEN
+
token
+
argument
+
END_TOKEN
def
nil_
scanne
r
(
text
:
str
)
->
str
:
def
nil_
preprocesso
r
(
text
:
str
)
->
str
:
return
text
class
Scanne
rToken
(
Parser
):
class
Preprocesso
rToken
(
Parser
):
"""
Parses tokens that have been inserted by a
Scanne
r.
Parses tokens that have been inserted by a
preprocesso
r.
Scanne
rs can generate Tokens with the ``make_token``-function.
Preprocesso
rs can generate Tokens with the ``make_token``-function.
These tokens start and end with magic characters that can only be
matched by the ScannerToken Parser. Scanner tokens can be used to
insert BEGIN - END delimiters at the beginning or ending of an
indented block. Otherwise indented block are difficult to handle
with parsing expression grammars.
matched by the PreprocessorToken Parser. Such tokens can be used to
insert BEGIN - END delimiters at the beginning or ending of a
quoted block, for example.
"""
def
__init__
(
self
,
scanner_
token
:
str
)
->
None
:
assert
scanner_
token
and
scanner_
token
.
isupper
()
assert
RX_
SCANNE
R_TOKEN
.
match
(
scanner_
token
)
super
(
Scanne
rToken
,
self
).
__init__
(
scanner_
token
)
def
__init__
(
self
,
token
:
str
)
->
None
:
assert
token
and
token
.
isupper
()
assert
RX_
PREPROCESSO
R_TOKEN
.
match
(
token
)
super
(
Preprocesso
rToken
,
self
).
__init__
(
token
)
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
if
text
[
0
:
1
]
==
BEGIN_
SCANNER_
TOKEN
:
end
=
text
.
find
(
END_
SCANNER_
TOKEN
,
1
)
if
text
[
0
:
1
]
==
BEGIN_TOKEN
:
end
=
text
.
find
(
END_TOKEN
,
1
)
if
end
<
0
:
node
=
Node
(
self
,
''
).
add_error
(
'END_
SCANNER_
TOKEN delimiter missing from
scanne
r token. '
'(Most likely due to a
scanne
r bug!)'
)
# type: Node
'END_TOKEN delimiter missing from
preprocesso
r token. '
'(Most likely due to a
preprocesso
r bug!)'
)
# type: Node
return
node
,
text
[
1
:]
elif
end
==
0
:
node
=
Node
(
self
,
''
).
add_error
(
'
Scanner
token cannot have zero length. '
'(Most likely due to a
scanne
r bug!)'
)
'
Preprocessor-
token cannot have zero length. '
'(Most likely due to a
preprocesso
r bug!)'
)
return
node
,
text
[
2
:]
elif
text
.
find
(
BEGIN_
SCANNER_
TOKEN
,
1
,
end
)
>=
0
:
elif
text
.
find
(
BEGIN_TOKEN
,
1
,
end
)
>=
0
:
node
=
Node
(
self
,
text
[
len
(
self
.
name
)
+
1
:
end
])
node
.
add_error
(
'
Scanner
tokens must not be nested or contain '
'BEGIN_
SCANNER_
TOKEN delimiter as part of their argument. '
'(Most likely due to a
scanne
r bug!)'
)
'
Preprocessor-
tokens must not be nested or contain '
'BEGIN_TOKEN delimiter as part of their argument. '
'(Most likely due to a
preprocesso
r bug!)'
)
return
node
,
text
[
end
:]
if
text
[
1
:
len
(
self
.
name
)
+
1
]
==
self
.
name
:
return
Node
(
self
,
text
[
len
(
self
.
name
)
+
1
:
end
]),
\
...
...
@@ -700,7 +698,7 @@ class RegExp(Parser):
return
RegExp
(
regexp
,
self
.
name
)
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
match
=
text
[
0
:
1
]
!=
BEGIN_
SCANNER_
TOKEN
and
self
.
regexp
.
match
(
text
)
# ESC starts a
scanne
r token.
match
=
text
[
0
:
1
]
!=
BEGIN_TOKEN
and
self
.
regexp
.
match
(
text
)
# ESC starts a
preprocesso
r token.
if
match
:
end
=
match
.
end
()
return
Node
(
self
,
text
[:
end
]),
text
[
end
:]
...
...
@@ -1400,7 +1398,7 @@ class Compiler:
def
compile_source
(
source
:
str
,
scanner
:
Scanne
rFunc
,
# str -> str
preprocessor
:
Preprocesso
rFunc
,
# str -> str
parser
:
Grammar
,
# str -> Node (concrete syntax tree (CST))
transformer
:
TransformationFunc
,
# Node -> Node (abstract syntax tree (AST))
compiler
:
Compiler
):
# Node (AST) -> Any
...
...
@@ -1416,8 +1414,8 @@ def compile_source(source: str,
Args:
source (str): The input text for compilation or a the name of a
file containing the input text.
scanne
r (function): text -> text. A
scanne
r function
or None,
if no scanne
r is needed.
preprocesso
r (function): text -> text. A
preprocesso
r function
or None, if no preprocesso
r is needed.
parser (function): A parsing function or grammar class
transformer (function): A transformation function that takes
the root-node of the concrete syntax tree as an argument and
...
...
@@ -1435,8 +1433,8 @@ def compile_source(source: str,
"""
source_text
=
load_if_file
(
source
)
log_file_name
=
logfile_basename
(
source
,
compiler
)
if
scanne
r
is
not
None
:
source_text
=
scanne
r
(
source_text
)
if
preprocesso
r
is
not
None
:
source_text
=
preprocesso
r
(
source_text
)
syntax_tree
=
parser
(
source_text
)
if
is_logging
():
syntax_tree
.
log
(
log_file_name
+
'.cst'
)
...
...
DHParser/syntaxtree.py
View file @
db24cec1
...
...
@@ -98,6 +98,7 @@ class ParserBase:
def
repr
(
self
)
->
str
:
return
self
.
name
if
self
.
name
else
repr
(
self
)
class
MockParser
(
ParserBase
):
"""
MockParser objects can be used to reconstruct syntax trees from a
...
...
@@ -583,7 +584,7 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
"""Traverses the snytax tree starting with the given ``node`` depth
first and applies the sequences of callback-functions registered
in the ``calltable``-dictionary.
The most important use case is the transformation of a concrete
syntax tree into an abstract tree (AST). But it is also imaginable
to employ tree-traversal for the semantic analysis of the AST.
...
...
@@ -598,16 +599,16 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
'~': always called (after any other processing function)
Args:
root_node (Node): The root-node of the syntax tree to be traversed
root_node (Node): The root-node of the syntax tree to be traversed
processing_table (dict): node key -> sequence of functions that
will be applied to matching nodes in order. This dictionary
is interpreted as a ``compact_table``. See
is interpreted as a ``compact_table``. See
``toolkit.expand_table`` or ``EBNFCompiler.EBNFTransTable``
key_func (function): A mapping key_func(node) -> keystr. The default
key_func yields node.parser.name.
Example:
table = { "term": [replace_by_single_child, flatten],
table = { "term": [replace_by_single_child, flatten],
"factor, flowmarker, retrieveop": replace_by_single_child }
traverse(node, table)
"""
...
...
@@ -656,19 +657,6 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
# ------------------------------------------------
@
transformation_factory
def
replace_parser
(
node
,
name
:
str
):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
name
,
ptype
=
(
name
.
split
(
':'
)
+
[
''
])[:
2
]
node
.
parser
=
MockParser
(
name
,
ptype
)
def
replace_by_single_child
(
node
):
"""Remove single branch node, replacing it by its immediate descendant.
(In case the descendant's name is empty (i.e. anonymous) the
...
...
@@ -691,6 +679,19 @@ def reduce_single_child(node):
node
.
result
=
node
.
result
[
0
].
result
@
transformation_factory
def
replace_parser
(
node
,
name
:
str
):
"""Replaces the parser of a Node with a mock parser with the given
name.
Parameters:
name(str): "NAME:PTYPE" of the surogate. The ptype is optional
node(Node): The node where the parser shall be replaced
"""
name
,
ptype
=
(
name
.
split
(
':'
)
+
[
''
])[:
2
]
node
.
parser
=
MockParser
(
name
,
ptype
)
@
transformation_factory
(
Callable
)
def
flatten
(
node
,
condition
=
lambda
node
:
not
node
.
parser
.
name
,
recursive
=
True
):
"""Flattens all children, that fulfil the given `condition`
...
...
DHParser/toolkit.py
View file @
db24cec1
...
...
@@ -30,6 +30,7 @@ the directory exists and raises an error if a file with the same name
already exists.
"""
import
codecs
import
collections
import
contextlib
import
hashlib
...
...
@@ -38,6 +39,7 @@ try:
import
regex
as
re
except
ImportError
:
import
re
import
sys
try
:
from
typing
import
Any
,
List
,
Tuple
except
ImportError
:
...
...
@@ -389,3 +391,13 @@ def compile_python_object(python_src, catch_obj_regex=""):
return
namespace
[
matches
[
0
]]
if
matches
else
None
else
:
return
namespace
try
:
if
sys
.
stdout
.
encoding
.
upper
()
!=
"UTF-8"
:
# make sure that `print()` does not raise an error on
# non-ASCII characters:
sys
.
stdout
=
codecs
.
getwriter
(
"utf-8"
)(
sys
.
stdout
.
detach
())
except
AttributeError
:
# somebody has already taken care of this !?
pass
Introduction.md
View file @
db24cec1
This diff is collapsed.
Click to expand it.
dhparser.py
View file @
db24cec1
...
...
@@ -18,7 +18,7 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
# TODO: This is still a stub...