Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
bfc79c2e
Commit
bfc79c2e
authored
Dec 27, 2017
by
eckhart
Browse files
Merge branch 'source_maps' of
https://gitlab.lrz.de/badw-it/DHParser
into source_maps
parents
ea9bda2e
309c7376
Changes
22
Show whitespace changes
Inline
Side-by-side
DHParser/__init__.py
View file @
bfc79c2e
...
@@ -18,19 +18,20 @@ implied. See the License for the specific language governing
...
@@ -18,19 +18,20 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
permissions and limitations under the License.
"""
"""
# Flat namespace for the DHParser Package. Is this a good idea...?
from
.error
import
*
from
.dsl
import
*
from
.dsl
import
*
from
.ebnf
import
*
from
.ebnf
import
*
from
.parsers
import
*
# Flat namespace for the DHParser Package. Is this a good idea...?
from
.error
import
*
from
.parse
import
*
from
.preprocess
import
*
from
.stringview
import
*
from
.stringview
import
*
from
.syntaxtree
import
*
from
.syntaxtree
import
*
from
.testing
import
*
from
.toolkit
import
*
from
.toolkit
import
*
from
.transform
import
*
from
.transform
import
*
from
.testing
import
*
from
.versionnumber
import
__version__
from
.versionnumber
import
__version__
__author__
=
"Eckhart Arnold <arnold@badw.de>"
__author__
=
"Eckhart Arnold <arnold@badw.de>"
__copyright__
=
"http://www.apache.org/licenses/LICENSE-2.0"
__copyright__
=
"http://www.apache.org/licenses/LICENSE-2.0"
# __all__ = ['toolkit', 'stringview', 'error', 'syntaxtree', 'p
arser', 'transform', 'ebnf', 'dsl', 'testing
',
# __all__ = ['toolkit', 'stringview', 'error', 'syntaxtree', 'p
reprocess', 'parse
',
# '
versionnumber'] # flat namespace
# '
transform', 'ebnf', 'dsl', 'testing', 'versionnumber']
DHParser/dsl.py
View file @
bfc79c2e
...
@@ -20,18 +20,20 @@ compilation of domain specific languages based on an EBNF-grammar.
...
@@ -20,18 +20,20 @@ compilation of domain specific languages based on an EBNF-grammar.
"""
"""
import
os
import
os
from
typing
import
Any
,
cast
,
List
,
Tuple
,
Union
,
Iterator
,
Iterable
from
DHParser.ebnf
import
EBNFCompiler
,
grammar_changed
,
\
from
DHParser.ebnf
import
EBNFCompiler
,
grammar_changed
,
\
get_ebnf_preprocessor
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
\
get_ebnf_preprocessor
,
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
,
\
PreprocessorFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
PreprocessorFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
only_errors
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
only_errors
from
DHParser.parsers
import
Grammar
,
Compiler
,
compile_source
,
nil_preprocessor
,
PreprocessorFunc
from
DHParser.parse
import
Grammar
,
Compiler
,
compile_source
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
from
DHParser.toolkit
import
logging
,
load_if_file
,
is_python_code
,
compile_python_object
,
\
from
DHParser.toolkit
import
logging
,
load_if_file
,
is_python_code
,
compile_python_object
,
\
re
,
typing
re
from
typing
import
Any
,
cast
,
List
,
Tuple
,
Union
,
Iterator
,
Iterable
__all__
=
(
'GrammarError'
,
__all__
=
(
'DHPARSER_IMPORTS'
,
'GrammarError'
,
'CompilationError'
,
'CompilationError'
,
'load_compiler_suite'
,
'load_compiler_suite'
,
'compileDSL'
,
'compileDSL'
,
...
@@ -70,7 +72,7 @@ try:
...
@@ -70,7 +72,7 @@ try:
except ImportError:
except ImportError:
import re
import re
from DHParser import logging, is_filename, load_if_file,
\\
from DHParser import logging, is_filename, load_if_file,
\\
Grammar, Compiler, nil_preprocessor,
\\
Grammar, Compiler, nil_preprocessor,
PreprocessorToken,
\\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered,
\\
Lookbehind, Lookahead, Alternative, Pop, Token, Synonym, AllOf, SomeOf, Unordered,
\\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture,
\\
Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, RE, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
...
@@ -495,14 +497,15 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
...
@@ -495,14 +497,15 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> It
+
"
\n
# "
.
join
(
str
(
error
).
split
(
'
\n
)'
)))
+
"
\n
# "
.
join
(
str
(
error
).
split
(
'
\n
)'
)))
print
(
result
)
print
(
result
)
finally
:
finally
:
if
f
:
f
.
close
()
if
f
:
f
.
close
()
return
messages
return
messages
def
recompile_grammar
(
ebnf_filename
,
force
=
False
)
->
bool
:
def
recompile_grammar
(
ebnf_filename
,
force
=
False
)
->
bool
:
"""
"""
Recompiles an
ebnf
-grammar if necessary, that is, if either no
Re
-
compiles an
EBNF
-grammar if necessary, that is, if either no
corresponding 'XXXXCompiler.py'-file exists or if that file is
corresponding 'XXXXCompiler.py'-file exists or if that file is
outdated.
outdated.
...
...
DHParser/ebnf.py
View file @
bfc79c2e
...
@@ -19,18 +19,19 @@ permissions and limitations under the License.
...
@@ -19,18 +19,19 @@ permissions and limitations under the License.
import
keyword
import
keyword
from
collections
import
OrderedDict
from
collections
import
OrderedDict
from
functools
import
partial
from
functools
import
partial
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
from
DHParser.error
import
Error
from
DHParser.error
import
Error
from
DHParser.parse
rs
import
Grammar
,
mixin_comment
,
nil_preprocessor
,
Forward
,
RegExp
,
RE
,
\
from
DHParser.parse
import
Grammar
,
mixin_comment
,
Forward
,
RegExp
,
RE
,
\
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
,
\
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
,
\
Compiler
,
PreprocessorFunc
Compiler
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
,
typing
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
from
DHParser.transform
import
traverse
,
remove_brackets
,
\
from
DHParser.transform
import
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
\
remove_tokens
,
flatten
,
forbid
,
assert_content
,
remove_infix_operator
remove_tokens
,
flatten
,
forbid
,
assert_content
,
remove_infix_operator
from
DHParser.versionnumber
import
__version__
from
DHParser.versionnumber
import
__version__
from
typing
import
Callable
,
Dict
,
List
,
Set
,
Tuple
__all__
=
(
'get_ebnf_preprocessor'
,
__all__
=
(
'get_ebnf_preprocessor'
,
'get_ebnf_grammar'
,
'get_ebnf_grammar'
,
...
@@ -332,7 +333,7 @@ class EBNFCompiler(Compiler):
...
@@ -332,7 +333,7 @@ class EBNFCompiler(Compiler):
`alternative = a | b`
`alternative = a | b`
Now `[
str(node)
for node in self.rules['alternative']]`
Now `[
node.content
for node in self.rules['alternative']]`
yields `['alternative = a | b', 'a', 'b']`
yields `['alternative = a | b', 'a', 'b']`
symbols: A mapping of symbol names to their first usage (not
symbols: A mapping of symbol names to their first usage (not
...
@@ -597,7 +598,7 @@ class EBNFCompiler(Compiler):
...
@@ -597,7 +598,7 @@ class EBNFCompiler(Compiler):
def
on_definition
(
self
,
node
:
Node
)
->
Tuple
[
str
,
str
]:
def
on_definition
(
self
,
node
:
Node
)
->
Tuple
[
str
,
str
]:
rule
=
str
(
node
.
children
[
0
]
)
rule
=
node
.
children
[
0
]
.
content
if
rule
in
self
.
rules
:
if
rule
in
self
.
rules
:
first
=
self
.
rules
[
rule
][
0
]
first
=
self
.
rules
[
rule
][
0
]
if
not
first
.
_errors
:
if
not
first
.
_errors
:
...
@@ -652,7 +653,7 @@ class EBNFCompiler(Compiler):
...
@@ -652,7 +653,7 @@ class EBNFCompiler(Compiler):
def
on_directive
(
self
,
node
:
Node
)
->
str
:
def
on_directive
(
self
,
node
:
Node
)
->
str
:
key
=
str
(
node
.
children
[
0
]
)
.
lower
()
key
=
node
.
children
[
0
]
.
content
.
lower
()
assert
key
not
in
self
.
directives
[
'tokens'
]
assert
key
not
in
self
.
directives
[
'tokens'
]
if
key
not
in
self
.
REPEATABLE_DIRECTIVES
:
if
key
not
in
self
.
REPEATABLE_DIRECTIVES
:
...
@@ -674,8 +675,9 @@ class EBNFCompiler(Compiler):
...
@@ -674,8 +675,9 @@ class EBNFCompiler(Compiler):
else
:
else
:
node
.
add_error
(
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
node
.
add_error
(
'Value "%s" not allowed for directive "%s".'
%
(
value
,
key
))
else
:
else
:
value
=
str
(
node
.
children
[
1
]).
strip
(
"~"
)
# cast(str, node.children[1].result).strip("~")
value
=
node
.
children
[
1
].
content
.
strip
(
"~"
)
# cast(str, node.children[
if
value
!=
str
(
node
.
children
[
1
]):
# cast(str, node.children[1].result):
# 1].result).strip("~")
if
value
!=
node
.
children
[
1
].
content
:
# cast(str, node.children[1].result):
node
.
add_error
(
"Whitespace marker '~' not allowed in definition of "
node
.
add_error
(
"Whitespace marker '~' not allowed in definition of "
"%s regular expression."
%
key
)
"%s regular expression."
%
key
)
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
if
value
[
0
]
+
value
[
-
1
]
in
{
'""'
,
"''"
}:
...
@@ -688,11 +690,11 @@ class EBNFCompiler(Compiler):
...
@@ -688,11 +690,11 @@ class EBNFCompiler(Compiler):
self
.
directives
[
key
]
=
value
self
.
directives
[
key
]
=
value
elif
key
==
'ignorecase'
:
elif
key
==
'ignorecase'
:
if
str
(
node
.
children
[
1
]
)
.
lower
()
not
in
{
"off"
,
"false"
,
"no"
}:
if
node
.
children
[
1
]
.
content
.
lower
()
not
in
{
"off"
,
"false"
,
"no"
}:
self
.
re_flags
.
add
(
'i'
)
self
.
re_flags
.
add
(
'i'
)
# elif key == 'testing':
# elif key == 'testing':
# value =
str(
node.children[1]
)
# value = node.children[1]
.content
# self.directives['testing'] = value.lower() not in {"off", "false", "no"}
# self.directives['testing'] = value.lower() not in {"off", "false", "no"}
elif
key
==
'literalws'
:
elif
key
==
'literalws'
:
...
@@ -708,7 +710,7 @@ class EBNFCompiler(Compiler):
...
@@ -708,7 +710,7 @@ class EBNFCompiler(Compiler):
elif
key
in
{
'tokens'
,
'preprocessor_tokens'
}:
elif
key
in
{
'tokens'
,
'preprocessor_tokens'
}:
tokens
=
self
.
compile
(
node
.
children
[
1
])
tokens
=
self
.
compile
(
node
.
children
[
1
])
redeclared
=
self
.
directives
[
'tokes'
]
&
tokens
redeclared
=
self
.
directives
[
'toke
n
s'
]
&
tokens
if
redeclared
:
if
redeclared
:
node
.
add_error
(
'Tokens %s have already been declared earlier. '
node
.
add_error
(
'Tokens %s have already been declared earlier. '
%
str
(
redeclared
)
+
'Later declaration will be ignored'
,
%
str
(
redeclared
)
+
'Later declaration will be ignored'
,
...
@@ -752,7 +754,7 @@ class EBNFCompiler(Compiler):
...
@@ -752,7 +754,7 @@ class EBNFCompiler(Compiler):
filtered_children
=
[]
filtered_children
=
[]
i
=
0
i
=
0
for
nd
in
node
.
children
:
for
nd
in
node
.
children
:
if
nd
.
parser
.
ptype
==
TOKEN_PTYPE
and
str
(
nd
)
==
"§"
:
if
nd
.
parser
.
ptype
==
TOKEN_PTYPE
and
nd
.
content
==
"§"
:
mandatory_marker
.
append
(
i
)
mandatory_marker
.
append
(
i
)
if
i
==
0
:
if
i
==
0
:
nd
.
add_error
(
'First item of a series should not be mandatory.'
,
nd
.
add_error
(
'First item of a series should not be mandatory.'
,
...
@@ -774,7 +776,7 @@ class EBNFCompiler(Compiler):
...
@@ -774,7 +776,7 @@ class EBNFCompiler(Compiler):
def
on_factor
(
self
,
node
:
Node
)
->
str
:
def
on_factor
(
self
,
node
:
Node
)
->
str
:
assert
node
.
children
assert
node
.
children
assert
len
(
node
.
children
)
>=
2
,
node
.
as_sxpr
()
assert
len
(
node
.
children
)
>=
2
,
node
.
as_sxpr
()
prefix
=
str
(
node
.
children
[
0
]
)
# cast(str, node.children[0].result)
prefix
=
node
.
children
[
0
]
.
content
custom_args
=
[]
# type: List[str]
custom_args
=
[]
# type: List[str]
if
prefix
in
{
'::'
,
':'
}:
if
prefix
in
{
'::'
,
':'
}:
...
@@ -806,15 +808,15 @@ class EBNFCompiler(Compiler):
...
@@ -806,15 +808,15 @@ class EBNFCompiler(Compiler):
if
len
(
nd
.
children
)
>=
1
:
if
len
(
nd
.
children
)
>=
1
:
nd
=
nd
.
children
[
0
]
nd
=
nd
.
children
[
0
]
while
nd
.
parser
.
name
==
"symbol"
:
while
nd
.
parser
.
name
==
"symbol"
:
symlist
=
self
.
rules
.
get
(
str
(
nd
)
,
[])
symlist
=
self
.
rules
.
get
(
nd
.
content
,
[])
if
len
(
symlist
)
==
2
:
if
len
(
symlist
)
==
2
:
nd
=
symlist
[
1
]
nd
=
symlist
[
1
]
else
:
else
:
if
len
(
symlist
)
==
1
:
if
len
(
symlist
)
==
1
:
nd
=
symlist
[
0
].
children
[
1
]
nd
=
symlist
[
0
].
children
[
1
]
break
break
if
(
nd
.
parser
.
name
!=
"regexp"
or
str
(
nd
)
[:
1
]
!=
'/'
if
(
nd
.
parser
.
name
!=
"regexp"
or
nd
.
content
[:
1
]
!=
'/'
or
str
(
nd
)
[
-
1
:]
!=
'/'
):
or
nd
.
content
[
-
1
:]
!=
'/'
):
node
.
add_error
(
"Lookbehind-parser can only be used with plain RegExp-"
node
.
add_error
(
"Lookbehind-parser can only be used with plain RegExp-"
"parsers, not with: "
+
nd
.
parser
.
name
+
nd
.
parser
.
ptype
)
"parsers, not with: "
+
nd
.
parser
.
name
+
nd
.
parser
.
ptype
)
...
@@ -838,10 +840,6 @@ class EBNFCompiler(Compiler):
...
@@ -838,10 +840,6 @@ class EBNFCompiler(Compiler):
return
self
.
non_terminal
(
node
,
'OneOrMore'
)
return
self
.
non_terminal
(
node
,
'OneOrMore'
)
def
on_regexchain
(
self
,
node
)
->
str
:
raise
EBNFCompilerError
(
"Not yet implemented!"
)
def
on_group
(
self
,
node
)
->
str
:
def
on_group
(
self
,
node
)
->
str
:
raise
EBNFCompilerError
(
"Group nodes should have been eliminated by "
raise
EBNFCompilerError
(
"Group nodes should have been eliminated by "
"AST transformation!"
)
"AST transformation!"
)
...
@@ -851,7 +849,7 @@ class EBNFCompiler(Compiler):
...
@@ -851,7 +849,7 @@ class EBNFCompiler(Compiler):
assert
len
(
node
.
children
)
==
1
assert
len
(
node
.
children
)
==
1
nd
=
node
.
children
[
0
]
nd
=
node
.
children
[
0
]
for
child
in
nd
.
children
:
for
child
in
nd
.
children
:
if
child
.
parser
.
ptype
==
TOKEN_PTYPE
and
str
(
nd
)
==
"§"
:
if
child
.
parser
.
ptype
==
TOKEN_PTYPE
and
nd
.
content
==
"§"
:
node
.
add_error
(
"Unordered parser lists cannot contain mandatory (§) items."
)
node
.
add_error
(
"Unordered parser lists cannot contain mandatory (§) items."
)
args
=
', '
.
join
(
self
.
compile
(
child
)
for
child
in
nd
.
children
)
args
=
', '
.
join
(
self
.
compile
(
child
)
for
child
in
nd
.
children
)
if
nd
.
parser
.
name
==
"term"
:
if
nd
.
parser
.
name
==
"term"
:
...
@@ -863,7 +861,7 @@ class EBNFCompiler(Compiler):
...
@@ -863,7 +861,7 @@ class EBNFCompiler(Compiler):
return
""
return
""
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
# called only for symbols on the right hand side!
def
on_symbol
(
self
,
node
:
Node
)
->
str
:
# called only for symbols on the right hand side!
symbol
=
str
(
node
)
# ; assert result == cast(str, node.result)
symbol
=
node
.
content
# ; assert result == cast(str, node.result)
if
symbol
in
self
.
directives
[
'tokens'
]:
if
symbol
in
self
.
directives
[
'tokens'
]:
return
'PreprocessorToken("'
+
symbol
+
'")'
return
'PreprocessorToken("'
+
symbol
+
'")'
else
:
else
:
...
@@ -878,11 +876,12 @@ class EBNFCompiler(Compiler):
...
@@ -878,11 +876,12 @@ class EBNFCompiler(Compiler):
def
on_literal
(
self
,
node
)
->
str
:
def
on_literal
(
self
,
node
)
->
str
:
return
'Token('
+
str
(
node
).
replace
(
'
\\
'
,
r
'\\'
)
+
')'
# return 'Token(' + ', '.merge_children([node.result]) + ')' ?
return
'Token('
+
node
.
content
.
replace
(
'
\\
'
,
r
'\\'
)
+
')'
# return 'Token(' + ',
# '.merge_children([node.result]) + ')' ?
def
on_regexp
(
self
,
node
:
Node
)
->
str
:
def
on_regexp
(
self
,
node
:
Node
)
->
str
:
rx
=
str
(
node
)
rx
=
node
.
content
name
=
[]
# type: List[str]
name
=
[]
# type: List[str]
if
rx
[
0
]
==
'/'
and
rx
[
-
1
]
==
'/'
:
if
rx
[
0
]
==
'/'
and
rx
[
-
1
]
==
'/'
:
parser
=
'RegExp('
parser
=
'RegExp('
...
...
DHParser/error.py
View file @
bfc79c2e
...
@@ -18,11 +18,9 @@ permissions and limitations under the License.
...
@@ -18,11 +18,9 @@ permissions and limitations under the License.
import
bisect
import
bisect
import
functools
import
functools
from
typing
import
Iterable
,
Iterator
,
Union
,
Tuple
,
List
from
DHParser.stringview
import
StringView
from
DHParser.stringview
import
StringView
from
DHParser.toolkit
import
typing
from
typing
import
Hashable
,
Iterable
,
Iterator
,
Union
,
Tuple
,
List
__all__
=
(
'Error'
,
__all__
=
(
'Error'
,
'is_error'
,
'is_error'
,
...
@@ -71,10 +69,16 @@ class Error:
...
@@ -71,10 +69,16 @@ class Error:
@
property
@
property
def
level_str
(
self
):
def
level_str
(
self
):
"""Returns a string representation of the error level, e.g. "warning".
"""Returns a string representation of the error level, e.g. "warning"."""
"""
return
"Warning"
if
is_warning
(
self
.
code
)
else
"Error"
return
"Warning"
if
is_warning
(
self
.
code
)
else
"Error"
def
visualize
(
self
,
document
:
str
)
->
str
:
"""Shows the line of the document and the position where the error
occurred."""
start
=
document
.
rfind
(
'
\n
'
,
0
,
self
.
pos
)
+
1
stop
=
document
.
find
(
'
\n
'
,
self
.
pos
)
return
document
[
start
:
stop
]
+
'
\n
'
+
' '
*
(
self
.
pos
-
start
)
+
'^
\n
'
def
is_warning
(
code
:
int
)
->
bool
:
def
is_warning
(
code
:
int
)
->
bool
:
"""Returns True, if error is merely a warning."""
"""Returns True, if error is merely a warning."""
...
...
DHParser/parse
rs
.py
→
DHParser/parse.py
View file @
bfc79c2e
...
@@ -59,26 +59,20 @@ import collections
...
@@ -59,26 +59,20 @@ import collections
import
copy
import
copy
import
html
import
html
import
os
import
os
from
functools
import
partial
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
linebreaks
,
line_col
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
linebreaks
,
line_col
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
ParserBase
,
WHITESPACE_PTYPE
,
\
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
ParserBase
,
WHITESPACE_PTYPE
,
\
TOKEN_PTYPE
,
ZOMBIE_PARSER
TOKEN_PTYPE
,
ZOMBIE_PARSER
from
DHParser.preprocess
import
BEGIN_TOKEN
,
END_TOKEN
,
RX_TOKEN_NAME
,
\
PreprocessorFunc
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
,
\
from
DHParser.toolkit
import
is_logging
,
log_dir
,
logfile_basename
,
escape_re
,
sane_parser_name
,
\
load_if_file
,
re
,
typing
escape_control_characters
,
load_if_file
,
re
,
typing
from
typing
import
Any
,
Callable
,
cast
,
Dict
,
List
,
Set
,
Tuple
,
Union
,
Optional
from
typing
import
Any
,
Callable
,
cast
,
Dict
,
List
,
Set
,
Tuple
,
Union
,
Optional
__all__
=
(
'HistoryRecord'
,
__all__
=
(
'PreprocessorFunc'
,
'HistoryRecord'
,
'Parser'
,
'Parser'
,
'Grammar'
,
'Grammar'
,
'RX_PREPROCESSOR_TOKEN'
,
'BEGIN_TOKEN'
,
'END_TOKEN'
,
'make_token'
,
'nil_preprocessor'
,
'PreprocessorToken'
,
'PreprocessorToken'
,
'RegExp'
,
'RegExp'
,
'RE'
,
'RE'
,
...
@@ -117,9 +111,6 @@ __all__ = ('PreprocessorFunc',
...
@@ -117,9 +111,6 @@ __all__ = ('PreprocessorFunc',
########################################################################
########################################################################
PreprocessorFunc
=
Union
[
Callable
[[
str
],
str
],
partial
]
LEFT_RECURSION_DEPTH
=
8
# type: int
LEFT_RECURSION_DEPTH
=
8
# type: int
# because of python's recursion depth limit, this value ought not to be
# because of python's recursion depth limit, this value ought not to be
# set too high. PyPy allows higher values than CPython
# set too high. PyPy allows higher values than CPython
...
@@ -242,7 +233,7 @@ class HistoryRecord:
...
@@ -242,7 +233,7 @@ class HistoryRecord:
def
excerpt
(
self
):
def
excerpt
(
self
):
length
=
len
(
self
.
node
)
if
self
.
node
else
len
(
self
.
text
)
length
=
len
(
self
.
node
)
if
self
.
node
else
len
(
self
.
text
)
excerpt
=
str
(
self
.
node
)[:
min
(
length
,
20
)]
if
self
.
node
else
self
.
text
[:
20
]
excerpt
=
str
(
self
.
node
)[:
min
(
length
,
20
)]
if
self
.
node
else
self
.
text
[:
20
]
excerpt
=
e
xcerpt
.
replace
(
'
\n
'
,
'
\\
n'
)
excerpt
=
e
scape_control_characters
(
excerpt
)
if
length
>
20
:
if
length
>
20
:
excerpt
+=
'...'
excerpt
+=
'...'
return
excerpt
return
excerpt
...
@@ -1007,7 +998,8 @@ class Grammar:
...
@@ -1007,7 +998,8 @@ class Grammar:
if
html
and
len
(
log
)
%
100
==
0
:
if
html
and
len
(
log
)
%
100
==
0
:
log
.
append
(
'
\n
</table>
\n
<table>
\n
'
+
HistoryRecord
.
COLGROUP
)
log
.
append
(
'
\n
</table>
\n
<table>
\n
'
+
HistoryRecord
.
COLGROUP
)
if
is_logging
():
if
not
is_logging
():
raise
AssertionError
(
"Cannot log history when logging is turned off!"
)
assert
self
.
history__
,
\
assert
self
.
history__
,
\
"Parser did not yet run or logging was turned off when running parser!"
"Parser did not yet run or logging was turned off when running parser!"
if
not
log_file_name
:
if
not
log_file_name
:
...
@@ -1059,31 +1051,6 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
...
@@ -1059,31 +1051,6 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
########################################################################
########################################################################
RX_PREPROCESSOR_TOKEN
=
re
.
compile
(
r
'\w+'
)
BEGIN_TOKEN
=
'
\x1b
'
END_TOKEN
=
'
\x1c
'
def
make_token
(
token
:
str
,
argument
:
str
=
''
)
->
str
:
"""
Turns the ``token`` and ``argument`` into a special token that
will be caught by the `PreprocessorToken`-parser.
This function is a support function that should be used by
preprocessors to inject preprocessor tokens into the source text.
"""
assert
RX_PREPROCESSOR_TOKEN
.
match
(
token
)
assert
argument
.
find
(
BEGIN_TOKEN
)
<
0
assert
argument
.
find
(
END_TOKEN
)
<
0
return
BEGIN_TOKEN
+
token
+
argument
+
END_TOKEN
def
nil_preprocessor
(
text
:
str
)
->
str
:
"""A preprocessor that does nothing, i.e. just returns the input."""
return
text
class
PreprocessorToken
(
Parser
):
class
PreprocessorToken
(
Parser
):
"""
"""
Parses tokens that have been inserted by a preprocessor.
Parses tokens that have been inserted by a preprocessor.
...
@@ -1097,7 +1064,7 @@ class PreprocessorToken(Parser):
...
@@ -1097,7 +1064,7 @@ class PreprocessorToken(Parser):
def
__init__
(
self
,
token
:
str
)
->
None
:
def
__init__
(
self
,
token
:
str
)
->
None
:
assert
token
and
token
.
isupper
()
assert
token
and
token
.
isupper
()
assert
RX_
PREPROCESSOR_
TOKEN
.
match
(
token
)
assert
RX_TOKEN
_NAME
.
match
(
token
)
super
(
PreprocessorToken
,
self
).
__init__
(
token
)
super
(
PreprocessorToken
,
self
).
__init__
(
token
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
...
@@ -1121,8 +1088,7 @@ class PreprocessorToken(Parser):
...
@@ -1121,8 +1088,7 @@ class PreprocessorToken(Parser):
'(Most likely due to a preprocessor bug!)'
)
'(Most likely due to a preprocessor bug!)'
)
return
node
,
text
[
end
:]
return
node
,
text
[
end
:]
if
text
[
1
:
len
(
self
.
name
)
+
1
]
==
self
.
name
:
if
text
[
1
:
len
(
self
.
name
)
+
1
]
==
self
.
name
:
return
Node
(
self
,
text
[
len
(
self
.
name
)
+
1
:
end
]),
\
return
Node
(
self
,
text
[
len
(
self
.
name
)
+
2
:
end
]),
text
[
end
+
1
:]
text
[
end
+
1
:]
return
None
,
text
return
None
,
text
...
@@ -1157,15 +1123,21 @@ class RegExp(Parser):
...
@@ -1157,15 +1123,21 @@ class RegExp(Parser):
return
RegExp
(
regexp
,
self
.
name
)
return
RegExp
(
regexp
,
self
.
name
)
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
def
__call__
(
self
,
text
:
StringView
)
->
Tuple
[
Optional
[
Node
],
StringView
]:
if
text
[
0
:
1
]
!=
BEGIN_TOKEN
:
# ESC starts a preprocessor token.
match
=
text
.
match
(
self
.
regexp
)
match
=
text
.
match
(
self
.
regexp
)
if
match
:
if
match
:
capture
=
match
.
group
(
0
)
end
=
text
.
index
(
match
.
end
())
end
=
text
.
index
(
match
.
end
())
return
Node
(
self
,
match
.
group
(
0
),
True
),
text
[
end
:]
# regular expression must never match preprocessor-tokens!
# TODO: Find a better solution here? e.g. static checking/re-mangling at compile time
i
=
capture
.
find
(
BEGIN_TOKEN
)
if
i
>=
0
:
capture
=
capture
[:
i
]
end
=
i
return
Node
(
self
,
capture
,
True
),
text
[
end
:]
return
None
,
text
return
None
,
text
def
__repr__
(
self
):
def
__repr__
(
self
):
return
'/%s/'
%
self
.
regexp
.
pattern
return
escape_control_characters
(
'/%s/'
%
self
.
regexp
.
pattern
)
class
Whitespace
(
RegExp
):
class
Whitespace
(
RegExp
):
...
...
DHParser/preprocess.py
0 → 100644
View file @
bfc79c2e
""" preprocess.py - preprocessing of source files for DHParser
Copyright 2016 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
"""
import
bisect
import
collections
import
functools
from
typing
import
Union
,
Callable
from
DHParser.toolkit
import
re