Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
3004d9b7
Commit
3004d9b7
authored
Mar 10, 2018
by
eckhart
Browse files
- moved compilation support to a separate module "compile.py"
parent
3f663703
Changes
13
Hide whitespace changes
Inline
Side-by-side
DHParser/__init__.py
View file @
3004d9b7
...
...
@@ -18,6 +18,7 @@ implied. See the License for the specific language governing
permissions and limitations under the License.
"""
from
.compile
import
*
from
.dsl
import
*
from
.ebnf
import
*
# Flat namespace for the DHParser Package. Is this a good idea...?
...
...
DHParser/dsl.py
View file @
3004d9b7
...
...
@@ -30,9 +30,10 @@ from DHParser.ebnf import EBNFCompiler, grammar_changed, \
PreprocessorFactoryFunc
,
ParserFactoryFunc
,
TransformerFactoryFunc
,
CompilerFactoryFunc
from
DHParser.error
import
Error
,
is_error
,
has_errors
,
only_errors
from
DHParser.log
import
logging
from
DHParser.parse
import
Grammar
,
Compiler
,
compile_source
from
DHParser.parse
import
Grammar
from
DHParser
import
Compiler
,
compile_source
,
TransformationFunc
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
from
DHParser.syntaxtree
import
Node
from
DHParser.toolkit
import
load_if_file
,
is_python_code
,
compile_python_object
,
\
re
...
...
DHParser/ebnf.py
View file @
3004d9b7
...
...
@@ -31,10 +31,10 @@ from typing import Callable, Dict, List, Set, Tuple
from
DHParser.error
import
Error
from
DHParser.parse
import
Grammar
,
mixin_comment
,
Forward
,
RegExp
,
RE
,
\
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
,
\
Compiler
NegativeLookahead
,
Alternative
,
Series
,
Option
,
OneOrMore
,
ZeroOrMore
,
Token
from
DHParser
import
Compiler
,
TransformationFunc
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.syntaxtree
import
Node
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
,
expand_table
from
DHParser.transform
import
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
\
...
...
DHParser/parse.py
View file @
3004d9b7
# parse.py - parser combinators for
for
DHParser
# parse.py - parser combinators for DHParser
#
# Copyright 2016 by Eckhart Arnold (arnold@badw.de)
# Bavarian Academy of Sciences an Humanities (badw.de)
...
...
@@ -18,8 +18,8 @@
"""
Module ``parse`` contains the python classes and functions for
DHParser's packrat
parser. It's central class is the
``Grammar``-class, which is the base class for any con
t
rete
DHParser's packrat
-
parser. It's central class is the
``Grammar``-class, which is the base class for any con
c
rete
Grammar. Grammar-objects are callable and parsing is done by
calling a Grammar-object with a source text as argument.
...
...
@@ -27,35 +27,20 @@ The different parsing functions are callable descendants of class
``Parser``. Usually, they are organized in a tree and defined
within the namespace of a grammar-class. See ``ebnf.EBNFGrammar``
for an example.
Module ``parse`` furthermode contains the base class for a
compiler as well as a generic ccompiler function. Compiler
objects are also callabe receive the Abstract syntax tree (AST)
as argument and yield whatever output the compiler produces. In
most Digital Humanities applications this will be
XML-code. However, it can also be anything else, like binary
code or, as in the case of DHParser's ebnf-compiler, Python
source code.
See module ``ebnf`` for a sample of the implementation of a
compiler object.
"""
import
copy
import
os
from
typing
import
Any
,
Callable
,
cast
,
Dict
,
List
,
Set
,
Tuple
,
Union
,
Optional
from
DHParser.error
import
Error
,
is_error
,
linebreaks
,
adjust_error_locations
from
DHParser.log
import
is_logging
,
logfile_basename
,
HistoryRecord
,
log_ST
,
\
log_parsing_history
from
DHParser.preprocess
import
BEGIN_TOKEN
,
END_TOKEN
,
RX_TOKEN_NAME
,
\
PreprocessorFunc
,
with_source_mapping
,
strip_tokens
from
DHParser.error
import
Error
,
linebreaks
from
DHParser.log
import
is_logging
,
HistoryRecord
from
DHParser.preprocess
import
BEGIN_TOKEN
,
END_TOKEN
,
RX_TOKEN_NAME
from
DHParser.stringview
import
StringView
,
EMPTY_STRING_VIEW
from
DHParser.syntaxtree
import
Node
,
TransformationFunc
,
ParserBase
,
WHITESPACE_PTYPE
,
\
from
DHParser.syntaxtree
import
Node
,
ParserBase
,
WHITESPACE_PTYPE
,
\
TOKEN_PTYPE
,
ZOMBIE_PARSER
from
DHParser.toolkit
import
sane_parser_name
,
\
escape_control_characters
,
load_if_file
,
re
from
DHParser.toolkit
import
sane_parser_name
,
escape_control_characters
,
re
,
typing
from
typing
import
Callable
,
cast
,
Dict
,
List
,
Set
,
Tuple
,
Union
,
Optional
__all__
=
(
'Parser'
,
'UnknownParserError'
,
...
...
@@ -86,9 +71,7 @@ __all__ = ('Parser',
'Capture'
,
'Retrieve'
,
'Pop'
,
'Forward'
,
'Compiler'
,
'compile_source'
)
'Forward'
)
########################################################################
...
...
@@ -1900,211 +1883,4 @@ class Forward(Parser):
return
False
#######################################################################
#
# Syntax driven compilation support
#
#######################################################################
class
Compiler
:
"""
Class Compiler is the abstract base class for compilers. Compiler
objects are callable and take the root node of the abstract
syntax tree (AST) as argument and return the compiled code in a
format chosen by the compiler itself.
Subclasses implementing a compiler must define `on_XXX()`-methods
for each node name that can occur in the AST where 'XXX' is the
node's name(for unnamed nodes it is the node's ptype without the
leading colon ':').
These compiler methods take the node on which they are run as
argument. Other than in the AST transformation, which runs depth-first,
compiler methods are called forward moving starting with the root
node, and they are responsible for compiling the child nodes
themselves. This should be done by invoking the `compile(node)`-
method which will pick the right `on_XXX`-method. It is not
recommended to call the `on_XXX`-methods directly.
Attributes:
context: A list of parent nodes that ends with the currently
compiled node.
grammar_name: The name of the grammar this compiler is related to
grammar_source: The source code of the grammar this compiler is
related to.
_dirty_flag: A flag indicating that the compiler has already been
called at least once and that therefore all compilation
variables must be reset when it is called again.
"""
def
__init__
(
self
,
grammar_name
=
""
,
grammar_source
=
""
):
self
.
_reset
()
self
.
set_grammar_name
(
grammar_name
,
grammar_source
)
def
_reset
(
self
):
self
.
context
=
[]
# type: List[Node]
self
.
_dirty_flag
=
False
def
__call__
(
self
,
node
:
Node
)
->
Any
:
"""
Compiles the abstract syntax tree with the root node `node` and
returns the compiled code. It is up to subclasses implementing
the compiler to determine the format of the returned data.
(This very much depends on the kind and purpose of the
implemented compiler.)
"""
if
self
.
_dirty_flag
:
self
.
_reset
()
self
.
_dirty_flag
=
True
result
=
self
.
compile
(
node
)
self
.
propagate_error_flags
(
node
,
lazy
=
True
)
return
result
def
set_grammar_name
(
self
,
grammar_name
=
""
,
grammar_source
=
""
):
"""
Changes the grammar's name and the grammar's source.
The grammar name and the source text of the grammar are
metadata about the grammar that do not affect the compilation
process. Classes inheriting from `Compiler` can use this
information to name and annotate its output.
"""
assert
grammar_name
==
""
or
re
.
match
(
r
'\w+\Z'
,
grammar_name
)
if
not
grammar_name
and
re
.
fullmatch
(
r
'[\w/:\\]+'
,
grammar_source
):
grammar_name
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
grammar_source
))[
0
]
self
.
grammar_name
=
grammar_name
self
.
grammar_source
=
load_if_file
(
grammar_source
)
@
staticmethod
def
propagate_error_flags
(
node
:
Node
,
lazy
:
bool
=
True
)
->
None
:
# See test_parser.TestCompilerClass.test_propagate_error()..
if
not
lazy
or
node
.
error_flag
<
Error
.
HIGHEST
:
for
child
in
node
.
children
:
Compiler
.
propagate_error_flags
(
child
)
node
.
error_flag
=
max
(
node
.
error_flag
,
child
.
error_flag
)
if
lazy
and
node
.
error_flag
>=
Error
.
HIGHEST
:
return
@
staticmethod
def
method_name
(
node_name
:
str
)
->
str
:
"""Returns the method name for `node_name`, e.g.::
>>> Compiler.method_name('expression')
'on_expression'
"""
return
'on_'
+
node_name
def
fallback_compiler
(
self
,
node
:
Node
)
->
Any
:
"""This is a generic compiler function which will be called on
all those node types for which no compiler method `on_XXX` has
been defined."""
if
node
.
children
:
result
=
tuple
(
self
.
compile
(
nd
)
for
nd
in
node
.
children
)
node
.
result
=
result
return
node
def
compile
(
self
,
node
:
Node
)
->
Any
:
"""
Calls the compilation method for the given node and returns the
result of the compilation.
The method's name is derived from either the node's parser
name or, if the parser is anonymous, the node's parser's class
name by adding the prefix ``on_``.
Note that ``compile`` does not call any compilation functions
for the parsers of the sub nodes by itself. Rather, this should
be done within the compilation methods.
"""
elem
=
node
.
parser
.
name
or
node
.
parser
.
ptype
[
1
:]
if
not
sane_parser_name
(
elem
):
node
.
add_error
(
"Reserved name '%s' not allowed as parser "
"name! "
%
elem
+
"(Any name starting with "
"'_' or '__' or ending with '__' is reserved.)"
)
return
None
else
:
try
:
compiler
=
self
.
__getattribute__
(
self
.
method_name
(
elem
))
except
AttributeError
:
compiler
=
self
.
fallback_compiler
self
.
context
.
append
(
node
)
result
=
compiler
(
node
)
self
.
context
.
pop
()
if
result
is
None
:
raise
ValueError
(
'%s failed to return a valid compilation result!'
%
str
(
compiler
))
# # the following statement makes sure that the error_flag
# # is propagated early on. Otherwise it is redundant, because
# # the __call__ method globally propagates the node's error_flag
# # later anyway. So, maybe it could be removed here.
# for child in node.children:
# node.error_flag = node.error_flag or child.error_flag
return
result
def
compile_source
(
source
:
str
,
preprocessor
:
Optional
[
PreprocessorFunc
],
# str -> str
parser
:
Grammar
,
# str -> Node (concrete syntax tree (CST))
transformer
:
TransformationFunc
,
# Node -> Node (abstract syntax tree (AST))
compiler
:
Compiler
)
->
Tuple
[
Any
,
List
[
Error
],
Node
]:
# Node (AST) -> Any
"""
Compiles a source in four stages:
1. Preprocessing (if needed)
2. Parsing
3. AST-transformation
4. Compiling.
The compilations stage is only invoked if no errors occurred in
either of the two previous stages.
Args:
source (str): The input text for compilation or a the name of a
file containing the input text.
preprocessor (function): text -> text. A preprocessor function
or None, if no preprocessor is needed.
parser (function): A parsing function or grammar class
transformer (function): A transformation function that takes
the root-node of the concrete syntax tree as an argument and
transforms it (in place) into an abstract syntax tree.
compiler (function): A compiler function or compiler class
instance
Returns (tuple):
The result of the compilation as a 3-tuple
(result, errors, abstract syntax tree). In detail:
1. The result as returned by the compiler or ``None`` in case of failure
2. A list of error or warning messages
3. The root-node of the abstract syntax tree
"""
original_text
=
load_if_file
(
source
)
log_file_name
=
logfile_basename
(
source
,
compiler
)
if
preprocessor
is
None
:
source_text
=
original_text
source_mapping
=
lambda
i
:
i
else
:
source_text
,
source_mapping
=
with_source_mapping
(
preprocessor
(
original_text
))
syntax_tree
=
parser
(
source_text
)
if
is_logging
():
log_ST
(
syntax_tree
,
log_file_name
+
'.cst'
)
log_parsing_history
(
parser
,
log_file_name
)
assert
is_error
(
syntax_tree
.
error_flag
)
or
str
(
syntax_tree
)
==
strip_tokens
(
source_text
)
# only compile if there were no syntax errors, for otherwise it is
# likely that error list gets littered with compile error messages
result
=
None
efl
=
syntax_tree
.
error_flag
messages
=
syntax_tree
.
collect_errors
(
clear_errors
=
True
)
if
not
is_error
(
efl
):
transformer
(
syntax_tree
)
efl
=
max
(
efl
,
syntax_tree
.
error_flag
)
messages
.
extend
(
syntax_tree
.
collect_errors
(
clear_errors
=
True
))
if
is_logging
():
log_ST
(
syntax_tree
,
log_file_name
+
'.ast'
)
if
not
is_error
(
syntax_tree
.
error_flag
):
result
=
compiler
(
syntax_tree
)
# print(syntax_tree.as_sxpr())
messages
.
extend
(
syntax_tree
.
collect_errors
())
syntax_tree
.
error_flag
=
max
(
syntax_tree
.
error_flag
,
efl
)
adjust_error_locations
(
messages
,
original_text
,
source_mapping
)
return
result
,
messages
,
syntax_tree
DHParser/syntaxtree.py
View file @
3004d9b7
...
...
@@ -26,12 +26,11 @@ parser classes are defined in the ``parse`` module.
import
collections.abc
import
copy
from
functools
import
partial
from
DHParser.error
import
Error
,
linebreaks
,
line_col
from
DHParser.stringview
import
StringView
from
DHParser.toolkit
import
re
,
typing
from
typing
import
Any
,
Callable
,
cast
,
Iterator
,
List
,
Union
,
Tuple
,
Hashable
,
Optional
from
DHParser.toolkit
import
re
from
typing
import
Callable
,
cast
,
Iterator
,
List
,
Union
,
Tuple
,
Optional
__all__
=
(
'ParserBase'
,
...
...
@@ -42,8 +41,7 @@ __all__ = ('ParserBase',
'ZOMBIE_PARSER'
,
'Node'
,
'mock_syntax_tree'
,
'flatten_sxpr'
,
'TransformationFunc'
)
'flatten_sxpr'
)
#######################################################################
...
...
@@ -745,10 +743,6 @@ def mock_syntax_tree(sxpr):
node
.
_pos
=
0
return
node
TransformationFunc
=
Union
[
Callable
[[
Node
],
Any
],
partial
]
# if __name__ == "__main__":
# st = mock_syntax_tree("(alpha (beta (gamma i\nj\nk) (delta y)) (epsilon z))")
# print(st.as_sxpr())
...
...
DHParser/transform.py
View file @
3004d9b7
...
...
@@ -38,6 +38,7 @@ from typing import AbstractSet, Any, ByteString, Callable, cast, Container, Dict
__all__
=
(
'TransformationDict'
,
'TransformationProc'
,
'TransformationFunc'
,
'ConditionFunc'
,
'KeyFunc'
,
'transformation_factory'
,
...
...
@@ -96,6 +97,7 @@ __all__ = ('TransformationDict',
TransformationProc
=
Callable
[[
List
[
Node
]],
None
]
TransformationDict
=
Dict
[
str
,
Sequence
[
Callable
]]
TransformationFunc
=
Union
[
Callable
[[
Node
],
Any
],
partial
]
ProcessingTableType
=
Dict
[
str
,
Union
[
Sequence
[
Callable
],
TransformationDict
]]
ConditionFunc
=
Callable
# Callable[[List[Node]], bool]
KeyFunc
=
Callable
[[
Node
],
str
]
...
...
@@ -846,3 +848,4 @@ def forbid(context: List[Node], child_tags: AbstractSet[str]):
if
child
.
tag_name
in
child_tags
:
node
.
add_error
(
'Element "%s" cannot be nested inside "%s".'
%
(
child
.
parser
.
name
,
node
.
parser
.
name
))
dhparser.py
View file @
3004d9b7
...
...
@@ -25,7 +25,7 @@ import sys
from
DHParser.dsl
import
compileDSL
,
compile_on_disk
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
DHParser
.parse
import
compile_source
from
DHParser
import
compile_source
from
DHParser.log
import
logging
EBNF_TEMPLATE
=
r
"""-grammar
...
...
documentation/ModuleReference.rst
View file @
3004d9b7
...
...
@@ -63,9 +63,9 @@ Main Modules Reference
The core of DHParser are the modules containing the functionality
for the parsing and compiling process. The modules ``preprocess``,
``parse``
and
``transform`` represent particular stages of the
``parse``
,
``transform``
and ``compile``
represent particular stages of the
parsing/compiling process, while ``syntaxtree`` and ``error`` define
clases for syntax trees and parser/compiler errors, respectively.
clas
s
es for syntax trees and parser/compiler errors, respectively.
Module ``preprocess``
---------------------
...
...
@@ -91,6 +91,12 @@ Module ``transform``
.. automodule:: transform
:members:
Module ``compile``
--------------------
.. automodule:: compile
:members:
Module ``error``
----------------
...
...
examples/BibTeX/BibTeXCompiler.py
View file @
3004d9b7
...
...
@@ -23,13 +23,14 @@ from DHParser import is_filename, load_if_file, \
Option
,
NegativeLookbehind
,
OneOrMore
,
RegExp
,
Retrieve
,
Series
,
RE
,
Capture
,
\
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
TransformationDict
,
TRUE_CONDITION
,
\
Node
,
TransformationDict
,
TRUE_CONDITION
,
\
traverse
,
remove_children_if
,
merge_children
,
is_anonymous
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_whitespace
,
\
is_empty
,
is_expendable
,
collapse
,
replace_content
,
remove_nodes
,
remove_content
,
remove_brackets
,
replace_parser
,
\
keep_children
,
is_one_of
,
has_content
,
apply_if
,
remove_first
,
remove_last
,
\
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.transform
import
TransformationFunc
from
DHParser.log
import
logging
...
...
test/test_dsl.py
View file @
3004d9b7
...
...
@@ -24,7 +24,8 @@ import os
import
sys
sys
.
path
.
extend
([
'../'
,
'./'
])
from
DHParser.parse
import
Grammar
,
Compiler
from
DHParser.parse
import
Grammar
from
DHParser
import
Compiler
from
DHParser.error
import
is_error
from
DHParser.dsl
import
compile_on_disk
,
run_compiler
,
compileEBNF
,
grammar_provider
,
\
load_compiler_suite
...
...
test/test_ebnf.py
View file @
3004d9b7
...
...
@@ -27,7 +27,7 @@ sys.path.extend(['../', './'])
from
DHParser.toolkit
import
compile_python_object
,
re
from
DHParser.preprocess
import
nil_preprocessor
from
DHParser
.parse
import
compile_source
from
DHParser
import
compile_source
from
DHParser.error
import
has_errors
from
DHParser.syntaxtree
import
WHITESPACE_PTYPE
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
EBNFTransform
,
get_ebnf_compiler
...
...
test/test_parse.py
View file @
3004d9b7
...
...
@@ -26,12 +26,10 @@ sys.path.extend(['../', './'])
from
DHParser.toolkit
import
compile_python_object
from
DHParser.log
import
logging
,
is_logging
,
log_ST
from
DHParser.stringview
import
StringView
from
DHParser.error
import
Error
from
DHParser.syntaxtree
import
mock_syntax_tree
from
DHParser.parse
import
compile_source
,
Retrieve
,
Grammar
,
Forward
,
Token
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
,
Alternative
,
AllOf
,
SomeOf
,
Compiler
,
\
UnknownParserError
from
DHParser.parse
import
Retrieve
,
Grammar
,
Forward
,
Token
,
ZeroOrMore
,
RE
,
\
RegExp
,
Lookbehind
,
NegativeLookahead
,
OneOrMore
,
Series
,
Alternative
,
AllOf
,
SomeOf
,
UnknownParserError
from
DHParser
import
compile_source
from
DHParser.ebnf
import
get_ebnf_grammar
,
get_ebnf_transformer
,
get_ebnf_compiler
from
DHParser.dsl
import
grammar_provider
,
DHPARSER_IMPORTS
...
...
@@ -548,22 +546,6 @@ class TestBorderlineCases:
assert
not
cst
.
error_flag
class
TestCompilerClass
:
def
test_error_propagations
(
self
):
tree
=
mock_syntax_tree
(
'(A (B 1) (C (D (E 2) (F 3))))'
)
A
=
tree
B
=
next
(
tree
.
find
(
lambda
node
:
str
(
node
)
==
"1"
))
D
=
next
(
tree
.
find
(
lambda
node
:
node
.
parser
.
name
==
"D"
))
F
=
next
(
tree
.
find
(
lambda
node
:
str
(
node
)
==
"3"
))
B
.
add_error
(
"Error in child node"
)
F
.
add_error
(
"Error in child's child node"
)
Compiler
.
propagate_error_flags
(
tree
,
lazy
=
True
)
assert
A
.
error_flag
assert
not
D
.
error_flag
Compiler
.
propagate_error_flags
(
tree
,
lazy
=
False
)
assert
D
.
error_flag
class
TestUnknownParserError
:
def
test_unknown_parser_error
(
self
):
gr
=
Grammar
()
...
...
test/test_preprocess.py
View file @
3004d9b7
...
...
@@ -25,7 +25,7 @@ limitations under the License.
from
functools
import
partial
from
DHParser.dsl
import
grammar_provider
from
DHParser
.parse
import
compile_source
from
DHParser
import
compile_source
from
DHParser.preprocess
import
make_token
,
tokenized_to_original_mapping
,
source_map
,
\
BEGIN_TOKEN
,
END_TOKEN
,
TOKEN_DELIMITER
,
SourceMapFunc
,
SourceMap
,
chain_preprocessors
,
\
strip_tokens
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment