Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
ae67d404
Commit
ae67d404
authored
Jul 02, 2021
by
Eckhart Arnold
Browse files
documentation extended
parent
5491f428
Changes
10
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
ae67d404
...
...
@@ -2394,6 +2394,10 @@ def preprocessor_factory() -> PreprocessorFunc:
get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1)
def preprocess_{NAME}(source):
return get_preprocessor()(source)
'''
...
...
@@ -2420,8 +2424,10 @@ def {NAME}Transformer() -> TransformationFunc:
threads or processes."""
return partial(traverse, processing_table={NAME}_AST_transformation_table.copy())
get_transformer = ThreadLocalSingletonFactory({NAME}Transformer, ident={ID})
def transform_{NAME}(cst):
get_transformer()(cst)
'''
...
...
@@ -2430,6 +2436,7 @@ def transform_{NAME}(cst):
COMPILER_FACTORY
=
'''
get_compiler = ThreadLocalSingletonFactory({NAME}Compiler, ident={ID})
def compile_{NAME}(ast):
return get_compiler()(ast)
'''
...
...
DHParser/preprocess.pxd
0 → 100644
View file @
ae67d404
#cython: infer_types=True
#cython: language_level=3
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
import
cython
DHParser/scripts/dhparser_rename.py
View file @
ae67d404
...
...
@@ -2,6 +2,8 @@
"""dhparser_rename.py - rename a dhparser project properly
UNMAINTAINED!!!
Copyright 2019 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
...
...
DHParser/syntaxtree.py
View file @
ae67d404
...
...
@@ -191,12 +191,12 @@ Serializing and de-serializing syntax-trees
Syntax trees can be serialized as S-expressions, XML, JSON and indented
text. Module 'syntaxtree' also contains two simple parsers
(:py:func:`~syntaxtree.parse_sxpr()`, :py:func:`~syntaxtree.parse_xml()`)
to convert XML-snippets
and
S-expressions
into trees composed of Node-objects.
In addition to that there is a function to parse JSON
(
:py:func:`~syntaxtree.parse_
json_syntaxtree()`), but in contrast
to the form
er two functions
it
can
only d
es
e
ri
alize previously
JSON
-serialized trees and not any kind of JSON-file. T
he
r
e
is no
function to deserialize indented text.
or :py:func:`~syntaxtree.parse_json()`
to convert XML-snippets
,
S-expressions
or json objects into trees composed of Node-objects.
Only
:py:func:`~syntaxtree.parse_
xml()` can deserialize any XML-file.
The oth
er two functions can
parse only the r
es
t
ri
cted subset of S-expressions
or
JSON
into Node-trees that is used when serializing into t
he
s
e
formats.
There is no
function to deserialize indented text.
In order to make parameterizing serialization easier, the Node-class
also defines a generic py:meth:`~syntaxtree.serialize()`-method next to
...
...
@@ -744,8 +744,8 @@ __all__ = ('WHITESPACE_PTYPE',
'DHParser_JSONEncoder'
,
'parse_sxpr'
,
'parse_xml'
,
'parse_json
_syntaxtree
'
,
'
parse_tre
e'
,
'parse_json'
,
'
deserializ
e'
,
'flatten_sxpr'
,
'flatten_xml'
)
...
...
@@ -2191,6 +2191,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def
as_json
(
self
,
indent
:
Optional
[
int
]
=
2
,
ensure_ascii
=
False
)
->
str
:
"""Serializes the tree originating in `self` as JSON-string."""
if
not
indent
or
indent
<=
0
:
indent
=
None
return
json
.
dumps
(
self
.
to_json_obj
(),
indent
=
indent
,
ensure_ascii
=
ensure_ascii
,
separators
=
(
', '
,
': '
)
if
indent
is
not
None
else
(
','
,
':'
))
...
...
@@ -3377,9 +3378,9 @@ class DHParser_JSONEncoder(json.JSONEncoder):
return
json
.
JSONEncoder
.
default
(
self
,
obj
)
def
parse_json
_syntaxtree
(
json_str
:
str
)
->
Node
:
def
parse_json
(
json_str
:
str
)
->
Node
:
"""
Parses a JSON-representation of a syntax tree. Other than
parse_sxpr
Parses a JSON-representation of a syntax tree. Other than
and parse_xml, this function does not convert any json-text into
a syntax tree, but only json-text that represents a syntax tree, e.g.
that has been produced by `Node.as_json()`!
...
...
@@ -3388,22 +3389,22 @@ def parse_json_syntaxtree(json_str: str) -> Node:
return
Node
.
from_json_obj
(
json_obj
)
def
parse_tre
e
(
xml_sxpr_json
:
str
)
->
Optional
[
Node
]:
def
deserializ
e
(
xml_sxpr_
or_
json
:
str
)
->
Optional
[
Node
]:
"""
Parses either XML or S-expressions or a JSON representation of a
syntax-tree. Which of these is detected automatically.
"""
if
RX_IS_XML
.
match
(
xml_sxpr_json
):
return
parse_xml
(
xml_sxpr_json
)
elif
RX_IS_SXPR
.
match
(
xml_sxpr_json
):
return
parse_sxpr
(
xml_sxpr_json
)
elif
re
.
match
(
r
'\s*'
,
xml_sxpr_json
):
if
RX_IS_XML
.
match
(
xml_sxpr_
or_
json
):
return
parse_xml
(
xml_sxpr_
or_
json
)
elif
RX_IS_SXPR
.
match
(
xml_sxpr_
or_
json
):
return
parse_sxpr
(
xml_sxpr_
or_
json
)
elif
re
.
match
(
r
'\s*'
,
xml_sxpr_
or_
json
):
return
None
else
:
try
:
return
parse_json
_syntaxtree
(
xml_sxpr_json
)
return
parse_json
(
xml_sxpr_
or_
json
)
except
json
.
decoder
.
JSONDecodeError
:
m
=
re
.
match
(
r
'\s*(.*)\n?'
,
xml_sxpr_json
)
m
=
re
.
match
(
r
'\s*(.*)\n?'
,
xml_sxpr_
or_
json
)
snippet
=
m
.
group
(
1
)
if
m
else
''
raise
ValueError
(
'Snippet is neither S-expression nor XML: '
+
snippet
+
' ...'
)
...
...
DHParser/templates/DSLParser.pyi
View file @
ae67d404
...
...
@@ -2,7 +2,7 @@ RESULT_FILE_EXTENSION = ".sxpr" # Change this according to your needs!
def compile_src(source: str) -> Tuple[Any, List[Error]]:
"""Compiles ``source`` and returns (result, errors
, ast
)."""
"""Compiles ``source`` and returns (result, errors)."""
result_tuple = compile_source(source, get_preprocessor(), get_grammar(), get_transformer(),
get_compiler())
return result_tuple[:2] # drop the AST at the end of the result tuple
...
...
DHParser/testing.py
View file @
ae67d404
...
...
@@ -46,7 +46,7 @@ from DHParser.log import is_logging, clear_logs, local_log_dir, log_parsing_hist
from
DHParser.parse
import
Lookahead
from
DHParser.preprocess
import
gen_neutral_srcmap_func
from
DHParser.server
import
RX_CONTENT_LENGTH
,
RE_DATA_START
,
JSONRPC_HEADER_BYTES
from
DHParser.syntaxtree
import
Node
,
RootNode
,
parse_tre
e
,
flatten_sxpr
,
ZOMBIE_TAG
from
DHParser.syntaxtree
import
Node
,
RootNode
,
deserializ
e
,
flatten_sxpr
,
ZOMBIE_TAG
from
DHParser.trace
import
set_tracer
,
all_descendants
,
trace_history
from
DHParser.transform
import
traverse
,
remove_children
from
DHParser.toolkit
import
load_if_file
,
re
,
re_find
,
concurrent_ident
,
instantiate_executor
...
...
@@ -469,7 +469,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
if
"cst"
in
tests
and
len
(
errata
)
==
errflag
:
try
:
compare
=
parse_tre
e
(
get
(
tests
,
"cst"
,
test_name
))
compare
=
deserializ
e
(
get
(
tests
,
"cst"
,
test_name
))
except
ValueError
as
e
:
raise
SyntaxError
(
'CST-TEST "%s" of parser "%s" failed with:
\n
%s'
%
(
test_name
,
parser_name
,
str
(
e
)))
...
...
@@ -483,7 +483,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
if
"ast"
in
tests
and
len
(
errata
)
==
errflag
:
try
:
compare
=
parse_tre
e
(
get
(
tests
,
"ast"
,
test_name
))
compare
=
deserializ
e
(
get
(
tests
,
"ast"
,
test_name
))
except
ValueError
as
e
:
raise
SyntaxError
(
'AST-TEST "%s" of parser "%s" failed with:
\n
%s'
%
(
test_name
,
parser_name
,
str
(
e
)))
...
...
documentation_src/Overview.rst
View file @
ae67d404
...
...
@@ -355,6 +355,9 @@ DHParser does does not hide any stages of the tree generation
process. Thus, you get full access to the (simplified) concrete
syntax tree (CST) as well as to the abstract syntax tree (AST).
An internal mini-DSL for AST-transformation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Abstract syntax tree generation is controlled in
declarative style by simple lists of transformations
applied to each node depending on its type. Remember
...
...
@@ -392,9 +395,10 @@ end as nodes containing the quotation mark-delimiters
of that string.
To give an expression how AST-transformation-tables
may look like, here is an excerpt from DHParser's
own transformation table to derive a lean AST from
the concrete syntax-tree of an EBNF grammar::
may look like, here is an excerpt from (a former
version of) DHParser's own transformation table
to derive a lean AST from the concrete syntax-tree
of an EBNF grammar::
EBNF_AST_transformation_table = {
# AST Transformations for EBNF-grammar
...
...
@@ -427,16 +431,108 @@ are composed of a single :py:class:`~syntaxtree.Node`-type.
Nodes contain either text-data or have one or more other nodes
as children (but not both). The "kind" or "type"
of a node is indicated by its "tag-name". It should be
easy, though, to this into an application-specific
tree of objects of different classes.
easy, though, to this tree of nodes into an
application-specific tree of objects of different classes.
Serialization as you like it: XML, JSON, S-expressions
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
DHParser makes it easy to visualize the various stages
of tree-transformation (CST, AST, ...) by offering
manifold serialization methods that output syntax-trees
in either a nicely formatted or compact form::
1. S-expressions::
>>> syntax_tree = JSONParser.parse_JSON('{ "one": 1, "two": 2 }')
>>> JSONParser.transform_JSON(syntax_tree)
>>> print(syntax_tree.as_sxpr())
(json
(object
(member
(string
(PLAIN "one"))
(number
(INT "1")))
(member
(string
(PLAIN "two"))
(number
(INT "2")))))
2. XML::
>>> print(syntax_tree.as_xml(indent=None))
<json>
<object>
<member>
<string>
<PLAIN>one</PLAIN>
</string>
<number>
<INT>1</INT>
</number>
</member>
<member>
<string>
<PLAIN>two</PLAIN>
</string>
<number>
<INT>2</INT>
</number>
</member>
</object>
</json>
3. JSON::
>>> print(syntax_tree.as_json(indent=None))
["json",[["object",[["member",[["string",[["PLAIN","one",3]],2],["number",[["INT","1",9]],9]],2],["member",[["string",[["PLAIN","two",13]],12],["number",[["INT","2",19]],19]],10]],0]],0]
4. Indented text-tree::
>>> print(syntax_tree.as_tree())
json
object
member
string
PLAIN "one"
number
INT "1"
member
string
PLAIN "two"
number
INT "2"
All but the last serialization-formats can be de-serialized into
a tree of nodes with the functions: :py:func:`~syntaxtree.parse_sxpr`,
:py:func:`~syntaxtree.parse_xml`, :py:func:`~syntaxtree.parse_json`.
The :py:func:`~syntaxtree.parse_xml` is not restricted to de-serialization but
can parse any XML into a tree of nodes.
XML-connection
^^^^^^^^^^^^^^
Since DHParser has been build with Digital-Humanities-applications in mind,
it offers to further methods to connect to X-technologies. The methods
:py:meth:`~syntaxtree.Node.as_etree` and :py:meth:`~syntaxtree.Node.from_etree`
allow direct conversion to the xml-ElementTrees of the Python standard-library
or of the lxml-package which offers full support for XPath, XQuery and XSLT.
Test-driven grammar development
-------------------------------
Just like regular expressions, it is quite difficult to get
EBNF-grammars right on the first try - especially, if you are
new to the technology. For regular expressions there exist
all kinds of "workbenches" to try and test regular expressions.
- Debugging parsers
Debugging parsers
-----------------
Fail-tolerant parsing
...
...
@@ -448,8 +544,8 @@ Compiling DSLs
Serialization
-------------
XML-Connection
--------------
-
XML-Connection
Language Servers
----------------
...
...
examples/Introduction/LyrikParser.py
View file @
ae67d404
...
...
@@ -90,7 +90,7 @@ get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1)
class
LyrikGrammar
(
Grammar
):
r
"""Parser for a Lyrik source file.
"""
source_hash__
=
"
26385fa0fbbe6e28b8b15d563a5407c9
"
source_hash__
=
"
d4d0bbf5b09e354e4c6737bfaf757f57
"
disposable__
=
re
.
compile
(
'JAHRESZAHL$|ZEICHENFOLGE$|ENDE$|LEERRAUM$|ziel$|wortfolge$'
)
static_analysis_pending__
=
[]
# type: List[bool]
parser_initialization__
=
[
"upon instantiation"
]
...
...
experimental/JSON/serialization_demo.py
0 → 100644
View file @
ae67d404
import
sys
,
os
try
:
scriptpath
=
os
.
path
.
dirname
(
__file__
)
except
NameError
:
scriptpath
=
''
dhparser_parentdir
=
os
.
path
.
abspath
(
os
.
path
.
join
(
scriptpath
,
r
'../..'
))
if
scriptpath
not
in
sys
.
path
:
sys
.
path
.
append
(
scriptpath
)
if
dhparser_parentdir
not
in
sys
.
path
:
sys
.
path
.
append
(
dhparser_parentdir
)
import
JSONParser
if
__name__
==
"__main__"
:
syntax_tree
=
JSONParser
.
parse_JSON
(
'{ "one": 1, "two": 2 }'
)
JSONParser
.
transform_JSON
(
syntax_tree
)
print
(
syntax_tree
.
as_sxpr
())
print
(
syntax_tree
.
as_json
(
indent
=
None
))
print
(
syntax_tree
.
as_xml
())
print
(
syntax_tree
.
as_tree
())
tests/test_syntaxtree.py
View file @
ae67d404
...
...
@@ -29,7 +29,7 @@ sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from
DHParser.configuration
import
get_config_value
,
set_config_value
from
DHParser.syntaxtree
import
Node
,
RootNode
,
parse_sxpr
,
parse_xml
,
flatten_sxpr
,
\
flatten_xml
,
parse_json
_syntaxtree
,
ZOMBIE_TAG
,
EMPTY_NODE
,
ALL_NODES
,
next_context
,
\
flatten_xml
,
parse_json
,
ZOMBIE_TAG
,
EMPTY_NODE
,
ALL_NODES
,
next_context
,
\
prev_context
,
serialize_context
,
generate_context_mapping
,
map_pos_to_context
,
\
select_context_if
,
select_context
,
create_context_match_function
from
DHParser.transform
import
traverse
,
reduce_single_child
,
\
...
...
@@ -172,7 +172,7 @@ class TestParseJSON:
tree_copy
=
Node
.
from_json_obj
(
json
.
loads
(
s
))
assert
tree_copy
.
equals
(
self
.
tree
,
ignore_attr_order
=
sys
.
version_info
<
(
3
,
6
))
s
=
self
.
tree
.
as_json
(
indent
=
None
,
ensure_ascii
=
False
)
tree_copy
=
parse_json
_syntaxtree
(
s
)
tree_copy
=
parse_json
(
s
)
# print(s)
# print(self.tree.as_sxpr())
# print(tree_copy.as_sxpr())
...
...
@@ -184,7 +184,7 @@ class TestParseJSON:
n
.
attr
[
'id'
]
=
'007'
# json
json
=
n
.
as_json
()
tree
=
parse_json
_syntaxtree
(
json
)
tree
=
parse_json
(
json
)
# print()
# XML
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment