Commit ae67d404 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

documentation extended

parent 5491f428
......@@ -2394,6 +2394,10 @@ def preprocessor_factory() -> PreprocessorFunc:
get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1)
def preprocess_{NAME}(source):
return get_preprocessor()(source)
'''
......@@ -2420,8 +2424,10 @@ def {NAME}Transformer() -> TransformationFunc:
threads or processes."""
return partial(traverse, processing_table={NAME}_AST_transformation_table.copy())
get_transformer = ThreadLocalSingletonFactory({NAME}Transformer, ident={ID})
def transform_{NAME}(cst):
get_transformer()(cst)
'''
......@@ -2430,6 +2436,7 @@ def transform_{NAME}(cst):
COMPILER_FACTORY = '''
get_compiler = ThreadLocalSingletonFactory({NAME}Compiler, ident={ID})
def compile_{NAME}(ast):
return get_compiler()(ast)
'''
......
#cython: infer_types=True
#cython: language_level=3
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
import cython
......@@ -2,6 +2,8 @@
"""dhparser_rename.py - rename a dhparser project properly
UNMAINTAINED!!!
Copyright 2019 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de)
......
......@@ -191,12 +191,12 @@ Serializing and de-serializing syntax-trees
Syntax trees can be serialized as S-expressions, XML, JSON and indented
text. Module 'syntaxtree' also contains two simple parsers
(:py:func:`~syntaxtree.parse_sxpr()`, :py:func:`~syntaxtree.parse_xml()`)
to convert XML-snippets and S-expressions into trees composed of Node-objects.
In addition to that there is a function to parse JSON
(:py:func:`~syntaxtree.parse_json_syntaxtree()`), but in contrast
to the former two functions it can only deserialize previously
JSON-serialized trees and not any kind of JSON-file. There is no
function to deserialize indented text.
or :py:func:`~syntaxtree.parse_json()` to convert XML-snippets, S-expressions
or json objects into trees composed of Node-objects.
Only :py:func:`~syntaxtree.parse_xml()` can deserialize any XML-file.
The other two functions can parse only the restricted subset of S-expressions
or JSON into Node-trees that is used when serializing into these formats.
There is no function to deserialize indented text.
In order to make parameterizing serialization easier, the Node-class
also defines a generic py:meth:`~syntaxtree.serialize()`-method next to
......@@ -744,8 +744,8 @@ __all__ = ('WHITESPACE_PTYPE',
'DHParser_JSONEncoder',
'parse_sxpr',
'parse_xml',
'parse_json_syntaxtree',
'parse_tree',
'parse_json',
'deserialize',
'flatten_sxpr',
'flatten_xml')
......@@ -2191,6 +2191,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def as_json(self, indent: Optional[int] = 2, ensure_ascii=False) -> str:
"""Serializes the tree originating in `self` as JSON-string."""
if not indent or indent <= 0: indent = None
return json.dumps(self.to_json_obj(), indent=indent, ensure_ascii=ensure_ascii,
separators=(', ', ': ') if indent is not None else (',', ':'))
......@@ -3377,9 +3378,9 @@ class DHParser_JSONEncoder(json.JSONEncoder):
return json.JSONEncoder.default(self, obj)
def parse_json_syntaxtree(json_str: str) -> Node:
def parse_json(json_str: str) -> Node:
"""
Parses a JSON-representation of a syntax tree. Other than parse_sxpr
Parses a JSON-representation of a syntax tree. Other than
and parse_xml, this function does not convert any json-text into
a syntax tree, but only json-text that represents a syntax tree, e.g.
that has been produced by `Node.as_json()`!
......@@ -3388,22 +3389,22 @@ def parse_json_syntaxtree(json_str: str) -> Node:
return Node.from_json_obj(json_obj)
def parse_tree(xml_sxpr_json: str) -> Optional[Node]:
def deserialize(xml_sxpr_or_json: str) -> Optional[Node]:
"""
Parses either XML or S-expressions or a JSON representation of a
syntax-tree. Which of these is detected automatically.
"""
if RX_IS_XML.match(xml_sxpr_json):
return parse_xml(xml_sxpr_json)
elif RX_IS_SXPR.match(xml_sxpr_json):
return parse_sxpr(xml_sxpr_json)
elif re.match(r'\s*', xml_sxpr_json):
if RX_IS_XML.match(xml_sxpr_or_json):
return parse_xml(xml_sxpr_or_json)
elif RX_IS_SXPR.match(xml_sxpr_or_json):
return parse_sxpr(xml_sxpr_or_json)
elif re.match(r'\s*', xml_sxpr_or_json):
return None
else:
try:
return parse_json_syntaxtree(xml_sxpr_json)
return parse_json(xml_sxpr_or_json)
except json.decoder.JSONDecodeError:
m = re.match(r'\s*(.*)\n?', xml_sxpr_json)
m = re.match(r'\s*(.*)\n?', xml_sxpr_or_json)
snippet = m.group(1) if m else ''
raise ValueError('Snippet is neither S-expression nor XML: ' + snippet + ' ...')
......
......@@ -2,7 +2,7 @@ RESULT_FILE_EXTENSION = ".sxpr" # Change this according to your needs!
def compile_src(source: str) -> Tuple[Any, List[Error]]:
"""Compiles ``source`` and returns (result, errors, ast)."""
"""Compiles ``source`` and returns (result, errors)."""
result_tuple = compile_source(source, get_preprocessor(), get_grammar(), get_transformer(),
get_compiler())
return result_tuple[:2] # drop the AST at the end of the result tuple
......
......@@ -46,7 +46,7 @@ from DHParser.log import is_logging, clear_logs, local_log_dir, log_parsing_hist
from DHParser.parse import Lookahead
from DHParser.preprocess import gen_neutral_srcmap_func
from DHParser.server import RX_CONTENT_LENGTH, RE_DATA_START, JSONRPC_HEADER_BYTES
from DHParser.syntaxtree import Node, RootNode, parse_tree, flatten_sxpr, ZOMBIE_TAG
from DHParser.syntaxtree import Node, RootNode, deserialize, flatten_sxpr, ZOMBIE_TAG
from DHParser.trace import set_tracer, all_descendants, trace_history
from DHParser.transform import traverse, remove_children
from DHParser.toolkit import load_if_file, re, re_find, concurrent_ident, instantiate_executor
......@@ -469,7 +469,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
if "cst" in tests and len(errata) == errflag:
try:
compare = parse_tree(get(tests, "cst", test_name))
compare = deserialize(get(tests, "cst", test_name))
except ValueError as e:
raise SyntaxError('CST-TEST "%s" of parser "%s" failed with:\n%s'
% (test_name, parser_name, str(e)))
......@@ -483,7 +483,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
if "ast" in tests and len(errata) == errflag:
try:
compare = parse_tree(get(tests, "ast", test_name))
compare = deserialize(get(tests, "ast", test_name))
except ValueError as e:
raise SyntaxError('AST-TEST "%s" of parser "%s" failed with:\n%s'
% (test_name, parser_name, str(e)))
......
......@@ -355,6 +355,9 @@ DHParser does does not hide any stages of the tree generation
process. Thus, you get full access to the (simplified) concrete
syntax tree (CST) as well as to the abstract syntax tree (AST).
An internal mini-DSL for AST-transformation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Abstract syntax tree generation is controlled in
declarative style by simple lists of transformations
applied to each node depending on its type. Remember
......@@ -392,9 +395,10 @@ end as nodes containing the quotation mark-delimiters
of that string.
To give an expression how AST-transformation-tables
may look like, here is an excerpt from DHParser's
own transformation table to derive a lean AST from
the concrete syntax-tree of an EBNF grammar::
may look like, here is an excerpt from (a former
version of) DHParser's own transformation table
to derive a lean AST from the concrete syntax-tree
of an EBNF grammar::
EBNF_AST_transformation_table = {
# AST Transformations for EBNF-grammar
......@@ -427,16 +431,108 @@ are composed of a single :py:class:`~syntaxtree.Node`-type.
Nodes contain either text-data or have one or more other nodes
as children (but not both). The "kind" or "type"
of a node is indicated by its "tag-name". It should be
easy, though, to this into an application-specific
tree of objects of different classes.
easy, though, to this tree of nodes into an
application-specific tree of objects of different classes.
Serialization as you like it: XML, JSON, S-expressions
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
DHParser makes it easy to visualize the various stages
of tree-transformation (CST, AST, ...) by offering
manifold serialization methods that output syntax-trees
in either a nicely formatted or compact form::
1. S-expressions::
>>> syntax_tree = JSONParser.parse_JSON('{ "one": 1, "two": 2 }')
>>> JSONParser.transform_JSON(syntax_tree)
>>> print(syntax_tree.as_sxpr())
(json
(object
(member
(string
(PLAIN "one"))
(number
(INT "1")))
(member
(string
(PLAIN "two"))
(number
(INT "2")))))
2. XML::
>>> print(syntax_tree.as_xml(indent=None))
<json>
<object>
<member>
<string>
<PLAIN>one</PLAIN>
</string>
<number>
<INT>1</INT>
</number>
</member>
<member>
<string>
<PLAIN>two</PLAIN>
</string>
<number>
<INT>2</INT>
</number>
</member>
</object>
</json>
3. JSON::
>>> print(syntax_tree.as_json(indent=None))
["json",[["object",[["member",[["string",[["PLAIN","one",3]],2],["number",[["INT","1",9]],9]],2],["member",[["string",[["PLAIN","two",13]],12],["number",[["INT","2",19]],19]],10]],0]],0]
4. Indented text-tree::
>>> print(syntax_tree.as_tree())
json
object
member
string
PLAIN "one"
number
INT "1"
member
string
PLAIN "two"
number
INT "2"
All but the last serialization-formats can be de-serialized into
a tree of nodes with the functions: :py:func:`~syntaxtree.parse_sxpr`,
:py:func:`~syntaxtree.parse_xml`, :py:func:`~syntaxtree.parse_json`.
The :py:func:`~syntaxtree.parse_xml` is not restricted to de-serialization but
can parse any XML into a tree of nodes.
XML-connection
^^^^^^^^^^^^^^
Since DHParser has been build with Digital-Humanities-applications in mind,
it offers to further methods to connect to X-technologies. The methods
:py:meth:`~syntaxtree.Node.as_etree` and :py:meth:`~syntaxtree.Node.from_etree`
allow direct conversion to the xml-ElementTrees of the Python standard-library
or of the lxml-package which offers full support for XPath, XQuery and XSLT.
Test-driven grammar development
-------------------------------
Just like regular expressions, it is quite difficult to get
EBNF-grammars right on the first try - especially, if you are
new to the technology. For regular expressions there exist
all kinds of "workbenches" to try and test regular expressions.
- Debugging parsers
Debugging parsers
-----------------
Fail-tolerant parsing
......@@ -448,8 +544,8 @@ Compiling DSLs
Serialization
-------------
XML-Connection
--------------
- XML-Connection
Language Servers
----------------
......
......@@ -90,7 +90,7 @@ get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1)
class LyrikGrammar(Grammar):
r"""Parser for a Lyrik source file.
"""
source_hash__ = "26385fa0fbbe6e28b8b15d563a5407c9"
source_hash__ = "d4d0bbf5b09e354e4c6737bfaf757f57"
disposable__ = re.compile('JAHRESZAHL$|ZEICHENFOLGE$|ENDE$|LEERRAUM$|ziel$|wortfolge$')
static_analysis_pending__ = [] # type: List[bool]
parser_initialization__ = ["upon instantiation"]
......
import sys, os
try:
scriptpath = os.path.dirname(__file__)
except NameError:
scriptpath = ''
dhparser_parentdir = os.path.abspath(os.path.join(scriptpath, r'../..'))
if scriptpath not in sys.path:
sys.path.append(scriptpath)
if dhparser_parentdir not in sys.path:
sys.path.append(dhparser_parentdir)
import JSONParser
if __name__ == "__main__":
syntax_tree = JSONParser.parse_JSON('{ "one": 1, "two": 2 }')
JSONParser.transform_JSON(syntax_tree)
print(syntax_tree.as_sxpr())
print(syntax_tree.as_json(indent=None))
print(syntax_tree.as_xml())
print(syntax_tree.as_tree())
......@@ -29,7 +29,7 @@ sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser.configuration import get_config_value, set_config_value
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
flatten_xml, parse_json_syntaxtree, ZOMBIE_TAG, EMPTY_NODE, ALL_NODES, next_context, \
flatten_xml, parse_json, ZOMBIE_TAG, EMPTY_NODE, ALL_NODES, next_context, \
prev_context, serialize_context, generate_context_mapping, map_pos_to_context, \
select_context_if, select_context, create_context_match_function
from DHParser.transform import traverse, reduce_single_child, \
......@@ -172,7 +172,7 @@ class TestParseJSON:
tree_copy = Node.from_json_obj(json.loads(s))
assert tree_copy.equals(self.tree, ignore_attr_order = sys.version_info < (3, 6))
s = self.tree.as_json(indent=None, ensure_ascii=False)
tree_copy = parse_json_syntaxtree(s)
tree_copy = parse_json(s)
# print(s)
# print(self.tree.as_sxpr())
# print(tree_copy.as_sxpr())
......@@ -184,7 +184,7 @@ class TestParseJSON:
n.attr['id'] = '007'
# json
json = n.as_json()
tree = parse_json_syntaxtree(json)
tree = parse_json(json)
# print()
# XML
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment