Commit ae67d404 authored by Eckhart Arnold's avatar Eckhart Arnold
Browse files

documentation extended

parent 5491f428
...@@ -2394,6 +2394,10 @@ def preprocessor_factory() -> PreprocessorFunc: ...@@ -2394,6 +2394,10 @@ def preprocessor_factory() -> PreprocessorFunc:
get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1) get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1)
def preprocess_{NAME}(source):
return get_preprocessor()(source)
''' '''
...@@ -2420,8 +2424,10 @@ def {NAME}Transformer() -> TransformationFunc: ...@@ -2420,8 +2424,10 @@ def {NAME}Transformer() -> TransformationFunc:
threads or processes.""" threads or processes."""
return partial(traverse, processing_table={NAME}_AST_transformation_table.copy()) return partial(traverse, processing_table={NAME}_AST_transformation_table.copy())
get_transformer = ThreadLocalSingletonFactory({NAME}Transformer, ident={ID}) get_transformer = ThreadLocalSingletonFactory({NAME}Transformer, ident={ID})
def transform_{NAME}(cst): def transform_{NAME}(cst):
get_transformer()(cst) get_transformer()(cst)
''' '''
...@@ -2430,6 +2436,7 @@ def transform_{NAME}(cst): ...@@ -2430,6 +2436,7 @@ def transform_{NAME}(cst):
COMPILER_FACTORY = ''' COMPILER_FACTORY = '''
get_compiler = ThreadLocalSingletonFactory({NAME}Compiler, ident={ID}) get_compiler = ThreadLocalSingletonFactory({NAME}Compiler, ident={ID})
def compile_{NAME}(ast): def compile_{NAME}(ast):
return get_compiler()(ast) return get_compiler()(ast)
''' '''
......
#cython: infer_types=True
#cython: language_level=3
#cython: c_string_type=unicode
#cython: c_string_encoding=utf-8
import cython
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
"""dhparser_rename.py - rename a dhparser project properly """dhparser_rename.py - rename a dhparser project properly
UNMAINTAINED!!!
Copyright 2019 by Eckhart Arnold (arnold@badw.de) Copyright 2019 by Eckhart Arnold (arnold@badw.de)
Bavarian Academy of Sciences an Humanities (badw.de) Bavarian Academy of Sciences an Humanities (badw.de)
......
...@@ -191,12 +191,12 @@ Serializing and de-serializing syntax-trees ...@@ -191,12 +191,12 @@ Serializing and de-serializing syntax-trees
Syntax trees can be serialized as S-expressions, XML, JSON and indented Syntax trees can be serialized as S-expressions, XML, JSON and indented
text. Module 'syntaxtree' also contains two simple parsers text. Module 'syntaxtree' also contains two simple parsers
(:py:func:`~syntaxtree.parse_sxpr()`, :py:func:`~syntaxtree.parse_xml()`) (:py:func:`~syntaxtree.parse_sxpr()`, :py:func:`~syntaxtree.parse_xml()`)
to convert XML-snippets and S-expressions into trees composed of Node-objects. or :py:func:`~syntaxtree.parse_json()` to convert XML-snippets, S-expressions
In addition to that there is a function to parse JSON or json objects into trees composed of Node-objects.
(:py:func:`~syntaxtree.parse_json_syntaxtree()`), but in contrast Only :py:func:`~syntaxtree.parse_xml()` can deserialize any XML-file.
to the former two functions it can only deserialize previously The other two functions can parse only the restricted subset of S-expressions
JSON-serialized trees and not any kind of JSON-file. There is no or JSON into Node-trees that is used when serializing into these formats.
function to deserialize indented text. There is no function to deserialize indented text.
In order to make parameterizing serialization easier, the Node-class In order to make parameterizing serialization easier, the Node-class
also defines a generic py:meth:`~syntaxtree.serialize()`-method next to also defines a generic py:meth:`~syntaxtree.serialize()`-method next to
...@@ -744,8 +744,8 @@ __all__ = ('WHITESPACE_PTYPE', ...@@ -744,8 +744,8 @@ __all__ = ('WHITESPACE_PTYPE',
'DHParser_JSONEncoder', 'DHParser_JSONEncoder',
'parse_sxpr', 'parse_sxpr',
'parse_xml', 'parse_xml',
'parse_json_syntaxtree', 'parse_json',
'parse_tree', 'deserialize',
'flatten_sxpr', 'flatten_sxpr',
'flatten_xml') 'flatten_xml')
...@@ -2191,6 +2191,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil ...@@ -2191,6 +2191,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
def as_json(self, indent: Optional[int] = 2, ensure_ascii=False) -> str: def as_json(self, indent: Optional[int] = 2, ensure_ascii=False) -> str:
"""Serializes the tree originating in `self` as JSON-string.""" """Serializes the tree originating in `self` as JSON-string."""
if not indent or indent <= 0: indent = None
return json.dumps(self.to_json_obj(), indent=indent, ensure_ascii=ensure_ascii, return json.dumps(self.to_json_obj(), indent=indent, ensure_ascii=ensure_ascii,
separators=(', ', ': ') if indent is not None else (',', ':')) separators=(', ', ': ') if indent is not None else (',', ':'))
...@@ -3377,9 +3378,9 @@ class DHParser_JSONEncoder(json.JSONEncoder): ...@@ -3377,9 +3378,9 @@ class DHParser_JSONEncoder(json.JSONEncoder):
return json.JSONEncoder.default(self, obj) return json.JSONEncoder.default(self, obj)
def parse_json_syntaxtree(json_str: str) -> Node: def parse_json(json_str: str) -> Node:
""" """
Parses a JSON-representation of a syntax tree. Other than parse_sxpr Parses a JSON-representation of a syntax tree. Other than
and parse_xml, this function does not convert any json-text into and parse_xml, this function does not convert any json-text into
a syntax tree, but only json-text that represents a syntax tree, e.g. a syntax tree, but only json-text that represents a syntax tree, e.g.
that has been produced by `Node.as_json()`! that has been produced by `Node.as_json()`!
...@@ -3388,22 +3389,22 @@ def parse_json_syntaxtree(json_str: str) -> Node: ...@@ -3388,22 +3389,22 @@ def parse_json_syntaxtree(json_str: str) -> Node:
return Node.from_json_obj(json_obj) return Node.from_json_obj(json_obj)
def parse_tree(xml_sxpr_json: str) -> Optional[Node]: def deserialize(xml_sxpr_or_json: str) -> Optional[Node]:
""" """
Parses either XML or S-expressions or a JSON representation of a Parses either XML or S-expressions or a JSON representation of a
syntax-tree. Which of these is detected automatically. syntax-tree. Which of these is detected automatically.
""" """
if RX_IS_XML.match(xml_sxpr_json): if RX_IS_XML.match(xml_sxpr_or_json):
return parse_xml(xml_sxpr_json) return parse_xml(xml_sxpr_or_json)
elif RX_IS_SXPR.match(xml_sxpr_json): elif RX_IS_SXPR.match(xml_sxpr_or_json):
return parse_sxpr(xml_sxpr_json) return parse_sxpr(xml_sxpr_or_json)
elif re.match(r'\s*', xml_sxpr_json): elif re.match(r'\s*', xml_sxpr_or_json):
return None return None
else: else:
try: try:
return parse_json_syntaxtree(xml_sxpr_json) return parse_json(xml_sxpr_or_json)
except json.decoder.JSONDecodeError: except json.decoder.JSONDecodeError:
m = re.match(r'\s*(.*)\n?', xml_sxpr_json) m = re.match(r'\s*(.*)\n?', xml_sxpr_or_json)
snippet = m.group(1) if m else '' snippet = m.group(1) if m else ''
raise ValueError('Snippet is neither S-expression nor XML: ' + snippet + ' ...') raise ValueError('Snippet is neither S-expression nor XML: ' + snippet + ' ...')
......
...@@ -2,7 +2,7 @@ RESULT_FILE_EXTENSION = ".sxpr" # Change this according to your needs! ...@@ -2,7 +2,7 @@ RESULT_FILE_EXTENSION = ".sxpr" # Change this according to your needs!
def compile_src(source: str) -> Tuple[Any, List[Error]]: def compile_src(source: str) -> Tuple[Any, List[Error]]:
"""Compiles ``source`` and returns (result, errors, ast).""" """Compiles ``source`` and returns (result, errors)."""
result_tuple = compile_source(source, get_preprocessor(), get_grammar(), get_transformer(), result_tuple = compile_source(source, get_preprocessor(), get_grammar(), get_transformer(),
get_compiler()) get_compiler())
return result_tuple[:2] # drop the AST at the end of the result tuple return result_tuple[:2] # drop the AST at the end of the result tuple
......
...@@ -46,7 +46,7 @@ from DHParser.log import is_logging, clear_logs, local_log_dir, log_parsing_hist ...@@ -46,7 +46,7 @@ from DHParser.log import is_logging, clear_logs, local_log_dir, log_parsing_hist
from DHParser.parse import Lookahead from DHParser.parse import Lookahead
from DHParser.preprocess import gen_neutral_srcmap_func from DHParser.preprocess import gen_neutral_srcmap_func
from DHParser.server import RX_CONTENT_LENGTH, RE_DATA_START, JSONRPC_HEADER_BYTES from DHParser.server import RX_CONTENT_LENGTH, RE_DATA_START, JSONRPC_HEADER_BYTES
from DHParser.syntaxtree import Node, RootNode, parse_tree, flatten_sxpr, ZOMBIE_TAG from DHParser.syntaxtree import Node, RootNode, deserialize, flatten_sxpr, ZOMBIE_TAG
from DHParser.trace import set_tracer, all_descendants, trace_history from DHParser.trace import set_tracer, all_descendants, trace_history
from DHParser.transform import traverse, remove_children from DHParser.transform import traverse, remove_children
from DHParser.toolkit import load_if_file, re, re_find, concurrent_ident, instantiate_executor from DHParser.toolkit import load_if_file, re, re_find, concurrent_ident, instantiate_executor
...@@ -469,7 +469,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT' ...@@ -469,7 +469,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
if "cst" in tests and len(errata) == errflag: if "cst" in tests and len(errata) == errflag:
try: try:
compare = parse_tree(get(tests, "cst", test_name)) compare = deserialize(get(tests, "cst", test_name))
except ValueError as e: except ValueError as e:
raise SyntaxError('CST-TEST "%s" of parser "%s" failed with:\n%s' raise SyntaxError('CST-TEST "%s" of parser "%s" failed with:\n%s'
% (test_name, parser_name, str(e))) % (test_name, parser_name, str(e)))
...@@ -483,7 +483,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT' ...@@ -483,7 +483,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT'
if "ast" in tests and len(errata) == errflag: if "ast" in tests and len(errata) == errflag:
try: try:
compare = parse_tree(get(tests, "ast", test_name)) compare = deserialize(get(tests, "ast", test_name))
except ValueError as e: except ValueError as e:
raise SyntaxError('AST-TEST "%s" of parser "%s" failed with:\n%s' raise SyntaxError('AST-TEST "%s" of parser "%s" failed with:\n%s'
% (test_name, parser_name, str(e))) % (test_name, parser_name, str(e)))
......
...@@ -355,6 +355,9 @@ DHParser does does not hide any stages of the tree generation ...@@ -355,6 +355,9 @@ DHParser does does not hide any stages of the tree generation
process. Thus, you get full access to the (simplified) concrete process. Thus, you get full access to the (simplified) concrete
syntax tree (CST) as well as to the abstract syntax tree (AST). syntax tree (CST) as well as to the abstract syntax tree (AST).
An internal mini-DSL for AST-transformation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Abstract syntax tree generation is controlled in Abstract syntax tree generation is controlled in
declarative style by simple lists of transformations declarative style by simple lists of transformations
applied to each node depending on its type. Remember applied to each node depending on its type. Remember
...@@ -392,9 +395,10 @@ end as nodes containing the quotation mark-delimiters ...@@ -392,9 +395,10 @@ end as nodes containing the quotation mark-delimiters
of that string. of that string.
To give an expression how AST-transformation-tables To give an expression how AST-transformation-tables
may look like, here is an excerpt from DHParser's may look like, here is an excerpt from (a former
own transformation table to derive a lean AST from version of) DHParser's own transformation table
the concrete syntax-tree of an EBNF grammar:: to derive a lean AST from the concrete syntax-tree
of an EBNF grammar::
EBNF_AST_transformation_table = { EBNF_AST_transformation_table = {
# AST Transformations for EBNF-grammar # AST Transformations for EBNF-grammar
...@@ -427,16 +431,108 @@ are composed of a single :py:class:`~syntaxtree.Node`-type. ...@@ -427,16 +431,108 @@ are composed of a single :py:class:`~syntaxtree.Node`-type.
Nodes contain either text-data or have one or more other nodes Nodes contain either text-data or have one or more other nodes
as children (but not both). The "kind" or "type" as children (but not both). The "kind" or "type"
of a node is indicated by its "tag-name". It should be of a node is indicated by its "tag-name". It should be
easy, though, to this into an application-specific easy, though, to this tree of nodes into an
tree of objects of different classes. application-specific tree of objects of different classes.
Serialization as you like it: XML, JSON, S-expressions
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
DHParser makes it easy to visualize the various stages
of tree-transformation (CST, AST, ...) by offering
manifold serialization methods that output syntax-trees
in either a nicely formatted or compact form::
1. S-expressions::
>>> syntax_tree = JSONParser.parse_JSON('{ "one": 1, "two": 2 }')
>>> JSONParser.transform_JSON(syntax_tree)
>>> print(syntax_tree.as_sxpr())
(json
(object
(member
(string
(PLAIN "one"))
(number
(INT "1")))
(member
(string
(PLAIN "two"))
(number
(INT "2")))))
2. XML::
>>> print(syntax_tree.as_xml(indent=None))
<json>
<object>
<member>
<string>
<PLAIN>one</PLAIN>
</string>
<number>
<INT>1</INT>
</number>
</member>
<member>
<string>
<PLAIN>two</PLAIN>
</string>
<number>
<INT>2</INT>
</number>
</member>
</object>
</json>
3. JSON::
>>> print(syntax_tree.as_json(indent=None))
["json",[["object",[["member",[["string",[["PLAIN","one",3]],2],["number",[["INT","1",9]],9]],2],["member",[["string",[["PLAIN","two",13]],12],["number",[["INT","2",19]],19]],10]],0]],0]
4. Indented text-tree::
>>> print(syntax_tree.as_tree())
json
object
member
string
PLAIN "one"
number
INT "1"
member
string
PLAIN "two"
number
INT "2"
All but the last serialization-formats can be de-serialized into
a tree of nodes with the functions: :py:func:`~syntaxtree.parse_sxpr`,
:py:func:`~syntaxtree.parse_xml`, :py:func:`~syntaxtree.parse_json`.
The :py:func:`~syntaxtree.parse_xml` is not restricted to de-serialization but
can parse any XML into a tree of nodes.
XML-connection
^^^^^^^^^^^^^^
Since DHParser has been build with Digital-Humanities-applications in mind,
it offers to further methods to connect to X-technologies. The methods
:py:meth:`~syntaxtree.Node.as_etree` and :py:meth:`~syntaxtree.Node.from_etree`
allow direct conversion to the xml-ElementTrees of the Python standard-library
or of the lxml-package which offers full support for XPath, XQuery and XSLT.
Test-driven grammar development Test-driven grammar development
------------------------------- -------------------------------
Just like regular expressions, it is quite difficult to get
EBNF-grammars right on the first try - especially, if you are
new to the technology. For regular expressions there exist
all kinds of "workbenches" to try and test regular expressions.
- Debugging parsers
Debugging parsers
-----------------
Fail-tolerant parsing Fail-tolerant parsing
...@@ -448,8 +544,8 @@ Compiling DSLs ...@@ -448,8 +544,8 @@ Compiling DSLs
Serialization Serialization
------------- -------------
XML-Connection - XML-Connection
--------------
Language Servers Language Servers
---------------- ----------------
......
...@@ -90,7 +90,7 @@ get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1) ...@@ -90,7 +90,7 @@ get_preprocessor = ThreadLocalSingletonFactory(preprocessor_factory, ident=1)
class LyrikGrammar(Grammar): class LyrikGrammar(Grammar):
r"""Parser for a Lyrik source file. r"""Parser for a Lyrik source file.
""" """
source_hash__ = "26385fa0fbbe6e28b8b15d563a5407c9" source_hash__ = "d4d0bbf5b09e354e4c6737bfaf757f57"
disposable__ = re.compile('JAHRESZAHL$|ZEICHENFOLGE$|ENDE$|LEERRAUM$|ziel$|wortfolge$') disposable__ = re.compile('JAHRESZAHL$|ZEICHENFOLGE$|ENDE$|LEERRAUM$|ziel$|wortfolge$')
static_analysis_pending__ = [] # type: List[bool] static_analysis_pending__ = [] # type: List[bool]
parser_initialization__ = ["upon instantiation"] parser_initialization__ = ["upon instantiation"]
......
import sys, os
try:
scriptpath = os.path.dirname(__file__)
except NameError:
scriptpath = ''
dhparser_parentdir = os.path.abspath(os.path.join(scriptpath, r'../..'))
if scriptpath not in sys.path:
sys.path.append(scriptpath)
if dhparser_parentdir not in sys.path:
sys.path.append(dhparser_parentdir)
import JSONParser
if __name__ == "__main__":
syntax_tree = JSONParser.parse_JSON('{ "one": 1, "two": 2 }')
JSONParser.transform_JSON(syntax_tree)
print(syntax_tree.as_sxpr())
print(syntax_tree.as_json(indent=None))
print(syntax_tree.as_xml())
print(syntax_tree.as_tree())
...@@ -29,7 +29,7 @@ sys.path.append(os.path.abspath(os.path.join(scriptpath, '..'))) ...@@ -29,7 +29,7 @@ sys.path.append(os.path.abspath(os.path.join(scriptpath, '..')))
from DHParser.configuration import get_config_value, set_config_value from DHParser.configuration import get_config_value, set_config_value
from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \ from DHParser.syntaxtree import Node, RootNode, parse_sxpr, parse_xml, flatten_sxpr, \
flatten_xml, parse_json_syntaxtree, ZOMBIE_TAG, EMPTY_NODE, ALL_NODES, next_context, \ flatten_xml, parse_json, ZOMBIE_TAG, EMPTY_NODE, ALL_NODES, next_context, \
prev_context, serialize_context, generate_context_mapping, map_pos_to_context, \ prev_context, serialize_context, generate_context_mapping, map_pos_to_context, \
select_context_if, select_context, create_context_match_function select_context_if, select_context, create_context_match_function
from DHParser.transform import traverse, reduce_single_child, \ from DHParser.transform import traverse, reduce_single_child, \
...@@ -172,7 +172,7 @@ class TestParseJSON: ...@@ -172,7 +172,7 @@ class TestParseJSON:
tree_copy = Node.from_json_obj(json.loads(s)) tree_copy = Node.from_json_obj(json.loads(s))
assert tree_copy.equals(self.tree, ignore_attr_order = sys.version_info < (3, 6)) assert tree_copy.equals(self.tree, ignore_attr_order = sys.version_info < (3, 6))
s = self.tree.as_json(indent=None, ensure_ascii=False) s = self.tree.as_json(indent=None, ensure_ascii=False)
tree_copy = parse_json_syntaxtree(s) tree_copy = parse_json(s)
# print(s) # print(s)
# print(self.tree.as_sxpr()) # print(self.tree.as_sxpr())
# print(tree_copy.as_sxpr()) # print(tree_copy.as_sxpr())
...@@ -184,7 +184,7 @@ class TestParseJSON: ...@@ -184,7 +184,7 @@ class TestParseJSON:
n.attr['id'] = '007' n.attr['id'] = '007'
# json # json
json = n.as_json() json = n.as_json()
tree = parse_json_syntaxtree(json) tree = parse_json(json)
# print() # print()
# XML # XML
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment