Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
badw-it
DHParser
Commits
a4af9d28
Commit
a4af9d28
authored
May 12, 2019
by
eckhart
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- new subdirectory examples/demos
parent
c0877f53
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
115 additions
and
32 deletions
+115
-32
DHParser/configuration.py
DHParser/configuration.py
+8
-3
DHParser/ebnf.py
DHParser/ebnf.py
+2
-5
DHParser/syntaxtree.py
DHParser/syntaxtree.py
+13
-2
DHParser/templates/DSLServer.pyi
DHParser/templates/DSLServer.pyi
+11
-15
DHParser/testing.py
DHParser/testing.py
+3
-3
DHParser/toolkit.py
DHParser/toolkit.py
+2
-0
DHParser/versionnumber.py
DHParser/versionnumber.py
+1
-1
README.md
README.md
+51
-1
examples/demos/key_value_store.py
examples/demos/key_value_store.py
+22
-0
setup.py
setup.py
+2
-2
No files found.
DHParser/configuration.py
View file @
a4af9d28
...
...
@@ -36,6 +36,7 @@ __all__ = ('CONFIG_PRESET',
'XML_SERIALIZATION'
,
'SXPRESSION_SERIALIZATION'
,
'COMPACT_SERIALIZATION'
,
'SMART_SERIALIZATION'
,
'JSON_SERIALIZATION'
,
'SERIALIZATIONS'
)
...
...
@@ -85,6 +86,9 @@ CONFIG_PRESET['max_parser_dropouts'] = 3
# 'compact' - compact tree output, i.e. children a represented on
# indented lines with no opening or closing tags, brackets
# etc.
# 'smart' - serialize as S-expression if the S-expression fits on
# one line (see 'flatten_sxpr_threshold'), otherwise
# serialize as compact tree output
# 'json' - output in JSON-format. This is probably the least
# readable representation, but useful for serialization, for
# example, to return syntax trees from remote procedure calls.
...
...
@@ -93,6 +97,7 @@ CONFIG_PRESET['max_parser_dropouts'] = 3
XML_SERIALIZATION
=
"XML"
SXPRESSION_SERIALIZATION
=
"S-expression"
COMPACT_SERIALIZATION
=
"compact"
SMART_SERIALIZATION
=
"smart"
JSON_SERIALIZATION
=
"json"
SERIALIZATIONS
=
frozenset
({
XML_SERIALIZATION
,
...
...
@@ -100,9 +105,9 @@ SERIALIZATIONS = frozenset({XML_SERIALIZATION,
COMPACT_SERIALIZATION
,
JSON_SERIALIZATION
})
CONFIG_PRESET
[
'cst_serialization'
]
=
COMPAC
T_SERIALIZATION
CONFIG_PRESET
[
'ast_serialization'
]
=
XML
_SERIALIZATION
CONFIG_PRESET
[
'default_serialization'
]
=
S
XPRESSION
_SERIALIZATION
CONFIG_PRESET
[
'cst_serialization'
]
=
SMAR
T_SERIALIZATION
CONFIG_PRESET
[
'ast_serialization'
]
=
SMART
_SERIALIZATION
CONFIG_PRESET
[
'default_serialization'
]
=
S
MART
_SERIALIZATION
# Defines the maximum line length for flattened S-expressions.
# Below this threshold S-expressions will be returned in flattened
...
...
DHParser/ebnf.py
View file @
a4af9d28
...
...
@@ -38,7 +38,7 @@ from DHParser.parse import Grammar, mixin_comment, Forward, RegExp, DropWhitespa
from
DHParser.preprocess
import
nil_preprocessor
,
PreprocessorFunc
from
DHParser.syntaxtree
import
Node
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
from
DHParser.toolkit
import
load_if_file
,
escape_re
,
md5
,
sane_parser_name
,
re
,
expand_table
,
\
GLOBALS
,
get_config_value
,
unrepr
,
compile_python_object
,
DHPARSER_DIR
GLOBALS
,
get_config_value
,
unrepr
,
compile_python_object
,
DHPARSER_
PARENT
DIR
from
DHParser.transform
import
TransformationFunc
,
traverse
,
remove_brackets
,
\
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
remove_empty
,
\
remove_tokens
,
flatten
,
forbid
,
assert_content
...
...
@@ -68,9 +68,6 @@ __all__ = ('get_ebnf_preprocessor',
########################################################################
dhparser_parentdir
=
os
.
path
.
dirname
(
DHPARSER_DIR
)
DHPARSER_IMPORTS
=
'''
import collections
from functools import partial
...
...
@@ -101,7 +98,7 @@ from DHParser import logging, is_filename, load_if_file, \\
error_on, recompile_grammar, left_associative, lean_left, set_config_value,
\\
get_config_value, XML_SERIALIZATION, SXPRESSION_SERIALIZATION, COMPACT_SERIALIZATION,
\\
JSON_SERIALIZATION, CONFIG_PRESET, GLOBALS
'''
.
format
(
dhparser_parentdir
=
dhparser_parentdir
)
'''
.
format
(
dhparser_parentdir
=
DHPARSER_PARENTDIR
)
########################################################################
...
...
DHParser/syntaxtree.py
View file @
a4af9d28
...
...
@@ -31,7 +31,7 @@ from typing import Callable, cast, Iterator, Sequence, List, AbstractSet, Set, U
Container
,
Optional
,
Dict
from
DHParser.configuration
import
SERIALIZATIONS
,
XML_SERIALIZATION
,
SXPRESSION_SERIALIZATION
,
\
COMPACT_SERIALIZATION
,
JSON_SERIALIZATION
COMPACT_SERIALIZATION
,
JSON_SERIALIZATION
,
SMART_SERIALIZATION
from
DHParser.error
import
Error
,
ErrorCode
,
linebreaks
,
line_col
from
DHParser.stringview
import
StringView
from
DHParser.toolkit
import
get_config_value
,
re
...
...
@@ -824,7 +824,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
# serialization meta-method ###
def
serialize
_as
(
self
:
'Node'
,
how
:
str
=
'default'
)
->
str
:
def
serialize
(
self
:
'Node'
,
how
:
str
=
'default'
)
->
str
:
"""
Serializes the tree starting with `node` either as S-expression, XML, JSON,
or in compact form. Possible values for `how` are 'S-expression',
...
...
@@ -849,6 +849,17 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return
self
.
as_json
()
elif
switch
==
COMPACT_SERIALIZATION
.
lower
():
return
self
.
as_sxpr
(
compact
=
True
)
elif
switch
==
SMART_SERIALIZATION
.
lower
():
threshold
=
get_config_value
(
'flatten_sxpr_threshold'
)
if
threshold
<=
0
:
return
self
.
as_sxpr
(
compact
=
True
)
sxpr
=
self
.
as_sxpr
(
flatten_threshold
=
threshold
)
if
sxpr
.
find
(
'
\n
'
)
>=
0
:
sxpr
=
re
.
sub
(
r
'\n(\s*)\('
,
r
'\n\1'
,
sxpr
)
sxpr
=
re
.
sub
(
r
'\n\s*\)'
,
r
''
,
sxpr
)
sxpr
=
re
.
sub
(
r
'\)'
,
r
''
,
sxpr
)
sxpr
=
re
.
sub
(
r
'^\('
,
r
''
,
sxpr
)
return
sxpr
else
:
raise
ValueError
(
'Unknown serialization %s. Allowed values are either: %s or : %s'
%
(
how
,
"'ast', 'cst', 'default'"
,
", "
.
join
(
list
(
SERIALIZATIONS
))))
...
...
DHParser/templates/DSLServer.pyi
View file @
a4af9d28
#!/usr/bin/python3
"""
MLW
Server.py - starts a server (if not already running) for the
compilation of
the MLW (medieval latin dictionary)
"""
DSL
Server.py - starts a server (if not already running) for the
compilation of
DSL
Author: Eckhart Arnold <arnold@badw.de>
...
...
@@ -24,8 +24,6 @@ import asyncio
import
os
import
sys
scriptdir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
sys
.
path
.
extend
([
os
.
path
.
join
(
scriptdir
,
'DHParser-submodule'
)])
STOP_SERVER_REQUEST
=
b
"__STOP_SERVER__"
# hardcoded in order to avoid import from DHParser.server
IDENTIFY_REQUEST
=
"identify()"
...
...
@@ -94,11 +92,9 @@ def json_rpc(func, params=[], ID=None) -> str:
return
str
({
"jsonrpc"
:
"2.0"
,
"method"
:
func
.
__name__
,
"params"
:
params
,
"id"
:
ID
})
def
mlw_compiler
(
dateiname
):
from
MLWCompiler
import
verarbeite_mlw_artikel
print
(
"Generiere HTML "
+
dateiname
)
ergebnis
=
verarbeite_mlw_artikel
(
dateiname
,
''
,
{})
return
ergebnis
def
DSL_compiler
(
dateiname
):
from
DSLCompiler
import
compile_source
return
compile_source
(
dateiname
)
def
run_server
(
host
,
port
):
...
...
@@ -111,8 +107,8 @@ def run_server(host, port):
print
(
'PermissionError: Could not write temporary config file: '
+
config_filename
)
print
(
'Starting server on %s:%i'
%
(
host
,
port
))
mlw
_server
=
LanguageServer
({
'
mlw
_compiler'
:
mlw
_compiler
})
mlw
_server
.
run_server
(
host
,
port
)
DSL
_server
=
LanguageServer
({
'
DSL
_compiler'
:
DSL
_compiler
})
DSL
_server
.
run_server
(
host
,
port
)
async
def
send_request
(
request
,
host
,
port
):
...
...
@@ -151,10 +147,10 @@ def start_server_daemon(host, port):
def
print_usage_and_exit
():
print
(
'Usages:
\n
'
+
' python
MLW
Server.py --startserver [host] [port]
\n
'
+
' python
MLW
Server.py --stopserver
\n
'
+
' python
MLW
Server.py --status
\n
'
+
' python
MLW
Server.py FILENAME.
mlw
[--host host] [--port port]'
)
+
' python
DSL
Server.py --startserver [host] [port]
\n
'
+
' python
DSL
Server.py --stopserver
\n
'
+
' python
DSL
Server.py --status
\n
'
+
' python
DSL
Server.py FILENAME.
dsl
[--host host] [--port port]'
)
sys
.
exit
(
1
)
...
...
DHParser/testing.py
View file @
a4af9d28
...
...
@@ -269,10 +269,10 @@ def get_report(test_unit):
cst
=
tests
.
get
(
'__cst__'
,
{}).
get
(
test_name
,
None
)
if
cst
and
(
not
ast
or
str
(
test_name
).
endswith
(
'*'
)):
report
.
append
(
'
\n
### CST'
)
report
.
append
(
indent
(
cst
.
serialize
_as
(
'cst'
)))
report
.
append
(
indent
(
cst
.
serialize
(
'cst'
)))
if
ast
:
report
.
append
(
'
\n
### AST'
)
report
.
append
(
indent
(
ast
.
serialize
_as
(
'ast'
)))
report
.
append
(
indent
(
ast
.
serialize
(
'ast'
)))
for
test_name
,
test_code
in
tests
.
get
(
'fail'
,
dict
()).
items
():
heading
=
'Fail-test "%s"'
%
test_name
report
.
append
(
'
\n
%s
\n
%s
\n
'
%
(
heading
,
'-'
*
len
(
heading
)))
...
...
@@ -436,7 +436,7 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
if
compare
:
if
not
compare
.
equals
(
cst
):
errata
.
append
(
'Concrete syntax tree test "%s" for parser "%s" failed:
\n
%s'
%
(
test_name
,
parser_name
,
cst
.
serialize
_as
(
'cst'
)))
(
test_name
,
parser_name
,
cst
.
serialize
(
'cst'
)))
if
verbose
:
infostr
=
' cst-test "'
+
test_name
+
'" ... '
write
(
infostr
+
(
"OK"
if
len
(
errata
)
==
errflag
else
"FAIL"
))
...
...
DHParser/toolkit.py
View file @
a4af9d28
...
...
@@ -70,6 +70,7 @@ __all__ = ('typing',
'smart_list'
,
'sane_parser_name'
,
'DHPARSER_DIR'
,
'DHPARSER_PARENTDIR'
,
'GLOBALS'
,
'get_config_value'
,
'set_config_value'
)
...
...
@@ -83,6 +84,7 @@ __all__ = ('typing',
DHPARSER_DIR
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
DHPARSER_PARENTDIR
=
os
.
path
.
dirname
(
DHPARSER_DIR
.
rstrip
(
'/'
))
GLOBALS
=
threading
.
local
()
...
...
DHParser/versionnumber.py
View file @
a4af9d28
...
...
@@ -16,4 +16,4 @@
# permissions and limitations under the License.
__all__
=
(
'__version__'
,)
__version__
=
'0.
8.9
'
# + '_dev' + str(os.stat(__file__).st_mtime)
__version__
=
'0.
9.0
'
# + '_dev' + str(os.stat(__file__).st_mtime)
README.md
View file @
a4af9d28
DHParser
========
DHParser - The domian specific language (DSL) construction kit for the Digit Humanities
DHParser - A parser generator and domain specific language (DSL) construction
kit for the Digit Humanities
Features
...
...
@@ -22,6 +23,55 @@ Features
message for users that do not habitually deal with formal notations!
Ease of use
-----------
key_value_store.py:
# A mini-DSL for a key value store
from DHParser import *
# specify the grammar of your DSL in EBNF-notation
grammar = '''@ drop = whitespace, token
key_store = ~ { entry }
entry = key "=" value
key = /
\w
+/~ # Scannerless parsing, use regular
value = /
\"
[^"
\n
]
*
\"
/~ # expressions wherever you like'''
# generating a parser is almost as simple as compiling a regular expression
parser_factory = grammar_provider(grammar)
parser = parser_factory() # parser factory for thread-safety
Now, parse some text and extract the data from the Python-shell:
>>> from key_value_store import parser
>>> text = '''
title = "Odysee 2001"
director = "Stanley Kubrick"
'''
>>> data = parser(text)
>>> for entry in data.select('entry'):
print(entry['key'], entry['value'])
title "Odysee 2001"
director "Stanley Kubrick"
Or, serialize as XML:
>>> print(data.as_xml())
<key_store>
<entry>
<key>
title
</key>
<value>
"Odysee 2001"
</value>
</entry>
<entry>
<key>
director
</key>
<value>
"Stanley Kubrick"
</value>
</entry>
</key_store>
License
-------
...
...
examples/demos/key_value_store.py
0 → 100644
View file @
a4af9d28
# A mini-DSL for a key value store
from
DHParser
import
*
# specify the grammar of your DSL in EBNF-notation
grammar
=
'''
@ drop = whitespace, token
key_store = ~ { entry }
entry = key "=" value
key = /\w+/~ # Scannerless parsing, use regular
value = /
\"
[^"
\n
]*
\"
/~ # expressions wherever you like'''
parser
=
grammar_provider
(
grammar
)()
if
__name__
==
'__main__'
:
text
=
'''
title = "Odysee 2001"
director = "Stanley Kubrick"
'''
data
=
parser
(
text
)
for
entry
in
data
.
select
(
'entry'
):
print
(
entry
[
'key'
],
entry
[
'value'
])
setup.py
View file @
a4af9d28
...
...
@@ -31,9 +31,9 @@ setup(
license
=
'[Apache 2.0 License](https://www.apache.org/licenses/LICENSE-2.0)'
,
author
=
'Eckhart Arnold'
,
author_email
=
'arnold@badw.de'
,
description
=
'DHParser -
Domain specific languages for the Digital Humanities
'
,
description
=
'DHParser -
Parser generator and DSL-construction-kit
'
,
long_description
=
read_me
,
keywords
=
'
Digital Humanities
, domain specific languages, parser combinators, EBNF'
,
keywords
=
'
parser generator
, domain specific languages,
Digital Humanities,
parser combinators, EBNF'
,
classifiers
=
[
'Development Status :: 4 - Beta'
,
'Intended Audience :: Developers'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment