Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
5249535b
Commit
5249535b
authored
Jan 15, 2019
by
eckhart
Browse files
merge stringview.pxd
parents
64d393a4
dbe1a013
Changes
9
Hide whitespace changes
Inline
Side-by-side
DHParser/ebnf.py
View file @
5249535b
...
...
@@ -619,7 +619,7 @@ class EBNFCompiler(Compiler):
# prepare and add resume-rules
resume_rules
=
dict
()
# type: Dict[str, List[Union[str, unrpr]]]
resume_rules
=
dict
()
# type: Dict[str, List[Union[str, unr
e
pr]]]
for
symbol
,
raw_rules
in
self
.
directives
[
'resume'
].
items
():
refined_rules
=
[]
for
rule
in
raw_rules
:
...
...
DHParser/stringview.pxd
View file @
5249535b
...
...
@@ -16,4 +16,66 @@ cdef int last_char(text, int begin, int end)
cdef
int
pack_index
(
int
index
,
int
length
)
@
cython
.
locals
(
cbegin
=
cython
.
int
,
cend
=
cython
.
int
)
cpdef
real_indices
(
begin
,
end
,
int
length
)
cdef
real_indices
(
begin
,
end
,
int
length
)
# cpdef real_indices(begin, end, int length)
# cdefs for class StringView: https://cython.readthedocs.io/en/latest/src/tutorial/pure.html
cdef
class
StringView
:
cdef
str
text
cdef
int
begin
,
end
,
len
cdef
str
fullstring
cpdef
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
cpdef
__bool__
(
self
)
->
bool
cpdef
__len__
(
self
)
->
int
cpdef
__str__
(
self
)
->
str
cpdef
__eq__
(
self
,
other
)
->
bool
cpdef
__hash__
(
self
)
->
int
cpdef
__add__
(
self
,
other
)
->
Union
[
str
,
'StringView'
]
cpdef
__radd__
(
self
,
other
)
->
Union
[
str
,
'StringView'
]
@
cython
.
locals
(
start
=
cython
.
int
,
stop
=
cython
.
int
)
cpdef
__getitem__
(
self
,
index
:
Optional
[
slice
,
int
])
->
StringView
cpdef
count
(
self
,
sub
:
str
,
start
:
Optional
[
int
]
=
None
,
end
:
Optional
[
int
]
=
None
)
->
int
cpdef
find
(
self
,
sub
:
str
,
start
:
Optional
[
int
]
=
None
,
end
:
Optional
[
int
]
=
None
)
->
int
cpdef
rfind
(
self
,
sub
:
str
,
start
:
Optional
[
int
]
=
None
,
end
:
Optional
[
int
]
=
None
)
->
int
cpdef
startswith
(
self
,
prefix
:
str
,
start
:
int
=
0
,
end
:
Optional
[
int
]
=
None
)
->
bool
cpdef
endswith
(
self
,
suffix
:
str
,
start
:
int
=
0
,
end
:
Optional
[
int
]
=
None
)
->
bool
cpdef
match
(
self
,
regex
,
flags
=
0
)
cpdef
index
(
self
,
absolute_index
:
int
)
->
int
cpdef
indices
(
self
,
absolute_indices
:
Iterable
[
int
])
->
Tuple
[
int
,
...]
cpdef
search
(
self
,
regex
)
cpdef
finditer
(
self
,
regex
)
@
cython
.
locals
(
begin
=
cython
.
int
,
end
=
cython
.
int
)
cpdef
strip
(
self
)
@
cython
.
locals
(
begin
=
cython
.
int
)
cpdef
lstrip
(
self
)
@
cython
.
locals
(
end
=
cython
.
int
)
cpdef
rstrip
(
self
)
@
cython
.
locals
(
length
=
cython
.
int
,
k
=
cython
.
int
,
i
=
cython
.
int
)
cpdef
split
(
self
,
sep
=
None
)
cpdef
replace
(
self
,
old
,
new
)
DHParser/stringview.py
View file @
5249535b
...
...
@@ -72,6 +72,9 @@ def pack_index(index: int, length: int) -> int:
"""
# assert length >= 0
index
=
index
if
index
>=
0
else
index
+
length
# TODO: Test the following code for speedup
# if index < 0:
# index += length
return
0
if
index
<
0
else
length
if
index
>
length
else
index
...
...
@@ -86,7 +89,7 @@ def real_indices(begin: Optional[int],
return
pack_index
(
cbegin
,
length
),
pack_index
(
cend
,
length
)
class
StringView
(
collections
.
abc
.
Sized
):
class
StringView
:
#
(collections.abc.Sized):
"""
A rudimentary StringView class, just enough for the use cases
in parse.py. The difference between a StringView and the python
...
...
@@ -106,13 +109,13 @@ class StringView(collections.abc.Sized):
else
:
self
.
fullstring
=
''
def
__bool__
(
self
):
def
__bool__
(
self
)
->
bool
:
return
self
.
end
>
self
.
begin
# and bool(self.text)
def
__len__
(
self
):
def
__len__
(
self
)
->
int
:
return
self
.
len
def
__str__
(
self
):
def
__str__
(
self
)
->
str
:
# PERFORMANCE WARNING: This creates a copy of the string-slice
if
self
.
fullstring
:
# optimization: avoid slicing/copying
return
self
.
fullstring
...
...
@@ -122,27 +125,27 @@ class StringView(collections.abc.Sized):
self
.
fullstring
=
self
.
text
[
self
.
begin
:
self
.
end
]
return
self
.
fullstring
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
)
->
bool
:
# PERFORMANCE WARNING: This creates copies of the strings
return
len
(
other
)
==
len
(
self
)
and
str
(
self
)
==
str
(
other
)
def
__hash__
(
self
):
def
__hash__
(
self
)
->
int
:
# PERFORMANCE WARNING: This creates a copy of the string-slice
return
hash
(
str
(
self
))
def
__add__
(
self
,
other
):
def
__add__
(
self
,
other
)
->
Union
[
str
,
'StringView'
]
:
if
isinstance
(
other
,
str
):
return
str
(
self
)
+
other
else
:
return
StringView
(
str
(
self
)
+
str
(
other
))
def
__radd__
(
self
,
other
):
def
__radd__
(
self
,
other
)
->
Union
[
str
,
'StringView'
]
:
if
isinstance
(
other
,
str
):
return
other
+
str
(
self
)
else
:
return
StringView
(
str
(
other
)
+
str
(
self
))
def
__getitem__
(
self
,
index
)
:
def
__getitem__
(
self
,
index
:
Union
[
slice
,
int
])
->
'StringView'
:
# assert isinstance(index, slice), "As of now, StringView only allows slicing."
# assert index.step is None or index.step == 1, \
# "Step sizes other than 1 are not yet supported by StringView"
...
...
@@ -150,9 +153,9 @@ class StringView(collections.abc.Sized):
start
,
stop
=
real_indices
(
index
.
start
,
index
.
stop
,
self
.
len
)
return
StringView
(
self
.
text
,
self
.
begin
+
start
,
self
.
begin
+
stop
)
except
AttributeError
:
return
self
.
text
[
self
.
begin
+
index
]
return
StringView
(
self
.
text
,
self
.
begin
+
index
,
self
.
begin
+
index
+
1
)
def
count
(
self
,
sub
:
str
,
start
=
None
,
end
=
None
)
->
int
:
def
count
(
self
,
sub
:
str
,
start
:
Optional
[
int
]
=
None
,
end
:
Optional
[
int
]
=
None
)
->
int
:
"""Returns the number of non-overlapping occurrences of substring
`sub` in StringView S[start:end]. Optional arguments start and end
are interpreted as in slice notation.
...
...
@@ -165,7 +168,7 @@ class StringView(collections.abc.Sized):
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
count
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
def
find
(
self
,
sub
:
str
,
start
=
None
,
end
=
None
)
->
int
:
def
find
(
self
,
sub
:
str
,
start
:
Optional
[
int
]
=
None
,
end
:
Optional
[
int
]
=
None
)
->
int
:
"""Returns the lowest index in S where substring `sub` is found,
such that `sub` is contained within S[start:end]. Optional
arguments `start` and `end` are interpreted as in slice notation.
...
...
@@ -179,7 +182,7 @@ class StringView(collections.abc.Sized):
start
,
end
=
real_indices
(
start
,
end
,
self
.
len
)
return
self
.
text
.
find
(
sub
,
self
.
begin
+
start
,
self
.
begin
+
end
)
-
self
.
begin
def
rfind
(
self
,
sub
:
str
,
start
=
None
,
end
=
None
)
->
int
:
def
rfind
(
self
,
sub
:
str
,
start
:
Optional
[
int
]
=
None
,
end
:
Optional
[
int
]
=
None
)
->
int
:
"""Returns the highest index in S where substring `sub` is found,
such that `sub` is contained within S[start:end]. Optional
arguments `start` and `end` are interpreted as in slice notation.
...
...
DHParser/syntaxtree.py
View file @
5249535b
...
...
@@ -915,7 +915,7 @@ def parse_sxpr(sxpr: Union[str, StringView]) -> Node:
"""
sxpr
=
StringView
(
sxpr
).
strip
()
if
isinstance
(
sxpr
,
str
)
else
sxpr
.
strip
()
mock_parsers
=
dict
()
# type: Dict[
s
tr, MockParser]
mock_parsers
=
dict
()
# type: Dict[
S
tr
ingView
, MockParser]
def
next_block
(
s
:
StringView
):
"""Generator that yields all characters until the next closing bracket
...
...
Introduction.md
View file @
5249535b
...
...
@@ -390,13 +390,13 @@ scroll down to the AST section, you'll see something like this:
"*": replace_by_single_child
}
As you can see, AST-transformations a specified declaratively (with the
As you can see, AST-transformations a
re
specified declaratively (with the
option to add your own Python-programmed transformation rules). This
keeps the specification of the AST-transformation simple and concise. At
the same, we avoid adding hints for the AST-transformation in the
grammar specification, which would render the grammar less readable.
Now that you have seen how DHParser basically works, it is time to go
through the process of desining and testing a domain specific notation
through the process of desi
g
ning and testing a domain specific notation
step by step from the very start. Head over to the documentation in
subdirectory and read the step by step guide.
README.md
View file @
5249535b
...
...
@@ -18,7 +18,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
https://www.apache.org/licenses/LICENSE-2.0
e
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
...
...
dhparser.py
View file @
5249535b
...
...
@@ -54,8 +54,8 @@ EBNF_TEMPLATE = r"""-grammar
#
#######################################################################
document = ~ { WORD } §EOF # root parser: a sequence of words preceded
by whitespace
# until the end of file
document = ~ { WORD } §EOF
# root parser: a sequence of words preceded
#
by whitespace
until the end of file
#######################################################################
#
...
...
documentation/Presentations/deRSE2019/cuts.txt
0 → 100644
View file @
5249535b
documentation/Presentations/deRSE2019/deRSE2019_proposal_EA.md
0 → 100644
View file @
5249535b
Workshop: DHParser - Domain Specific Languages for the Digital Humanities
=========================================================================
Proposal for a workshop (180 min) for the
[
deRSE2019
Conference
](
https://derse19.uni-jena.de/
)
by Eckhart Arnold, Bavarian Academy of Sciences and Humanities, arnold@badw.de
Abstract
--------
Domain specific languages have become an ubiquitous tool in the
software-industry, in many cases replacing XML as configuration or
data description language. By now, there exist quite a few mature
DSL-construction toolkits and DSL-parser generators out there
([Xtext], [MPS], [ANTLR], [pyparsing]) that support the creation of
DSLs.
Nonetheless, DSLs are strangely underused in Digital Humanities
Projects, even though they can provide a great addition, if not in
some cases viable alternative to the omnipresent XML-toolchains. One
possible reason why DSLs have not yet become popular in the Digital
Humanities is that the common DSL construction kits and parser
generators are geared towards different application domains, and do
not fulfill the specific demands of Digital Humanities contexts. In
the Digital Humanities DSLs, just like the XML-data-structures, say,
for a historical-critical edition, can become quite complex, evolve
over time, result from an iterative testing and discussion process in
which users interact with programmers and must be understandable and
usable with ease by researchers that not necessarily accustomed to
computer technology.
[DHParser] is a parser generator for DSLs, developed at the Bavarian
Academy of Sciences and Humanities, that specifically addresses the
Digital Humanities. In particular, it offers support for:
-
unit testing of DSLs
-
specifying meaningful error messages for the user of the DSL and
locating errors correctly
-
debugging support for the DSL-specification and parsing process
-
support for abstract-syntax-tree-generation
-
a basic framework for compiler construction with XML-output as the
most common use case in mind
-
programming in Python, the most commonly known and used programming
language in the Digital Humanities
In the workshop, I am going to explain how to develop a Frontend-DSL
for the “[DTA-Basisformat]” (or, for the purpose of introduction, a
subset thereof). We will assume the “DTA-Basisformat” as a given
target-format und run through the whole development process from
designing the syntax of the DSL through examples, specifying it
formally with [EBNF], directing abstract-syntax-tree generation,
generating XML-output, writing test-cases and specifying error
messages. If time permits, we will also look into the process of
preparing an editor / development environment for our DTA-DSL with
[Visual Studio Code].
In the end, every participant will have learned:
-
what a DSL is and what the steps for creating one are
-
how the syntax of a DSL can be specified in an EBNF-like formalism
-
how a simple DSL-XML-compiler is programmed in Python with the
DHParser-framework
-
how important practical concerns like unit-testing of DSLs and
error-reporting can be addressed
-
How DSLs relate to XML: Basically, XML allows you to declare and encode
the domain specific semantics of any kind of data, DSLs also enable you
to specify a domain specific syntax for you data, rendering the encoded
data much more human-readable (and -writable) than XML.
-
how to use DHParser ;-)
We will close the workshop with a discussion about the benefits as
well as possible disadvantages of employing DSLs in DH-projects in
relation to the necessary effort in in comparison to the
ordinary XML-workflows.
**Requirements for participating and benefiting from the workshop:**
-
good working knowledge of [Python] and [regular expressions]
-
a laptop with python installed
Suggested Reading:
-
[Introduction to DHParser]
-
or, more detailed, the [Step by Step Guide to DHParser]
-
or, for a real world example, though work in progress, the [DSL for
the medival latin dictionary]
[
Xtext
]:
https://www.eclipse.org/Xtext/
[
MPS
]:
https://www.jetbrains.com/mps/
[
ANTLR
]:
https://www.antlr.org/
[
pyparsing
]:
https://pypi.org/project/pyparsing/
[
DHParser
]:
https://gitlab.lrz.de/badw-it/DHParser
[
DTA-Basisformat
]:
http://www.deutschestextarchiv.de/doku/basisformat/
[
EBNF
]:
https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf
[
Visual Studio Code
]:
https://code.visualstudio.com/
[
Python
]:
https://www.python.org/
[
regular expressions
]:
https://docs.python.org/3/library/re.html
[
Introduction to DHParser
]:
https://gitlab.lrz.de/badw-it/DHParser/blob/development/Introduction.md
[
step by step guide to DHParser
]:
https://gitlab.lrz.de/badw-it/DHParser/blob/development/documentation/StepByStepGuide.rst
[
DSL for the medival latin dictionary
]:
https://gitlab.lrz.de/badw-it/MLW-DSL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment