Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
a50ca9ad
Commit
a50ca9ad
authored
Jul 07, 2017
by
di68kap
Browse files
- experimental code for indirect left recursion
parent
219a01a8
Changes
26
Expand all
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
a50ca9ad
...
...
@@ -81,9 +81,9 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
last_value, counterpart, accumulate, ScannerFunc
from DHParser.syntaxtree import Node, traverse, remove_children_if,
\\
reduce_single_child,
reduce_children,
replace_by_single_child, remove_whitespace,
\\
reduce_single_child, replace_by_single_child, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace,
\\
is_empty, is_expendable, collapse,
map
_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
is_empty, is_expendable, collapse,
replace
_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
TransformationFunc, remove_children, remove_content, remove_first, remove_last,
\\
has_name, has_content
'''
...
...
@@ -458,7 +458,10 @@ def compile_on_disk(source_file: str, compiler_suite="", extension=".xml"):
try
:
f
=
open
(
rootname
+
extension
,
'w'
,
encoding
=
"utf-8"
)
if
isinstance
(
result
,
Node
):
f
.
write
(
result
.
as_xml
())
if
extension
.
lower
()
==
'.xml'
:
f
.
write
(
result
.
as_xml
())
else
:
f
.
write
(
result
.
as_sxpr
())
else
:
f
.
write
(
result
)
except
(
PermissionError
,
FileNotFoundError
,
IOError
)
as
error
:
...
...
DHParser/ebnf.py
View file @
a50ca9ad
...
...
@@ -101,7 +101,6 @@ class EBNFGrammar(Grammar):
retrieveop = "::" | ":" # '::' pop, ':' retrieve
group = "(" expression §")"
regexchain = ">" expression §"<" # compiles "expression" into a singular regular expression
oneormore = "{" expression "}+"
repetition = "{" expression §"}"
option = "[" expression §"]"
...
...
@@ -131,14 +130,13 @@ class EBNFGrammar(Grammar):
option
=
Series
(
Token
(
"["
),
expression
,
Required
(
Token
(
"]"
)))
repetition
=
Series
(
Token
(
"{"
),
expression
,
Required
(
Token
(
"}"
)))
oneormore
=
Series
(
Token
(
"{"
),
expression
,
Token
(
"}+"
))
regexchain
=
Series
(
Token
(
"<"
),
expression
,
Required
(
Token
(
">"
)))
group
=
Series
(
Token
(
"("
),
expression
,
Required
(
Token
(
")"
)))
retrieveop
=
Alternative
(
Token
(
"::"
),
Token
(
":"
))
flowmarker
=
Alternative
(
Token
(
"!"
),
Token
(
"&"
),
Token
(
"§"
),
Token
(
"-!"
),
Token
(
"-&"
))
factor
=
Alternative
(
Series
(
Optional
(
flowmarker
),
Optional
(
retrieveop
),
symbol
,
NegativeLookahead
(
Token
(
"="
))),
Series
(
Optional
(
flowmarker
),
literal
),
Series
(
Optional
(
flowmarker
),
regexp
),
Series
(
Optional
(
flowmarker
),
group
),
Series
(
Optional
(
flowmarker
),
regexchain
),
Series
(
Optional
(
flowmarker
),
oneormore
),
repetition
,
option
)
Series
(
Optional
(
flowmarker
),
group
),
Series
(
Optional
(
flowmarker
),
oneormore
),
repetition
,
option
)
term
=
OneOrMore
(
factor
)
expression
.
set
(
Series
(
term
,
ZeroOrMore
(
Series
(
Token
(
"|"
),
term
))))
directive
=
Series
(
Token
(
"@"
),
Required
(
symbol
),
Required
(
Token
(
"="
)),
Alternative
(
regexp
,
literal
,
list_
))
...
...
@@ -379,7 +377,7 @@ class EBNFCompiler(Compiler):
'Compiler, self).__init__(grammar_name, grammar_source)'
,
" assert re.match('\w+\Z', grammar_name)"
,
''
]
for
name
in
self
.
rules
:
method_name
=
Compiler
.
derive_
method_name
(
name
)
method_name
=
Compiler
.
method_name
(
name
)
if
name
==
self
.
root
:
compiler
+=
[
' def '
+
method_name
+
'(self, node):'
,
' return node'
,
''
]
...
...
@@ -485,7 +483,7 @@ class EBNFCompiler(Compiler):
if
nd
.
parser
.
name
==
"definition"
:
definitions
.
append
(
self
.
_compile
(
nd
))
else
:
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_s
e
xpr
()
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_sxpr
()
self
.
_compile
(
nd
)
node
.
error_flag
=
node
.
error_flag
or
nd
.
error_flag
...
...
@@ -517,7 +515,7 @@ class EBNFCompiler(Compiler):
# assume it's a synonym, like 'page = REGEX_PAGE_NR'
defn
=
'Synonym(%s)'
%
defn
except
TypeError
as
error
:
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
node
.
as_s
e
xpr
()
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
node
.
as_sxpr
()
node
.
add_error
(
errmsg
)
rule
,
defn
=
rule
+
':error'
,
'"'
+
errmsg
+
'"'
return
rule
,
defn
...
...
@@ -610,7 +608,7 @@ class EBNFCompiler(Compiler):
def
on_factor
(
self
,
node
:
Node
)
->
str
:
assert
node
.
children
assert
len
(
node
.
children
)
>=
2
,
node
.
as_s
e
xpr
()
assert
len
(
node
.
children
)
>=
2
,
node
.
as_sxpr
()
prefix
=
str
(
node
.
children
[
0
])
# cast(str, node.children[0].result)
custom_args
=
[]
# type: List[str]
...
...
@@ -691,7 +689,7 @@ class EBNFCompiler(Compiler):
arg
=
repr
(
self
.
_check_rx
(
node
,
rx
[
1
:
-
1
].
replace
(
r
'\/'
,
'/'
)))
except
AttributeError
as
error
:
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
\
node
.
as_s
e
xpr
()
node
.
as_sxpr
()
node
.
add_error
(
errmsg
)
return
'"'
+
errmsg
+
'"'
return
'RE('
+
', '
.
join
([
arg
]
+
name
)
+
')'
...
...
DHParser/parsers.py
View file @
a50ca9ad
...
...
@@ -171,7 +171,7 @@ def add_parser_guard(parser_func):
grammar
=
parser
.
grammar
# grammar may be 'None' for unconnected parsers!
if
not
grammar
.
moving_forward__
:
# rollback variable changes f
o
r discarded
branch of parsing tree
# rollback variable changes fr
om
discarded
parser passes
if
grammar
.
last_rb__loc__
<=
location
:
grammar
.
rollback_to__
(
location
)
grammar
.
moving_forward__
=
True
...
...
@@ -182,7 +182,7 @@ def add_parser_guard(parser_func):
# if location has already been visited by the current parser,
# return saved result
if
location
in
parser
.
visited
:
return
parser
.
visited
[
location
]
return
parser
.
visited
[
location
]
# TODO: might not work with Capture-Retrieve-Pop-Parsers!!!
# break left recursion at the maximum allowed depth
if
parser
.
recursion_counter
.
setdefault
(
location
,
0
)
>
LEFT_RECURSION_DEPTH
:
return
None
,
text
...
...
@@ -289,10 +289,28 @@ class Parser(ParserBase, metaclass=ParserMetaClass):
return
True
def
mixin_comment
(
whitespace
:
str
,
comment
:
str
)
->
str
:
"""Returns a regular expression that merges comment and whitespace
regexps. Thus comments cann occur whereever whitespace is allowed
and will be skipped just as implicit whitespace.
Note, that because this works on the level of regular expressions,
nesting comments is not possible. It also makes it much harder to
use directives inside comments (which isn't recommended, anyway).
"""
wspc
=
'(?:'
+
whitespace
+
'(?:'
+
comment
+
whitespace
+
')*)'
return
wspc
class
Grammar
:
root__
=
None
# type: Union[Parser, None]
# root__ must be overwritten with the root-parser by grammar subclass
parser_initialization__
=
"pending"
# type: str
# some default values
COMMENT__
=
r
''
# r'#.*(?:\n|$)'
WSP__
=
mixin_comment
(
whitespace
=
r
'[\t ]*'
,
comment
=
COMMENT__
)
wspL__
=
''
wspR__
=
WSP__
@
classmethod
def
_assign_parser_names
(
cls
):
...
...
@@ -471,10 +489,18 @@ class Grammar:
return
result
def
push_rollback__
(
self
,
location
,
func
):
"""Adds a rollback function that either removes or re-adds
values on the variable stack (`self.variables`) that have been
added (or removed) by Capture or Pop Parsers, the results of
which have been dismissed.
"""
self
.
rollback__
.
append
((
location
,
func
))
self
.
last_rb__loc__
=
location
def
rollback_to__
(
self
,
location
):
"""Rolls back the variable stacks (`self.variables`) to its
state at an earlier location in the parsed document.
"""
while
self
.
rollback__
and
self
.
rollback__
[
-
1
][
0
]
<=
location
:
loc
,
rollback_func
=
self
.
rollback__
.
pop
()
assert
not
loc
>
self
.
last_rb__loc__
...
...
@@ -522,7 +548,7 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
Args:
parser (Parser): The parser where the error was noticed. Note
that this is not necessarily the parser that caused the
error but only where the error became aparent.
error but only where the error became ap
p
arent.
error_str (str): A short string describing the error.
Returns:
str: An error message including the call stack if history
...
...
@@ -746,19 +772,6 @@ class Token(RE):
return
'"%s"'
%
self
.
token
if
self
.
token
.
find
(
'"'
)
<
0
else
"'%s'"
%
self
.
token
def
mixin_comment
(
whitespace
:
str
,
comment
:
str
)
->
str
:
"""Returns a regular expression that merges comment and whitespace
regexps. Thus comments cann occur whereever whitespace is allowed
and will be skipped just as implicit whitespace.
Note, that because this works on the level of regular expressions,
nesting comments is not possible. It also makes it much harder to
use directives inside comments (which isn't recommended, anyway).
"""
wspc
=
'(?:'
+
whitespace
+
'(?:'
+
comment
+
whitespace
+
')*)'
return
wspc
########################################################################
#
# Combinator parser classes (i.e. trunk classes of the parser tree)
...
...
@@ -944,12 +957,17 @@ class Alternative(NaryOperator):
super
(
Alternative
,
self
).
__init__
(
*
parsers
,
name
=
name
)
assert
len
(
self
.
parsers
)
>=
1
assert
all
(
not
isinstance
(
p
,
Optional
)
for
p
in
self
.
parsers
)
self
.
been_here
=
dict
()
# type: Dict[int, int]
def
__call__
(
self
,
text
:
str
)
->
Tuple
[
Node
,
str
]:
for
parser
in
self
.
parsers
:
location
=
len
(
text
)
pindex
=
self
.
been_here
.
get
(
location
,
0
)
for
parser
in
self
.
parsers
[
pindex
:]:
node
,
text_
=
parser
(
text
)
if
node
:
return
Node
(
self
,
node
),
text_
pindex
+=
1
# self.been_here[location] = pindex
return
None
,
text
def
__repr__
(
self
):
...
...
@@ -1249,7 +1267,7 @@ class Compiler:
self
.
grammar_source
=
load_if_file
(
grammar_source
)
@
staticmethod
def
derive_
method_name
(
node_name
:
str
)
->
str
:
def
method_name
(
node_name
:
str
)
->
str
:
"""Returns the method name for ``node_name``, e.g.
>>> Compiler.method_name('expression')
'on_expression'
...
...
@@ -1275,7 +1293,7 @@ class Compiler:
"'_' or '__' or ending with '__' is reserved.)"
)
return
None
else
:
compiler
=
self
.
__getattribute__
(
self
.
derive_
method_name
(
elem
))
compiler
=
self
.
__getattribute__
(
self
.
method_name
(
elem
))
result
=
compiler
(
node
)
node
.
propagate_error_flags
()
return
result
...
...
DHParser/syntaxtree.py
View file @
a50ca9ad
...
...
@@ -48,10 +48,9 @@ __all__ = ['WHITESPACE_PTYPE',
'traverse'
,
'replace_by_single_child'
,
'reduce_single_child'
,
'reduce_children'
,
'replace_parser'
,
'collapse'
,
'
map
_content'
,
'
replace
_content'
,
'is_whitespace'
,
'is_empty'
,
'is_expendable'
,
...
...
@@ -336,7 +335,7 @@ class Node:
else
:
return
head
+
'
\n
'
.
join
([
tab
+
dataF
(
s
)
for
s
in
res
.
split
(
'
\n
'
)])
+
tail
def
as_s
e
xpr
(
self
,
src
:
str
=
None
)
->
str
:
def
as_sxpr
(
self
,
src
:
str
=
None
)
->
str
:
"""
Returns content as S-expression, i.e. in lisp-like form.
...
...
@@ -421,7 +420,7 @@ class Node:
def
log
(
self
,
log_file_name
):
st_file_name
=
log_file_name
with
open
(
os
.
path
.
join
(
log_dir
(),
st_file_name
),
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
self
.
as_s
e
xpr
())
f
.
write
(
self
.
as_sxpr
())
def
find
(
self
,
match_function
)
->
Iterator
[
'Node'
]:
"""Finds nodes in the tree that match a specific criterion.
...
...
@@ -649,7 +648,7 @@ def traverse(root_node, processing_table, key_func=key_tag_name) -> None:
# - tree may be rearranged (e.g.flattened)
# - nodes that are not leaves may be dropped
# - order is preserved
# -
all
leave
s are kept
# - leave
content is preserved (though not necessarily the leaves themselves)
#
# ------------------------------------------------
...
...
@@ -690,40 +689,26 @@ def reduce_single_child(node):
@
transformation_factory
(
Callable
)
def
reduce_children
(
node
,
condition
=
lambda
node
:
not
node
.
name
):
"""Replaces those children of node that have children themselves
ans fulfil the given condition (default unnamed nodes).
In contrast to ``flatten`` (see below) this transformation does not
operate recursively.
"""
if
node
.
children
:
new_result
=
[]
for
child
in
node
.
children
:
if
child
.
children
and
condition
(
child
):
new_result
.
extend
(
child
.
children
)
else
:
new_result
.
append
(
child
)
node
.
result
=
tuple
(
new_result
)
def
flatten
(
node
):
"""Recursively flattens all unnamed sub-nodes, in case there is more
than one sub-node present. Flattening means that
wherever a node has child nodes, the child nodes are inserted in place
of the node. In other words, all leaves of this node and its child nodes
are collected in-order as direct children of this node.
This is meant to achieve these kinds of structural transformation:
def
flatten
(
node
,
condition
=
lambda
node
:
not
node
.
parser
.
name
,
recursive
=
True
):
"""Flattens all children, that fulfil the given `condition`
(default: all unnamed children). Flattening means that wherever a
node has child nodes, the child nodes are inserted in place of the
node.
If the parameter `recursive` is `True` the same will recursively be
done with the child-nodes, first. In other words, all leaves of
this node and its child nodes are collected in-order as direct
children of this node.
Applying flatten recursively will result in these kinds of
structural transformation:
(1 (+ 2) (+ 3) -> (1 + 2 + 3)
(1 (+ (2 + (3)))) -> (1 + 2 + 3)
Warning: Use with care. Du tue its recursive nature, flattening can
have unexpected side-effects.
"""
if
node
.
children
:
new_result
=
[]
for
child
in
node
.
children
:
if
not
child
.
parser
.
name
and
child
.
children
:
flatten
(
child
)
if
child
.
children
and
condition
(
child
):
if
recursive
:
flatten
(
child
,
condition
,
recursive
)
new_result
.
extend
(
child
.
children
)
else
:
new_result
.
append
(
child
)
...
...
@@ -829,7 +814,7 @@ def remove_content(node, contents: AbstractSet[str]):
@
transformation_factory
def
map
_content
(
node
,
func
:
Callable
):
# Callable[[Node], ResultType]
def
replace
_content
(
node
,
func
:
Callable
):
# Callable[[Node], ResultType]
"""Replaces the content of the node. ``func`` takes the node
as an argument an returns the mapped result.
"""
...
...
DHParser/testing.py
View file @
a50ca9ad
...
...
@@ -37,7 +37,7 @@ def mock_syntax_tree(sexpr):
"""Generates a tree of nodes from an S-expression.
Example:
>>> mock_syntax_tree("(a (b c))").as_s
e
xpr()
>>> mock_syntax_tree("(a (b c))").as_sxpr()
(a
(b
"c"
...
...
@@ -200,10 +200,10 @@ def get_report(test_unit):
cst
=
tests
.
get
(
'__cst__'
,
{}).
get
(
test_name
,
None
)
if
cst
and
(
not
ast
or
cst
==
ast
):
report
.
append
(
'
\n
### CST'
)
report
.
append
(
cst
.
as_s
e
xpr
())
report
.
append
(
cst
.
as_sxpr
())
elif
ast
:
report
.
append
(
'
\n
### AST'
)
report
.
append
(
ast
.
as_s
e
xpr
())
report
.
append
(
ast
.
as_sxpr
())
return
'
\n
'
.
join
(
report
)
...
...
@@ -248,15 +248,15 @@ def grammar_unit(test_unit, parser_factory, transformer_factory, report=True, ve
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
elif
"cst"
in
tests
and
mock_syntax_tree
(
tests
[
"cst"
][
test_name
])
!=
cst
:
errata
.
append
(
'Concrete syntax tree test "%s" for parser "%s" failed:
\n
%s'
%
(
test_name
,
parser_name
,
cst
.
as_s
e
xpr
()))
(
test_name
,
parser_name
,
cst
.
as_sxpr
()))
elif
"ast"
in
tests
:
compare
=
mock_syntax_tree
(
tests
[
"ast"
][
test_name
])
if
compare
!=
ast
:
errata
.
append
(
'Abstract syntax tree test "%s" for parser "%s" failed:'
'
\n\t
Expr.: %s
\n\t
Expected: %s
\n\t
Received: %s'
%
(
test_name
,
parser_name
,
'
\n\t
'
.
join
(
test_code
.
split
(
'
\n
'
)),
compact_sexpr
(
compare
.
as_s
e
xpr
()),
compact_sexpr
(
ast
.
as_s
e
xpr
())))
compact_sexpr
(
compare
.
as_sxpr
()),
compact_sexpr
(
ast
.
as_sxpr
())))
tests
.
setdefault
(
'__err__'
,
{})[
test_name
]
=
errata
[
-
1
]
if
verbose
:
print
(
infostr
+
(
"OK"
if
len
(
errata
)
==
errflag
else
"FAIL"
))
...
...
DHParser/toolkit.py
View file @
a50ca9ad
...
...
@@ -220,7 +220,7 @@ def compact_sexpr(s) -> str:
# if isinstance(parsing_result, collections.Collection):
# result = parsing_result[0]
# err = ('\nUnmatched sequence: ' + parsing_result[1]) if parsing_result[1] else ''
# sexpr = compact_sexpr(result.as_s
e
xpr())
# sexpr = compact_sexpr(result.as_sxpr())
# return sexpr + err
...
...
DHParser/versionnumber.py
View file @
a50ca9ad
...
...
@@ -18,4 +18,4 @@ permissions and limitations under the License.
import
os
__version__
=
'0.7.5'
#
+ '_dev' + str(os.stat(__file__).st_mtime)
__version__
=
'0.7.5'
+
'_dev'
+
str
(
os
.
stat
(
__file__
).
st_mtime
)
OLDSTUFF/ParserCombinators_obsolete.py
View file @
a50ca9ad
...
...
@@ -564,7 +564,7 @@ def flatten(node):
new_result
=
[]
for
child
in
node
.
children
:
if
not
child
.
parser
.
name
and
child
.
children
:
assert
child
.
children
,
node
.
as_s
e
xpr
()
assert
child
.
children
,
node
.
as_sxpr
()
flatten
(
child
)
new_result
.
extend
(
child
.
result
)
else
:
...
...
@@ -577,7 +577,7 @@ def remove_brackets(node):
from a literal or braces from a group).
"""
if
len
(
node
.
children
)
>=
3
:
assert
not
node
.
children
[
0
].
children
and
not
node
.
children
[
-
1
].
children
,
node
.
as_s
e
xpr
()
assert
not
node
.
children
[
0
].
children
and
not
node
.
children
[
-
1
].
children
,
node
.
as_sxpr
()
node
.
result
=
node
.
result
[
1
:
-
1
]
...
...
@@ -1704,7 +1704,7 @@ class EBNFCompiler(CompilerBase):
if
nd
.
parser
.
name
==
"definition"
:
definitions
.
append
(
self
.
compile__
(
nd
))
else
:
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_s
e
xpr
()
assert
nd
.
parser
.
name
==
"directive"
,
nd
.
as_sxpr
()
self
.
compile__
(
nd
)
return
self
.
gen_parser
(
definitions
)
...
...
@@ -1732,7 +1732,7 @@ class EBNFCompiler(CompilerBase):
defn
=
'Capture(%s, "%s")'
%
(
defn
,
rule
)
self
.
variables
.
remove
(
rule
)
except
TypeError
as
error
:
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
node
.
as_s
e
xpr
()
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
node
.
as_sxpr
()
node
.
add_error
(
errmsg
)
rule
,
defn
=
rule
+
':error'
,
'"'
+
errmsg
+
'"'
return
(
rule
,
defn
)
...
...
@@ -1797,9 +1797,9 @@ class EBNFCompiler(CompilerBase):
return
self
.
non_terminal
(
node
,
'Sequence'
)
def
factor
(
self
,
node
):
assert
isinstance
(
node
.
parser
,
Sequence
),
node
.
as_s
e
xpr
()
# these assert statements can be removed
assert
isinstance
(
node
.
parser
,
Sequence
),
node
.
as_sxpr
()
# these assert statements can be removed
assert
node
.
children
assert
len
(
node
.
result
)
>=
2
,
node
.
as_s
e
xpr
()
assert
len
(
node
.
result
)
>=
2
,
node
.
as_sxpr
()
prefix
=
node
.
result
[
0
].
result
arg
=
node
.
result
[
-
1
]
...
...
@@ -1825,7 +1825,7 @@ class EBNFCompiler(CompilerBase):
if
prefix
==
match
:
return
self
.
non_terminal
(
node
,
parser_class
)
assert
False
,
(
"Unknown prefix %s
\n
"
%
prefix
)
+
node
.
as_s
e
xpr
()
assert
False
,
(
"Unknown prefix %s
\n
"
%
prefix
)
+
node
.
as_sxpr
()
def
option
(
self
,
node
):
return
self
.
non_terminal
(
node
,
'Optional'
)
...
...
@@ -1871,7 +1871,7 @@ class EBNFCompiler(CompilerBase):
arg
=
repr
(
self
.
_check_rx
(
node
,
rx
[
1
:
-
1
].
replace
(
r
'\/'
,
'/'
)))
except
AttributeError
as
error
:
errmsg
=
EBNFCompiler
.
AST_ERROR
+
" ("
+
str
(
error
)
+
")
\n
"
+
\
node
.
as_s
e
xpr
()
node
.
as_sxpr
()
node
.
add_error
(
errmsg
)
return
'"'
+
errmsg
+
'"'
return
'RE('
+
', '
.
join
([
arg
]
+
name
)
+
')'
...
...
examples/CommonMark/markdown.py
View file @
a50ca9ad
...
...
@@ -206,5 +206,5 @@ print(markdown_text)
syntax_tree
=
parser
(
markdown_text
)
ASTTransform
(
syntax_tree
,
MDTransTable
)
print
(
syntax_tree
.
as_s
e
xpr
())
print
(
syntax_tree
.
as_sxpr
())
print
(
error_messages
(
markdown_text
,
syntax_tree
.
collect_errors
()))
examples/CommonMark/markdown_old.py
View file @
a50ca9ad
...
...
@@ -282,5 +282,5 @@ markdown_text = load_if_file('../testdata/test_md1.md')
syntax_tree
=
parse
(
markdown_text
,
parser
)
ASTTransform
(
syntax_tree
,
MDTransTable
)
print
(
syntax_tree
.
as_s
e
xpr
())
print
(
syntax_tree
.
as_sxpr
())
print
(
error_messages
(
markdown_text
,
syntax_tree
.
collect_errors
()))
examples/MLW/MLW.ebnf
View file @
a50ca9ad
# EBNF-Syntax für MLW-Artikel
@ testing = True
@ comment = /#.*(?:\n|$)/ # Kommentare beginnen mit '#' und reichen bis zum Zeilenende
@ whitespace = /[\t ]*/ # Zeilensprünge zählen nicht als Leerraum
@ literalws = right # Leerraum vor und nach Literalen wird automatisch entfernt
...
...
@@ -11,23 +13,22 @@ Artikel = [LZ]
§LemmaPosition
[ArtikelKopf]
§BedeutungsPosition
§A
utorinfo
§A
rtikelVerfasser
[LZ] DATEI_ENDE
#### LEMMA-POSITION ##########################################################
LemmaPosition = "LEMMA" [LZ] §
Haupt
Lemma §TR [LemmaVarianten] §GrammatikPosition
LemmaPosition = "LEMMA" [LZ] §Lemma §TR [LemmaVarianten] §GrammatikPosition
[EtymologiePosition]
Haupt
Lemma = [klassisch] [gesichert]
l
emma
Lemma
= [klassisch] [gesichert]
L
emma
Wort
klassisch = "*"
gesichert = "$"
LemmaVarianten = [LZ]
{
l
emma §TR }+
{
L
emma
Wort
§TR }+
[LemmaZusatz §ABS]
l
emma
= LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
L
emma
Wort
= LAT_WORT_TEIL { ("|" | "-") LAT_WORT_TEIL }
LemmaZusatz = "ZUSATZ" §lzs_typ
lzs_typ = /sim\./
...
...
@@ -53,6 +54,11 @@ genus = "maskulinum" | "m." |
"neutrum" | "n."
## ETYMOLOGIE-POSITION ##
EtymologiePosition
#### ARTIKEL-KOPF ############################################################
ArtikelKopf = SchreibweisenPosition
...
...
@@ -60,7 +66,7 @@ SchreibweisenPosition = "SCHREIBWEISE" [LZ] §SWTyp ":" [LZ]
§SWVariante { ABS SWVariante} [LZ]
SWTyp = "script." | "script. fat-"
SWVariante = Schreibweise ":" Beleg
Schreibweise =
"vizreg-" | "festregel(a)" | "fezdregl(a)" | "fat-"
Schreibweise =
ZEICHENFOLGE
#### BEDEUTUNGS-POSITION #####################################################
...
...
@@ -81,8 +87,8 @@ Zusatz = "ZUSATZ" /\s*.*/ ABS
#### AUTOR/AUTORIN ###########################################################
A
utorinfo
= ("AUTORIN" | "AUTOR") Name
Name = { NAME | NAMENS_ABKÜRZUNG }+
A
rtikelVerfasser
= ("AUTORIN" | "AUTOR") Name
Name
= { NAME | NAMENS_ABKÜRZUNG }+
#### GENERISCHE UND ATOMARE AUSDRÜCKE ########################################
...
...
@@ -91,7 +97,7 @@ Beleg = Verweis
Verweis = ZielName
VerweisZiel = "[" ZielName "]"
ZielName =
ZEICH
ENFOLGE
ZielName =
BUCHSTAB
ENFOLGE
NAMENS_ABKÜRZUNG = /[A-ZÄÖÜÁÀÂÓÒÔÚÙÛ]\./~
NAME = /[A-ZÄÖÜÁÀÓÒÚÙÂÔÛ][a-zäöüßáàâóòôúùû]+/~
...
...
@@ -103,7 +109,8 @@ LAT_WORT = /[a-z]+/~
LAT_WORT_TEIL = /[a-z]+/
GROSSSCHRIFT = /[A-ZÄÖÜ]+/~
ZEICHENFOLGE = /\w+/~
BUCHSTABENFOLGE = /\w+/~
ZEICHENFOLGE = /[\w()-]+/~
TR = ABS | LZ # (beliebiger) Trenner
ABS = /\s*;\s*/ | { ZW }+ # Abschluss (durch Semikolon oder Zeilenwechsel)
...
...
examples/MLW/
OLDSTUFF
/MLW_compiler.py
→
examples/MLW/
VERALTET
/MLW_compiler.py
View file @
a50ca9ad
...
...
@@ -211,7 +211,7 @@ def join_strings(node, delimiter='\n'):
n
+=
1
nd
.
result
=
delimiter
.
join
((
r
.
result
for
r
in
node
.
result
[
a
:
n
]))
elif
nd
.
parser
.
name
!=
"Zusatz"
:
raise
AssertionError
(
nd
.
as_s
e
xpr
())
raise
AssertionError
(
nd
.
as_sxpr
())
else
:
n
+=
1
new_result
.
append
(
nd
)
...
...
examples/MLW/
OLDSTUFF
/MLW_compiler_old.py
→
examples/MLW/
VERALTET
/MLW_compiler_old.py
View file @
a50ca9ad
...
...
@@ -153,7 +153,7 @@ class MLWGrammar(GrammarBase):
def
test
(
node
):
print
(
node
.
as_s
e
xpr
())
print
(
node
.
as_sxpr
())
def
join_strings
(
node
,
delimiter
=
'
\n
'
):
...
...
@@ -169,7 +169,7 @@ def join_strings(node, delimiter='\n'):
nd
.
result
=
delimiter
.
join
((
r
.
result
for
r
in
node
.
result
[
a
:
n
]))
new_result
.
append
(
nd
)
node
.
result
=
tuple
(
new_result
)
print
(
node
.
as_s
e
xpr
())
print
(
node
.
as_sxpr
())
MLWTransTable
=
{
...
...
examples/MLW/
OLDSTUFF
/MLW_kopie.ebnf
→
examples/MLW/
VERALTET
/MLW_kopie.ebnf
View file @
a50ca9ad
File moved
examples/MLW/
OLDSTUFF
/MLW_kopie2.ebnf
→
examples/MLW/
VERALTET
/MLW_kopie2.ebnf
View file @
a50ca9ad
File moved
examples/MLW/
OLDSTUFF
/compile_MLW-grammar.py
→
examples/MLW/
VERALTET
/compile_MLW-grammar.py
View file @
a50ca9ad
File moved
examples/MLW/
OLDSTUFF
/fascitergula.xml
→
examples/MLW/
VERALTET
/fascitergula.xml
View file @
a50ca9ad
File moved
examples/MLW/samples/compile_MLW-entry.py
View file @
a50ca9ad
...
...
@@ -41,7 +41,7 @@ if (not os.path.exists(MLW_compiler) or
sys
.
exit
(
1
)
with
toolkit
.
logging
():
errors
=
compile_on_disk
(
"fascitergula.mlw"
,
MLW_compiler
,
".
xml
"
)
errors
=
compile_on_disk
(
"fascitergula.mlw"
,
MLW_compiler
,
".
sxpr
"
)
if
errors
:
print
(
'
\n
'
.
join
(
errors
))
sys
.
exit
(
1
)
examples/MLW/samples/fascitergula.xml
View file @
a50ca9ad
This diff is collapsed.
Click to expand it.
examples/Tutorial/LyrikCompiler_example.py
View file @
a50ca9ad
...
...
@@ -26,8 +26,8 @@ from DHParser.parsers import Grammar, Compiler, nil_scanner, \
from
DHParser.syntaxtree
import
Node
,
traverse
,
remove_last
,
remove_first
,
\
remove_children_if
,
reduce_single_child
,
replace_by_single_child
,
remove_whitespace
,
\
remove_expendables
,
remove_tokens
,
flatten
,
is_whitespace
,
is_expendable
,
\
collapse
,
map
_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
TransformationFunc
,
\
remove_children
,
remove_empty
,
reduce_children
,
has_content
,
has_name
collapse
,
replace
_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
TransformationFunc
,
\
remove_children
,
remove_empty
,
has_content
,
has_name
#######################################################################
...
...
@@ -149,9 +149,9 @@ Lyrik_AST_transformation_table = {
"jahr"
:
[
reduce_single_child
],
"wortfolge"
:
[
reduce_childr
en
(
has_name
(
'WORT'
)),
remove_last
(
is_whitespace
),
collapse
],
[
flatt
en
(
has_name
(
'WORT'
)
,
recursive
=
False
),
remove_last
(
is_whitespace
),
collapse
],
"namenfolge"
:
[
reduce_childr
en
(
has_name
(
'NAME'
)),
remove_last
(
is_whitespace
),
collapse
],
[
flatt
en
(
has_name
(
'NAME'
)
,
recursive
=
False
),
remove_last
(
is_whitespace
),
collapse
],
"verknüpfung"
: