Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
a100f00d
Commit
a100f00d
authored
Aug 06, 2017
by
Eckhart Arnold
Browse files
- streamlining transform.py + more LaTeX tests
parent
4f6c3ae8
Changes
8
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
a100f00d
...
...
@@ -81,7 +81,7 @@ from DHParser import logging, is_filename, load_if_file, \\
ZeroOrMore, Forward, NegativeLookahead, mixin_comment, compile_source,
\\
last_value, counterpart, accumulate, PreprocessorFunc,
\\
Node, TransformationFunc,
\\
traverse, remove_children_if, join,
\\
traverse, remove_children_if, join
_children
,
\\
reduce_single_child, replace_by_single_child, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_whitespace,
\\
is_empty, is_expendable, collapse, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
...
...
DHParser/ebnf.py
View file @
a100f00d
...
...
@@ -206,9 +206,10 @@ EBNF_transformation_table = {
"factor, flowmarker, retrieveop"
:
replace_by_single_child
,
"group"
:
[
remove_
tokens
(
'('
,
')'
)
,
replace_by_single_child
],
[
remove_
brackets
,
replace_by_single_child
],
"oneormore, repetition, option"
:
[
reduce_single_child
,
remove_brackets
],
[
reduce_single_child
,
remove_brackets
,
forbid
(
'repetition'
,
'option'
,
'oneormore'
),
assert_content
(
r
'(?!§)'
)],
"symbol, literal, regexp"
:
reduce_single_child
,
(
TOKEN_PTYPE
,
WHITESPACE_PTYPE
):
...
...
@@ -220,18 +221,8 @@ EBNF_transformation_table = {
}
EBNF_validation_table
=
{
# Semantic validation on the AST. EXPERIMENTAL!
"repetition, option, oneormore"
:
[
forbid
(
'repetition'
,
'option'
,
'oneormore'
),
assert_content
(
r
'(?!§)'
)]
}
def
EBNFTransformer
(
syntax_tree
:
Node
):
for
processing_table
,
key_func
in
[(
EBNF_transformation_table
,
key_tag_name
),
(
EBNF_validation_table
,
key_tag_name
)]:
traverse
(
syntax_tree
,
processing_table
,
key_func
)
traverse
(
syntax_tree
,
EBNF_transformation_table
,
key_tag_name
)
def
get_ebnf_transformer
()
->
TransformationFunc
:
...
...
@@ -728,7 +719,7 @@ class EBNFCompiler(Compiler):
def
on_literal
(
self
,
node
)
->
str
:
return
'Token('
+
str
(
node
).
replace
(
'
\\
'
,
r
'\\'
)
+
')'
# return 'Token(' + ', '.join([node.result]) + ')' ?
return
'Token('
+
str
(
node
).
replace
(
'
\\
'
,
r
'\\'
)
+
')'
# return 'Token(' + ', '.join
_children
([node.result]) + ')' ?
def
on_regexp
(
self
,
node
:
Node
)
->
str
:
...
...
DHParser/syntaxtree.py
View file @
a100f00d
...
...
@@ -270,7 +270,8 @@ class Node:
def
errors
(
self
)
->
List
[
Error
]:
return
[
Error
(
self
.
pos
,
err
)
for
err
in
self
.
_errors
]
def
add_error
(
self
,
error_str
)
->
'Node'
:
def
add_error
(
self
,
error_str
:
str
)
->
'Node'
:
assert
isinstance
(
error_str
,
str
)
self
.
_errors
.
append
(
error_str
)
self
.
error_flag
=
True
return
self
...
...
DHParser/toolkit.py
View file @
a100f00d
...
...
@@ -151,7 +151,7 @@ def is_logging() -> bool:
# if i < 0:
# parameter_list = parameter_list[:i]
# name = f.__self__.__class__.__name__ if f.__name__ == '__init__' else f.__name__
# return "%s(%s)" % (name, ", ".join(repr(item) for item in parameter_list))
# return "%s(%s)" % (name, ", ".join
_children
(repr(item) for item in parameter_list))
def
line_col
(
text
:
str
,
pos
:
int
)
->
Tuple
[
int
,
int
]:
...
...
DHParser/transform.py
View file @
a100f00d
...
...
@@ -43,7 +43,7 @@ __all__ = ('transformation_factory',
'reduce_single_child'
,
'replace_parser'
,
'collapse'
,
'join'
,
'join
_children
'
,
'replace_content'
,
'apply_if'
,
'is_whitespace'
,
...
...
@@ -133,7 +133,7 @@ def transformation_factory(t=None):
# Provide for the case that transformation_factory has been
# written as plain decorator and not as a function call that
# returns the decorator proper.
func
=
t
;
func
=
t
t
=
None
return
decorator
(
func
)
else
:
...
...
@@ -234,7 +234,7 @@ def replace_by_single_child(node):
if
not
node
.
result
[
0
].
parser
.
name
:
node
.
result
[
0
].
parser
.
name
=
node
.
parser
.
name
node
.
parser
=
node
.
result
[
0
].
parser
node
.
_errors
.
extend
(
node
.
result
[
0
].
errors
)
node
.
_errors
.
extend
(
node
.
result
[
0
].
_
errors
)
node
.
result
=
node
.
result
[
0
].
result
...
...
@@ -243,7 +243,7 @@ def reduce_single_child(node):
immediate descendant to this node, but keeping this node's parser entry.
"""
if
node
.
children
and
len
(
node
.
result
)
==
1
:
node
.
_errors
.
extend
(
node
.
result
[
0
].
errors
)
node
.
_errors
.
extend
(
node
.
result
[
0
].
_
errors
)
node
.
result
=
node
.
result
[
0
].
result
...
...
@@ -295,14 +295,14 @@ def collapse(node):
@
transformation_factory
def
join
(
node
,
tag_names
:
List
[
str
]):
def
join
_children
(
node
,
tag_names
:
List
[
str
]):
"""Joins all children next to each other and with particular tag-
names into a single child node with mock parser 'parser_name'.
"""
result
=
[]
name
,
ptype
=
(
tag_names
[
0
].
split
(
':'
)
+
[
''
])[:
2
]
if
node
.
children
:
i
=
0
;
i
=
0
L
=
len
(
node
.
children
)
while
i
<
L
:
while
i
<
L
and
not
node
.
children
[
i
].
tag_name
in
tag_names
:
...
...
@@ -356,21 +356,17 @@ def is_token(node, tokens: AbstractSet[str] = frozenset()) -> bool:
return
node
.
parser
.
ptype
==
TOKEN_PTYPE
and
(
not
tokens
or
node
.
result
in
tokens
)
@
transformation_factory
def
has_name
(
node
,
tag_names
:
AbstractSet
[
str
])
->
bool
:
"""Checks if node has any of a given set of `tag names`.
See property `Node.tagname`."""
return
node
.
tag_name
in
tag_names
def
has_name
(
node
,
regexp
:
str
)
->
bool
:
"""Checks a node's tag name against a regular expression."""
return
bool
(
re
.
match
(
regexp
,
node
.
tag_name
))
@
transformation_factory
def
has_content
(
node
,
contents
:
AbstractSet
[
str
])
->
bool
:
"""Checks if the node's content (i.e. `str(node)`) matches any of
a given set of strings."""
return
str
(
node
)
in
contents
def
has_content
(
node
,
regexp
:
str
)
->
bool
:
"""Checks a node's content against a regular expression."""
return
bool
(
re
.
match
(
regexp
,
str
(
node
)))
@
transformation_factory
@
transformation_factory
(
Callable
)
def
apply_if
(
node
,
transformation
:
Callable
,
condition
:
Callable
):
"""Applies a transformation only if a certain condition is met.
"""
...
...
@@ -378,47 +374,32 @@ def apply_if(node, transformation: Callable, condition: Callable):
transformation
(
node
)
@
transformation_factory
def
keep_children
(
node
,
section
:
slice
=
slice
(
None
,
None
,
None
),
condition
=
lambda
node
:
True
):
"""Keeps only the nodes which fall into a slice of the result field
and for which the function `condition(child_node)` evaluates to
`True`."""
@
transformation_factory
(
slice
)
def
keep_children
(
node
,
section
:
slice
=
slice
(
None
)):
"""Keeps only child-nodes which fall into a slice of the result field."""
if
node
.
children
:
node
.
result
=
tuple
(
c
for
c
in
node
.
children
[
section
]
if
condition
(
c
))
node
.
result
=
node
.
children
[
section
]
@
transformation_factory
(
Callable
)
def
remove_children_if
(
node
,
condition
):
def
remove_children_if
(
node
,
condition
:
Callable
,
section
:
slice
=
slice
(
None
)
):
"""Removes all nodes from a slice of the result field if the function
`
`condition(child_node)`
`
evaluates to
`
`True`
`
."""
`condition(child_node)` evaluates to `True`."""
if
node
.
children
:
node
.
result
=
tuple
(
c
for
c
in
node
.
children
if
not
condition
(
c
))
c
=
node
.
children
N
=
len
(
c
)
rng
=
range
(
*
section
.
indices
(
N
))
node
.
result
=
tuple
(
c
[
i
]
for
i
in
range
(
N
)
if
not
i
in
rng
or
not
condition
(
c
[
i
]))
remove_whitespace
=
remove_children_if
(
is_whitespace
)
# partial(remove_children_if, condition=is_whitespace)
remove_empty
=
remove_children_if
(
is_empty
)
remove_expendables
=
remove_children_if
(
is_expendable
)
# partial(remove_children_if, condition=is_expendable)
remove_first
=
keep_children
(
slice
(
1
,
None
))
remove_last
=
keep_children
(
slice
(
None
,
-
1
))
remove_brackets
=
keep_children
(
slice
(
1
,
-
1
))
@
transformation_factory
(
Callable
)
def
remove_first
(
node
,
condition
=
lambda
node
:
True
):
"""Removes the first child if the condition is met.
Otherwise does nothing."""
if
node
.
children
:
if
condition
(
node
.
children
[
0
]):
node
.
result
=
node
.
result
[
1
:]
@
transformation_factory
(
Callable
)
def
remove_last
(
node
,
condition
=
lambda
node
:
True
):
"""Removes the last child if the condition is met.
Otherwise does nothing."""
if
node
.
children
:
if
condition
(
node
.
children
[
-
1
]):
node
.
result
=
node
.
result
[:
-
1
]
@
transformation_factory
def
remove_tokens
(
node
,
tokens
:
AbstractSet
[
str
]
=
frozenset
()):
"""Reomoves any among a particular set of tokens from the immediate
...
...
@@ -428,24 +409,60 @@ def remove_tokens(node, tokens: AbstractSet[str] = frozenset()):
@
transformation_factory
def
remove_parser
(
node
,
tag_names
:
AbstractSet
[
str
]
):
def
remove_parser
(
node
,
regexp
:
str
):
"""Removes children by 'tag name'."""
remove_children_if
(
node
,
partial
(
has_name
,
tag_names
=
tag_names
))
remove_children_if
(
node
,
partial
(
has_name
,
regexp
=
regexp
))
@
transformation_factory
def
remove_content
(
node
,
contents
:
AbstractSet
[
str
]
):
def
remove_content
(
node
,
regexp
:
str
):
"""Removes children depending on their string value."""
remove_children_if
(
node
,
partial
(
has_content
,
contents
=
contents
))
remove_children_if
(
node
,
partial
(
has_content
,
regexp
=
regexp
))
########################################################################
#
# AST semantic validation functions
# EXPERIMENTAL!
# AST semantic validation functions (EXPERIMENTAL!!!)
#
########################################################################
@
transformation_factory
(
Callable
)
def
assert_condition
(
node
,
condition
:
Callable
,
error_msg
:
str
=
''
)
->
bool
:
"""Checks for `condition`; adds an error message if condition is not met."""
if
not
condition
(
node
):
if
error_msg
:
node
.
add_error
(
error_msg
%
node
.
tag_name
if
error_msg
.
find
(
"%s"
)
>
0
else
error_msg
)
else
:
cond_name
=
condition
.
__name__
if
hasattr
(
condition
,
'__name__'
)
\
else
condition
.
__class__
.
__name__
if
hasattr
(
condition
,
'__class__'
)
\
else
'<unknown>'
node
.
add_error
(
"transform.assert_condition: Failed to meet condition "
+
cond_name
)
assert_has_children
=
assert_condition
(
lambda
nd
:
nd
.
children
,
'Element "%s" has no children'
)
@
transformation_factory
def
assert_content
(
node
,
regexp
:
str
):
if
not
has_content
(
node
,
regexp
):
node
.
add_error
(
'Element "%s" violates %s on %s'
%
(
node
.
parser
.
name
,
str
(
regexp
),
str
(
node
)))
#
# @transformation_factory
# def assert_name(node, regexp: str):
# if not has_name(node, regexp):
# node.add_error('Element name "%s" does not match %s' % (node.tag_name), str(regexp))
#
#
# @transformation_factory(Callable)
# def assert_children(node, condition: Callable=lambda node: True,
# error_msg: str='', section: slice=slice(None)):
# if node.children:
# for child in node.children:
# assert_condition(child, condition, error_msg)
#
@
transformation_factory
def
require
(
node
,
child_tags
:
AbstractSet
[
str
]):
...
...
@@ -461,11 +478,3 @@ def forbid(node, child_tags: AbstractSet[str]):
if
child
.
tag_name
in
child_tags
:
node
.
add_error
(
'Element "%s" cannot be nested inside "%s".'
%
(
child
.
parser
.
name
,
node
.
parser
.
name
))
@
transformation_factory
def
assert_content
(
node
,
regex
:
str
):
content
=
str
(
node
)
if
not
re
.
match
(
regex
,
content
):
node
.
add_error
(
'Element "%s" violates %s on %s'
%
(
node
.
parser
.
name
,
str
(
regex
),
content
))
DevScripts/create_standalone.py
View file @
a100f00d
...
...
@@ -112,7 +112,7 @@ def selftest() -> bool:
if errors:
print("Selftest FAILED :-(")
print("\n\n".join(errors))
print("\n\n".join
_children
(errors))
return False
print(generated_ebnf_parser)
print("\n\nSTAGE 2: Selfhosting-test: Trying to compile EBNF-Grammar with generated parser...\n")
...
...
dhparser.py
View file @
a100f00d
...
...
@@ -47,7 +47,7 @@ from DHParser.toolkit import logging
# transformer, compiler)
# print(result)
# if errors:
# print('\n\n'.join(errors))
# print('\n\n'.join
_children
(errors))
# sys.exit(1)
# else:
# # compile the grammar again using the result of the previous
...
...
examples/LaTeX/LaTeXCompiler.py
View file @
a100f00d
...
...
@@ -21,9 +21,9 @@ from DHParser import logging, is_filename, Grammar, Compiler, Lookbehind, Altern
ZeroOrMore
,
Forward
,
NegativeLookahead
,
mixin_comment
,
compile_source
,
\
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
\
traverse
,
join
,
\
traverse
,
join
_children
,
remove_whitespace
,
remove_parser
,
\
reduce_single_child
,
replace_by_single_child
,
remove_expendables
,
remove_empty
,
flatten
,
\
collapse
,
replace_content
,
remove_brackets
collapse
,
replace_content
,
remove_brackets
,
remove_first
#######################################################################
...
...
@@ -324,35 +324,43 @@ LaTeX_AST_transformation_table = {
"blockenv"
:
[],
"parblock"
:
[],
"sequence"
:
flatten
,
[
flatten
,
remove_parser
(
'PARSEP'
),
replace_by_single_child
],
"enumerate, itemize"
:
[
remove_brackets
,
remove_parser
(
'PARSEP'
),
reduce_single_child
],
"item"
:
[
remove_first
,
remove_parser
(
'PARSEP'
)],
"paragraph"
:
[
flatten
(
lambda
node
:
not
node
.
parser
.
name
or
node
.
parser
.
name
==
"text"
),
join
(
'text'
,
':Whitespace'
)],
"inlineenv"
:
[],
"beginenv"
:
[],
"endenv"
:
[],
"command"
:
[],
"config"
:
[],
"block"
:
[
remove_brackets
,
reduce_single_child
],
join_children
(
'text'
,
':Whitespace'
)],
"quotation, generic_bloc, generic_inline_env, inline_math"
:
[
remove_brackets
],
"inline_environment"
:
[],
"begin_environment"
:
[],
"end_environment"
:
[],
# "command": [],
"generic_command"
:
[],
"config, block"
:
[
remove_brackets
,
reduce_single_child
],
"text"
:
[
reduce_single_child
,
join
(
'text'
,
'word_sequence'
,
':Whitespace'
)],
[
reduce_single_child
,
join
_children
(
'text'
,
'word_sequence'
,
':Whitespace'
)],
"cfgtext"
:
[
flatten
,
reduce_single_child
],
"word_sequence"
:
[
collapse
],
"blockcmd"
:
[],
"CMDNAME"
:
[
remove_expendables
,
reduce_single_child
],
"NAME"
:
[],
"NAME"
:
[
reduce_single_child
],
"ESCAPED"
:
[
reduce_single_child
],
"BRACKETS"
:
[],
"TEXTCHUNK"
:
[],
"WSPC, :Whitespace"
:
streamline_whitespace
,
[],
#
streamline_whitespace,
# whitespace will be removed anyway
"LF"
:
replace_content
(
lambda
node
:
'
\n
'
),
"PARSEP"
:
replace_content
(
lambda
node
:
'
\n\n
'
),
[],
#
replace_content(lambda node: '\n\n'),
"EOF"
:
[],
":Token"
:
[],
# [remove_whitespace, reduce_single_child], # Tokens will be removed anyway?
"*"
:
replace_by_single_child
,
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment