Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
e07e8b97
Commit
e07e8b97
authored
Feb 13, 2019
by
eckhart
Browse files
- DHParser/syntaxtree.py: more docstrings; adjusted examples to refactoring
parent
366020e0
Changes
10
Hide whitespace changes
Inline
Side-by-side
DHParser/dsl.py
View file @
e07e8b97
...
...
@@ -92,11 +92,11 @@ from DHParser import logging, is_filename, load_if_file, \\
Lookbehind, Lookahead, Alternative, Pop, Token, DropToken, Synonym, AllOf, SomeOf,
\\
Unordered, Option, NegativeLookbehind, OneOrMore, RegExp, Retrieve, Series, Capture,
\\
ZeroOrMore, Forward, NegativeLookahead, Required, mixin_comment, compile_source,
\\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc,
\\
grammar_changed, last_value, counterpart, accumulate, PreprocessorFunc,
is_empty,
\\
Node, TransformationFunc, TransformationDict, transformation_factory, traverse,
\\
remove_children_if, move_adjacent, normalize_whitespace, is_anonymous, matches_re,
\\
reduce_single_child, replace_by_single_child, replace_or_reduce, remove_whitespace,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace,
is_empty,
\\
remove_expendables, remove_empty, remove_tokens, flatten, is_insignificant_whitespace,
\\
is_expendable, collapse, collapse_if, replace_content, WHITESPACE_PTYPE, TOKEN_PTYPE,
\\
remove_nodes, remove_content, remove_brackets, replace_parser, remove_anonymous_tokens,
\\
keep_children, is_one_of, not_one_of, has_content, apply_if, remove_first, remove_last,
\\
...
...
DHParser/syntaxtree.py
View file @
e07e8b97
...
...
@@ -285,11 +285,14 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
raise
ValueError
(
'Leave node cannot contain other nodes'
)
def
equals
(
self
,
other
)
:
def
equals
(
self
,
other
:
'Node'
)
->
bool
:
"""
Equality of nodes: Two nodes are considered as equal, if their tag
name is the same, if their results are equal and if their attributes
and attribute values are the same.
Equality of value: Two nodes are considered as having the same value,
if their tag name is the same, if their results are equal and
if their attributes and attribute values are the same.
Returns True, if the tree originating in node `self` is equal by
value to the tree originating in node `other`.
"""
if
self
.
tag_name
==
other
.
tag_name
and
self
.
compare_attr
(
other
):
if
self
.
children
:
...
...
@@ -314,7 +317,7 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
return
surrogate
def
is_anonymous
(
self
):
def
is_anonymous
(
self
)
->
bool
:
return
not
self
.
tag_name
or
self
.
tag_name
[
0
]
==
':'
...
...
@@ -729,6 +732,16 @@ class Node: # (collections.abc.Sized): Base class omitted for cython-compatibil
class
FrozenNode
(
Node
):
"""
FrozenNode is an immutable kind of Node, i.e. it must not be changed
after initialization. The purpose is mainly to allow certain kinds of
optimization, like not having to instantiate empty nodes (because they
are always the same and will be dropped while parsing, anyway).
Frozen nodes must be used only temporarily during parsing or
tree-transformation and should not occur in the product of the
transformation any more. This can be verified with `tree_sanity_check()`.
"""
def
__init__
(
self
,
tag_name
:
str
,
result
:
ResultType
)
->
None
:
if
isinstance
(
result
,
str
)
or
isinstance
(
result
,
StringView
):
...
...
@@ -750,41 +763,55 @@ class FrozenNode(Node):
def
attr
(
self
):
raise
AssertionError
(
"Attributes cannot be accessed on a frozen node"
)
# @property
# def errors(self) -> List[Error]:
# return ()
#
# @errors.setter
# def errors(self, errors: List[Error]):
# if errors:
# raise AssertionError('Cannot assign error list to frozen node')
def
with_pos
(
self
,
pos
:
int
)
->
'Node'
:
pass
PLACEHOLDER
=
Node
(
'__PLACEHOLDER__'
,
''
)
PLACEHOLDER
=
Frozen
Node
(
'__PLACEHOLDER__'
,
''
)
def
tree_sanity_check
(
tree
:
Node
)
->
bool
:
"""
Sanity check for syntax trees: One and the same node must never appear
twice in the syntax tree. Frozen Nodes (EMTPY_NODE, PLACEHOLDER)
should only exist temporarily and must have been dropped or eliminated
before any kind of tree generation (i.e. parsing) or transformation
is finished.
:param tree: the root of the tree to be checked
:return: True, if the tree is `sane`, False otherwise.
"""
node_set
=
set
()
for
node
in
tree
.
select
(
lambda
nd
:
True
,
include_root
=
True
):
if
node
in
node_set
and
not
(
isinstance
(
node
,
FrozenNode
)
or
node
.
tag_name
==
'__PLACEHOLDER__'
):
if
node
in
node_set
or
isinstance
(
Node
,
FrozenNode
):
return
False
node_set
.
add
(
node
)
return
True
class
RootNode
(
Node
):
"""TODO: Add Documentation!!!
errors (list): A list of all errors that have occured so far during
"""The root node for the syntax tree is a special kind of node that keeps
and manages global properties of the tree as a whole. These are first and
foremost the list off errors that occurred during tree generation
(i.e. parsing) or any transformation of the tree. Other properties concern
the customization of the XML-serialization.
The root node can be instantiated before the tree is fully parsed. This is
necessary, because the root node is needed for managing error messages
during the parsing process, already. In order to connect the root node to
the tree, when parsing is finished, the swallow()-method must be called.
errors (list): A list of all errors that have occurred so far during
processing (i.e. parsing, AST-transformation, compiling)
of this tree.
error_flag (int): the highest warning or error level of all errors
that occurred.
inline_tags (set of strings): see `Node.as_xml()` for an explanation.
omit_tags (set of strings): see `Node.as_xml()` for an explanation.
empty_tags (set oif strings): see `Node.as_xml()` for an explanation.
"""
def
__init__
(
self
,
node
:
Optional
[
Node
]
=
None
):
...
...
@@ -1037,7 +1064,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
# mock_parsers = {TOKEN_PTYPE: PlainText}
def
parse_attributes
(
s
:
StringView
)
->
Tuple
[
StringView
,
OrderedDict
]:
"""Parses a sqeuence of XML-Attributes. Returns the string-slice
"""
Parses a sqeuence of XML-Attributes. Returns the string-slice
beginning after the end of the attr.
"""
attributes
=
OrderedDict
()
# type: OrderedDict[str, str]
...
...
@@ -1049,7 +1077,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
return
(
s
[
restart
:],
attributes
)
def
parse_opening_tag
(
s
:
StringView
)
->
Tuple
[
StringView
,
str
,
OrderedDict
,
bool
]:
"""Parses an opening tag. Returns the string segment following the
"""
Parses an opening tag. Returns the string segment following the
the opening tag, the tag name, a dictionary of attr and
a flag indicating whether the tag is actually a solitary tag as
indicated by a slash at the end, i.e. <br/>.
...
...
@@ -1064,7 +1093,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
return
s
[
i
+
1
:],
tagname
,
attributes
,
s
[
i
-
1
]
==
"/"
def
parse_closing_tag
(
s
:
StringView
)
->
Tuple
[
StringView
,
str
]:
"""Parses a closing tag and returns the string segment, just after
"""
Parses a closing tag and returns the string segment, just after
the closing tag.
"""
match
=
s
.
match
(
re
.
compile
(
r
'</\s*(?P<tagname>[\w:]+)>'
))
...
...
@@ -1073,7 +1103,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
return
s
[
s
.
index
(
match
.
end
()):],
tagname
def
parse_leaf_content
(
s
:
StringView
)
->
Tuple
[
StringView
,
StringView
]:
"""Parses a piece of the content of a tag, just until the next opening,
"""
Parses a piece of the content of a tag, just until the next opening,
closing or solitary tag is reached.
"""
i
=
0
...
...
@@ -1082,7 +1113,8 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
return
s
[
i
:],
s
[:
i
]
def
parse_full_content
(
s
:
StringView
)
->
Tuple
[
StringView
,
Node
]:
"""Parses the full content of a tag, starting right at the beginning
"""
Parses the full content of a tag, starting right at the beginning
of the opening tag and ending right after the closing tag.
"""
res
=
[]
# type: List[Node]
...
...
@@ -1112,6 +1144,9 @@ def parse_xml(xml: Union[str, StringView]) -> Node:
def
parse_tree
(
xml_or_sxpr
:
str
)
->
Optional
[
Node
]:
"""
Parses either XML or S-expressions. Which of these is detected automatically.
"""
if
re
.
match
(
'\s*<'
,
xml_or_sxpr
):
return
parse_xml
(
xml_or_sxpr
)
elif
re
.
match
(
'\s*\('
,
xml_or_sxpr
):
...
...
examples/Arithmetic/ArithmeticCompiler.py
View file @
e07e8b97
...
...
@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, \
ZeroOrMore
,
Forward
,
NegativeLookahead
,
Required
,
mixin_comment
,
compile_source
,
\
grammar_changed
,
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
TransformationDict
,
transformation_factory
,
traverse
,
\
remove_children_if
,
move_
whitesp
ace
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
remove_children_if
,
move_
adj
ace
nt
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_insignificant_whitespace
,
is_empty
,
\
is_expendable
,
collapse
,
collapse_if
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
...
...
@@ -59,7 +59,7 @@ class ArithmeticGrammar(Grammar):
r
"""Parser for an Arithmetic source file.
"""
expression
=
Forward
()
source_hash__
=
"
50681341ebb2536b3eadd7eb5540ece0
"
source_hash__
=
"
d77842f8b59d2ec3736b21778c0c9c78
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
'#.*'
...
...
examples/Arithmetic_old/ArithmeticCompiler.py
View file @
e07e8b97
...
...
@@ -59,7 +59,7 @@ class ArithmeticGrammar(Grammar):
digit
=
Forward
()
expression
=
Forward
()
variable
=
Forward
()
source_hash__
=
"
120070baa84f5a2bd1bbb900627078fc
"
source_hash__
=
"
cf537b22b7a1a2a58c426f99f784285d
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
''
...
...
examples/BibTeX/BibTeXCompiler.py
View file @
e07e8b97
...
...
@@ -57,7 +57,7 @@ class BibTeXGrammar(Grammar):
r
"""Parser for a BibTeX source file.
"""
text
=
Forward
()
source_hash__
=
"
e402951b290cb0fce63ba0cbca3f23e9
"
source_hash__
=
"
f0e945d8b504317cdfb6e08fd2fcf596
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
'(?i)%.*(?:\n|$)'
...
...
@@ -85,12 +85,12 @@ class BibTeXGrammar(Grammar):
def
get_grammar
()
->
BibTeXGrammar
:
global
GLOBALS
try
:
grammar
=
GLOBALS
.
BibTeX_1_grammar_singleton
grammar
=
GLOBALS
.
BibTeX_
0000000
1_grammar_singleton
except
AttributeError
:
GLOBALS
.
BibTeX_1_grammar_singleton
=
BibTeXGrammar
()
GLOBALS
.
BibTeX_
0000000
1_grammar_singleton
=
BibTeXGrammar
()
if
hasattr
(
get_grammar
,
'python_src__'
):
GLOBALS
.
BibTeX_1_grammar_singleton
.
python_src__
=
get_grammar
.
python_src__
grammar
=
GLOBALS
.
BibTeX_1_grammar_singleton
GLOBALS
.
BibTeX_
0000000
1_grammar_singleton
.
python_src__
=
get_grammar
.
python_src__
grammar
=
GLOBALS
.
BibTeX_
0000000
1_grammar_singleton
return
grammar
...
...
examples/EBNF/EBNFCompiler.py
View file @
e07e8b97
...
...
@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, \
ZeroOrMore
,
Forward
,
NegativeLookahead
,
Required
,
mixin_comment
,
compile_source
,
\
grammar_changed
,
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
TransformationDict
,
transformation_factory
,
traverse
,
\
remove_children_if
,
move_
whitesp
ace
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
remove_children_if
,
move_
adj
ace
nt
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_insignificant_whitespace
,
is_empty
,
\
is_expendable
,
collapse
,
collapse_if
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
...
...
@@ -59,7 +59,7 @@ class EBNFGrammar(Grammar):
r
"""Parser for an EBNF source file.
"""
expression
=
Forward
()
source_hash__
=
"
a7119a157d38270e4215972858d0b930
"
source_hash__
=
"
de6d0516ea104e7d8318b998e488b2d1
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
'#.*(?:\n|$)'
...
...
examples/EBNF_old/EBNFCompiler.py
View file @
e07e8b97
...
...
@@ -57,7 +57,7 @@ class EBNFGrammar(Grammar):
r
"""Parser for an EBNF source file.
"""
expression
=
Forward
()
source_hash__
=
"
5e9e65a057bec7da29989dba47f40394
"
source_hash__
=
"
7ca2bbabfc9bc19ec54e2318bbc4c9c2
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
'#.*(?:\n|$)'
...
...
examples/Tutorial/Lyrik_explicit_whitespaceCompiler.py
View file @
e07e8b97
...
...
@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, \
ZeroOrMore
,
Forward
,
NegativeLookahead
,
Required
,
mixin_comment
,
compile_source
,
\
grammar_changed
,
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
TransformationDict
,
transformation_factory
,
traverse
,
\
remove_children_if
,
move_
whitesp
ace
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
remove_children_if
,
move_
adj
ace
nt
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_insignificant_whitespace
,
is_empty
,
\
is_expendable
,
collapse
,
collapse_if
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
...
...
@@ -58,7 +58,7 @@ def get_preprocessor() -> PreprocessorFunc:
class
Lyrik_explicit_whitespaceGrammar
(
Grammar
):
r
"""Parser for a Lyrik_explicit_whitespace source file.
"""
source_hash__
=
"
824c3970f8997489b9a0faa53f2dff51
"
source_hash__
=
"
bcb3cee425961a2148941b492e614bd2
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
''
...
...
examples/XML/XMLCompiler.py
View file @
e07e8b97
...
...
@@ -67,7 +67,7 @@ class XMLGrammar(Grammar):
extSubsetDecl
=
Forward
()
ignoreSectContents
=
Forward
()
markupdecl
=
Forward
()
source_hash__
=
"
afe79281456bb2625a0c90c58a699d32
"
source_hash__
=
"
3b6f8c0aafa133d9139684e42a30adfa
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
''
...
...
examples/XMLSnippet/XMLSnippetCompiler.py
View file @
e07e8b97
...
...
@@ -25,7 +25,7 @@ from DHParser import logging, is_filename, load_if_file, Grammar, Compiler, nil_
ZeroOrMore
,
Forward
,
NegativeLookahead
,
Required
,
mixin_comment
,
compile_source
,
\
grammar_changed
,
last_value
,
counterpart
,
accumulate
,
PreprocessorFunc
,
\
Node
,
TransformationFunc
,
TransformationDict
,
transformation_factory
,
traverse
,
\
remove_children_if
,
move_
whitesp
ace
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
remove_children_if
,
move_
adj
ace
nt
,
normalize_whitespace
,
is_anonymous
,
matches_re
,
\
reduce_single_child
,
replace_by_single_child
,
replace_or_reduce
,
remove_whitespace
,
\
remove_expendables
,
remove_empty
,
remove_tokens
,
flatten
,
is_insignificant_whitespace
,
is_empty
,
\
is_expendable
,
collapse
,
collapse_if
,
replace_content
,
WHITESPACE_PTYPE
,
TOKEN_PTYPE
,
\
...
...
@@ -60,7 +60,7 @@ class XMLSnippetGrammar(Grammar):
"""
Name
=
Forward
()
element
=
Forward
()
source_hash__
=
"
49e51a7b2ad79e95ba239427830ba02f
"
source_hash__
=
"
ef0fa6d8c7a96ee0fe2a8e209c3f2ae9
"
parser_initialization__
=
[
"upon instantiation"
]
resume_rules__
=
{}
COMMENT__
=
r
''
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment