Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
badw-it
DHParser
Commits
1fc8788d
Commit
1fc8788d
authored
Nov 12, 2017
by
eckhart
Browse files
- further cleanups
parent
faa86362
Changes
2
Hide whitespace changes
Inline
Side-by-side
DHParser/parser.py
View file @
1fc8788d
...
...
@@ -661,10 +661,10 @@ class Grammar:
grammar class!
Attention: If there exists more than one reference to the same
parser, only the first one will be chosen for python versions
parser, only the first one will be chosen for python versions
greater or equal 3.6. For python version <= 3.5 an arbitrarily
selected reference will be chosen. See PEP 520
(www.python.org/dev/peps/pep-0520/) for an explanation of why.
(www.python.org/dev/peps/pep-0520/) for an explanation of why.
"""
if
cls
.
parser_initialization__
!=
"done"
:
cdict
=
cls
.
__dict__
...
...
@@ -677,7 +677,7 @@ class Grammar:
cls
.
parser_initialization__
=
"done"
def
__init__
(
self
,
root
:
Parser
=
None
)
->
None
:
def
__init__
(
self
,
root
:
Parser
=
None
)
->
None
:
# if not hasattr(self.__class__, 'parser_initialization__'):
# self.__class__.parser_initialization__ = "pending"
# if not hasattr(self.__class__, 'wspL__'):
...
...
@@ -891,7 +891,7 @@ class Grammar:
def
log_parsing_history__
(
self
,
log_file_name
:
str
=
''
)
->
None
:
"""
Writes a log of the parsing history of the most recently parsed
document.
document.
"""
def
prepare_line
(
record
):
excerpt
=
self
.
document__
.
text
.
__getitem__
(
record
.
extent
)[:
25
].
replace
(
'
\n
'
,
'
\\
n'
)
...
...
@@ -936,8 +936,8 @@ def dsl_error_msg(parser: Parser, error_str: str) -> str:
that this is not necessarily the parser that caused the
error but only where the error became apparent.
error_str (str): A short string describing the error.
Returns:
str: An error message including the call stack if history
Returns:
str: An error message including the call stack if history
tacking has been turned in the grammar object.
"""
msg
=
[
"DSL parser specification error:"
,
error_str
,
'Caught by parser "%s".'
%
str
(
parser
)]
...
...
@@ -983,7 +983,7 @@ def nil_preprocessor(text: str) -> str:
class
PreprocessorToken
(
Parser
):
"""
Parses tokens that have been inserted by a preprocessor.
Preprocessors can generate Tokens with the ``make_token``-function.
These tokens start and end with magic characters that can only be
matched by the PreprocessorToken Parser. Such tokens can be used to
...
...
@@ -1023,8 +1023,9 @@ class PreprocessorToken(Parser):
class
RegExp
(
Parser
):
"""Regular expression parser.
r
"""
Regular expression parser.
The RegExp-parser parses text that matches a regular expression.
RegExp can also be considered as the "atomic parser", because all
other parsers delegate part of the parsing job to other parsers,
...
...
@@ -1068,13 +1069,13 @@ class Whitespace(RegExp):
class
RE
(
Parser
):
"""
r
"""
Regular Expressions with optional leading or trailing whitespace.
The RE-parser parses pieces of text that match a given regular
expression. Other than the ``RegExp``-Parser it can also skip
"implicit whitespace" before or after the matched text.
The whitespace is in turn defined by a regular expression. It should
be made sure that this expression also matches the empty string,
e.g. use r'\s*' or r'[\t ]+', but not r'\s+'. If the respective
...
...
@@ -1096,17 +1097,18 @@ class RE(Parser):
EBNF-Notation: `/ ... /~` or `~/ ... /` or `~/ ... /~`
EBNF-Example: `word = /\w+/~`
"""
def
__init__
(
self
,
regexp
,
wL
=
None
,
wR
=
None
,
name
=
''
):
"""Constructor for class RE.
Args:
regexp (str or regex object): The regular expression to be
used for parsing.
wL (str or regexp): Left whitespace regular expression,
used for parsing.
wL (str or regexp): Left whitespace regular expression,
i.e. either ``None``, the empty string or a regular
expression (e.g. "\s*") that defines whitespace. An
expression (e.g. "\s*") that defines whitespace. An
empty string means no whitespace will be skipped,
``None`` means that the default whitespace will be
``None`` means that the default whitespace will be
used.
wR (str or regexp): Right whitespace regular expression.
See above.
...
...
@@ -1248,7 +1250,7 @@ class NaryOperator(Parser):
class
Option
(
UnaryOperator
):
"""
r
"""
Parser `Optional` always matches, even if its child-parser
did not match.
...
...
@@ -1328,7 +1330,7 @@ class ZeroOrMore(Option):
class
OneOrMore
(
UnaryOperator
):
"""
r
"""
`OneOrMore` applies a parser repeatedly as long as this parser
matches. Other than `ZeroOrMore` which always matches, at least
one match is required by `OneOrMore`.
...
...
@@ -1372,7 +1374,7 @@ class OneOrMore(UnaryOperator):
class
Series
(
NaryOperator
):
"""
r
"""
Matches if each of a series of parsers matches exactly in the order of
the series.
...
...
@@ -1394,7 +1396,8 @@ class Series(NaryOperator):
L
=
len
(
self
.
parsers
)
assert
1
<=
L
<
Series
.
NOPE
,
(
'Length %i of series exceeds maximum length of %i'
%
(
L
,
Series
.
NOPE
))
if
mandatory
<
0
:
mandatory
+=
L
if
mandatory
<
0
:
mandatory
+=
L
assert
0
<=
mandatory
<
L
or
mandatory
==
Series
.
NOPE
self
.
mandatory
=
mandatory
...
...
DHParser/syntaxtree.py
View file @
1fc8788d
...
...
@@ -80,7 +80,7 @@ class ParserBase:
def
repr
(
self
)
->
str
:
"""Returns the parser's name if it has a name and the parser's
`ptype` otherwise. Note that for named parsers this is not the
same as `repr(parsers)` which always returns the comined name
same as `repr(parsers)` which always returns the comined name
and ptype, e.g. 'term:OneOrMore'."""
return
self
.
name
if
self
.
name
else
repr
(
self
)
...
...
@@ -88,7 +88,7 @@ class ParserBase:
"""Resets any parser variables. (Should be overridden.)"""
pass
def
grammar
(
self
):
def
grammar
(
self
)
->
Optional
[
'Grammar'
]
:
"""Returns the Grammar object to which the parser belongs. If not
yet connected to any Grammar object, None is returned."""
return
None
...
...
@@ -341,7 +341,9 @@ class Node(collections.abc.Sized):
@
property
def
pos
(
self
)
->
int
:
assert
self
.
_pos
>=
0
,
"position value not initialized!"
"""Returns the position of the Node's content in the source text."""
if
self
.
_pos
<
0
:
raise
AssertionError
(
"position value not initialized!"
)
return
self
.
_pos
@
pos
.
setter
...
...
@@ -359,10 +361,24 @@ class Node(collections.abc.Sized):
@
property
def
errors
(
self
)
->
List
[
Error
]:
"""
Returns the errors that occured at this Node,
not including any errors from child nodes.
"""
return
self
.
_errors
.
copy
()
def
add_error
(
self
,
message
:
str
,
level
:
int
=
Error
.
ERROR
,
code
:
Hashable
=
0
)
->
'Node'
:
def
add_error
(
self
,
message
:
str
,
level
:
int
=
Error
.
ERROR
,
code
:
Hashable
=
cast
(
Hashable
,
0
))
->
'Node'
:
"""
Adds an error to this Node.
Parameters:
message(str): A string with the error message.abs
level(int): The error level (error or warning)
code(Hashable): An error code to identify the kind of error
"""
self
.
_errors
.
append
(
Error
(
message
,
level
,
code
))
self
.
error_flag
=
max
(
self
.
error_flag
,
self
.
_errors
[
-
1
].
level
)
return
self
...
...
@@ -382,7 +398,7 @@ class Node(collections.abc.Sized):
else
:
return
[]
def
_collect_errors
(
self
,
lbreaks
:
List
[
int
]
=
[],
clear_errors
=
False
)
->
List
[
Error
]:
def
_collect_errors
(
self
,
lbreaks
:
List
[
int
]
=
[],
clear_errors
=
False
)
->
List
[
Error
]:
if
self
.
error_flag
:
errors
=
self
.
errors
if
lbreaks
:
...
...
@@ -400,7 +416,7 @@ class Node(collections.abc.Sized):
return
[]
def
_tree_repr
(
self
,
tab
,
open
F
,
close
F
,
data
F
=
identity
,
density
=
0
)
->
str
:
def
_tree_repr
(
self
,
tab
,
open
_fn
,
close
_fn
,
data
_fn
=
identity
,
density
=
0
)
->
str
:
"""
Generates a tree representation of this node and its children
in string from.
...
...
@@ -422,31 +438,31 @@ class Node(collections.abc.Sized):
A string that contains a (serialized) tree representation
of the node and its children.
"""
head
=
open
F
(
self
)
tail
=
close
F
(
self
)
head
=
open
_fn
(
self
)
tail
=
close
_fn
(
self
)
if
not
self
.
result
:
return
head
.
rstrip
()
+
tail
.
lstrip
()
D
=
None
if
density
&
2
else
''
tail
=
tail
.
lstrip
(
None
if
density
&
2
else
''
)
if
self
.
children
:
content
=
[]
for
child
in
self
.
children
:
subtree
=
child
.
_tree_repr
(
tab
,
open
F
,
close
F
,
data
F
,
density
).
split
(
'
\n
'
)
subtree
=
child
.
_tree_repr
(
tab
,
open
_fn
,
close
_fn
,
data
_fn
,
density
).
split
(
'
\n
'
)
content
.
append
(
'
\n
'
.
join
((
tab
+
s
)
for
s
in
subtree
))
return
head
+
'
\n
'
.
join
(
content
)
+
tail
.
lstrip
(
D
)
return
head
+
'
\n
'
.
join
(
content
)
+
tail
res
=
cast
(
str
,
self
.
result
)
# safe, because if there are no children, result is a string
if
density
&
1
and
res
.
find
(
'
\n
'
)
<
0
:
# and head[0] == "<":
# except for XML, add a gap between opening statement and content
gap
=
' '
if
head
.
rstrip
()[
-
1
]
!=
'>'
else
''
return
head
.
rstrip
()
+
gap
+
data
F
(
self
.
result
)
+
tail
.
lstrip
()
return
head
.
rstrip
()
+
gap
+
data
_fn
(
self
.
result
)
+
tail
.
lstrip
()
else
:
return
head
+
'
\n
'
.
join
([
tab
+
data
F
(
s
)
for
s
in
res
.
split
(
'
\n
'
)])
+
tail
.
lstrip
(
D
)
return
head
+
'
\n
'
.
join
([
tab
+
data
_fn
(
s
)
for
s
in
res
.
split
(
'
\n
'
)])
+
tail
def
as_sxpr
(
self
,
src
:
str
=
None
,
compact
:
bool
=
False
)
->
str
:
def
as_sxpr
(
self
,
src
:
str
=
None
,
compact
:
bool
=
False
)
->
str
:
"""
Returns content as S-expression, i.e. in lisp-like form.
...
...
@@ -459,27 +475,33 @@ class Node(collections.abc.Sized):
tree structure.
"""
l
B
,
rB
,
D
=
(
''
,
''
,
1
)
if
compact
else
(
'('
,
'
\n
)'
,
0
)
l
eft_bracket
,
right_bracket
,
density
=
(
''
,
''
,
1
)
if
compact
else
(
'('
,
'
\n
)'
,
0
)
def
opening
(
node
)
->
str
:
s
=
lB
+
node
.
tag_name
"""Returns the opening string for the representation of `node`."""
txt
=
left_bracket
+
node
.
tag_name
# s += " '(pos %i)" % node.pos
if
src
:
s
+=
" '(pos %i "
%
node
.
pos
# + " %i %i)" % line_col(src, node.pos)
txt
+=
" '(pos %i "
%
node
.
pos
# + " %i %i)" % line_col(src, node.pos)
if
node
.
errors
:
s
+=
" '(err '(%s))"
%
' '
.
join
(
str
(
err
).
replace
(
'"'
,
r
'\"'
)
for
err
in
node
.
errors
)
return
s
+
'
\n
'
txt
+=
" '(err '(%s))"
%
' '
.
join
(
str
(
err
).
replace
(
'"'
,
r
'\"'
)
for
err
in
node
.
errors
)
return
txt
+
'
\n
'
def
pretty
(
s
):
return
'"%s"'
%
s
if
s
.
find
(
'"'
)
<
0
\
else
"'%s'"
%
s
if
s
.
find
(
"'"
)
<
0
\
else
'"%s"'
%
s
.
replace
(
'"'
,
r
'\"'
)
def
closing
(
node
)
->
str
:
"""Returns the closing string for the representation of `node`."""
return
right_bracket
return
self
.
_tree_repr
(
' '
,
opening
,
lambda
node
:
rB
,
pretty
,
density
=
D
)
def
pretty
(
strg
):
"""Encloses `strg` with the right kind of quotation marks."""
return
'"%s"'
%
strg
if
strg
.
find
(
'"'
)
<
0
\
else
"'%s'"
%
strg
if
strg
.
find
(
"'"
)
<
0
\
else
'"%s"'
%
strg
.
replace
(
'"'
,
r
'\"'
)
return
self
.
_tree_repr
(
' '
,
opening
,
closing
,
pretty
,
density
=
density
)
def
as_xml
(
self
,
src
:
str
=
None
)
->
str
:
def
as_xml
(
self
,
src
:
str
=
None
)
->
str
:
"""
Returns content as XML-tree.
...
...
@@ -490,23 +512,27 @@ class Node(collections.abc.Sized):
"""
def
opening
(
node
)
->
str
:
s
=
'<'
+
node
.
tag_name
"""Returns the opening string for the representation of `node`."""
txt
=
'<'
+
node
.
tag_name
# s += ' pos="%i"' % node.pos
if
src
:
s
+=
' line="%i" col="%i"'
%
line_col
(
src
,
node
.
pos
)
txt
+=
' line="%i" col="%i"'
%
line_col
(
src
,
node
.
pos
)
if
node
.
errors
:
s
+=
' err="%s"'
%
''
.
join
(
str
(
err
).
replace
(
'"'
,
r
'\"'
)
for
err
in
node
.
errors
)
return
s
+
">
\n
"
txt
+=
' err="%s"'
%
''
.
join
(
str
(
err
).
replace
(
'"'
,
r
'\"'
)
for
err
in
node
.
errors
)
return
txt
+
">
\n
"
def
closing
(
node
):
"""Returns the closing string for the representation of `node`."""
return
'
\n
</'
+
node
.
tag_name
+
'>'
return
self
.
_tree_repr
(
' '
,
opening
,
closing
,
density
=
1
)
def
structure
(
self
)
->
str
:
"""Return structure (and content) as S-expression on a single line
without any line breaks."""
"""
Return structure (and content) as S-expression on a single line
without any line breaks.
"""
return
flatten_sxpr
(
self
.
as_sxpr
())
...
...
@@ -522,33 +548,39 @@ class Node(collections.abc.Sized):
def
find
(
self
,
match_function
:
Callable
)
->
Iterator
[
'Node'
]:
"""Finds nodes in the tree that match a specific criterion.
"""
Finds nodes in the tree that match a specific criterion.
``find`` is a generator that yields all nodes for which the
given ``match_function`` evaluates to True. The tree is
given ``match_function`` evaluates to True. The tree is
traversed pre-order.
Args:
match_function (function): A function that takes as Node
object as argument and returns True or False
Yields:
Node: all nodes of the tree for which
Node: all nodes of the tree for which
``match_function(node)`` returns True
"""
if
match_function
(
self
):
yield
self
else
:
for
child
in
self
.
children
:
for
n
d
in
child
.
find
(
match_function
):
yield
n
d
for
n
ode
in
child
.
find
(
match_function
):
yield
n
ode
def
tree_size
(
self
)
->
int
:
"""Recursively counts the number of nodes in the tree including the root node."""
"""
Recursively counts the number of nodes in the tree including the root node.
"""
return
sum
(
child
.
tree_size
()
for
child
in
self
.
children
)
+
1
def
log
(
self
,
log_file_name
):
"""
Writes ab S-expressions of the tree with root `self` to a file.
"""
if
is_logging
():
path
=
os
.
path
.
join
(
log_dir
(),
log_file_name
)
if
os
.
path
.
exists
(
path
):
...
...
@@ -567,9 +599,13 @@ def mock_syntax_tree(sxpr):
"""
def
next_block
(
s
):
"""Generator that yields all characters until the next closing bracket
that does not match an opening bracket matched earlier within the same
package."""
s
=
s
.
strip
()
while
s
[
0
]
!=
')'
:
if
s
[
0
]
!=
'('
:
raise
ValueError
(
'"(" expected, not '
+
s
[:
10
])
if
s
[
0
]
!=
'('
:
raise
ValueError
(
'"(" expected, not '
+
s
[:
10
])
# assert s[0] == '(', s
level
=
1
k
=
1
...
...
@@ -583,28 +619,29 @@ def mock_syntax_tree(sxpr):
s
=
s
[
k
:].
strip
()
sxpr
=
sxpr
.
strip
()
if
sxpr
[
0
]
!=
'('
:
raise
ValueError
(
'"(" expected, not '
+
sxpr
[:
10
])
if
sxpr
[
0
]
!=
'('
:
raise
ValueError
(
'"(" expected, not '
+
sxpr
[:
10
])
# assert sxpr[0] == '(', sxpr
sxpr
=
sxpr
[
1
:].
strip
()
m
=
re
.
match
(
'[\w:]+'
,
sxpr
)
name
,
class_name
=
(
sxpr
[:
m
.
end
()].
split
(
':'
)
+
[
''
])[:
2
]
sxpr
=
sxpr
[
m
.
end
():].
strip
()
m
atch
=
re
.
match
(
r
'[\w:]+'
,
sxpr
)
name
,
class_name
=
(
sxpr
[:
m
atch
.
end
()].
split
(
':'
)
+
[
''
])[:
2
]
sxpr
=
sxpr
[
m
atch
.
end
():].
strip
()
if
sxpr
[
0
]
==
'('
:
result
=
tuple
(
mock_syntax_tree
(
block
)
for
block
in
next_block
(
sxpr
))
else
:
lines
=
[]
while
sxpr
and
sxpr
[
0
]
!=
')'
:
for
q
m
in
[
'"""'
,
"'''"
,
'"'
,
"'"
]:
m
=
re
.
match
(
q
m
+
r
'.*?'
+
q
m
,
sxpr
,
re
.
DOTALL
)
if
m
:
i
=
len
(
q
m
)
lines
.
append
(
sxpr
[
i
:
m
.
end
()
-
i
])
sxpr
=
sxpr
[
m
.
end
():].
strip
()
for
q
tmark
in
[
'"""'
,
"'''"
,
'"'
,
"'"
]:
m
atch
=
re
.
match
(
q
tmark
+
r
'.*?'
+
q
tmark
,
sxpr
,
re
.
DOTALL
)
if
m
atch
:
i
=
len
(
q
tmark
)
lines
.
append
(
sxpr
[
i
:
m
atch
.
end
()
-
i
])
sxpr
=
sxpr
[
m
atch
.
end
():].
strip
()
break
else
:
m
=
re
.
match
(
r
'(?:(?!\)).)*'
,
sxpr
,
re
.
DOTALL
)
lines
.
append
(
sxpr
[:
m
.
end
()])
sxpr
=
sxpr
[
m
.
end
():]
m
atch
=
re
.
match
(
r
'(?:(?!\)).)*'
,
sxpr
,
re
.
DOTALL
)
lines
.
append
(
sxpr
[:
m
atch
.
end
()])
sxpr
=
sxpr
[
m
atch
.
end
():]
result
=
"
\n
"
.
join
(
lines
)
return
Node
(
MockParser
(
name
,
':'
+
class_name
),
result
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment