Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
9.2.2023: Due to updates GitLab will be unavailable for some minutes between 9:00 and 11:00.
Open sidebar
badw-it
DHParser
Commits
f2162cfb
Commit
f2162cfb
authored
Dec 22, 2017
by
eckhart
Browse files
- early position handling finished
parent
cc0f248d
Changes
5
Hide whitespace changes
Inline
Side-by-side
DHParser/parsers.py
View file @
f2162cfb
...
...
@@ -868,11 +868,10 @@ class Grammar:
Returns:
Node: The root node ot the parse tree.
"""
def
add
_pos
(
node
:
Node
,
predecessors
:
List
[
Node
])
->
int
:
def
tail
_pos
(
predecessors
:
List
[
Node
])
->
int
:
"""Adds the position after the last node in the list of
predecessors to the node."""
node
.
_pos
=
predecessors
[
-
1
].
_pos
+
len
(
predecessors
[
-
1
])
if
predecessors
else
0
return
node
return
predecessors
[
-
1
].
pos
+
len
(
predecessors
[
-
1
])
if
predecessors
else
0
# assert isinstance(document, str), type(document)
if
self
.
root__
is
None
:
...
...
@@ -897,7 +896,7 @@ class Grammar:
if
not
rest
:
result
,
_
=
parser
(
rest
)
if
result
is
None
:
result
=
Node
(
None
,
''
)
result
=
Node
(
None
,
''
)
.
init_pos
(
0
)
result
.
add_error
(
'Parser "%s" did not match empty document.'
%
str
(
parser
))
while
rest
and
len
(
stitches
)
<
MAX_DROPOUTS
:
result
,
rest
=
parser
(
rest
)
...
...
@@ -917,24 +916,24 @@ class Grammar:
if
self
.
history_tracking__
else
"..."
))
if
len
(
stitches
)
<
MAX_DROPOUTS
else
" too often! Terminating parser."
)
stitches
.
append
(
add_pos
(
Node
(
None
,
skip
)
,
stitches
))
stitches
.
append
(
Node
(
None
,
skip
)
.
init_pos
(
tail_pos
(
stitches
))
)
stitches
[
-
1
].
add_error
(
error_msg
)
if
self
.
history_tracking__
:
# some parsers may have matched and left history records with nodes != None.
# Because these are not connected to the stitched root node, their pos-
# properties will not be initialized by setting the root node's pos property
# to zero. Therefore, their pos properties need to be initialized here
for
record
in
self
.
history__
:
if
record
.
node
and
record
.
node
.
_pos
<
0
:
record
.
node
.
pos
=
0
#
#
some parsers may have matched and left history records with nodes != None.
#
#
Because these are not connected to the stitched root node, their pos-
#
#
properties will not be initialized by setting the root node's pos property
#
#
to zero. Therefore, their pos properties need to be initialized here
#
for record in self.history__:
#
if record.node and record.node._pos < 0:
#
record.node.
init_pos(0)
record
=
HistoryRecord
(
self
.
call_stack__
.
copy
(),
stitches
[
-
1
],
rest
)
self
.
history__
.
append
(
record
)
# stop history tracking when parser returned too early
self
.
history_tracking__
=
False
if
stitches
:
if
rest
:
stitches
.
append
(
add_pos
(
Node
(
None
,
rest
)
,
stitches
))
result
=
add_pos
(
Node
(
None
,
tuple
(
stitches
))
,
[])
stitches
.
append
(
Node
(
None
,
rest
)
.
init_pos
(
tail_pos
(
stitches
))
)
result
=
Node
(
None
,
tuple
(
stitches
))
if
any
(
self
.
variables__
.
values
()):
error_str
=
"Capture-retrieve-stack not empty after end of parsing: "
+
\
str
(
self
.
variables__
)
...
...
@@ -943,12 +942,12 @@ class Grammar:
# add another child node at the end to ensure that the position
# of the error will be the end of the text. Otherwise, the error
# message above ("...after end of parsing") would appear illogical.
error_node
=
Node
(
ZOMBIE_PARSER
,
''
)
error_node
=
Node
(
ZOMBIE_PARSER
,
''
)
.
init_pos
(
tail_pos
(
result
.
children
))
error_node
.
add_error
(
error_str
)
result
.
result
=
result
.
children
+
(
add_pos
(
error_node
,
result
.
children
),
)
result
.
result
=
result
.
children
+
(
error_node
,)
else
:
result
.
add_error
(
error_str
)
result
.
pos
=
0
# calculate all positions
#
result.pos = 0 # calculate all positions
# result.collect_errors(self.document__)
return
result
...
...
@@ -1529,7 +1528,7 @@ class Series(NaryOperator):
# Provide useful error messages
match
=
text
.
search
(
Series
.
RX_ARGUMENT
)
i
=
max
(
1
,
text
.
index
(
match
.
regs
[
1
][
0
]))
if
match
else
1
node
=
Node
(
self
,
text_
[:
i
])
node
=
Node
(
self
,
text_
[:
i
])
.
init_pos
(
self
.
grammar
.
document_length__
-
len
(
text_
))
node
.
add_error
(
'%s expected; "%s"... found!'
%
(
str
(
parser
),
text_
[:
10
].
replace
(
'
\n
'
,
'
\\
n '
)),
code
=
Error
.
MANDATORY_CONTINUATION
)
...
...
DHParser/stringview.py
View file @
f2162cfb
...
...
@@ -89,6 +89,7 @@ class StringView(collections.abc.Sized):
__slots__
=
[
'text'
,
'begin'
,
'end'
,
'len'
,
'fullstring_flag'
]
def
__init__
(
self
,
text
:
str
,
begin
:
Optional
[
int
]
=
0
,
end
:
Optional
[
int
]
=
None
)
->
None
:
assert
isinstance
(
text
,
str
)
self
.
text
=
text
# type: str
self
.
begin
,
self
.
end
=
real_indices
(
begin
,
end
,
len
(
text
))
self
.
len
=
max
(
self
.
end
-
self
.
begin
,
0
)
# type: int
...
...
DHParser/syntaxtree.py
View file @
f2162cfb
...
...
@@ -229,6 +229,7 @@ class Node(collections.abc.Sized):
"""
self
.
error_flag
=
0
# type: int
self
.
_errors
=
[]
# type: List[Error]
self
.
_pos
=
-
1
# type: int
# Assignment to self.result initializes the attributes _result, children and _len
# The following if-clause is merely an optimization, i.e. a fast-path for leaf-Nodes
if
leafhint
:
...
...
@@ -237,8 +238,6 @@ class Node(collections.abc.Sized):
self
.
_len
=
-
1
# type: int # lazy evaluation
else
:
self
.
result
=
result
# self.pos: int = 0 # continuous updating of pos values wastes a lot of time
self
.
_pos
=
-
1
# type: int
self
.
parser
=
parser
or
ZOMBIE_PARSER
...
...
@@ -321,6 +320,8 @@ class Node(collections.abc.Sized):
self
.
children
=
(
result
,)
self
.
_result
=
self
.
children
self
.
error_flag
=
result
.
error_flag
if
self
.
_pos
<
0
:
self
.
_pos
=
result
.
_pos
else
:
if
isinstance
(
result
,
tuple
):
self
.
children
=
result
...
...
@@ -328,6 +329,8 @@ class Node(collections.abc.Sized):
if
result
:
if
self
.
error_flag
==
0
:
self
.
error_flag
=
max
(
child
.
error_flag
for
child
in
self
.
children
)
if
self
.
_pos
<
0
:
self
.
_pos
=
result
[
0
].
_pos
else
:
self
.
children
=
NoChildren
self
.
_result
=
str
(
result
)
...
...
@@ -368,17 +371,33 @@ class Node(collections.abc.Sized):
return
self
.
_pos
@
pos
.
setter
def
pos
(
self
,
pos
:
int
):
assert
self
.
_pos
==
pos
,
str
(
"%i != %i"
%
(
self
.
_pos
,
pos
))
offset
=
0
# @pos.setter
# def pos(self, pos: int):
# assert self._pos == pos, str("%i != %i" % (self._pos, pos))
# offset = 0
# # recursively adjust pos-values of all children
# for child in self.children:
# assert child.pos == pos + offset
# offset += len(child)
# # add pos-values to Error-objects
# for err in self._errors:
# err.pos = pos
def
init_pos
(
self
,
pos
:
int
,
overwrite
:
bool
=
False
)
->
'Node'
:
if
overwrite
or
self
.
_pos
<
0
:
self
.
_pos
=
pos
for
err
in
self
.
_errors
:
err
.
pos
=
pos
else
:
assert
self
.
_pos
==
pos
,
str
(
"%i != %i"
%
(
self
.
_pos
,
pos
))
# recursively adjust pos-values of all children
offset
=
self
.
pos
for
child
in
self
.
children
:
assert
child
.
pos
==
pos
+
offset
offset
+=
len
(
child
)
# add pos-values to Error-objects
for
err
in
self
.
_errors
:
err
.
pos
=
pos
child
.
init_pos
(
offset
)
offset
=
child
.
pos
+
len
(
child
)
return
self
@
property
...
...
test/test_parser.py
View file @
f2162cfb
...
...
@@ -147,8 +147,8 @@ class TestRegex:
assert
result
assert
not
messages
,
str
(
messages
)
parser
=
compile_python_object
(
DHPARSER_IMPORTS
+
result
,
'\w+Grammar$'
)()
node
,
rest
=
parser
.
regex
(
StringView
(
'abc+def'
)
)
assert
rest
==
''
node
=
parser
(
'abc+def'
,
parser
.
regex
)
assert
not
node
.
error_flag
assert
node
.
parser
.
name
==
"regex"
assert
str
(
node
)
==
'abc+def'
...
...
test/test_syntaxtree.py
View file @
f2162cfb
...
...
@@ -120,7 +120,7 @@ class TestNode:
assert
len
(
nd2
)
==
3
,
"Expected Node.len == 3, got %i"
%
len
(
nd2
)
nd
=
Node
(
None
,
(
nd1
,
nd2
))
assert
len
(
nd
)
==
6
,
"Expected Node.len == 6, got %i"
%
len
(
nd
)
nd
.
pos
=
0
nd
.
init_pos
(
0
)
assert
nd
.
pos
==
0
,
"Expected Node.pos == 0, got %i"
%
nd
.
pos
assert
nd1
.
pos
==
0
,
"Expected Node.pos == 0, got %i"
%
nd1
.
pos
assert
nd2
.
pos
==
3
,
"Expected Node.pos == 3, got %i"
%
nd2
.
pos
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment