24.09., 9:00 - 11:00: Due to updates GitLab will be unavailable for some minutes between 09:00 and 11:00.

Commit b25f5f57 authored by Phillip Samfass's avatar Phillip Samfass

some refactoring for progressing outstanding requests

parent b1d407ac
...@@ -67,7 +67,7 @@ void Timing::markTimeline(int tag) { ...@@ -67,7 +67,7 @@ void Timing::markTimeline(int tag) {
} else if (tag < 0) { } else if (tag < 0) {
if (timer.heartbeatTimes.at(getTeam()).size()) { if (timer.heartbeatTimes.at(getTeam()).size()) {
timer.heartbeatTimes.at(getTeam()).back() = PMPI_Wtime() - timer.heartbeatTimes.at(getTeam()).back(); timer.heartbeatTimes.at(getTeam()).back() = PMPI_Wtime() - timer.heartbeatTimes.at(getTeam()).back();
printf("World Rank: %d, team rank: %d, team: %d, submitted time %f\n", getWorldRank(), getTeamRank(), getTeam(),timer.heartbeatTimes.at(getTeam()).back()); //printf("World Rank: %d, team rank: %d, team: %d, submitted time %f\n", getWorldRank(), getTeamRank(), getTeam(),timer.heartbeatTimes.at(getTeam()).back());
compareProgressWithReplicas(); compareProgressWithReplicas();
} }
} else { } else {
...@@ -80,6 +80,24 @@ void Timing::markTimeline(int tag, const void *sendbuf, int sendcount, MPI_Datat ...@@ -80,6 +80,24 @@ void Timing::markTimeline(int tag, const void *sendbuf, int sendcount, MPI_Datat
//compareBufferWithReplicas(sendbuf, sendcount, sendtype); //compareBufferWithReplicas(sendbuf, sendcount, sendtype);
} }
void Timing::progressOutstandingRequests(int targetTeam) {
// Progress on outstanding receives and sends
auto it = timer.heartbeatTimeRequests.at(targetTeam).begin();
while (it != timer.heartbeatTimeRequests.at(targetTeam).end()) {
int flag;
PMPI_Test(&(*it), &flag, MPI_STATUS_IGNORE);
if (flag) {
if (!((*it) == MPI_REQUEST_NULL)){
MPI_Request_free(&(*it));
}
it = timer.heartbeatTimeRequests.at(targetTeam).erase(it);
}
++it;
}
}
void Timing::compareProgressWithReplicas() { void Timing::compareProgressWithReplicas() {
for (int r=0; r < getNumberOfTeams(); r++) { for (int r=0; r < getNumberOfTeams(); r++) {
if (r != getTeam()) { if (r != getTeam()) {
...@@ -99,20 +117,8 @@ void Timing::compareProgressWithReplicas() { ...@@ -99,20 +117,8 @@ void Timing::compareProgressWithReplicas() {
PMPI_Irecv(&timer.heartbeatTimes.at(r).back(), 1, MPI_DOUBLE, PMPI_Irecv(&timer.heartbeatTimes.at(r).back(), 1, MPI_DOUBLE,
mapTeamToWorldRank(getTeamRank(), r), r, getLibComm(), &timer.heartbeatTimeRequests.at(r).back()); mapTeamToWorldRank(getTeamRank(), r), r, getLibComm(), &timer.heartbeatTimeRequests.at(r).back());
} }
// Progress on outstanding receives and sends progressOutstandingRequests(r);
auto it = timer.heartbeatTimeRequests.at(r).begin();
while (it != timer.heartbeatTimeRequests.at(r).end()) {
int flag;
PMPI_Test(&(*it), &flag, MPI_STATUS_IGNORE);
if (flag) {
if (!((*it) == MPI_REQUEST_NULL)){
MPI_Request_free(&(*it));
}
it = timer.heartbeatTimeRequests.at(r).erase(it);
}
++it;
}
} }
} }
} }
...@@ -168,8 +174,8 @@ void Timing::outputTiming() { ...@@ -168,8 +174,8 @@ void Timing::outputTiming() {
std::cout.flush(); std::cout.flush();
PMPI_Barrier(MPI_COMM_WORLD); PMPI_Barrier(MPI_COMM_WORLD);
//TODO: finish outstanding communication requests //finish outstanding communication requests
/*bool finished_all = false; bool finished_all = false;
while(!finished_all) { while(!finished_all) {
for(int r=0; r<getNumberOfTeams(); r++) { for(int r=0; r<getNumberOfTeams(); r++) {
finished_all &= timer.heartbeatTimeRequests.at(r).empty(); finished_all &= timer.heartbeatTimeRequests.at(r).empty();
...@@ -177,7 +183,7 @@ void Timing::outputTiming() { ...@@ -177,7 +183,7 @@ void Timing::outputTiming() {
if(!finished_all) { if(!finished_all) {
} }
}*/ }
std::string filenamePrefix = getEnvString("TMPI_FILE"); std::string filenamePrefix = getEnvString("TMPI_FILE");
std::string outputPathPrefix = getEnvString("TMPI_OUTPUT_PATH"); std::string outputPathPrefix = getEnvString("TMPI_OUTPUT_PATH");
...@@ -229,9 +235,6 @@ void Timing::outputTiming() { ...@@ -229,9 +235,6 @@ void Timing::outputTiming() {
f << "heartbeatTimes"; f << "heartbeatTimes";
for (const double& t : timer.heartbeatTimes.at(getTeam())) { for (const double& t : timer.heartbeatTimes.at(getTeam())) {
if(t>1.5)
printf("World Rank: %d, team rank: %d, team: %d, time %f\n", getWorldRank(), getTeamRank(), getTeam(),t);
f << sep << t; f << sep << t;
} }
f << "\n"; f << "\n";
......
...@@ -26,6 +26,8 @@ void compareProgressWithReplicas(); ...@@ -26,6 +26,8 @@ void compareProgressWithReplicas();
// Also compare a hash of a heartbeat buffer // Also compare a hash of a heartbeat buffer
void compareBufferWithReplicas(const void *sendbuf, int sendcount, MPI_Datatype sendtype); void compareBufferWithReplicas(const void *sendbuf, int sendcount, MPI_Datatype sendtype);
void progressOutstandingRequests(int targetTeam);
void sleepRankRaised(); void sleepRankRaised();
void outputTiming(); void outputTiming();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment