Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit b25f5f57 authored by Phillip Samfass's avatar Phillip Samfass
Browse files

some refactoring for progressing outstanding requests

parent b1d407ac
......@@ -67,7 +67,7 @@ void Timing::markTimeline(int tag) {
} else if (tag < 0) {
if (timer.heartbeatTimes.at(getTeam()).size()) {
timer.heartbeatTimes.at(getTeam()).back() = PMPI_Wtime() - timer.heartbeatTimes.at(getTeam()).back();
printf("World Rank: %d, team rank: %d, team: %d, submitted time %f\n", getWorldRank(), getTeamRank(), getTeam(),timer.heartbeatTimes.at(getTeam()).back());
//printf("World Rank: %d, team rank: %d, team: %d, submitted time %f\n", getWorldRank(), getTeamRank(), getTeam(),timer.heartbeatTimes.at(getTeam()).back());
compareProgressWithReplicas();
}
} else {
......@@ -80,6 +80,24 @@ void Timing::markTimeline(int tag, const void *sendbuf, int sendcount, MPI_Datat
//compareBufferWithReplicas(sendbuf, sendcount, sendtype);
}
void Timing::progressOutstandingRequests(int targetTeam) {
// Progress on outstanding receives and sends
auto it = timer.heartbeatTimeRequests.at(targetTeam).begin();
while (it != timer.heartbeatTimeRequests.at(targetTeam).end()) {
int flag;
PMPI_Test(&(*it), &flag, MPI_STATUS_IGNORE);
if (flag) {
if (!((*it) == MPI_REQUEST_NULL)){
MPI_Request_free(&(*it));
}
it = timer.heartbeatTimeRequests.at(targetTeam).erase(it);
}
++it;
}
}
void Timing::compareProgressWithReplicas() {
for (int r=0; r < getNumberOfTeams(); r++) {
if (r != getTeam()) {
......@@ -99,20 +117,8 @@ void Timing::compareProgressWithReplicas() {
PMPI_Irecv(&timer.heartbeatTimes.at(r).back(), 1, MPI_DOUBLE,
mapTeamToWorldRank(getTeamRank(), r), r, getLibComm(), &timer.heartbeatTimeRequests.at(r).back());
}
// Progress on outstanding receives and sends
auto it = timer.heartbeatTimeRequests.at(r).begin();
while (it != timer.heartbeatTimeRequests.at(r).end()) {
int flag;
PMPI_Test(&(*it), &flag, MPI_STATUS_IGNORE);
if (flag) {
if (!((*it) == MPI_REQUEST_NULL)){
MPI_Request_free(&(*it));
}
it = timer.heartbeatTimeRequests.at(r).erase(it);
}
++it;
}
progressOutstandingRequests(r);
}
}
}
......@@ -168,8 +174,8 @@ void Timing::outputTiming() {
std::cout.flush();
PMPI_Barrier(MPI_COMM_WORLD);
//TODO: finish outstanding communication requests
/*bool finished_all = false;
//finish outstanding communication requests
bool finished_all = false;
while(!finished_all) {
for(int r=0; r<getNumberOfTeams(); r++) {
finished_all &= timer.heartbeatTimeRequests.at(r).empty();
......@@ -177,7 +183,7 @@ void Timing::outputTiming() {
if(!finished_all) {
}
}*/
}
std::string filenamePrefix = getEnvString("TMPI_FILE");
std::string outputPathPrefix = getEnvString("TMPI_OUTPUT_PATH");
......@@ -229,9 +235,6 @@ void Timing::outputTiming() {
f << "heartbeatTimes";
for (const double& t : timer.heartbeatTimes.at(getTeam())) {
if(t>1.5)
printf("World Rank: %d, team rank: %d, team: %d, time %f\n", getWorldRank(), getTeamRank(), getTeam(),t);
f << sep << t;
}
f << "\n";
......
......@@ -26,6 +26,8 @@ void compareProgressWithReplicas();
// Also compare a hash of a heartbeat buffer
void compareBufferWithReplicas(const void *sendbuf, int sendcount, MPI_Datatype sendtype);
void progressOutstandingRequests(int targetTeam);
void sleepRankRaised();
void outputTiming();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment