Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit 9d1f65dd authored by Philipp Samfaß's avatar Philipp Samfaß
Browse files

married teaMPI with OffloadingMPI

parent 691ebd93
CC=mpiicpc
CFLAGS += -fPIC -g -Wall -std=c++11
LDFLAGS += -shared
CFLAGS += -fPIC -g -Wall -std=c++11 -I${HOME}/mpi_offloading/offloading_lib
LDFLAGS += -shared -L${HOME}/mpi_offloading/offloading_lib -lmpioffloading
SRC = Rank.cpp RankControl.cpp Timing.cpp Wrapper.cpp teaMPI.cpp
DEP = Rank.h RankControl.h Timing.h Wrapper.h Logging.h teaMPI.h
......
......@@ -22,31 +22,31 @@ static int teamSize;
static int numTeams;
static MPI_Comm TMPI_COMM_TEAM;
static MPI_Comm TMPI_COMM_DUP;
MPI_Comm TMPI_COMM_DUP;
static MPI_Comm TMPI_COMM_INTER_TEAM;
int initialiseTMPI() {
int initialiseTMPI( MPI_Comm comm) {
/**
* The application should have no knowledge of the world_size or world_rank
*/
setEnvironment();
PMPI_Comm_size(MPI_COMM_WORLD, &worldSize);
PMPI_Comm_rank(MPI_COMM_WORLD, &worldRank);
PMPI_Comm_size(comm, &worldSize);
PMPI_Comm_rank(comm, &worldRank);
teamSize = worldSize / numTeams;
int color = worldRank / teamSize;
PMPI_Comm_dup(MPI_COMM_WORLD, &TMPI_COMM_DUP);
PMPI_Comm_dup(comm, &TMPI_COMM_DUP);
PMPI_Comm_split(MPI_COMM_WORLD, color, worldRank, &TMPI_COMM_TEAM);
PMPI_Comm_split(comm, color, worldRank, &TMPI_COMM_TEAM);
PMPI_Comm_rank(TMPI_COMM_TEAM, &teamRank);
PMPI_Comm_size(TMPI_COMM_TEAM, &teamSize);
// Todo: free
PMPI_Comm_split(MPI_COMM_WORLD, teamRank, worldRank, &TMPI_COMM_INTER_TEAM);
PMPI_Comm_split(comm, teamRank, worldRank, &TMPI_COMM_INTER_TEAM);
assert(teamSize == (worldSize / numTeams));
......@@ -127,14 +127,14 @@ void outputEnvironment(){
assert(worldSize % numTeams == 0);
PMPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(TMPI_COMM_DUP);
double my_time = MPI_Wtime();
PMPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(TMPI_COMM_DUP);
double times[worldSize];
PMPI_Gather(&my_time, 1, MPI_DOUBLE, times, 1, MPI_DOUBLE, MASTER, MPI_COMM_WORLD);
PMPI_Gather(&my_time, 1, MPI_DOUBLE, times, 1, MPI_DOUBLE, MASTER, TMPI_COMM_DUP);
PMPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(TMPI_COMM_DUP);
if (worldRank == MASTER) {
std::cout << "------------TMPI SETTINGS------------\n";
......@@ -152,7 +152,7 @@ void outputEnvironment(){
std::cout << "---------------------------------------\n\n";
}
PMPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(TMPI_COMM_DUP);
}
void setEnvironment() {
......
......@@ -30,8 +30,10 @@
#error "Cannot decipher SIZE_MAX"
#endif
extern MPI_Comm TMPI_COMM_DUP;
/* Split ranks into teams */
int initialiseTMPI();
int initialiseTMPI(MPI_Comm comm);
int getWorldRank();
......
......@@ -175,7 +175,7 @@ void Timing::sleepRankRaised() {
void Timing::outputTiming() {
std::cout.flush();
PMPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(TMPI_COMM_DUP);
//finish outstanding communication requests
bool finished_all = false;
......@@ -218,7 +218,7 @@ void Timing::outputTiming() {
std::cout << "-------------------------------\n";
}
std::cout.flush();
PMPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(TMPI_COMM_DUP);
if (!filenamePrefix.empty()) {
// Write Generic Sync points to files
......@@ -259,6 +259,6 @@ void Timing::outputTiming() {
}
}*/
PMPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(TMPI_COMM_DUP);
}
......@@ -7,15 +7,33 @@
#include "Rank.h"
#include "Timing.h"
#ifdef USE_MPI_OFFLOADING
#include "mpi_offloading.h"
#include "mpi_offloading_common.h"
#endif
int MPI_Init(int *argc, char*** argv) {
int err = PMPI_Init(argc, argv);
initialiseTMPI();
#ifdef USE_MPI_OFFLOADING
ompi_init( (*argv)[0]);
if(!_is_server)
initialiseTMPI(_comm);
#else
initialiseTMPI(MPI_COMM_WORLD);
#endif
return err;
}
int MPI_Init_thread( int *argc, char ***argv, int required, int *provided ) {
int err = PMPI_Init_thread(argc, argv, required, provided);
initialiseTMPI();
#ifdef USE_MPI_OFFLOADING
ompi_init( (*argv)[0]);
if(!_is_server)
initialiseTMPI(_comm);
#else
initialiseTMPI(MPI_COMM_WORLD);
#endif
return err;
}
......@@ -27,9 +45,18 @@ int MPI_Is_thread_main(int* flag) {
int MPI_Comm_rank(MPI_Comm comm, int *rank) {
// todo: assert that a team comm is used
//assert(comm == MPI_COMM_WORLD);
//assert(comm == MPI_COMM_WORLD);
if(comm==MPI_COMM_WORLD)
*rank = getTeamRank();
#ifdef USE_MPI_OFFLOADING
if(!_is_server)
*rank = getTeamRank();
else {
PMPI_Comm_rank(_comm, rank);
}
#else
*rank = getTeamRank();
#endif
else
PMPI_Comm_rank(comm, rank);
return MPI_SUCCESS;
......@@ -142,6 +169,39 @@ int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag,
return err;
}
#ifdef USE_MPI_OFFLOADING
int MPI_Iprobe_offload(int source, int tag, MPI_Comm comm, int *flag, MPI_Status_Offload *status) {
int ierr;
assert(source==MPI_ANY_SOURCE);
#if COMMUNICATION_MODE==0
ierr = ompi_iprobe_offload_p2p(source, tag, comm, flag, status);
#elif COMMUNICATION_MODE==1
ierr = ompi_iprobe_offload_rma(source, tag, comm, flag, status);
#endif
status->MPI_SOURCE = mapWorldToTeamRank(status->MPI_SOURCE);
return ierr;
}
int MPI_Send_offload(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) {
#if COMMUNICATION_MODE==0
return ompi_send_offload_p2p(buf, count, datatype, mapTeamToWorldRank(dest, getTeam()), tag, comm);
#elif COMMUNICATION_MODE==1
return ompi_send_offload_rma(buf, count, datatype, mapTeamToWorldRank(dest, getTeam()), tag, comm);
#endif
}
int MPI_Recv_offload(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status_Offload *stat) {
#if COMMUNICATION_MODE==0
return ompi_recv_offload_p2p(buf, count, datatype, mapTeamToWorldRank(source, getTeam()), tag, comm, stat);
#elif COMMUNICATION_MODE==1
return ompi_recv_offload_rma(buf, count, datatype, mapTeamToWorldRank(source, getTeam()), tag, comm, stat);
#endif
}
#endif
int MPI_Barrier(MPI_Comm comm) {
//assert(comm == MPI_COMM_WORLD);
int err = synchroniseRanksInTeam();
......@@ -203,12 +263,19 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
}
int MPI_Finalize() {
logInfo("Finalize");
Timing::finaliseTiming();
// Wait for all replicas before finalising
PMPI_Barrier(MPI_COMM_WORLD);
freeTeamComm();
Timing::outputTiming();
#ifdef USE_MPI_OFFLOADING
if(!_is_server) {
#endif
logInfo("Finalize");
Timing::finaliseTiming();
// Wait for all replicas before finalising
PMPI_Barrier(TMPI_COMM_DUP);
freeTeamComm();
Timing::outputTiming();
#ifdef USE_MPI_OFFLOADING
}
ompi_finalize();
#endif
return PMPI_Finalize();
}
......
......@@ -3,6 +3,9 @@
#include <mpi.h>
#define USE_MPI_OFFLOADING
/**
* Sets up the MPI library and initialises the process
* @param argc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment