Commit 5e73318f authored by Philipp Samfaß's avatar Philipp Samfaß
Browse files

added more doxygen docu

parent cba6d1e9
...@@ -21,8 +21,14 @@ namespace CommunicationStatistics { ...@@ -21,8 +21,14 @@ namespace CommunicationStatistics {
* Struct for storing the communication statistics * Struct for storing the communication statistics
*/ */
struct CommunicationStats { struct CommunicationStats {
std::atomic<size_t> sentBytes; /// number of sent bytes /**
std::atomic<size_t> receivedBytes; // number of received bytes * number of sent bytes
*/
std::atomic<size_t> sentBytes;
/**
* number of received bytes
*/
std::atomic<size_t> receivedBytes;
}; };
/** /**
* Computes the communication volume in bytes for a message of give datatype and count. * Computes the communication volume in bytes for a message of give datatype and count.
......
/* /**
* Logging.h * @file Logging.h
* * @brief Contains some logging functionality for teaMPI.
* Created on: 2 Mar 2018 * @author: Ben Hazelwood, Philipp Samfass
* Author: Ben Hazelwood
*/ */
#ifndef LOGGING_H_ #ifndef LOGGING_H_
......
...@@ -106,12 +106,16 @@ MPI_Comm getTeamComm(MPI_Comm comm) { ...@@ -106,12 +106,16 @@ MPI_Comm getTeamComm(MPI_Comm comm) {
return (comm==MPI_COMM_WORLD) ? TMPI_COMM_TEAM : comm; return (comm==MPI_COMM_WORLD) ? TMPI_COMM_TEAM : comm;
} }
int freeTeamComm() {
return MPI_Comm_free(&TMPI_COMM_TEAM);
}
MPI_Comm getTeamInterComm() { MPI_Comm getTeamInterComm() {
return TMPI_COMM_INTER_TEAM; return TMPI_COMM_INTER_TEAM;
} }
int freeTeamComm() { int freeTeamInterComm() {
return MPI_Comm_free(&TMPI_COMM_TEAM); return MPI_Comm_free(&TMPI_COMM_INTER_TEAM);
} }
MPI_Comm getLibComm() { MPI_Comm getLibComm() {
...@@ -183,12 +187,12 @@ int mapWorldToTeamRank(int rank) { ...@@ -183,12 +187,12 @@ int mapWorldToTeamRank(int rank) {
} }
} }
int mapTeamToWorldRank(int rank, int r) { int mapTeamToWorldRank(int rank, int team) {
if (rank == MPI_ANY_SOURCE) { if (rank == MPI_ANY_SOURCE) {
return MPI_ANY_SOURCE; return MPI_ANY_SOURCE;
} }
return rank + r * getTeamSize(); return rank + team * getTeamSize();
} }
int translateRank(MPI_Comm srcComm, int srcRank, MPI_Comm destComm) { int translateRank(MPI_Comm srcComm, int srcRank, MPI_Comm destComm) {
......
/* /**
* RankOperations.h * @file Rank.h
* * @brief Several routines to manage ranks and communicators in teaMPI.
* Created on: 2 Mar 2018 * @author Ben Hazelwood, Philipp Samfass
* Author: Ben Hazelwood, Philipp Samfass
*/ */
#ifndef RANK_H_ #ifndef RANK_H_
...@@ -32,71 +31,137 @@ ...@@ -32,71 +31,137 @@
extern MPI_Comm TMPI_COMM_DUP; extern MPI_Comm TMPI_COMM_DUP;
/* Split ranks into teams */ /**
* Split ranks into teams.
*/
int initialiseTMPI(MPI_Comm comm); int initialiseTMPI(MPI_Comm comm);
/**
* Returns rank of calling process in MPI_COMM_WORLD.
*/
int getWorldRank(); int getWorldRank();
/**
* Returns size of calling process in MPI_COMM_WORLD.
*/
int getWorldSize(); int getWorldSize();
/* Get the rank as seen by the application */ /**
* Get the rank as seen by the application
*/
int getTeamRank(); int getTeamRank();
/* Get the number of ranks as seen by the application */ /**
* Get the number of ranks as seen by the application
*/
int getTeamSize(); int getTeamSize();
/* Also the number of replicas */ /** Get the number of replicas==number of teams
*
*/
int getNumberOfTeams(); int getNumberOfTeams();
/* Return which team this rank belongs to */ /**
* Return which team this rank belongs to.
*/
int getTeam(); int getTeam();
/* The communicator used by this team */ /**
* Return the communicator used by this team
*/
MPI_Comm getTeamComm(MPI_Comm comm); MPI_Comm getTeamComm(MPI_Comm comm);
/**
* Frees the communicator used by the calling team
*/
int freeTeamComm(); int freeTeamComm();
/**
* Returns communictor for horizontal communication between replica ranks.
*/
MPI_Comm getTeamInterComm(); MPI_Comm getTeamInterComm();
/* The duplicate MPI_COMM_WORLD used by the library*/ /**
* Frees inter-team communicator.
*/
int freeTeamInterComm();
/**
* The duplicate MPI_COMM_WORLD used by the library
*/
MPI_Comm getLibComm(); MPI_Comm getLibComm();
/**
* Frees duplicate MPI_COMM_WORLD used by the library
*/
int freeLibComm(); int freeLibComm();
/* Get the value of an environment variable (empty string if undefined) */ /**
* Get the value of an environment variable (empty string if undefined)
*/
std::string getEnvString(std::string const& key); std::string getEnvString(std::string const& key);
/* Get the number of teams from environment */ /**
* Get the number of teams from environment
*/
void setEnvironment(); void setEnvironment();
/* Output team sizes and any timing inaccuracies between ranks */ /**
* Output team sizes and any timing inaccuracies between ranks
*/
void outputEnvironment(); void outputEnvironment();
/* Output the timing differences between replicas */ /**
* Output the timing differences between replicas
*/
void outputTiming(); void outputTiming();
/* Decide whether data should be manually corrupted upon next heartbeat */ /**
bool getShouldCorruptData(); * Maps a world rank to its team.
void setShouldCorruptData(bool toggle); * @param rank The input rank.
*/
int mapRankToTeamNumber(int rank); int mapRankToTeamNumber(int rank);
/**
* Maps a world rank to its team rank.
* @param rank The input rank.
*/
int mapWorldToTeamRank(int rank); int mapWorldToTeamRank(int rank);
int mapTeamToWorldRank(int rank, int r); /**
* Maps a team rank in some given team to the world rank.
* @param rank The input rank within a team
* @param team The team the input rank belongs to
*/
int mapTeamToWorldRank(int rank, int team);
/**
* Translates a rank in a src communicator to the matching rank in a destination communicator.
* @param srcComm The source communicator
* @param srcRank The rank in the source communicator
* @param destComm The destination communicator
*/
int translateRank(MPI_Comm srcComm, int srcRank, MPI_Comm destComm); int translateRank(MPI_Comm srcComm, int srcRank, MPI_Comm destComm);
/**
* Map a rank in a communicator comm to the world rank in MPI_COMM_WORLD.
* @param rank The input rank
* @param comm The communicator this rank number belongs to
*/
int mapToWorldRank(int rank, MPI_Comm comm); int mapToWorldRank(int rank, MPI_Comm comm);
/* Alters the MPI_SOURCE member of MPI_Status to 0 <= r < team size */ /**
* Alters the MPI_SOURCE member of MPI_Status to 0 <= r < team size
*/
void remapStatus(MPI_Status *status); void remapStatus(MPI_Status *status);
/* Barrier on team communicator */ /**
* Barrier on team communicator
*/
int synchroniseRanksInTeam(); int synchroniseRanksInTeam();
/* Barrier on all ranks (not called by application) */ /**
* Barrier on all ranks (not called by application)
*/
int synchroniseRanksGlobally(); int synchroniseRanksGlobally();
#endif /* RANK_H_ */ #endif /* RANK_H_ */
/* /*
* RankOperations.cpp * RankControl.cpp
* *
* Created on: 2 Jul 2018 * Created on: 2 Jul 2018
* Author: Ben Hazelwood * Author: Ben Hazelwood
*/ */
#include "RankControl.h"
#include <csignal> #include <csignal>
#include <unistd.h> #include <unistd.h>
#include "RankControl.h"
#include "Logging.h" #include "Logging.h"
#include "Timing.h" #include "Timing.h"
......
/* /**
* RankControl.h * @file RankControl.h
* * @brief Ranks can be paused or some of their data may be corrupted which is implemented by the functions declared in this file.
* This works by sending signals to the application processes which are caught in respective handlers.
* SIGUSR1 is the signal used to pause a rank.
* SIGUSR2 is the signal used to corrupt data on the next heartbeat.
* Created on: 2 Jul 2018 * Created on: 2 Jul 2018
* Author: Ben Hazelwood * @author: Ben Hazelwood, Philipp Samfass
*/ */
#ifndef RANKCONTROL_H_ #ifndef RANKCONTROL_H_
#define RANKCONTROL_H_ #define RANKCONTROL_H_
/* /**
USR1 is used to pause a rank for 1s * USR1 is used to pause a rank for 1s
USR2 is used to corrupt the data on next heartbeat * USR2 is used to corrupt the data on next heartbeat
*/ */
void registerSignalHandler(); void registerSignalHandler();
// USR1 /**
* Signal handler for USR1.
*/
void pauseThisRankSignalHandler(int signum); void pauseThisRankSignalHandler(int signum);
// USR2 /**
* Signal handler for USR2.
*/
void corruptThisRankSignalHandler(int signum); void corruptThisRankSignalHandler(int signum);
/**
* Returns true if data should be corrupted.
*/
bool getShouldCorruptData(); bool getShouldCorruptData();
/**
* Disables/enables corruption in next heartbeat.
* @param toggle If true, corruption is triggered.
*/
void setShouldCorruptData(bool toggle); void setShouldCorruptData(bool toggle);
#endif #endif
\ No newline at end of file
/* /**
* Timing.h * @file Timing.h
* * @brief Manages heartbeats (sending and receiving) and contains functionality for dumping heartbeats post-mortem to an output file.
* Created on: 2 Mar 2018 * @author Ben Hazelwood, Philipp Samfass
* Author: Ben Hazelwood
*/ */
#ifndef TIMING_H_ #ifndef TIMING_H_
...@@ -10,29 +9,66 @@ ...@@ -10,29 +9,66 @@
#include <mpi.h> #include <mpi.h>
/**
* Contains routines for managing heartbeats.
*/
namespace Timing { namespace Timing {
// Mark time only for this heartbeat /**
* Tracks start and end of a heartbeat and stores time between heartbeats.
* @param tag Tags for this heartbeat. A positive tag x starts the heartbeat, the corresponding negative tag -x ends the heartbeat..
*/
void markTimeline(int tag); void markTimeline(int tag);
// Also mark the hash for the heartbeat buffer /**
* Tracks start and end of a heartbeat, stores time between heartbeats and also keeps track of hashes over send buffers (for corruption detection).
* @see markTimeline
* @param tag Tag for this heartbeat.
* @param sendbuf Send buffer that is hashed.
* @param sendcount Length of sendbuffer in MPI_Datatype sendtype
* @param sendtype MPI datatype contained in sendbuffer
*/
void markTimeline(int tag, const void *sendbuf, int sendcount, MPI_Datatype sendtype); void markTimeline(int tag, const void *sendbuf, int sendcount, MPI_Datatype sendtype);
/**
* Initialises data structures for heartbeats.
*/
void initialiseTiming(); void initialiseTiming();
/**
* Destroys data structures for heartbeats.
*/
void finaliseTiming(); void finaliseTiming();
// Compare the time of heartbeat(s) with other replica(s) /**
* Compare the time of heartbeat(s) with other replica(s)
*/
void compareProgressWithReplicas(); void compareProgressWithReplicas();
// Also compare a hash of a heartbeat buffer /**
* Also compare a hash of a heartbeat buffer.
*/
void compareBufferWithReplicas(const void *sendbuf, int sendcount, MPI_Datatype sendtype); void compareBufferWithReplicas(const void *sendbuf, int sendcount, MPI_Datatype sendtype);
/**
* Probes for a heartbeat from the replica of a given team.
* @param targetTeam Team of the replica from which the heartbeat should be received.
*/
void pollForAndReceiveHeartbeat(int targetTeam); void pollForAndReceiveHeartbeat(int targetTeam);
/**
* Makes progress on outstanding communication requests for the given team.
* @param targetTeam Team for which progress on outstanding requests should be made.
*/
void progressOutstandingRequests(int targetTeam); void progressOutstandingRequests(int targetTeam);
/**
* Tracks points in time when sleep is invoked.
*/
void sleepRankRaised(); void sleepRankRaised();
/**
* Dumps heartbeat statistics
*/
void outputTiming(); void outputTiming();
}
}
#endif /* TIMING_H_ */ #endif /* TIMING_H_ */
...@@ -362,6 +362,7 @@ int MPI_Finalize() { ...@@ -362,6 +362,7 @@ int MPI_Finalize() {
#ifdef DirtyCleanUp #ifdef DirtyCleanUp
return MPI_SUCCESS; return MPI_SUCCESS;
#endif #endif
freeTeamInterComm();
return PMPI_Finalize(); return PMPI_Finalize();
} }
......
/**
* @file Wrapper.h
* @author Benjamin Hazelwood, Philipp Samfass
* @brief This file contains the wrapped MPI routines using the PMPI interface.
*/
#ifndef WRAPPER_H #ifndef WRAPPER_H
#define WRAPPER_H #define WRAPPER_H
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment