Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Tobias Weinzierl
teaMPI
Commits
5e73318f
Commit
5e73318f
authored
May 06, 2021
by
Philipp Samfaß
Browse files
added more doxygen docu
parent
cba6d1e9
Changes
9
Show whitespace changes
Inline
Side-by-side
lib/CommStats.h
View file @
5e73318f
...
...
@@ -21,8 +21,14 @@ namespace CommunicationStatistics {
* Struct for storing the communication statistics
*/
struct
CommunicationStats
{
std
::
atomic
<
size_t
>
sentBytes
;
/// number of sent bytes
std
::
atomic
<
size_t
>
receivedBytes
;
// number of received bytes
/**
* number of sent bytes
*/
std
::
atomic
<
size_t
>
sentBytes
;
/**
* number of received bytes
*/
std
::
atomic
<
size_t
>
receivedBytes
;
};
/**
* Computes the communication volume in bytes for a message of give datatype and count.
...
...
lib/Logging.h
View file @
5e73318f
/*
* Logging.h
*
* Created on: 2 Mar 2018
* Author: Ben Hazelwood
/**
* @file Logging.h
* @brief Contains some logging functionality for teaMPI.
* @author: Ben Hazelwood, Philipp Samfass
*/
#ifndef LOGGING_H_
...
...
lib/Rank.cpp
View file @
5e73318f
...
...
@@ -106,12 +106,16 @@ MPI_Comm getTeamComm(MPI_Comm comm) {
return
(
comm
==
MPI_COMM_WORLD
)
?
TMPI_COMM_TEAM
:
comm
;
}
int
freeTeamComm
()
{
return
MPI_Comm_free
(
&
TMPI_COMM_TEAM
);
}
MPI_Comm
getTeamInterComm
()
{
return
TMPI_COMM_INTER_TEAM
;
}
int
freeTeamComm
()
{
return
MPI_Comm_free
(
&
TMPI_COMM_TEAM
);
int
freeTeam
Inter
Comm
()
{
return
MPI_Comm_free
(
&
TMPI_COMM_
INTER_
TEAM
);
}
MPI_Comm
getLibComm
()
{
...
...
@@ -183,12 +187,12 @@ int mapWorldToTeamRank(int rank) {
}
}
int
mapTeamToWorldRank
(
int
rank
,
int
r
)
{
int
mapTeamToWorldRank
(
int
rank
,
int
team
)
{
if
(
rank
==
MPI_ANY_SOURCE
)
{
return
MPI_ANY_SOURCE
;
}
return
rank
+
r
*
getTeamSize
();
return
rank
+
team
*
getTeamSize
();
}
int
translateRank
(
MPI_Comm
srcComm
,
int
srcRank
,
MPI_Comm
destComm
)
{
...
...
lib/Rank.h
View file @
5e73318f
/*
* RankOperations.h
*
* Created on: 2 Mar 2018
* Author: Ben Hazelwood, Philipp Samfass
/**
* @file Rank.h
* @brief Several routines to manage ranks and communicators in teaMPI.
* @author Ben Hazelwood, Philipp Samfass
*/
#ifndef RANK_H_
...
...
@@ -32,71 +31,137 @@
extern
MPI_Comm
TMPI_COMM_DUP
;
/* Split ranks into teams */
/**
* Split ranks into teams.
*/
int
initialiseTMPI
(
MPI_Comm
comm
);
/**
* Returns rank of calling process in MPI_COMM_WORLD.
*/
int
getWorldRank
();
/**
* Returns size of calling process in MPI_COMM_WORLD.
*/
int
getWorldSize
();
/* Get the rank as seen by the application */
/**
* Get the rank as seen by the application
*/
int
getTeamRank
();
/* Get the number of ranks as seen by the application */
/**
* Get the number of ranks as seen by the application
*/
int
getTeamSize
();
/* Also the number of replicas */
/** Get the number of replicas==number of teams
*
*/
int
getNumberOfTeams
();
/* Return which team this rank belongs to */
/**
* Return which team this rank belongs to.
*/
int
getTeam
();
/* The communicator used by this team */
/**
* Return the communicator used by this team
*/
MPI_Comm
getTeamComm
(
MPI_Comm
comm
);
/**
* Frees the communicator used by the calling team
*/
int
freeTeamComm
();
/**
* Returns communictor for horizontal communication between replica ranks.
*/
MPI_Comm
getTeamInterComm
();
/* The duplicate MPI_COMM_WORLD used by the library*/
/**
* Frees inter-team communicator.
*/
int
freeTeamInterComm
();
/**
* The duplicate MPI_COMM_WORLD used by the library
*/
MPI_Comm
getLibComm
();
/**
* Frees duplicate MPI_COMM_WORLD used by the library
*/
int
freeLibComm
();
/* Get the value of an environment variable (empty string if undefined) */
/**
* Get the value of an environment variable (empty string if undefined)
*/
std
::
string
getEnvString
(
std
::
string
const
&
key
);
/* Get the number of teams from environment */
/**
* Get the number of teams from environment
*/
void
setEnvironment
();
/* Output team sizes and any timing inaccuracies between ranks */
/**
* Output team sizes and any timing inaccuracies between ranks
*/
void
outputEnvironment
();
/* Output the timing differences between replicas */
/**
* Output the timing differences between replicas
*/
void
outputTiming
();
/* Decide whether data should be manually corrupted upon next heartbeat */
bool
getShouldCorruptData
();
void
setShouldCorruptData
(
bool
toggle
);
/**
* Maps a world rank to its team.
* @param rank The input rank.
*/
int
mapRankToTeamNumber
(
int
rank
);
/**
* Maps a world rank to its team rank.
* @param rank The input rank.
*/
int
mapWorldToTeamRank
(
int
rank
);
int
mapTeamToWorldRank
(
int
rank
,
int
r
);
/**
* Maps a team rank in some given team to the world rank.
* @param rank The input rank within a team
* @param team The team the input rank belongs to
*/
int
mapTeamToWorldRank
(
int
rank
,
int
team
);
/**
* Translates a rank in a src communicator to the matching rank in a destination communicator.
* @param srcComm The source communicator
* @param srcRank The rank in the source communicator
* @param destComm The destination communicator
*/
int
translateRank
(
MPI_Comm
srcComm
,
int
srcRank
,
MPI_Comm
destComm
);
/**
* Map a rank in a communicator comm to the world rank in MPI_COMM_WORLD.
* @param rank The input rank
* @param comm The communicator this rank number belongs to
*/
int
mapToWorldRank
(
int
rank
,
MPI_Comm
comm
);
/* Alters the MPI_SOURCE member of MPI_Status to 0 <= r < team size */
/**
* Alters the MPI_SOURCE member of MPI_Status to 0 <= r < team size
*/
void
remapStatus
(
MPI_Status
*
status
);
/* Barrier on team communicator */
/**
* Barrier on team communicator
*/
int
synchroniseRanksInTeam
();
/* Barrier on all ranks (not called by application) */
/**
* Barrier on all ranks (not called by application)
*/
int
synchroniseRanksGlobally
();
#endif
/* RANK_H_ */
lib/RankControl.cpp
View file @
5e73318f
/*
* Rank
Operations
.cpp
* Rank
Control
.cpp
*
* Created on: 2 Jul 2018
* Author: Ben Hazelwood
*/
#include
"RankControl.h"
#include
<csignal>
#include
<unistd.h>
#include
"RankControl.h"
#include
"Logging.h"
#include
"Timing.h"
...
...
lib/RankControl.h
View file @
5e73318f
/*
* RankControl.h
*
/**
* @file RankControl.h
* @brief Ranks can be paused or some of their data may be corrupted which is implemented by the functions declared in this file.
* This works by sending signals to the application processes which are caught in respective handlers.
* SIGUSR1 is the signal used to pause a rank.
* SIGUSR2 is the signal used to corrupt data on the next heartbeat.
* Created on: 2 Jul 2018
*
A
uthor: Ben Hazelwood
*
@a
uthor: Ben Hazelwood
, Philipp Samfass
*/
#ifndef RANKCONTROL_H_
#define RANKCONTROL_H_
/*
USR1 is used to pause a rank for 1s
USR2 is used to corrupt the data on next heartbeat
/*
*
*
USR1 is used to pause a rank for 1s
*
USR2 is used to corrupt the data on next heartbeat
*/
void
registerSignalHandler
();
// USR1
/**
* Signal handler for USR1.
*/
void
pauseThisRankSignalHandler
(
int
signum
);
// USR2
/**
* Signal handler for USR2.
*/
void
corruptThisRankSignalHandler
(
int
signum
);
/**
* Returns true if data should be corrupted.
*/
bool
getShouldCorruptData
();
/**
* Disables/enables corruption in next heartbeat.
* @param toggle If true, corruption is triggered.
*/
void
setShouldCorruptData
(
bool
toggle
);
#endif
lib/Timing.h
View file @
5e73318f
/*
* Timing.h
*
* Created on: 2 Mar 2018
* Author: Ben Hazelwood
/**
* @file Timing.h
* @brief Manages heartbeats (sending and receiving) and contains functionality for dumping heartbeats post-mortem to an output file.
* @author Ben Hazelwood, Philipp Samfass
*/
#ifndef TIMING_H_
...
...
@@ -10,29 +9,66 @@
#include
<mpi.h>
/**
* Contains routines for managing heartbeats.
*/
namespace
Timing
{
// Mark time only for this heartbeat
/**
* Tracks start and end of a heartbeat and stores time between heartbeats.
* @param tag Tags for this heartbeat. A positive tag x starts the heartbeat, the corresponding negative tag -x ends the heartbeat..
*/
void
markTimeline
(
int
tag
);
// Also mark the hash for the heartbeat buffer
/**
* Tracks start and end of a heartbeat, stores time between heartbeats and also keeps track of hashes over send buffers (for corruption detection).
* @see markTimeline
* @param tag Tag for this heartbeat.
* @param sendbuf Send buffer that is hashed.
* @param sendcount Length of sendbuffer in MPI_Datatype sendtype
* @param sendtype MPI datatype contained in sendbuffer
*/
void
markTimeline
(
int
tag
,
const
void
*
sendbuf
,
int
sendcount
,
MPI_Datatype
sendtype
);
/**
* Initialises data structures for heartbeats.
*/
void
initialiseTiming
();
/**
* Destroys data structures for heartbeats.
*/
void
finaliseTiming
();
// Compare the time of heartbeat(s) with other replica(s)
/**
* Compare the time of heartbeat(s) with other replica(s)
*/
void
compareProgressWithReplicas
();
// Also compare a hash of a heartbeat buffer
/**
* Also compare a hash of a heartbeat buffer.
*/
void
compareBufferWithReplicas
(
const
void
*
sendbuf
,
int
sendcount
,
MPI_Datatype
sendtype
);
/**
* Probes for a heartbeat from the replica of a given team.
* @param targetTeam Team of the replica from which the heartbeat should be received.
*/
void
pollForAndReceiveHeartbeat
(
int
targetTeam
);
/**
* Makes progress on outstanding communication requests for the given team.
* @param targetTeam Team for which progress on outstanding requests should be made.
*/
void
progressOutstandingRequests
(
int
targetTeam
);
/**
* Tracks points in time when sleep is invoked.
*/
void
sleepRankRaised
();
/**
* Dumps heartbeat statistics
*/
void
outputTiming
();
}
}
#endif
/* TIMING_H_ */
lib/Wrapper.cpp
View file @
5e73318f
...
...
@@ -362,6 +362,7 @@ int MPI_Finalize() {
#ifdef DirtyCleanUp
return
MPI_SUCCESS
;
#endif
freeTeamInterComm
();
return
PMPI_Finalize
();
}
...
...
lib/Wrapper.h
View file @
5e73318f
/**
* @file Wrapper.h
* @author Benjamin Hazelwood, Philipp Samfass
* @brief This file contains the wrapped MPI routines using the PMPI interface.
*/
#ifndef WRAPPER_H
#define WRAPPER_H
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment