Timing.cpp 5.56 KB
Newer Older
1
2
3
4
/*
 * Timing.cpp
 *
 *  Created on: 2 Mar 2018
5
 *      Author: Ben Hazelwood
6
7
8
9
 */

#include "Timing.h"

Ben Hazelwood's avatar
Ben Hazelwood committed
10
11
#include <fstream>
#include <map>
12
#include <set>
Ben Hazelwood's avatar
Ben Hazelwood committed
13
14
15
#include <sstream>
#include <string>
#include <utility>
16
#include <stddef.h>
17
#include <bitset>
Ben Hazelwood's avatar
Ben Hazelwood committed
18
19

#include "Logging.h"
Ben Hazelwood's avatar
Ben Hazelwood committed
20
#include "Rank.h"
21
22

struct Timer {
Ben Hazelwood's avatar
Ben Hazelwood committed
23
24
  double startTime;
  double endTime;
25
26

  std::map< int, std::vector<double> > syncPoints;
27
28
29
30
  std::map< int, std::vector<MPI_Request> > syncRequests;

  std::map<int, std::vector<std::size_t> > hashes;
  std::map<int, std::vector<MPI_Request> > hashRequests;
Ben Hazelwood's avatar
Ben Hazelwood committed
31
32
} timer;

33
void Timing::initialiseTiming() {
Ben Hazelwood's avatar
Ben Hazelwood committed
34
  synchroniseRanksInTeam();
35
  timer.startTime = PMPI_Wtime();
Ben Hazelwood's avatar
Ben Hazelwood committed
36
  for (int i=0; i < getNumberOfTeams(); i++) {
37
    timer.syncPoints.insert(std::make_pair(i,std::vector<double>()));
38
39
40
41
    timer.syncRequests.insert(std::make_pair(i,std::vector<MPI_Request>()));

    timer.hashes.insert(std::make_pair(i,std::vector<std::size_t>()));
    timer.hashRequests.insert(std::make_pair(i,std::vector<MPI_Request>()));
42
43
44
45
  }
}

void Timing::finaliseTiming() {
Ben Hazelwood's avatar
Ben Hazelwood committed
46
  synchroniseRanksInTeam();
47
48
49
50
  timer.endTime = PMPI_Wtime();
}

void Timing::markTimeline() {
Ben Hazelwood's avatar
Ben Hazelwood committed
51
    timer.syncPoints.at(getTeam()).push_back(PMPI_Wtime());
52
    compareProgressWithReplicas();
53
54
}

55
56
57
58
59
void Timing::markTimeline(const void *sendbuf, int sendcount, MPI_Datatype sendtype) {
  markTimeline();
  compareBufferWithReplicas(sendbuf, sendcount, sendtype);
}

60
void Timing::compareProgressWithReplicas() {
Ben Hazelwood's avatar
Ben Hazelwood committed
61
62
  for (int r=0; r < getNumberOfTeams(); r++) {
    if (r != getTeam()) {
Ben Hazelwood's avatar
Ben Hazelwood committed
63
      // Send out this replica's times
64
      MPI_Request request;
Ben Hazelwood's avatar
Ben Hazelwood committed
65
66
67
      PMPI_Isend(&timer.syncPoints.at(getTeam()).back(), 1, MPI_DOUBLE,
                mapTeamToWorldRank(getTeamRank(), r), getTeam(),
                getLibComm(), &request);
68
69
70
71
      MPI_Request_free(&request);

      // Receive times from other replicas
      timer.syncPoints.at(r).push_back(0.0);
72
      timer.syncRequests.at(r).push_back(MPI_Request());
73
      PMPI_Irecv(&timer.syncPoints.at(r).back(), 1, MPI_DOUBLE,
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
                 mapTeamToWorldRank(getTeamRank(), r), r, getLibComm(), &timer.syncRequests.at(r).back());

      // Test for completion of Irecv's
      int numPending = 0;
      for (int i=0; i < timer.syncRequests.at(r).size(); i++) {
        int flag = 0;
        PMPI_Test(&timer.syncRequests.at(r).at(i), &flag, MPI_STATUS_IGNORE);
        numPending += 1 - flag;
      }
    }
  }
}

void Timing::compareBufferWithReplicas(const void *sendbuf, int sendcount, MPI_Datatype sendtype) {
  if (getShouldCorruptData()) {
89
90
    //TODO can remove const here (assuming data was originally non-const) and corrupt properly, no need for now
    sendcount++; // This isn't really that safe either...
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
    setShouldCorruptData(false);
  }

  int typeSize;
  MPI_Type_size(sendtype, &typeSize);

  std::string bits((const char*)sendbuf, sendcount*typeSize);
  std::hash<std::string> hash_fn;
  std::size_t hash = hash_fn(bits);
  timer.hashes.at(getTeam()).push_back((std::size_t)hash);

  for (int r=0; r < getNumberOfTeams(); r++) {
    if (r != getTeam()) {
      // Send out this replica's times
      MPI_Request request;
      PMPI_Isend(&timer.hashes.at(getTeam()).back(), 1, TMPI_SIZE_T,
                mapTeamToWorldRank(getTeamRank(), r), getTeam(),
                getLibComm(), &request);
      MPI_Request_free(&request);

      // Receive times from other replicas
      timer.hashes.at(r).push_back(0);
      timer.hashRequests.at(r).push_back(MPI_Request());
      PMPI_Irecv(&timer.hashes.at(r).back(), 1, TMPI_SIZE_T,
                 mapTeamToWorldRank(getTeamRank(), r), r, getLibComm(), &timer.hashRequests.at(r).back());
116
117
118

      // Test for completion of Irecv's
      int numPending = 0;
119
      for (int i=0; i < timer.hashRequests.at(r).size(); i++) {
120
        int flag = 0;
121
        PMPI_Test(&timer.hashRequests.at(r).at(i), &flag, MPI_STATUS_IGNORE);
122
123
        numPending += 1 - flag;
      }
124
      std::cout << "Num pending: " << numPending << "\n";
125
126
    }
  }
Ben Hazelwood's avatar
Ben Hazelwood committed
127
128
129
}

void Timing::outputTiming() {
130
131
  std::cout.flush();
  PMPI_Barrier(MPI_COMM_WORLD);
Ben Hazelwood's avatar
Ben Hazelwood committed
132

133
  // Output simple replica timings
134
  if ((getTeamRank() == MASTER) && (getWorldRank() != MASTER)) {
Ben Hazelwood's avatar
Ben Hazelwood committed
135
    PMPI_Send(&timer.endTime, 1, MPI_DOUBLE, MASTER, 0, getLibComm());
Ben Hazelwood's avatar
Ben Hazelwood committed
136
137
  }

138

139
140
141
142
143
144
145
146
  if (getWorldRank() == MASTER) {
    std::cout << std::endl;
    std::cout << "----------TMPI_TIMING----------\n";
    std::cout << "timing_file=";
#ifdef TMPI_TIMING
    std::cout << "tmpi_filename.csv";
#else
    std::cout << "timing_not_enabled";
Ben Hazelwood's avatar
Ben Hazelwood committed
147
#endif
148
    std::cout << "\n";
Ben Hazelwood's avatar
Ben Hazelwood committed
149
150
    std::cout << "num_replicas=" << getNumberOfTeams() << "\n";
    for (int i=0; i < getNumberOfTeams(); i++) {
151
152
153
154
      double rEndTime = 0.0;
      if (i == MASTER) {
        rEndTime = timer.endTime;
      } else {
Ben Hazelwood's avatar
Ben Hazelwood committed
155
        PMPI_Recv(&rEndTime, 1, MPI_DOUBLE, mapTeamToWorldRank(MASTER, i), 0, getLibComm(), MPI_STATUS_IGNORE);
156
157
158
159
160
161
162
163
      }

      std::cout << "replica_" << i << "=" << rEndTime << "\n";
    }
    std::cout << "-------------------------------\n";
  }
  std::cout.flush();
  PMPI_Barrier(MPI_COMM_WORLD);
164
165
166
167
168
169
170
171
172

  // Write Generic Sync points to files
  char sep = ',';
  std::ostringstream filename;
  std::string outputFolder("tmpi-timings");
  filename << outputFolder << "/"
      << "timings" << "-"
      << getWorldRank() << "-"
      << getTeamRank() << "-"
Ben Hazelwood's avatar
Ben Hazelwood committed
173
      << getTeam()
174
175
176
177
178
179
      << ".csv";
  std::ofstream f;
  f.open(filename.str().c_str());

  logInfo("Writing timings to " << filename);

180
  f << "endTime" << sep << timer.endTime - timer.startTime << "\n";
181
182

  f << "syncPoints";
Ben Hazelwood's avatar
Ben Hazelwood committed
183
  for (const double& t : timer.syncPoints.at(getTeam())) {
184
    f << sep << t - timer.startTime;
185
186
187
188
189
190
  }
  f << "\n";

  f.close();

  PMPI_Barrier(MPI_COMM_WORLD);
191
192
}