MSRSensorGroup.cpp 5.96 KB
Newer Older
1
2
3
//================================================================================
// Name        : MSRSensorGroup.cpp
// Author      : Carla Guillen
Micha Müller's avatar
Micha Müller committed
4
// Contact     : info@dcdb.it
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
// Copyright   : Leibniz Supercomputing Centre
// Description : Source file for MSR sensor group class.
//================================================================================

//================================================================================
// This file is part of DCDB (DataCenter DataBase)
// Copyright (C) 2019-2019 Leibniz Supercomputing Centre
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
//================================================================================
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

#include "MSRSensorGroup.h"

#include <boost/log/core/record.hpp>
#include <boost/log/sources/record_ostream.hpp>
#include <boost/log/trivial.hpp>
#include <boost/log/utility/formatting_ostream.hpp>
#include <boost/parameter/keyword.hpp>
#include <fcntl.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <atomic>
#include <exception>
#include <utility>

43
#include "logging.h"
44
45
46
47
48
49
50
51
52
53
54
55
56
#include "timestamp.h"
#include "Types.h"
#include <sstream>
#include <iomanip>


MSRSensorGroup::MSRSensorGroup(const std::string& name) :
	SensorGroupTemplate(name) {
}

MSRSensorGroup::~MSRSensorGroup() {
}

57
bool MSRSensorGroup::execOnStart() {
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
	for (auto &kv : cpuToFd) {
		int cpu = kv.first;
		char * path = new char[200];
		snprintf(path, 200, "/dev/cpu/%d/msr", cpu);
		int handle = open(path, O_RDWR);
		if (handle < 0) {   // try msr_safe
			snprintf(path, 200, "/dev/cpu/%d/msr_safe", cpu);
			handle = open(path, O_RDWR);
		}
		if (handle < 0){
			LOG(error) << "Can't open msr device " << path;
			delete [] path; // TODO do this with RAII
			continue;
		}
		delete [] path; //TODO do this with RAII
		cpuToFd[cpu] = handle;
	}

lu43jih's avatar
lu43jih committed
76
77
	program_fixed();

78
	return true;
79
80
}

81
void MSRSensorGroup::execOnStop() {
lu43jih's avatar
lu43jih committed
82
    //close file descriptors and leave counters running freely
83
84
85
    for (auto &kv: cpuToFd) {
    	close(kv.second);
        kv.second = -1;
Micha Mueller's avatar
Micha Mueller committed
86
    }
87
88
89
90
91
92
93
94
}

void MSRSensorGroup::read() {
	ureading_t reading;
	reading.timestamp = getTimestamp();

	try {
		for(auto s : _sensors) {
lu43jih's avatar
lu43jih committed
95
96
97
			auto ret_val = msr_read(s->getMetric(), &reading.value, s->getCpu());
			if(ret_val != -1){
				 s->storeReading(reading);
98
#ifdef DEBUG
lu43jih's avatar
lu43jih committed
99
				LOG(debug) << _groupName << "::" << s->getName() << " raw reading: \"" << reading.value << "\"";
100
#endif
lu43jih's avatar
lu43jih committed
101
			}
102
103
104
105
106
107
108
109
110
111
112
113
114
115
		}
	} catch (const std::exception& e) {
		LOG(error) << "Sensorgroup" << _groupName << " could not read value: " << e.what();
	}
}

int32_t MSRSensorGroup::msr_read(uint64_t msr_number, uint64_t * value, unsigned int cpu){
	return pread(cpuToFd[cpu], (void *) value, sizeof(uint64_t), msr_number);
}

int32_t MSRSensorGroup::msr_write(uint64_t msr_number, uint64_t value, unsigned int cpu){
	return pwrite(cpuToFd[cpu], (const void *) &value, sizeof(uint64_t), msr_number);
}

Micha Mueller's avatar
Micha Mueller committed
116
117
118
119
120
121
/**
 * Program the fixed MSR as required for this plugin.
 *
 * @return  True if counters programmed successfully, false otherwise, e.g.
 *          because the counters are already in use.
 */
122
void MSRSensorGroup::program_fixed(){
Micha Mueller's avatar
Micha Mueller committed
123

124
125
126
	for (auto &kv : cpuToFd) {
		// program core counters

Micha Mueller's avatar
Micha Mueller committed
127
128
	    //we do not want to interrupt other services already doing measurements with MSRs
        //therefore check if any fixed counter is currently enabled
129
		struct FixedEventControlRegister ctrl_reg;
Micha Mueller's avatar
Micha Mueller committed
130

131
		msr_read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value, kv.first);
132
133
134
135
136
		//are they all enabled?
		if (ctrl_reg.fields.os0 && ctrl_reg.fields.usr0 && ctrl_reg.fields.os1
						&& ctrl_reg.fields.usr1 && ctrl_reg.fields.os2
						&& ctrl_reg.fields.usr2) {
			//yes! Free running counters were set by someone else => we don't need to program them, just read them.
lu43jih's avatar
lu43jih committed
137
			LOG(debug) << "CPU" << kv.first << " has free running counter, so there will be no fixed counter programming";
138
			continue;
Micha Mueller's avatar
Micha Mueller committed
139
		}
140
		//not all of them (or none) are enabled => we program them again
Micha Mueller's avatar
Micha Mueller committed
141
142
143
144

		// disable counters while programming
        msr_write(IA32_CR_PERF_GLOBAL_CTRL, 0, kv.first);

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
		ctrl_reg.fields.os0 = 1;
		ctrl_reg.fields.usr0 = 1;
		ctrl_reg.fields.any_thread0 = 0;
		ctrl_reg.fields.enable_pmi0 = 0;

		ctrl_reg.fields.os1 = 1;
		ctrl_reg.fields.usr1 = 1;
		ctrl_reg.fields.any_thread1 = 0;
		ctrl_reg.fields.enable_pmi1 = 0;

		ctrl_reg.fields.os2 = 1;
		ctrl_reg.fields.usr2 = 1;
		ctrl_reg.fields.any_thread2 = 0;
		ctrl_reg.fields.enable_pmi2 = 0;

		ctrl_reg.fields.reserved1 = 0;
161

162
		// program them
163
164
		msr_write(IA32_CR_FIXED_CTR_CTRL, ctrl_reg.value, kv.first);

165
		// start counting, enable 3 fixed counters (enable also the programmables counters)
lu43jih's avatar
lu43jih committed
166
		uint64_t value = (1ULL << 0) + (1ULL << 1) + (1ULL << 2) + (1ULL << 3) + (1ULL << 32) + (1ULL << 33) + (1ULL << 34);
167
		//uint64_t value = (1ULL << 32) + (1ULL << 33) + (1ULL << 34);
168
169
170
171
172
173
174
		msr_write(IA32_CR_PERF_GLOBAL_CTRL, value, kv.first);
	}
}

void MSRSensorGroup::addCpu(unsigned int cpu){
	cpuToFd[cpu] = -1; /* -1 because no file descriptor has been assigned yet. */
}
175

176
177
178
179
180
181
182
183
std::vector<unsigned> MSRSensorGroup::getCpus() {
  std::vector<unsigned> cpus;
  for(auto kv : cpuToFd) {
    cpus.push_back(kv.first);
  }
  return cpus;
}

184
void MSRSensorGroup::printGroupConfig(LOG_LEVEL ll) {
185
186
187
188
189
190
191
    std::stringstream ss;
    const char* separator = "";
    for (auto &kv : cpuToFd) {
      ss << separator << kv.first;
      separator = ", ";
    }

192
    LOG_VAR(ll) << "            CPUs:  " << ss.str();
193
}