Commit 899d4ceb authored by Weronika's avatar Weronika

fixed a bug in the call to nvmlComputreRunningProcesses & cleaned the code

parent c4a3b55e
...@@ -28,10 +28,6 @@ ...@@ -28,10 +28,6 @@
#include "nvmlConfigurator.h" #include "nvmlConfigurator.h"
nvmlConfigurator::nvmlConfigurator() { nvmlConfigurator::nvmlConfigurator() {
/*
* TODO
* If you want sensor or group to be named differently in the config file, you can change it here
*/
_gpuFeatureMAP["GPU_ENERGY"] = GPU_ENERGY; _gpuFeatureMAP["GPU_ENERGY"] = GPU_ENERGY;
_gpuFeatureMAP["GPU_POWER"] = GPU_POWER; _gpuFeatureMAP["GPU_POWER"] = GPU_POWER;
...@@ -56,7 +52,6 @@ nvmlConfigurator::nvmlConfigurator() { ...@@ -56,7 +52,6 @@ nvmlConfigurator::nvmlConfigurator() {
nvmlConfigurator::~nvmlConfigurator() {} nvmlConfigurator::~nvmlConfigurator() {}
void nvmlConfigurator::sensorBase(nvmlSensorBase& s, CFG_VAL config) { void nvmlConfigurator::sensorBase(nvmlSensorBase& s, CFG_VAL config) {
//ADD {
BOOST_FOREACH (boost::property_tree::iptree::value_type &val, config) { BOOST_FOREACH (boost::property_tree::iptree::value_type &val, config) {
if (boost::iequals(val.first, "feature")) { if (boost::iequals(val.first, "feature")) {
...@@ -67,25 +62,13 @@ void nvmlConfigurator::sensorBase(nvmlSensorBase& s, CFG_VAL config) { ...@@ -67,25 +62,13 @@ void nvmlConfigurator::sensorBase(nvmlSensorBase& s, CFG_VAL config) {
LOG(warning) << " feature \"" << val.second.data() << "\" not known."; LOG(warning) << " feature \"" << val.second.data() << "\" not known.";
} }
} }
} // }
//}
} }
void nvmlConfigurator::sensorGroup(nvmlSensorGroup& s, CFG_VAL config) { void nvmlConfigurator::sensorGroup(nvmlSensorGroup& s, CFG_VAL config) {}
ADD {
/*
* TODO
* Add ATTRIBUTE macros for sensorGroup attributes
*/
}
}
void nvmlConfigurator::printConfiguratorConfig(LOG_LEVEL ll) { void nvmlConfigurator::printConfiguratorConfig(LOG_LEVEL ll) {
/*
* TODO
* Log attributes here for debug reasons or delete this method if there are
* not attributes to log.
*/
LOG_VAR(ll) << " NumSpacesAsIndention: " << 2; LOG_VAR(ll) << " NumSpacesAsIndention: " << 2;
} }
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
* @defgroup nvml nvml plugin * @defgroup nvml nvml plugin
* @ingroup pusherplugins * @ingroup pusherplugins
* *
* Collect data from the nvml interface.e * Collect data from the nvml interface
*/ */
#ifndef NVML_NVMLSENSORBASE_H_ #ifndef NVML_NVMLSENSORBASE_H_
...@@ -91,19 +91,8 @@ class nvmlSensorBase : public SensorBase { ...@@ -91,19 +91,8 @@ class nvmlSensorBase : public SensorBase {
return *this; return *this;
} }
/*
* TODO
* Getters and Setters for plugin specific attributes
*/
void printConfig(LOG_LEVEL ll, LOGGER& lg, unsigned leadingSpaces=16) { void printConfig(LOG_LEVEL ll, LOGGER& lg, unsigned leadingSpaces=16) {
/*
* TODO
* Log attributes here for debug reasons
*/
std::string leading(leadingSpaces, ' '); std::string leading(leadingSpaces, ' ');
std::string feature("unknown"); std::string feature("unknown");
...@@ -159,10 +148,6 @@ class nvmlSensorBase : public SensorBase { ...@@ -159,10 +148,6 @@ class nvmlSensorBase : public SensorBase {
protected: protected:
GPU_FEATURE _featureType; GPU_FEATURE _featureType;
/*
* TODO
* Add plugin specific attributes here
*/
}; };
......
...@@ -31,8 +31,8 @@ ...@@ -31,8 +31,8 @@
// Used to ensure we get a sensible value of energy by computing the difference // Used to ensure we get a sensible value of energy by computing the difference
// between calls to the read function // between calls to the read function
static int isfirsttime=0; static int isfirsttime=0;
struct env_t { struct env_t {
nvmlDevice_t device; nvmlDevice_t device;
} env; } env;
...@@ -51,7 +51,8 @@ struct counters_t { ...@@ -51,7 +51,8 @@ struct counters_t {
unsigned long long ecc_counts; unsigned long long ecc_counts;
nvmlUtilization_t utilization; nvmlUtilization_t utilization;
unsigned int pcie_throughput; unsigned int pcie_throughput;
nvmlProcessInfo_t running_prcs; unsigned int procs_cnt;
//nvmlProcessInfo_t running_prcs;
} counters; } counters;
nvmlSensorGroup::nvmlSensorGroup(const std::string& name) : nvmlSensorGroup::nvmlSensorGroup(const std::string& name) :
...@@ -66,60 +67,40 @@ nvmlSensorGroup::~nvmlSensorGroup() {} ...@@ -66,60 +67,40 @@ nvmlSensorGroup::~nvmlSensorGroup() {}
nvmlSensorGroup& nvmlSensorGroup::operator=(const nvmlSensorGroup& other) { nvmlSensorGroup& nvmlSensorGroup::operator=(const nvmlSensorGroup& other) {
SensorGroupTemplate::operator=(other); SensorGroupTemplate::operator=(other);
/*
* TODO
* Implement assignment operator
*/
return *this; return *this;
} }
void nvmlSensorGroup::execOnInit() { void nvmlSensorGroup::execOnInit() {
/*
* TODO
* Implement one time initialization logic for this group here
* (e.g. allocate memory for buffer) or remove this method if not
* required.
*/
// FR Add the contents of init_environment in here
nvmlReturn_t err;
nvmlReturn_t err;
err = nvmlInit(); err = nvmlInit();
err = nvmlDeviceGetHandleByIndex(0,&(env.device)); err = nvmlDeviceGetHandleByIndex(0,&(env.device));
err = nvmlDeviceGetTotalEnergyConsumption(env.device,&(counters.energy_initial)); err = nvmlDeviceGetTotalEnergyConsumption(env.device,&(counters.energy_initial));
// FR
} }
bool nvmlSensorGroup::execOnStart() { bool nvmlSensorGroup::execOnStart() {
//FR
cudaError_t cerr; cudaError_t cerr;
cerr = cudaProfilerStart(); cerr = cudaProfilerStart();
// FR
return true; return true;
} }
void nvmlSensorGroup::execOnStop() { void nvmlSensorGroup::execOnStop() {
/* /*
* TODO
* Implement logic when the group stops polling here * Implement logic when the group stops polling here
* (e.g. close a file descriptor) or remove this method if not required. * (e.g. close a file descriptor) or remove this method if not required.
*/ */
// FR
cudaError_t cerr; cudaError_t cerr;
cerr = cudaProfilerStop(); cerr = cudaProfilerStop();
// FR
} }
void nvmlSensorGroup::read() { void nvmlSensorGroup::read() {
reading_t reading; reading_t reading;
reading.timestamp = getTimestamp(); reading.timestamp = getTimestamp();
// FR
nvmlReturn_t err; nvmlReturn_t err;
unsigned long long temp; unsigned long long temp;
// FR
try { try {
for(auto s : _sensors) { for(auto s : _sensors) {
switch(s->getFeatureType()){ switch(s->getFeatureType()){
...@@ -137,9 +118,7 @@ void nvmlSensorGroup::read() { ...@@ -137,9 +118,7 @@ void nvmlSensorGroup::read() {
err = nvmlDeviceGetTotalEnergyConsumption(env.device,&(counters.energy_current)); err = nvmlDeviceGetTotalEnergyConsumption(env.device,&(counters.energy_current));
} }
temp=counters.energy_current - counters.energy_previous; // Take difference and compute energy in millijoules temp=counters.energy_current - counters.energy_previous; // Take difference and compute energy in millijoules
// You might want to consider putting this in the else block so we always measure something?
reading.value = temp; reading.value = temp;
// FR
break; break;
case(GPU_POWER): case(GPU_POWER):
err = nvmlDeviceGetPowerUsage(env.device,&(counters.power)); err = nvmlDeviceGetPowerUsage(env.device,&(counters.power));
...@@ -189,14 +168,16 @@ void nvmlSensorGroup::read() { ...@@ -189,14 +168,16 @@ void nvmlSensorGroup::read() {
err = nvmlDeviceGetTotalEccErrors (env.device, NVML_MEMORY_ERROR_TYPE_CORRECTED,NVML_VOLATILE_ECC,&(counters.ecc_counts)); err = nvmlDeviceGetTotalEccErrors (env.device, NVML_MEMORY_ERROR_TYPE_CORRECTED,NVML_VOLATILE_ECC,&(counters.ecc_counts));
reading.value = counters.ecc_counts; reading.value = counters.ecc_counts;
break; break;
case(GPU_PCIE_THRU): case(GPU_PCIE_THRU):
err = nvmlDeviceGetPcieThroughput (env.device, NVML_PCIE_UTIL_COUNT,&(counters.pcie_throughput)); err = nvmlDeviceGetPcieThroughput (env.device, NVML_PCIE_UTIL_COUNT,&(counters.pcie_throughput));
reading.value = counters.pcie_throughput; reading.value = counters.pcie_throughput;
break; break;
case(GPU_RUN_PRCS): case(GPU_RUN_PRCS):
err = nvmlDeviceGetComputeRunningProcesses (env.device,0,&(counters.running_prcs)); //err = nvmlDeviceGetComputeRunningProcesses (env.device,0,&(counters.running_prcs));
reading.value = counters.running_prcs.pid; //reading.value = counters.running_prcs.pid;
break; err = nvmlDeviceGetComputeRunningProcesses (env.device,&(counters.procs_cnt),NULL);
reading.value = counters.procs_cnt;
break;
} }
s->storeReading(reading); s->storeReading(reading);
#ifdef DEBUG #ifdef DEBUG
...@@ -209,10 +190,6 @@ void nvmlSensorGroup::read() { ...@@ -209,10 +190,6 @@ void nvmlSensorGroup::read() {
} }
void nvmlSensorGroup::printGroupConfig(LOG_LEVEL ll, unsigned int leadingSpaces) { void nvmlSensorGroup::printGroupConfig(LOG_LEVEL ll, unsigned int leadingSpaces) {
/*
* TODO
* Log attributes here for debug reasons
*/
LOG_VAR(ll) << " NumSpacesAsIndention: " << 12; LOG_VAR(ll) << " NumSpacesAsIndention: " << 12;
} }
...@@ -29,13 +29,9 @@ ...@@ -29,13 +29,9 @@
#define NVML_NVMLSENSORGROUP_H_ #define NVML_NVMLSENSORGROUP_H_
#include "../../includes/SensorGroupTemplate.h" #include "../../includes/SensorGroupTemplate.h"
#include "nvmlSensorBase.h" #include "nvmlSensorBase.h"
// FR Include files needed for NVML etc
#include <nvml.h> #include <nvml.h>
#include <cuda_profiler_api.h> #include <cuda_profiler_api.h>
// FR
/** /**
* @brief SensorGroupTemplate specialization for this plugin. * @brief SensorGroupTemplate specialization for this plugin.
...@@ -52,21 +48,12 @@ public: ...@@ -52,21 +48,12 @@ public:
void execOnInit() final override; void execOnInit() final override;
bool execOnStart() final override; bool execOnStart() final override;
void execOnStop() final override; void execOnStop() final override;
/*
* TODO
* Add getter and setters for group attributes if required
*/
void printGroupConfig(LOG_LEVEL ll, unsigned int leadingSpaces) final override; void printGroupConfig(LOG_LEVEL ll, unsigned int leadingSpaces) final override;
private: private:
void read() final override; void read() final override;
/*
* TODO
* Add group internal attributes
*/
}; };
#endif /* NVML_NVMLSENSORGROUP_H_ */ #endif /* NVML_NVMLSENSORGROUP_H_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment