24.09., 9:00 - 11:00: Due to updates GitLab will be unavailable for some minutes between 09:00 and 11:00.

Commit 899d4ceb authored by Weronika's avatar Weronika

fixed a bug in the call to nvmlComputreRunningProcesses & cleaned the code

parent c4a3b55e
......@@ -28,10 +28,6 @@
#include "nvmlConfigurator.h"
nvmlConfigurator::nvmlConfigurator() {
/*
* TODO
* If you want sensor or group to be named differently in the config file, you can change it here
*/
_gpuFeatureMAP["GPU_ENERGY"] = GPU_ENERGY;
_gpuFeatureMAP["GPU_POWER"] = GPU_POWER;
......@@ -56,7 +52,6 @@ nvmlConfigurator::nvmlConfigurator() {
nvmlConfigurator::~nvmlConfigurator() {}
void nvmlConfigurator::sensorBase(nvmlSensorBase& s, CFG_VAL config) {
//ADD {
BOOST_FOREACH (boost::property_tree::iptree::value_type &val, config) {
if (boost::iequals(val.first, "feature")) {
......@@ -67,25 +62,13 @@ void nvmlConfigurator::sensorBase(nvmlSensorBase& s, CFG_VAL config) {
LOG(warning) << " feature \"" << val.second.data() << "\" not known.";
}
}
} //
//}
}
}
void nvmlConfigurator::sensorGroup(nvmlSensorGroup& s, CFG_VAL config) {
ADD {
/*
* TODO
* Add ATTRIBUTE macros for sensorGroup attributes
*/
}
}
void nvmlConfigurator::sensorGroup(nvmlSensorGroup& s, CFG_VAL config) {}
void nvmlConfigurator::printConfiguratorConfig(LOG_LEVEL ll) {
/*
* TODO
* Log attributes here for debug reasons or delete this method if there are
* not attributes to log.
*/
LOG_VAR(ll) << " NumSpacesAsIndention: " << 2;
}
......@@ -29,7 +29,7 @@
* @defgroup nvml nvml plugin
* @ingroup pusherplugins
*
* Collect data from the nvml interface.e
* Collect data from the nvml interface
*/
#ifndef NVML_NVMLSENSORBASE_H_
......@@ -91,19 +91,8 @@ class nvmlSensorBase : public SensorBase {
return *this;
}
/*
* TODO
* Getters and Setters for plugin specific attributes
*/
void printConfig(LOG_LEVEL ll, LOGGER& lg, unsigned leadingSpaces=16) {
/*
* TODO
* Log attributes here for debug reasons
*/
std::string leading(leadingSpaces, ' ');
std::string feature("unknown");
......@@ -159,10 +148,6 @@ class nvmlSensorBase : public SensorBase {
protected:
GPU_FEATURE _featureType;
/*
* TODO
* Add plugin specific attributes here
*/
};
......
......@@ -31,8 +31,8 @@
// Used to ensure we get a sensible value of energy by computing the difference
// between calls to the read function
static int isfirsttime=0;
struct env_t {
nvmlDevice_t device;
} env;
......@@ -51,7 +51,8 @@ struct counters_t {
unsigned long long ecc_counts;
nvmlUtilization_t utilization;
unsigned int pcie_throughput;
nvmlProcessInfo_t running_prcs;
unsigned int procs_cnt;
//nvmlProcessInfo_t running_prcs;
} counters;
nvmlSensorGroup::nvmlSensorGroup(const std::string& name) :
......@@ -66,60 +67,40 @@ nvmlSensorGroup::~nvmlSensorGroup() {}
nvmlSensorGroup& nvmlSensorGroup::operator=(const nvmlSensorGroup& other) {
SensorGroupTemplate::operator=(other);
/*
* TODO
* Implement assignment operator
*/
return *this;
}
void nvmlSensorGroup::execOnInit() {
/*
* TODO
* Implement one time initialization logic for this group here
* (e.g. allocate memory for buffer) or remove this method if not
* required.
*/
// FR Add the contents of init_environment in here
nvmlReturn_t err;
nvmlReturn_t err;
err = nvmlInit();
err = nvmlDeviceGetHandleByIndex(0,&(env.device));
err = nvmlDeviceGetTotalEnergyConsumption(env.device,&(counters.energy_initial));
// FR
}
bool nvmlSensorGroup::execOnStart() {
//FR
cudaError_t cerr;
cerr = cudaProfilerStart();
// FR
return true;
}
void nvmlSensorGroup::execOnStop() {
/*
* TODO
* Implement logic when the group stops polling here
* (e.g. close a file descriptor) or remove this method if not required.
*/
// FR
cudaError_t cerr;
cerr = cudaProfilerStop();
// FR
}
void nvmlSensorGroup::read() {
reading_t reading;
reading.timestamp = getTimestamp();
// FR
nvmlReturn_t err;
unsigned long long temp;
// FR
try {
for(auto s : _sensors) {
switch(s->getFeatureType()){
......@@ -137,9 +118,7 @@ void nvmlSensorGroup::read() {
err = nvmlDeviceGetTotalEnergyConsumption(env.device,&(counters.energy_current));
}
temp=counters.energy_current - counters.energy_previous; // Take difference and compute energy in millijoules
// You might want to consider putting this in the else block so we always measure something?
reading.value = temp;
// FR
break;
case(GPU_POWER):
err = nvmlDeviceGetPowerUsage(env.device,&(counters.power));
......@@ -189,14 +168,16 @@ void nvmlSensorGroup::read() {
err = nvmlDeviceGetTotalEccErrors (env.device, NVML_MEMORY_ERROR_TYPE_CORRECTED,NVML_VOLATILE_ECC,&(counters.ecc_counts));
reading.value = counters.ecc_counts;
break;
case(GPU_PCIE_THRU):
err = nvmlDeviceGetPcieThroughput (env.device, NVML_PCIE_UTIL_COUNT,&(counters.pcie_throughput));
reading.value = counters.pcie_throughput;
break;
case(GPU_RUN_PRCS):
err = nvmlDeviceGetComputeRunningProcesses (env.device,0,&(counters.running_prcs));
reading.value = counters.running_prcs.pid;
break;
case(GPU_PCIE_THRU):
err = nvmlDeviceGetPcieThroughput (env.device, NVML_PCIE_UTIL_COUNT,&(counters.pcie_throughput));
reading.value = counters.pcie_throughput;
break;
case(GPU_RUN_PRCS):
//err = nvmlDeviceGetComputeRunningProcesses (env.device,0,&(counters.running_prcs));
//reading.value = counters.running_prcs.pid;
err = nvmlDeviceGetComputeRunningProcesses (env.device,&(counters.procs_cnt),NULL);
reading.value = counters.procs_cnt;
break;
}
s->storeReading(reading);
#ifdef DEBUG
......@@ -209,10 +190,6 @@ void nvmlSensorGroup::read() {
}
void nvmlSensorGroup::printGroupConfig(LOG_LEVEL ll, unsigned int leadingSpaces) {
/*
* TODO
* Log attributes here for debug reasons
*/
LOG_VAR(ll) << " NumSpacesAsIndention: " << 12;
}
......@@ -29,13 +29,9 @@
#define NVML_NVMLSENSORGROUP_H_
#include "../../includes/SensorGroupTemplate.h"
#include "nvmlSensorBase.h"
// FR Include files needed for NVML etc
#include <nvml.h>
#include <cuda_profiler_api.h>
// FR
/**
* @brief SensorGroupTemplate specialization for this plugin.
......@@ -52,21 +48,12 @@ public:
void execOnInit() final override;
bool execOnStart() final override;
void execOnStop() final override;
/*
* TODO
* Add getter and setters for group attributes if required
*/
void printGroupConfig(LOG_LEVEL ll, unsigned int leadingSpaces) final override;
private:
void read() final override;
/*
* TODO
* Add group internal attributes
*/
};
#endif /* NVML_NVMLSENSORGROUP_H_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment