From 1e026fa8f78200aa85e816899ab39258419134dd Mon Sep 17 00:00:00 2001 From: Weronika Filinger Date: Fri, 21 Feb 2020 12:25:27 +0000 Subject: [PATCH] adding the PCIE troughput sensor to the nvml plugin --- dcdbpusher/sensors/nvml/nvml.conf | 5 +++++ dcdbpusher/sensors/nvml/nvmlConfigurator.cpp | 1 + dcdbpusher/sensors/nvml/nvmlSensorBase.h | 11 ++++++++--- dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp | 8 +++++--- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/dcdbpusher/sensors/nvml/nvml.conf b/dcdbpusher/sensors/nvml/nvml.conf index 8b5b63c..a92cf27 100644 --- a/dcdbpusher/sensors/nvml/nvml.conf +++ b/dcdbpusher/sensors/nvml/nvml.conf @@ -101,6 +101,11 @@ group nvml_g2 { feature GPU_ECC_ERR } + sensor gpu_pcie_thru { + mqttsuffix /pcie_thru + feature GPU_PCIE_THRU + } + } diff --git a/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp b/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp index 034aa42..9b71512 100644 --- a/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp +++ b/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp @@ -46,6 +46,7 @@ nvmlConfigurator::nvmlConfigurator() { _gpuFeatureMAP["GPU_UTL_MEM"] = GPU_UTL_MEM; _gpuFeatureMAP["GPU_UTL_GPU"] = GPU_UTL_GPU; _gpuFeatureMAP["GPU_ECC_ERR"] = GPU_ECC_ERR; + _gpuFeatureMAP["GPU_PCIE_THRU"] = GPU_PCIE_THRU; _groupName = "group"; _baseName = "sensor"; diff --git a/dcdbpusher/sensors/nvml/nvmlSensorBase.h b/dcdbpusher/sensors/nvml/nvmlSensorBase.h index 4a82ccf..b23aebe 100644 --- a/dcdbpusher/sensors/nvml/nvmlSensorBase.h +++ b/dcdbpusher/sensors/nvml/nvmlSensorBase.h @@ -51,6 +51,8 @@ enum GPU_FEATURE { GPU_UTL_MEM = 10, GPU_UTL_GPU = 11, GPU_ECC_ERR = 13, + GPU_PCIE_THRU = 14, + }; /** @@ -142,9 +144,12 @@ class nvmlSensorBase : public SensorBase { case GPU_UTL_GPU: feature = "GPU_UTL_GPU"; break; - case GPU_ECC_ERR: - feature = "GPU_ECC_ERR"; - break; + case GPU_ECC_ERR: + feature = "GPU_ECC_ERR"; + break; + case GPU_PCIE_THRU: + feature = "GPU_PCIE_THRU"; + break; } LOG_VAR(ll) << leading << " Feature type: " << feature; } diff --git a/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp b/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp index 102ee4d..f70c6fa 100644 --- a/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp +++ b/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp @@ -41,9 +41,6 @@ struct counters_t { unsigned long long energy_initial; unsigned long long energy_current; unsigned long long energy_previous; - // unsigned int freq1; - // unsigned int freq2; - // unsigned int freq3; unsigned int temperature; unsigned int fanspeed; unsigned int clockspeed_graphics; @@ -53,6 +50,7 @@ struct counters_t { unsigned int power; unsigned long long ecc_counts; nvmlUtilization_t utilization; + unsigned int pcie_throughput; } counters; nvmlSensorGroup::nvmlSensorGroup(const std::string& name) : @@ -190,6 +188,10 @@ void nvmlSensorGroup::read() { err = nvmlDeviceGetTotalEccErrors (env.device, NVML_MEMORY_ERROR_TYPE_CORRECTED,NVML_VOLATILE_ECC,&(counters.ecc_counts)); reading.value = counters.ecc_counts; break; + case(GPU_PCIE_THRU): + err = nvmlDeviceGetPcieThroughput (env.device, NVML_PCIE_UTIL_COUNT,&(counters.pcie_throughput)); + reading.value = counters.pcie_throughput; + break; } s->storeReading(reading); -- GitLab