From c4a3b55ec298a7347766dacc0fe2e2f196bd1750 Mon Sep 17 00:00:00 2001 From: Weronika Filinger Date: Fri, 21 Feb 2020 12:58:58 +0000 Subject: [PATCH] added the compute runnning procesess sensor to the nvml plugin --- dcdbpusher/sensors/nvml/nvml.conf | 6 ++++++ dcdbpusher/sensors/nvml/nvmlConfigurator.cpp | 1 + dcdbpusher/sensors/nvml/nvmlSensorBase.h | 5 ++++- dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp | 6 +++++- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/dcdbpusher/sensors/nvml/nvml.conf b/dcdbpusher/sensors/nvml/nvml.conf index a92cf27..9ba4f78 100644 --- a/dcdbpusher/sensors/nvml/nvml.conf +++ b/dcdbpusher/sensors/nvml/nvml.conf @@ -106,6 +106,12 @@ group nvml_g2 { feature GPU_PCIE_THRU } + sensor gpu_run_prcs { + mqttsuffix /run_prcs + feature GPU_RUN_PRCS + } + + } diff --git a/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp b/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp index 9b71512..3cf9c4a 100644 --- a/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp +++ b/dcdbpusher/sensors/nvml/nvmlConfigurator.cpp @@ -47,6 +47,7 @@ nvmlConfigurator::nvmlConfigurator() { _gpuFeatureMAP["GPU_UTL_GPU"] = GPU_UTL_GPU; _gpuFeatureMAP["GPU_ECC_ERR"] = GPU_ECC_ERR; _gpuFeatureMAP["GPU_PCIE_THRU"] = GPU_PCIE_THRU; + _gpuFeatureMAP["GPU_RUN_PRCS"] = GPU_RUN_PRCS; _groupName = "group"; _baseName = "sensor"; diff --git a/dcdbpusher/sensors/nvml/nvmlSensorBase.h b/dcdbpusher/sensors/nvml/nvmlSensorBase.h index b23aebe..f4b61a1 100644 --- a/dcdbpusher/sensors/nvml/nvmlSensorBase.h +++ b/dcdbpusher/sensors/nvml/nvmlSensorBase.h @@ -52,7 +52,7 @@ enum GPU_FEATURE { GPU_UTL_GPU = 11, GPU_ECC_ERR = 13, GPU_PCIE_THRU = 14, - + GPU_RUN_PRCS = 15, }; /** @@ -150,6 +150,9 @@ class nvmlSensorBase : public SensorBase { case GPU_PCIE_THRU: feature = "GPU_PCIE_THRU"; break; + case GPU_RUN_PRCS: + feature = "GPU_RUN_PRCS"; + break; } LOG_VAR(ll) << leading << " Feature type: " << feature; } diff --git a/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp b/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp index f70c6fa..edd68e0 100644 --- a/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp +++ b/dcdbpusher/sensors/nvml/nvmlSensorGroup.cpp @@ -51,6 +51,7 @@ struct counters_t { unsigned long long ecc_counts; nvmlUtilization_t utilization; unsigned int pcie_throughput; + nvmlProcessInfo_t running_prcs; } counters; nvmlSensorGroup::nvmlSensorGroup(const std::string& name) : @@ -192,7 +193,10 @@ void nvmlSensorGroup::read() { err = nvmlDeviceGetPcieThroughput (env.device, NVML_PCIE_UTIL_COUNT,&(counters.pcie_throughput)); reading.value = counters.pcie_throughput; break; - + case(GPU_RUN_PRCS): + err = nvmlDeviceGetComputeRunningProcesses (env.device,0,&(counters.running_prcs)); + reading.value = counters.running_prcs.pid; + break; } s->storeReading(reading); #ifdef DEBUG -- GitLab