Commit 572c6921 authored by Micha Müller's avatar Micha Müller
Browse files

Pusher: Extend Caliper service:

- on sampling event record current object file + function name
- add numa-node and cpu info
parent 9c2f4406
......@@ -55,11 +55,16 @@
#include "caliper/common/Log.h"
#include "caliper/common/RuntimeConfig.h"
#include <atomic>
#include <cstdio>
#include <errno.h>
#include <features.h>
#include <sched.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <vector>
using namespace cali;
......@@ -71,21 +76,129 @@ private:
static const ConfigSet::Entry s_configdata[];
//TODO reduce snapshot frequency during runtime
unsigned snapshot_threshold = 10;
unsigned threshold_exceeded = 0;
unsigned snapshots_processed = 0;
unsigned snapshots_failed = 0;
bool initialized = false;
Attribute sampler { Attribute::invalid };
Attribute symbol_fun { Attribute::invalid };
Attribute timestamp { Attribute::invalid };
Attribute sampler_pc { Attribute::invalid };
Attribute sampler_fun { Attribute::invalid };
Attribute timestamp { Attribute::invalid };
Attribute thread_id { Attribute::invalid };
int sock;
typedef struct {
unsigned long long start_addr;
unsigned long long end_addr;
std::string pathname;
} addr_range;
std::vector<addr_range> proc_map; // buffer for memory regions from /proc/self/map for faster lookup
//for thread safety
std::atomic_flag writer_lock;
std::atomic_flag reader_lock;
std::atomic<unsigned> proc_map_readers;
/**
* Look up to which object file a given program counter points. This is done
* by checking the address ranges in /proc/self/maps.
*/
std::string lookup_pathname(unsigned long long pc, Channel* chn, bool retry=false) {
//we get blocked here if proc_map is currently rebuild
while(reader_lock.test_and_set(std::memory_order_acquire)) { }
proc_map_readers++;
reader_lock.clear(std::memory_order_release);
std::string pn = "";
for(const auto& entry : proc_map) {
if (pc >= entry.start_addr && pc <= entry.end_addr) {
pn = entry.pathname;
break;
}
}
proc_map_readers--;
if (pn == "" && !retry) {
Log(1).stream() << chn->name() << ": DcdbPusher: Rebuilding proc_map" << std::endl;
setup_proc_map(chn);
} else {
return pn;
}
return lookup_pathname(pc, chn, true);
}
/**
* Setup proc_map. Parse all address ranges and their pathnames which are
* marked as executable from /proc/self/maps.
*
* This method is called once on start-up and possibly if a new memory
* region with executable code is mapped (dynamic library load, self
* modifying code). However, the latter case is not expected to occur very
* often.
*/
bool setup_proc_map(Channel* chn) {
if (writer_lock.test_and_set(std::memory_order_acquire)) {
//another thread is already rebuilding the proc_map
return true;
}
//block readers from accessing proc_map
while(reader_lock.test_and_set(std::memory_order_acquire)) { /* idle */ }
//wait until current readers finished
while(proc_map_readers != 0) { /* idle */ }
FILE *file = NULL;
if (!(file = fopen("/proc/self/maps", "r"))) {
Log(1).stream() << chn->name() << ": DcdbPusher: Could not open memory map: "
<< strerror(errno) << std::endl;
reader_lock.clear(std::memory_order_release);
writer_lock.clear(std::memory_order_release);
return false;
}
addr_range range;
const size_t bufSize = 512;
char buf[bufSize];
char exec;
proc_map.clear();
while(fscanf(file, "%llx-%llx %*2c%1c%*s%*s%*s%*s%511[^\n]",
&(range.start_addr),
&(range.end_addr),
&exec,
buf) == 4) {
if (exec == 'x') {
range.pathname = std::string(buf);
//remove leading whitespaces
const std::string whitespace = " \t";
const auto strBegin = range.pathname.find_first_not_of(whitespace);
if (strBegin != std::string::npos) {
range.pathname = range.pathname.substr(strBegin);
}
if(range.pathname == "") {
range.pathname = "[Anonymous]";
}
proc_map.push_back(range);
}
}
reader_lock.clear(std::memory_order_release);
writer_lock.clear(std::memory_order_release);
fclose(file);
return true;
}
void post_init_cb(Caliper* c, Channel* chn) {
// manually issue a pre-flush event to set up the SymbolLookup service
chn->events().pre_flush_evt(c, chn, nullptr);
......@@ -94,19 +207,25 @@ private:
// active by searching for identifying attributes.
//TODO support data collection if event triggered snapshots are used
sampler = c->get_attribute("cali.sampler.pc");
symbol_fun = c->get_attribute("source.function#cali.sampler.pc");
timestamp = c->get_attribute("time.timestamp");
sampler_pc = c->get_attribute("cali.sampler.pc");
sampler_fun = c->get_attribute("source.function#cali.sampler.pc");
timestamp = c->get_attribute("time.timestamp");
if (sampler == Attribute::invalid ||
symbol_fun == Attribute::invalid ||
timestamp == Attribute::invalid) {
if (sampler_pc == Attribute::invalid ||
sampler_fun == Attribute::invalid ||
timestamp == Attribute::invalid) {
Log(1).stream() << chn->name() << ": DcdbPusher: not all required services "
"sampler, symbollookup and timestamp are running." << std::endl;
return;
}
if (!setup_proc_map(chn)) {
Log(1).stream() << chn->name() << ": DcdbPusher: Failed to init proc_map"
<< std::endl;
return;
}
sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
if(sock == -1) {
......@@ -128,21 +247,17 @@ private:
sock = -1;
return;
}
initialized = true;
}
void process_snapshot_cb(Caliper* c, Channel* chn, const SnapshotRecord*, const SnapshotRecord* sbuf) {
++snapshots_processed;
//TODO check if signal safety is required
//TODO sampler invoked snapshots are always signals, so better do
// nothing signal "un-safe"
// sampler invoked snapshots are always signals, so better do nothing signal "un-safe"
//if (c->is_signal()) {
// ++snapshots_failed;
// return;
//}
if (!initialized) {
if (sock == -1) {
++snapshots_failed;
return;
}
......@@ -164,18 +279,38 @@ private:
char buf[bufSize];
std::string time;
std::string value;
std::string func_name;
std::string file_name;
for (const Entry& e : rec) {
//print attribute values for timestamp and looked up function symbol name
//look up required attribute values in snapshot
cali_id_t entryId = c->get_attribute(e.attribute()).id();
if (entryId == timestamp.id()) {
time = e.value().to_string();
} else if (entryId == symbol_fun.id()) {
value = e.value().to_string();
} else if (entryId == sampler_fun.id()) {
func_name = e.value().to_string();
} else if (entryId == sampler_pc.id()) {
unsigned long long pc = e.value().to_uint();
file_name = lookup_pathname(pc, chn);
}
}
value = file_name + "::" + func_name;
#if __GLIBC_PREREQ(2, 29)
unsigned cpu, node;
if (!getcpu(&cpu, &node)) {
value += "/node" + std::to_string(node) + "/cpu" + std::to_string(cpu);
}
#else
int cpu = sched_getcpu();
if (cpu != -1) {
value += "/cpu" + std::to_string(cpu);
}
#endif
if ((value.length() + time.length() + 2) > 2048) {
Log(1).stream() << chn->name() << ": DcdbPusher: value exceeding buffer size" << std::endl;
++snapshots_failed;
......@@ -197,32 +332,23 @@ private:
}
void finish_cb(Caliper* c, Channel* chn) {
if(initialized) {
if(sock != -1) {
shutdown(sock, SHUT_WR);
close(sock);
}
if (threshold_exceeded > 0) {
Log(1).stream() << chn->name() << ": DcdbPusher: threshold was exceeded "
<< threshold_exceeded << " times." << std::endl;
}
Log(1).stream() << chn->name() << ": DcdbPusher: "
<< snapshots_processed << " snapshots processed of which "
<< snapshots_failed << " failed." << std::endl;
}
DcdbPusher(Caliper* c, Channel* chn) {
ConfigSet config = chn->config().init("dcdbpusher", s_configdata);
snapshot_threshold = config.get("snapshot_threshold").to_uint();
sock = -1;
}
public:
~DcdbPusher() {
}
~DcdbPusher() { }
static void dcdbpusher_register(Caliper* c, Channel* chn) {
DcdbPusher* instance = new DcdbPusher(c, chn);
......@@ -245,16 +371,6 @@ public:
}
}; // class DcdbPusher
const ConfigSet::Entry DcdbPusher::s_configdata[] = {
{ "snapshot_threshold", CALI_TYPE_UINT, "10",
"Maximum number of snapshots per second we are allowed to process",
"Maximum number of snapshots per second we are allowed to process.\n"
"If the threshold is exceeded snapshots get discarded until the\n"
"limit is met again." },
ConfigSet::Terminator
};
} // namespace
namespace cali {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment