Commit fb1037d1 authored by Micha Müller's avatar Micha Müller
Browse files

Major Caliper-service rework:

-implement custom symbollookup instead of Caliper service usage
-relocate lookup logic to pusher plugin
-share symbol information via shared memory
parent de7a1044
......@@ -55,29 +55,24 @@
#include "caliper/common/Log.h"
#include "caliper/common/RuntimeConfig.h"
#include <Symtab.h>
#include <LineInformation.h>
#include <Function.h>
#include <AddrLookup.h>
#include <atomic>
#include <cstdio>
#include <cxxabi.h>
#include <dlfcn.h>
#include <errno.h>
#include <fcntl.h>
#include <features.h>
#include <mutex>
#include <sched.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/un.h>
#include <unistd.h>
#include <vector>
using namespace cali;
#include <libelf.h>
#include <gelf.h>
using namespace Dyninst;
using namespace SymtabAPI;
using namespace cali;
namespace {
......@@ -97,174 +92,138 @@ private:
int sock;
/* For binary name look-up */
typedef struct {
unsigned long long start_addr;
unsigned long long end_addr;
std::string pathname;
} addr_range;
std::vector<addr_range> proc_map; // buffer for memory regions from /proc/self/map for faster lookup
#define max_symbol_size 512
#define max_path_size 4096
//for thread safe binary look-up
std::atomic_flag writer_lock;
std::atomic_flag reader_lock;
std::atomic<unsigned> proc_map_readers;
/* For function name look-up */
// The function name look-up code is stolen from the SymbolLookup (SL) service.
// SL service is intended to be triggered on snapshot flush and can cause problems
// if triggered on snapshot-processing, therefore we do our own look-up.
// As side-effect we can avoid overhead for all the additional SL service look-up
// stuff which is not required here.
AddressLookup* m_lookup;
std::mutex m_lookup_mutex;
/* Entry for an executable symbol in the symbol table */
typedef struct {
void* start_addr;
void* end_addr;
char name[max_symbol_size];
} fun_symbol;
unsigned m_num_lookups;
unsigned m_num_failed;
/* Defines a contiguous executable memory block */
typedef struct {
void* start_addr;
void* end_addr;
size_t sym_offset; // Offset pointing to the symbols for this memory block in the symbol table
size_t sym_count; // Number of symbols in this address range
char pathname[max_path_size]; // Filepath + name of the binary where this memory range comes from or "[Anonymous]" if unknown
} addr_range;
#if 0
/**
* Look up to which function the program counter points.
* Logic borrowed from SymbolLookup service.
/*
* Retrieve function symbols from an ELF file (binary or shared library) and
* store them in a file at a given offset
*
* @return The number of symbol entries written.
*/
std::string lookup_function(unsigned long long pc) {
SymtabAPI::Function* function = 0;
std::string funcname = "UNKNOWN";
bool ret_func = false;
size_t get_function_symbols(const char* const filename, int out_fd, size_t out_offset, Channel* chn) {
Elf *elf;
Elf_Scn *scn = NULL;
GElf_Shdr shdr;
Elf_Data *data;
int fd, ii, count;
size_t entryCnt = 0;
{
std::lock_guard<std::mutex>
g(m_lookup_mutex);
elf_version(EV_CURRENT);
if (!m_lookup)
return "UNKNOWN";
fd = open(filename, O_RDONLY);
Symtab* symtab;
Offset offset;
bool ret = m_lookup->getOffset(pc, symtab, offset);
if (fd == -1) {
Log(1).stream() << chn->name() << ": DcdbPusher: Could not open ELF file: "
<< strerror(errno) << std::endl;
return 0;
}
if (ret)
ret_func = symtab->getContainingFunction(offset, function);
elf = elf_begin(fd, ELF_C_READ, NULL);
++m_num_lookups;
while ((scn = elf_nextscn(elf, scn)) != NULL) {
gelf_getshdr(scn, &shdr);
if (shdr.sh_type == SHT_SYMTAB) {
/* found a symbol table */
break;
}
}
if (ret_func && function) {
auto it = function->pretty_names_begin();
data = elf_getdata(scn, NULL);
if (it != function->pretty_names_end())
funcname = *it;
if (shdr.sh_entsize == 0) {
Log(1).stream() << chn->name() << ": DcdbPusher: Section size zero" << std::endl;
}
if (!ret_func)
++m_num_failed; // not locked, doesn't matter too much if it's slightly off
count = shdr.sh_size / (shdr.sh_entsize ?: 1);
return funcname;
}
#endif
/* print the symbol names */
for (ii = 0; ii < count; ++ii) {
GElf_Sym sym;
gelf_getsym(data, ii, &sym);
/**
* Look up to which object file a given program counter points. This is done
* by checking a local cache of address ranges in /proc/self/maps. If no
* matching address range is found we rebuild the internal cache and try
* once again.
*/
void lookup_pathname(unsigned long long pc, char* const buf, Channel* chn, bool retry=false) {
//we get blocked here if proc_map is currently rebuild
while(reader_lock.test_and_set(std::memory_order_acquire)) { }
proc_map_readers++;
reader_lock.clear(std::memory_order_release);
for(const auto& entry : proc_map) {
if (pc >= entry.start_addr && pc <= entry.end_addr) {
strncpy(buf, entry.pathname.c_str(), 2032);
proc_map_readers--;
return;
if (sym == NULL) {
Log(1).stream() << chn->name() << ": DcdbPusher: Got no symbol" << std::endl;
continue;
}
//TODO store to out_fd
//TODO update size entry in output file
if (sym.st_info == STT_FUNC) {
//printf("%s\n", elf_strptr(elf, shdr.sh_link, sym.st_name));
entryCnt++;
}
}
proc_map_readers--;
if (!retry) {
Log(1).stream() << chn->name() << ": DcdbPusher: Rebuilding proc_map" << std::endl;
setup_proc_map(chn);
} else {
buf[0] = '\0';
return;
}
lookup_pathname(pc, buf, chn, true);
return;
elf_end(elf);
close(fd);
return entryCnt;
}
/**
* Setup proc_map. Parse all address ranges and their pathnames which are
* marked as executable from /proc/self/maps.
*
* This method is called once on start-up and every time lookup_pathname
* fails. Lookup_pathname can fail if a new memory region with executable
* code is mapped (dynamic library load, self modifying code), which is not
* yet in the local cache.
* TODO extend to parse elf symbols
* TODO store information into shared memory file
* TODO demangle symbol names func_name = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &status);
* TODO demangle different languages? (C, C++, Fortran, other?)
*/
bool setup_proc_map(Channel* chn) {
if (writer_lock.test_and_set(std::memory_order_acquire)) {
//another thread is already rebuilding the proc_map
return true;
}
//block readers from accessing proc_map
while(reader_lock.test_and_set(std::memory_order_acquire)) { /* idle */ }
//wait until current readers finished
while(proc_map_readers != 0) { /* idle */ }
bool parse_proc_map(const char* const shm_name, Channel* chn) {
FILE* file;
addr_range range;
char exec;
char buf[max_path_size];
FILE *file = NULL;
if (!(file = fopen("/proc/self/maps", "r"))) {
Log(1).stream() << chn->name() << ": DcdbPusher: Could not open memory map: "
<< strerror(errno) << std::endl;
reader_lock.clear(std::memory_order_release);
writer_lock.clear(std::memory_order_release);
return false;
}
addr_range range;
const size_t bufSize = 512;
char buf[bufSize];
char exec;
proc_map.clear();
while(fscanf(file, "%llx-%llx %*2c%1c%*s%*s%*s%*s%511[^\n]",
while(fscanf(file, "%llx-%llx %*2c%1c%*s%*s%*s%*s%4096[^\n]",
&(range.start_addr),
&(range.end_addr),
&exec,
buf) == 4) {
if (exec == 'x') {
range.pathname = std::string(buf);
//remove leading whitespaces
const std::string whitespace = " \t";
const auto strBegin = range.pathname.find_first_not_of(whitespace);
if (strBegin != std::string::npos) {
range.pathname = range.pathname.substr(strBegin);
}
if(range.pathname == "") {
range.pathname = "[Anonymous]";
sscanf(buf, "%4096s", range.pathname);
if (range.pathname[0] == '\0') {
strncpy(range.pathname, "[Anonymous]", 12);
} else if (range.pathname[0] == '/') {
//TODO check and avoid parsing files twice
printf("Reading symbols from %s\n", range.pathname);
get_function_symbols(range.pathname, -1, 0, chn);
printf("\n");
}
proc_map.push_back(range);
// TODO store in shm file
}
}
reader_lock.clear(std::memory_order_release);
writer_lock.clear(std::memory_order_release);
Log(1).stream() << chn->name() << ": DcdbPusher: Scan error: "
<< strerror(errno) << std::endl;
fclose(file);
return true;
//return true;
return false;
}
void post_init_cb(Caliper* c, Channel* chn) {
......@@ -285,31 +244,12 @@ private:
return;
}
if (!setup_proc_map(chn)) {
if (!parse_proc_map("", chn)) {
Log(1).stream() << chn->name() << ": DcdbPusher: Failed to init proc_map"
<< std::endl;
return;
}
{
//TODO refresh may be necessary for the same reasons proc_map has to be rebuild
//FIXME should probably go into own function.
std::lock_guard<std::mutex>
g(m_lookup_mutex);
if (!m_lookup) {
m_lookup = AddressLookup::createAddressLookup();
if (!m_lookup) {
Log(0).stream() << "DcdbPusher: Could not create address lookup object"
<< std::endl;
return;
}
m_lookup->refresh();
}
}
sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
if(sock == -1) {
......@@ -356,40 +296,14 @@ private:
Entry timestamp_entry = sbuf->get(timestamp);
Entry sampler_pc_entry = sbuf->get(sampler_pc);
//retrieve all information we need:
// *timestamp (from snapshot)
// *program counter (from snapshot)
// *cpu we are running on
unsigned long long time = timestamp_entry.value().to_uint();
unsigned long long pc = sampler_pc_entry.value().to_uint();
size_t bufCnt = 0;
size_t bufSize = 4096;
char buf[bufSize];
char file_name[2032];
char* func_name = NULL;
unsigned cpu = 0;
bufCnt += snprintf(buf, 21, "%llu", time);
bufCnt++; //count terminating NUL char
if (bufCnt > 21) {
Log(1).stream() << chn->name() << ": DcdbPusher: Timestamp has more digits than expected" << std::endl;
}
lookup_pathname(pc, file_name, chn);
//lookup_function(pc, func_name);
void* f = (void*) pc;
Dl_info dlinfo;
if (dladdr(f, &dlinfo) && dlinfo.dli_sname != NULL) {
#if 0
Log(1).stream() << chn->name() << ": DcdbPusher: dladdr() retrieved \""
<< dlinfo.dli_fname << "::" << dlinfo.dli_sname << "\"" << std::endl;
#endif
//TODO clean up, use pre-allocated buffer for demangling
int status;
func_name = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &status);
} else {
Log(1).stream() << chn->name() << ": DcdbPusher: dladdr() failed" << std::endl;
}
#if __GLIBC_PREREQ(2, 29)
if (getcpu(&cpu, NULL)) {
Log(1).stream() << chn->name() << ": DcdbPusher: getcpu() failed" << std::endl;
......@@ -403,20 +317,9 @@ private:
}
#endif
size_t tCnt = bufCnt;
bufCnt += snprintf(&buf[bufCnt], bufSize-bufCnt, "/cpu%u/%s::%s", cpu, file_name, func_name);
bufCnt++; //count terminating NUL char
if (bufCnt > bufSize) {
Log(1).stream() << chn->name() << ": DcdbPusher: value exceeding buffer size" << std::endl;
++snapshots_failed;
return;
}
#if 0
Log(1).stream() << chn->name() << ": DcdbPusher: Sending \""
<< &buf[tCnt] << "\" (" << buf << ")" << std::endl << std::endl;
#endif
//TODO use socket only for initial communication. All other communication via shared memory
size_t bufSize = 1024, bufCnt;
char buf[bufSize];
if (send(sock, buf, bufCnt, 0) == -1) {
Log(1).stream() << chn->name() << ": DcdbPusher: Failed to send message: "
......@@ -433,8 +336,6 @@ private:
}
Log(1).stream() << chn->name() << ": DcdbPusher: "
<< m_num_lookups << " address lookups, "
<< m_num_failed << " failed." << std::endl
<< snapshots_processed << " snapshots processed of which "
<< snapshots_failed << " failed." << std::endl;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment