Commit f17b72f5 authored by Micha Müller's avatar Micha Müller
Browse files

Caliper-service rework WIP2

parent f6c273f2
......@@ -57,6 +57,7 @@
#include "caliper/common/RuntimeConfig.h"
#include <atomic>
#include <cstdint>
#include <cstdio>
#include <cxxabi.h>
#include <errno.h>
......@@ -88,28 +89,29 @@ class DcdbPusher {
#define MSGQ_SIZE 8192
typedef struct {
void* pc;
uint64_t ts;
unsigned short cpu;
uintptr_t pc;
uint64_t ts;
unsigned short cpu;
} snap_data;
/* Entry for an executable symbol in the symbol table */
typedef struct {
void* start_addr;
void* end_addr;
char name[MAX_SYMBOL_SIZE];
} fun_symbol;
uintptr_t start_addr;
uintptr_t end_addr;
char name[MAX_SYMBOL_SIZE];
} fsym_data;
/* Defines a contiguous executable memory block */
typedef struct {
void* start_addr;
void* end_addr;
size_t sym_offset; // Offset pointing to the symbols for this memory
// block in the symbol table
size_t sym_count; // Number of symbols in this address range
char pathname[MAX_PATH_SIZE]; // Filepath + name of the binary where
// this memory range comes from or "[Anonymous]" if unknown
} addr_range;
uintptr_t start_addr;
uintptr_t end_addr;
size_t fsym_offset; // Offset in bytes from the address of this struct
// to the beginning of the associated symbol section
size_t fsym_count; // Number of symbols in this address range
char pathname[MAX_PATH_SIZE]; // Filepath + name of the binary where
// this memory range comes from or
// "[Anonymous]" if unknown
} addr_data;
private:
......@@ -132,37 +134,45 @@ private:
* snap_data[MSGQ_SIZE]
*
* //symbol lookup data
* size_t ar_count
* addr_range[ar_count]
* ar_count * (fun_symbol[addr_range.sym_count])
* size_t addr_count
* addr_data[addr_count]
* addr_count * (fsym_data[addr_data.fsym_count])
*/
void* shm; // pointer to shared memory object
void* shm; // pointer to shared memory object
size_t shm_size; // size of shm in bytes
int shm_file; // fd of the underlying shared memory file
int sock; // unix socket fd for initial shm setup communication
int shm_file; // fd of the underlying shared memory file
int sock; // unix socket fd for initial shm setup communication
std::atomic_flag shm_wlock; // for thread safe writing to shm queue
// each thread has a local buffer for relevant snapshot data to reduce
// writes to shm queue as this requires locking
constexpr size_t shm_buf_size = 1024 / sizeof(snap_data);
thread_local size_t shm_buf_idx = 0;
thread_local snap_data shm_buf[shm_buf_size];
static constexpr size_t shm_buf_size = 1024 / sizeof(snap_data);
static thread_local size_t shm_buf_idx;
static thread_local snap_data shm_buf[shm_buf_size];
bool initialized;
/*
* Retrieve function symbols from an ELF file (binary or shared library) and
* Retrieve function symbols from an ELF file and
* store them in a file at a given offset
*
* @param filename ELF file (binary or shared library) to retrieve symbol info from
* @param start_addr Only store symbols whose address is in between start_addr and end_addr
* @param end_addr
* @param dest_ptr Pointer to memory where to store fsym_data. Will be modified
* to point behind the last written element on return.
* @return The number of symbol entries written.
*/
size_t get_function_symbols(const char* const filename, int out_fd, size_t out_offset, Channel* chn) {
size_t write_function_symbols(const char* const filename,
const uintptr_t start_addr,
const uintptr_t end_addr,
fsym_data*& dest_ptr,
Channel* chn) {
Elf *elf;
Elf_Scn *scn = NULL;
GElf_Shdr shdr;
Elf_Data *data;
int fd, ii, count;
size_t entryCnt = 0;
int fd;
elf_version(EV_CURRENT);
......@@ -173,8 +183,8 @@ private:
return 0;
}
//search ELF header for symbol table
elf = elf_begin(fd, ELF_C_READ, NULL);
while ((scn = elf_nextscn(elf, scn)) != NULL) {
gelf_getshdr(scn, &shdr);
if (shdr.sh_type == SHT_SYMTAB) {
......@@ -188,12 +198,12 @@ private:
Log(1).stream() << chn->name() << ": DcdbPusher: Section size zero" << std::endl;
return 0;
}
count = shdr.sh_size / (shdr.sh_entsize ?: 1);
size_t entryCnt = 0;
int count = shdr.sh_size / (shdr.sh_entsize ?: 1);
/* print the symbol names */
for (ii = 0; ii < count; ++ii) {
for (int ii = 0; ii < count; ++ii) {
GElf_Sym sym;
char buf[MAX_SYMBOL_SIZE];
gelf_getsym(data, ii, &sym);
//if (gelf_getsym(data, ii, &sym) == NULL) {
......@@ -204,6 +214,7 @@ private:
char* symstr;
char* dsymstr;
int status = -1;
fsym_data symdat;
symstr = elf_strptr(elf, shdr.sh_link, sym.st_name);
......@@ -213,18 +224,31 @@ private:
}
if (status == 0) {
strncpy(buf, dsymstr, 512);
strncpy(symdat.name, dsymstr, 512);
free((void*) dsymstr);
} else {
strncpy(buf, symstr, 512);
strncpy(symdat.name, symstr, 512);
}
symdat.name[511] = '\0';
symdat.start_addr = sym.st_value;
symdat.end_addr = sym.st_value + sym.st_size;
//TODO determine if symbol address relative and if so add offset
if ((symdat.start_addr >= start_addr &&
symdat.start_addr <= end_addr) ||
(symdat.end_addr >= start_addr &&
symdat.end_addr <= end_addr)) {
memcpy(dest_ptr, &symdat, sizeof(addr_data));
++dest_ptr;
++entryCnt;
} else {
printf("Symbol %s out of mem range (%llx-%llx, size %llx)\n", symdat.name,
symdat.start_addr,
symdat.end_addr,
sym.st_size);
}
buf[511] = '\0';
//TODO store to out_fd
//TODO update size entry in output file
// TODO msync after write to shm file
printf("%s\n", buf);
entryCnt++;
}
}
......@@ -234,57 +258,113 @@ private:
}
/**
* Parse proc_map. Parse all address ranges and their pathnames which are
* marked as executable from /proc/self/maps.
*
* TODO store information into shared memory file
* Set up address data in shm. Parse all address ranges and their pathnames
* which are marked as executable from /proc/self/maps.
* Address ranges associated to a binary ELF file will be enriched with
* symbol data.
* TODO demangle different languages? (C, C++, Fortran, other?)
*/
bool parse_proc_map(const char* const shm_name, Channel* chn) {
bool setup_shm(Channel* chn) {
FILE* file;
addr_range range;
char exec;
char buf[MAX_PATH_SIZE];
addr_data addr;
char exec;
char buf[MAX_PATH_SIZE];
addr_data* addr_ptr;
//some pointer arithmetic for the beginning to get appropriate start pointers
size_t& addr_cnt = *(reinterpret_cast<size_t*>(static_cast<char*>(shm)
+ 2*sizeof(size_t) + MSGQ_SIZE*sizeof(snap_data)));
addr_data* const addr_start = reinterpret_cast<addr_data*>(&addr_cnt + 1);
addr_ptr = addr_start;
//read mapped address ranges from /proc/self/maps
if (!(file = fopen("/proc/self/maps", "r"))) {
Log(1).stream() << chn->name() << ": DcdbPusher: Could not open memory map: "
<< strerror(errno) << std::endl;
return false;
}
constexpr size_t addr_offset = 2*sizeof(size_t) + MSGQ_SIZE*sizeof(snap_data);
size_t addr_counter = 0;
//read one line = one address range
while(fscanf(file, "%llx-%llx %*2c%1c%*s%*s%*s%*s%4096[^\n]",
&(range.start_addr),
&(range.end_addr),
&(addr.start_addr),
&(addr.end_addr),
&exec,
buf) == 4) {
//Only executable memory ranges are interesting. If the program counter
//ever points in a non-executable section --> HCF
if (exec == 'x') {
//get rid of leading whitespaces
sscanf(buf, "%4096s", range.pathname);
if (range.pathname[0] == '\0') {
strncpy(range.pathname, "[Anonymous]", 12);
} else if (range.pathname[0] == '/') {
//TODO check and avoid parsing files twice
printf("Reading symbols from %s\n", range.pathname);
get_function_symbols(range.pathname, -1, 0, chn);
printf("\n");
sscanf(buf, "%4096s", addr.pathname);
//mem ranges are not required to be associated with a name
if (addr.pathname[0] == '\0') {
strncpy(addr.pathname, "[Anonymous]", 12);
}
// TODO store in shm file
// TODO msync after write to shm file
addr.fsym_count = 0;
addr.fsym_offset = 0;
//save in shared memory
memcpy(addr_ptr, &addr, sizeof(addr_data));
++addr_ptr;
++addr_cnt;
}
}
fclose(file);
Log(1).stream() << chn->name() << ": DcdbPusher: Scan error: "
<< strerror(errno) << std::endl;
//We have all interesting memory ranges now. Those associated with a
//binary file get enriched with symbol data
fsym_data* fsym_ptr = reinterpret_cast<fsym_data*>(addr_ptr);
size_t fsym_offset = addr_cnt * sizeof(addr_data);
addr_ptr = addr_start;
//TODO replace '/' with something unambiguous
for(size_t i = 0; i < addr_cnt; ++i) {
addr_ptr->fsym_offset = fsym_offset;
if (addr_ptr->pathname[0] == '/') {
printf("Parsing symbols for %s (%llx-%llx)\n", addr_ptr->pathname, addr_ptr->start_addr, addr_ptr->end_addr);
addr_ptr->fsym_count = write_function_symbols(addr_ptr->pathname,
addr_ptr->start_addr,
addr_ptr->end_addr,
fsym_ptr,
chn);
fsym_offset += addr_ptr->fsym_count * sizeof(fsym_data);
}
fsym_offset -= sizeof(addr_data);
++addr_ptr;
}
fclose(file);
// Log(1).stream() << chn->name() << ": DcdbPusher: Scan error: "
// << strerror(errno) << std::endl;
//return true;
return false;
return true;
}
void print_debug_shm() {
const size_t& addr_cnt = *(reinterpret_cast<size_t*>(static_cast<char*>(shm)
+ 2*sizeof(size_t) + MSGQ_SIZE*sizeof(snap_data)));
const addr_data* addr_ptr = reinterpret_cast<const addr_data*>(&addr_cnt + 1);
const fsym_data* fsym_ptr = reinterpret_cast<const fsym_data*>(addr_ptr + addr_cnt);
for (size_t i = 0; i < addr_cnt; ++i) {
printf("Mem range %s: %llx-%llx contains %d symbols:\n", addr_ptr->pathname,
addr_ptr->start_addr,
addr_ptr->end_addr,
addr_ptr->fsym_count);
fsym_ptr = reinterpret_cast<const fsym_data*>(
reinterpret_cast<const char*>(addr_ptr) + addr_ptr->fsym_offset);
for (size_t j = 0; j < addr_ptr->fsym_count; ++j) {
printf("> %s (%llx-%llx)\n", fsym_ptr->name,
fsym_ptr->start_addr,
fsym_ptr->end_addr);
++fsym_ptr;
}
printf("\n");
++addr_ptr;
}
}
void post_init_cb(Caliper* c, Channel* chn) {
......@@ -326,12 +406,15 @@ private:
return;
}
if (!parse_proc_map("", chn)) {
Log(1).stream() << chn->name() << ": DcdbPusher: Failed to init proc_map"
if (!setup_shm(chn)) {
Log(1).stream() << chn->name() << ": DcdbPusher: Failed to setup shm"
<< std::endl;
return;
}
print_debug_shm();
return;
sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
if (sock == -1) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment