Currently job artifacts in CI/CD pipelines on LRZ GitLab never expire. Starting from Wed 26.1.2022 the default expiration time will be 30 days (GitLab default). Currently existing artifacts in already completed jobs will not be affected by the change. The latest artifacts for all jobs in the latest successful pipelines will be kept. More information: https://gitlab.lrz.de/help/user/admin_area/settings/continuous_integration.html#default-artifacts-expiration

Commit de3a4abd authored by Micha Müller's avatar Micha Müller
Browse files

Caliper-service rework WIP8

-detect a subset of symbols in shared library even if stripped
-fix a critical bug which lead to skipping of function symbols
-both changes increase the memory overhead drastically...
parent b50a8424
......@@ -153,7 +153,6 @@ private:
+ 2*sizeof(sem_t)
+ MSGQ_SIZE*sizeof(snap_data);
//TODO max one application per node? multiple threads per node OK, what about multiple (MPI-)processes
//TODO close shm file at end
//TODO dynamic rebuild at runtime
void* shm; // pointer to shared memory object
int shm_file; // fd of the underlying shared memory file
......@@ -208,20 +207,42 @@ private:
while ((scn = elf_nextscn(elf, scn)) != NULL) {
gelf_getshdr(scn, &shdr);
if (shdr.sh_type == SHT_SYMTAB) {
/* found a symbol table */
/* found symbol table */
data = elf_getdata(scn, NULL);
break;
}
}
data = elf_getdata(scn, NULL);
if (shdr.sh_entsize == 0) {
Log(1).stream() << chn->name() << ": DcdbPusher: Section size zero" << std::endl;
//debug
Log(1).stream() << chn->name() << ": DcdbPusher: No symbol table present" << std::endl;
if (ehdr.e_type == ET_DYN) {
Log(1).stream() << chn->name() << ": DcdbPusher: Falling back to dynamic symtab" << std::endl;
scn = NULL;
while ((scn = elf_nextscn(elf, scn)) != NULL) {
gelf_getshdr(scn, &shdr);
if (shdr.sh_type == SHT_DYNSYM) {
/* found dynamic symbol table */
data = elf_getdata(scn, NULL);
break;
}
}
if (scn == NULL || shdr.sh_entsize == 0) {
Log(1).stream() << chn->name() << ": DcdbPusher: Absolutely no symbols found" << std::endl;
return 0;
}
} else {
return 0;
}
}
//retrieve symbol data
size_t entryCnt = 0;
int count = shdr.sh_size / (shdr.sh_entsize ?: 1);
int count = shdr.sh_size / shdr.sh_entsize;
//debug
//printf("Section has %d symbols\n", count);
for (int ii = 0; ii < count; ++ii) {
GElf_Sym sym;
......@@ -231,8 +252,9 @@ private:
// continue;
//}
//only interested in symbols related to executable code
if (sym.st_info != STT_FUNC) {
if (GELF_ST_TYPE(sym.st_info) != STT_FUNC || //only interested in symbols related to executable code
sym.st_shndx == SHN_UNDEF || //external symbol
sym.st_shndx == SHN_ABS) { //absolute symbol, unlikely for STT_FUNC
continue;
}
......@@ -244,6 +266,7 @@ private:
symstr = elf_strptr(elf, shdr.sh_link, sym.st_name);
if (symstr != NULL) {
/* Demangle if necessary. Require GNU v3 ABI by the "_Z" prefix. */
if (symstr[0] == '_' && symstr[1] == 'Z') {
dsymstr = abi::__cxa_demangle(symstr, NULL, NULL, &status);
......@@ -252,22 +275,16 @@ private:
if (status == 0) {
strncpy(symdat.name, dsymstr, 512);
free((void*) dsymstr);
} else if (symstr != NULL) {
} else {
strncpy(symdat.name, symstr, 512);
}
symdat.name[MAX_SYMBOL_SIZE-1] = '\0';
} else {
symdat.name[0] = '\0';
}
symdat.name[511] = '\0';
//resolve symbol value aka its address in this' process virtual memory
if (sym.st_shndx == SHN_UNDEF || sym.st_shndx == SHN_ABS) {
//external or absolute symbol
//are usually not encountered for sym.st_info == STT_FUNC
continue;
} else {
//internally defined symbol
symdat.start_addr = sym_offset + sym.st_value;
}
symdat.end_addr = symdat.start_addr + sym.st_size - 1;
if (symdat.start_addr >= start_addr &&
......@@ -298,6 +315,13 @@ private:
}
}
//debug
// if (shdr.sh_type == SHT_DYNSYM) {
// printf("Retrieved %d symbols of dynsym\n", entryCnt);
// } else {
// printf("Retrieved %d symbols of symtab\n", entryCnt);
// }
elf_end(elf);
close(fd);
return entryCnt;
......@@ -308,7 +332,10 @@ private:
* which are marked as executable from /proc//maps.
* Address ranges associated to a binary ELF file will be enriched with
* symbol data.
* TODO demangle different languages? (C, C++, Fortran, other?)
* TODO demangle different languages? (C -> no mangling,
* C++ -> demangling implemented,
* Fortran -> mangling compiler dependent,
* other languages?)
*/
bool setup_shm(Channel* chn) {
FILE* file;
......@@ -380,7 +407,7 @@ private:
addr_ptr->fsym_offset = fsym_offset;
if (addr_ptr->pathname[0] == '/') {
//debug
// printf("Parsing symbols for %s (%llx-%llx; %llx)\n", addr_ptr->pathname, addr_ptr->start_addr, addr_ptr->end_addr, addr_ptr->offset);
//printf("Parsing symbols for %s (%llx-%llx; %llx)\n", addr_ptr->pathname, addr_ptr->start_addr, addr_ptr->end_addr, addr_ptr->offset);
addr_ptr->fsym_count = write_function_symbols(addr_ptr->pathname,
addr_ptr->start_addr,
addr_ptr->end_addr,
......
......@@ -239,7 +239,6 @@ void CaliperSensorGroup::read() {
*(reinterpret_cast<size_t*>(static_cast<char*>(_shm))) = w_index;
sem_post(r_sem);
//TODO process snapshots
LOG(debug) << "Processing " << nelems << " snapshots";
for (size_t i = 0; i < nelems; ++i) {
......@@ -269,7 +268,7 @@ void CaliperSensorGroup::read() {
} //It's OK if we found no symbol. There are possibly none
//store in sensors
//TODO aggregate values
//TODO aggregate values (see header file)
S_Ptr s;
auto it = _sensorIndex.find(sName);
if(it != _sensorIndex.end()) {
......
......@@ -137,6 +137,7 @@ private:
std::atomic_flag _lock; ///< Lock to synchronize access to associated sensors
//TODO aggregate sensor values: store pair of S_Ptr and counter. Only push values at end of iteration
// Problem: each value has a different unique timestamp. We would loose information
std::unordered_map<std::string, S_Ptr> _sensorIndex; ///< Additional sensor storage for fast lookup
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment