root/dlsym_default.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. elf_hash
  2. elf_gnu_hash
  3. elf_gnu_hash
  4. elf_gnu_hash
  5. hash_first
  6. hash_next
  7. symbol_name
  8. version_name
  9. get_dt_tags
  10. dlsym_default_internal

   1 /****************************************************************************
   2  *   Copyright (C) 2014 by Gene Cooperman                                   *
   3  *   gene@ccs.neu.edu                                                       *
   4  *                                                                          *
   5  *  This file is part of DMTCP.                                             *
   6  *                                                                          *
   7  *  DMTCP is free software: you can redistribute it and/or                  *
   8  *  modify it under the terms of the GNU Lesser General Public License as   *
   9  *  published by the Free Software Foundation, either version 3 of the      *
  10  *  License, or (at your option) any later version.                         *
  11  *                                                                          *
  12  *  DMTCP is distributed in the hope that it will be useful,                *
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
  15  *  GNU Lesser General Public License for more details.                     *
  16  *                                                                          *
  17  *  You should have received a copy of the GNU Lesser General Public        *
  18  *  License along with DMTCP:dmtcp/src.  If not, see                        *
  19  *  <http://www.gnu.org/licenses/>.                                         *
  20  ****************************************************************************/
  21 
  22 /* USAGE:
  23  * #include "dlsym_default.h"
  24  * ... DLSYM_DEFAULT(RTLD_NEXT, ...) ...
  25  * WARNING:  DLSYM_DEFAULT works within a library, but not in base executable
  26  * WARNING:  RTLD_DEFAULT will not work with DLSYM_DEFAULT()
  27  */
  28 
  29 /* THEORY:  A versioned symbol consists of multiple symbols, one for
  30  * each version.  Each symbol entry in the dynsym section (which becomes the
  31  * same as the symtab section when loaded in memory) should have a
  32  * corresponding entry in the symtab section.  So, the dynsym array values
  33  * can be viewed as an extra field of the symtab array of structs.
  34  * The dynsym entry (value) is a version index to the version string
  35  * for that symbol.  The version string is indicated by an entry in the
  36  * versym section with the same version index.  (versym is an array of strings)
  37  *     The dynsym entry can also have the 'hidden' bit (bit 15) set.  For a
  38  * given * symbol name, there should be exactly one symbol of that name for
  39  * which the hidden bit is not set.  This is the default version.  The normal
  40  * "static linker" (at load time) should only link to the base version (version
  41  * given by index 1 or 2 in the versym section).  The "dynamic linker"
  42  * (invoked by dlopen) tries first for a base version, and if not found,
  43  * then hopes for a unique versioned symbol.  (It seems that in all of
  44  * the above, the linker will always ignore a hidden symbol for these
  45  * purposes.  Unfortunately, dlsym doesn't follow the same policy as the
  46  * static or dynamic linker.  Hence, dlsym_default_internal tries to replicate
  47  * that policy of preferring non-hidden symbols always.)
  48  *     The symbol pthread_cond_broadcast is a good test case.  It seems to
  49  * have its base version referenced as a hidden symbol, and only a non-base
  50  * version exists as unhidden.  Unfortunately, dlsym still chooses the
  51  * hidden base definition.
  52  *     Is this a bug in dlsym?  Or maybe just a bug in the 'man dlsym'
  53  * description?  Since versioning is not POSIX, it's difficult to say.
  54  */
  55 
  56 // Uncomment this to see what symbols and versions are chosen.
  57 // #define VERBOSE
  58 
  59 #define _GNU_SOURCE
  60 #include <link.h>
  61 #include <stdio.h>
  62 #include <stdlib.h>
  63 #include <assert.h>
  64 #include <string.h>
  65 
  66 #define _GNU_SOURCE
  67 #include <dlfcn.h>
  68 
  69 #include "config.h"
  70 
  71 // ***** NOTE:  link.h invokes elf.h, which:
  72 // *****        expands ElfW(Word)  to  Elf64_Word; and then defines:
  73 // *****        typedef uint32_t Elf63_Word;
  74 
  75 // older sysv standard
  76 static unsigned long elf_hash(const char *name) {
  77   unsigned long h = 0, g;
  78   while (*name) {
  79     h = (h << 4) + *name++;
  80     if ((g = h & 0xf0000000))
  81       h ^= g >> 24;
  82       h &= ~g;
  83   }
  84   return h;
  85 }
  86 
  87 // For GNU standard, below, see:
  88 //   https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
  89 //   http://deroko.phearless.org/dt_gnu_hash.txt
  90 //   glibc:elf/dl-lookup.c:do_lookup_x()
  91 //     See:  dl_setup_hash()  and  Elf32_Word bucket = map->l_gnu_buckets  ...
  92 
  93 // GNU standard
  94 #if 0
  95 static uint32_t elf_gnu_hash(const char *s) {
  96   uint32_t h = 5381;
  97   unsigned char c;
  98   for (c = *s; c != '\0'; c = *++s)
  99     h = h * 33 + c;
 100   return h;
 101 }
 102 #elif 0
 103 // From binutils:bfd/elf.c:bfd_elf_gnu_hash()
 104 unsigned long elf_gnu_hash (const char *namearg)
 105 {
 106   const unsigned char *name = (const unsigned char *) namearg;
 107   unsigned long h = 5381;
 108   unsigned char ch;
 109 
 110   while ((ch = *name++) != '\0')
 111     h = (h << 5) + h + ch;
 112   return h & 0xffffffff;
 113 }
 114 #else
 115 // From glibc-2.19
 116 static uint_fast32_t elf_gnu_hash (const char *s)
 117 {
 118   uint_fast32_t h = 5381;
 119   unsigned char c;
 120   for (c = *s; c != '\0'; c = *++s)
 121     h = h * 33 + c;
 122   return h & 0xffffffff;
 123 }
 124 #endif
 125 
 126 static Elf32_Word hash_first(const char *name, Elf32_Word *hash_table,
 127                              int use_gnu_hash) {
 128   if (use_gnu_hash) {
 129     uint32_t nbuckets = ((uint32_t*)hash_table)[0];
 130     // uint32_t symndx = ((uint32_t*)hash_table)[1];
 131     uint32_t maskwords = ((uint32_t*)hash_table)[2];
 132     uint32_t *buckets = (uint32_t *)
 133       ((char *)hash_table + 4*sizeof(uint32_t) + maskwords*sizeof(long unsigned int));
 134     // uint32_t *hashval = & buckets[nbuckets];
 135     if (buckets[elf_gnu_hash(name) % nbuckets])
 136       return buckets[elf_gnu_hash(name) % nbuckets];
 137     else
 138       return STN_UNDEF;
 139   } else {
 140     // http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
 141     Elf32_Word nbucket = *hash_table++;
 142     hash_table++; // Elf32_Word nchain = *hash_table++; // Note: nchain same as n_symtab
 143     Elf32_Word *bucket = hash_table;
 144     // Elf32_Word *chain = hash_table + nbucket;
 145     return bucket[elf_hash(name) % nbucket]; // return index into symbol table
 146   }
 147 }
 148 
 149 static Elf32_Word hash_next(Elf32_Word index, Elf32_Word *hash_table,
 150                             int use_gnu_hash) {
 151   if (use_gnu_hash) {
 152     assert( index > STN_UNDEF );
 153     uint32_t nbuckets = ((uint32_t*)hash_table)[0];
 154     uint32_t symndx = ((uint32_t*)hash_table)[1];
 155     uint32_t maskwords = ((uint32_t*)hash_table)[2];
 156     uint32_t *hashval = (uint32_t *)
 157       ((char *)hash_table + 4*sizeof(uint32_t) /* sizeof header */
 158        + maskwords*sizeof(long unsigned int) /* sizeof Bloom filter */
 159        + nbuckets*sizeof(Elf32_Word) /* sizeof hash buckets */
 160       );
 161     if (hashval[index - symndx] & 1)
 162       return STN_UNDEF;  // end-of-chain indicator
 163     else
 164       return index+1;
 165   } else {
 166     Elf32_Word nbucket = *hash_table++;
 167     hash_table++; // Elf32_Word nchain = *hash_table++;
 168     // Elf32_Word *bucket = hash_table;
 169     Elf32_Word *chain = hash_table + nbucket;
 170     return chain[index]; // If this returns STN_UNDEF, then it's end of chain
 171   }
 172 }
 173 
 174 typedef struct dt_tag{
 175     char *base_addr; /* Base address shared object is loaded at. */
 176     // ElfW(Sym) *dynsym; // On disk, dynsym would be dynamic symbols only
 177     ElfW(Sym) *symtab; // Same as dynsym, for in-memory symbol table.
 178     // ElfW(Word) n_symtab;
 179     ElfW(Half) *versym;
 180     /* elf.h lies.  DT_VERDEF is offset from base_addr, not addr. */
 181     ElfW(Verdef) *verdef;
 182     ElfW(Word) verdefnum;
 183     // ElfW(Word) first_ext_def;
 184     char *strtab;
 185     Elf32_Word *hash;
 186     Elf32_Word *gnu_hash;
 187 } dt_tag;
 188 
 189 static char *symbol_name(int i, dt_tag *tags) {
 190   return tags->strtab + tags->symtab[i].st_name;
 191 }
 192 
 193 static char *version_name(ElfW(Word) version_ndx, dt_tag *tags) {
 194     ElfW(Verdef) *cur, *prev;
 195 
 196     // Remove hidden bit, if it's set.
 197     if (version_ndx & (1<<15))
 198       version_ndx -= (1<<15);
 199     // Walk the list of all versions.
 200     for (prev = NULL, cur =
 201           (ElfW(Verdef)*)(tags->base_addr + (unsigned long int)(tags->verdef));
 202          // Could alternatively use verdefnum (DT_VERDEFNUM) here.
 203          cur != prev;
 204          prev = cur, cur = (ElfW(Verdef)*)(((char *)cur)+cur->vd_next))
 205     {
 206       assert (cur->vd_version == 1);
 207       if (cur->vd_ndx == version_ndx) {
 208         ElfW(Verdaux) *first = (ElfW(Verdaux) *)(((char *)cur)+cur->vd_aux);
 209         return tags->strtab + first->vda_name;
 210       }
 211     }
 212     return NULL;  // failed to find version name
 213 }
 214 
 215 // Note that the dynamic section is usually also a segment by itself.
 216 // [ 'readelf -l libXXX.so' to verify. ]
 217 // So, we don't need the object handle.  Its base address is enough,
 218 //   and we can then read the program header to get the right segment.
 219 // Also, the _DYNAMIC symbol in a section should also be a pointer to
 220 //   the address of the dynamic section.  (See comment in /usr/include/link.h)
 221 static void get_dt_tags(void *handle, dt_tag *tags) {
 222     struct link_map *link_map;  // from /usr/include/link.h
 223     if (dlinfo(handle, RTLD_DI_LINKMAP, &link_map) == -1)
 224       printf("ERROR: %s\n", dlerror());
 225     ElfW(Dyn) *dyn = link_map -> l_ld;  // from /usr/include/link.h
 226     // http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#dynamic_section
 227     /* Base address shared object is loaded at. (from /usr/include/lnik.h) */
 228     tags->base_addr = (char *)(link_map -> l_addr);
 229 
 230     tags->symtab = NULL;
 231     tags->versym = NULL;
 232     tags->verdef = NULL;
 233     tags->strtab = NULL;
 234     tags->hash = NULL;
 235     tags->gnu_hash = NULL;
 236     tags->verdefnum = 0;
 237 
 238     ElfW(Dyn) *cur_dyn;
 239     // The _DYNAMIC symbol should be pointer to address of the dynamic section.
 240     // printf("dyn: %p; _DYNAMIC: %p\n", dyn, _DYNAMIC);
 241     for (cur_dyn = dyn; cur_dyn->d_tag != DT_NULL;  cur_dyn++) {
 242       if (cur_dyn->d_tag == DT_VERSYM)
 243         tags->versym = (void *)cur_dyn->d_un.d_ptr;
 244       if (cur_dyn->d_tag == DT_VERDEF)
 245         tags->verdef = (void *)cur_dyn->d_un.d_ptr;
 246       if (cur_dyn->d_tag == DT_VERDEFNUM)
 247         tags->verdefnum = (ElfW(Word))cur_dyn->d_un.d_val;
 248       if (cur_dyn->d_tag == DT_STRTAB && tags->strtab == 0)
 249         tags->strtab = (void *)cur_dyn->d_un.d_ptr;
 250       // Not DT_DYNSYM, since only dynsym section loaded into RAM; not symtab.??
 251       //   So, DT_SYMTAB refers to dynsym section ??
 252       if (cur_dyn->d_tag == DT_SYMTAB)
 253         tags->symtab = (void *)cur_dyn->d_un.d_ptr;
 254       if (cur_dyn->d_tag == DT_HASH)
 255         tags->hash = (void *)cur_dyn->d_un.d_ptr;
 256 #ifdef HAS_GNU_HASH
 257       if (cur_dyn->d_tag == DT_GNU_HASH)
 258         tags->gnu_hash = (void *)cur_dyn->d_un.d_ptr;
 259 #endif
 260       //if (cur_dyn->d_tag == DT_MIPS_SYMTABNO) // Number of DYNSYM entries
 261       //  n_symtab = (ElfW(Word))cur_dyn->d_un.d_val;
 262       //if (cur_dyn->d_tag == DT_MIPS_UNREFEXTNO)  // First external DYNSYM
 263       //  first_ext_def = (ElfW(Word))cur_dyn->d_un.d_val;  // first dynsym entry??
 264     }
 265 }
 266 
 267 //  Don't use dlsym_default_internal(); use dlsym_default.h:DLSYM_DEFAULT()
 268 void *dlsym_default_internal(void *handle, const char*symbol) {
 269   dt_tag tags;
 270   Elf32_Word default_symbol_index = 0;
 271   Elf32_Word i;
 272 
 273 #ifdef __USE_GNU
 274   if (handle == RTLD_NEXT || handle == RTLD_DEFAULT) {
 275     Dl_info info;
 276     void *tmp_fnc = dlsym(handle, symbol);  // Hack: get symbol w/ any version
 277     // printf("tmp_fnc: %p\n", tmp_fnc);
 278     dladdr(tmp_fnc, &info);
 279     // ... and find what library the symbol is in
 280    printf("info.dli_fname: %s\n", info.dli_fname);
 281 #if 0
 282 char *tmp = info.dli_fname;
 283 char *basename = tmp;
 284 for ( ; *tmp != '\0'; tmp++ ) {
 285   if (*tmp == '/')
 286     basename = tmp+1;
 287 }
 288 #endif
 289     // Found handle of RTLD_NEXT or RTLD_DEFAULT
 290     handle = dlopen(info.dli_fname, RTLD_NOLOAD | RTLD_LAZY);
 291     // symbol name is:  info.dli_sname;  Could add assert as double-check.
 292     if (handle == NULL)
 293       printf("ERROR:  RTLD_DEFAULT or RTLD_NEXT called; no library found.\n");
 294     // Could try:  dlopen(info.dli_fname, RTLD_LOCAL|RTLD_LAZY); to get handle
 295     // But if library wasn't loaded before, we shouldn't load it now.
 296   }
 297   // An alternative to the above code is to use dl_iterate_phdr() to walk the
 298   //   list of loaded libraries, and for each one, hash on the symbol name
 299   //   to see if it's contained in that one.  But dl_iterate_phdr gives you
 300   //   the base address of the shared object.
 301   // dlopen(NULL); provides a handle for main program.  dlinfo can then get
 302   //   dynamic section (see get_dt_tags()), and also the link_map.
 303   //   When we find a shared object with our symbol in it, the link_map
 304   //   will give us the name, and dlopen (w/ NOLOAD?) on it gives us a handle.
 305   // A better way might be to start with any library handle at all: dlopen
 306   //   Then call dlinfo(handle, RTLD_DI_LINKMAP, &link_map);
 307   //   for: 'struct link_map &link_map;'  and follow get_dt_tags() for find
 308   //   info from dynamic section.
 309 #endif
 310 
 311   get_dt_tags(handle, &tags);
 312   assert(tags.hash != NULL || tags.gnu_hash != NULL);
 313   int use_gnu_hash = (tags.hash == NULL);
 314   Elf32_Word *hash = (use_gnu_hash ? tags.gnu_hash : tags.hash);
 315   for (i = hash_first(symbol, hash, use_gnu_hash); i != STN_UNDEF;
 316        i = hash_next(i, hash, use_gnu_hash)) {
 317     if (tags.symtab[i].st_name == 0 || tags.symtab[i].st_value == 0)
 318       continue;
 319     if (strcmp(symbol_name(i, &tags), symbol) != 0) // If different symbol name
 320       continue;
 321     // We have a symbol of the same name.  Let's look at the version number.
 322     if ( !(tags.versym[i] & (1<<15)) ) { // If hidden bit is not set.
 323       // If default symbol not set or if new version later than old one.
 324       // Notice that default_symbol_index will be set first to the
 325       //  base definition (1 for unversioned symbols; 2 for versioned symbols)
 326 if (default_symbol_index) {
 327   printf("WARNING:  More than one default symbol version.\n");
 328 }
 329       if (!default_symbol_index ||
 330           // Could look at version dependencies, but using strcmp instead.
 331           strcmp(version_name(tags.versym[i], &tags),
 332                  version_name(tags.versym[default_symbol_index], &tags)) > 0) {
 333         default_symbol_index = i;
 334       }
 335     }
 336   }
 337 #ifdef VERBOSE
 338   if (default_symbol_index) {
 339     printf("** st_value: %p\n",
 340            tags.base_addr + tags.symtab[default_symbol_index].st_value);
 341     printf("** symbol version: %s\n",
 342            version_name(tags.versym[default_symbol_index], &tags));
 343   }
 344 #endif
 345   if (!default_symbol_index) {
 346     printf("ERROR:  No default symbol version found for %s.\n"
 347            "        Extend code to look for hidden symbols?\n", symbol);
 348   }
 349   if (default_symbol_index)
 350     return tags.base_addr + tags.symtab[default_symbol_index].st_value;
 351   else
 352     assert(0);
 353     return NULL;
 354 }

/* [<][>][^][v][top][bottom][index][help] */