root/dlsym_default.c
/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- elf_hash
- elf_gnu_hash
- elf_gnu_hash
- elf_gnu_hash
- hash_first
- hash_next
- symbol_name
- version_name
- get_dt_tags
- dlsym_default_internal
1 /****************************************************************************
2 * Copyright (C) 2014 by Gene Cooperman *
3 * gene@ccs.neu.edu *
4 * *
5 * This file is part of DMTCP. *
6 * *
7 * DMTCP is free software: you can redistribute it and/or *
8 * modify it under the terms of the GNU Lesser General Public License as *
9 * published by the Free Software Foundation, either version 3 of the *
10 * License, or (at your option) any later version. *
11 * *
12 * DMTCP is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with DMTCP:dmtcp/src. If not, see *
19 * <http://www.gnu.org/licenses/>. *
20 ****************************************************************************/
21
22 /* USAGE:
23 * #include "dlsym_default.h"
24 * ... DLSYM_DEFAULT(RTLD_NEXT, ...) ...
25 * WARNING: DLSYM_DEFAULT works within a library, but not in base executable
26 * WARNING: RTLD_DEFAULT will not work with DLSYM_DEFAULT()
27 */
28
29 /* THEORY: A versioned symbol consists of multiple symbols, one for
30 * each version. Each symbol entry in the dynsym section (which becomes the
31 * same as the symtab section when loaded in memory) should have a
32 * corresponding entry in the symtab section. So, the dynsym array values
33 * can be viewed as an extra field of the symtab array of structs.
34 * The dynsym entry (value) is a version index to the version string
35 * for that symbol. The version string is indicated by an entry in the
36 * versym section with the same version index. (versym is an array of strings)
37 * The dynsym entry can also have the 'hidden' bit (bit 15) set. For a
38 * given * symbol name, there should be exactly one symbol of that name for
39 * which the hidden bit is not set. This is the default version. The normal
40 * "static linker" (at load time) should only link to the base version (version
41 * given by index 1 or 2 in the versym section). The "dynamic linker"
42 * (invoked by dlopen) tries first for a base version, and if not found,
43 * then hopes for a unique versioned symbol. (It seems that in all of
44 * the above, the linker will always ignore a hidden symbol for these
45 * purposes. Unfortunately, dlsym doesn't follow the same policy as the
46 * static or dynamic linker. Hence, dlsym_default_internal tries to replicate
47 * that policy of preferring non-hidden symbols always.)
48 * The symbol pthread_cond_broadcast is a good test case. It seems to
49 * have its base version referenced as a hidden symbol, and only a non-base
50 * version exists as unhidden. Unfortunately, dlsym still chooses the
51 * hidden base definition.
52 * Is this a bug in dlsym? Or maybe just a bug in the 'man dlsym'
53 * description? Since versioning is not POSIX, it's difficult to say.
54 */
55
56 // Uncomment this to see what symbols and versions are chosen.
57 // #define VERBOSE
58
59 #define _GNU_SOURCE
60 #include <link.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <assert.h>
64 #include <string.h>
65
66 #define _GNU_SOURCE
67 #include <dlfcn.h>
68
69 #include "config.h"
70
71 // ***** NOTE: link.h invokes elf.h, which:
72 // ***** expands ElfW(Word) to Elf64_Word; and then defines:
73 // ***** typedef uint32_t Elf63_Word;
74
75 // older sysv standard
76 static unsigned long elf_hash(const char *name) {
77 unsigned long h = 0, g;
78 while (*name) {
79 h = (h << 4) + *name++;
80 if ((g = h & 0xf0000000))
81 h ^= g >> 24;
82 h &= ~g;
83 }
84 return h;
85 }
86
87 // For GNU standard, below, see:
88 // https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
89 // http://deroko.phearless.org/dt_gnu_hash.txt
90 // glibc:elf/dl-lookup.c:do_lookup_x()
91 // See: dl_setup_hash() and Elf32_Word bucket = map->l_gnu_buckets ...
92
93 // GNU standard
94 #if 0
95 static uint32_t elf_gnu_hash(const char *s) {
96 uint32_t h = 5381;
97 unsigned char c;
98 for (c = *s; c != '\0'; c = *++s)
99 h = h * 33 + c;
100 return h;
101 }
102 #elif 0
103 // From binutils:bfd/elf.c:bfd_elf_gnu_hash()
104 unsigned long elf_gnu_hash (const char *namearg)
105 {
106 const unsigned char *name = (const unsigned char *) namearg;
107 unsigned long h = 5381;
108 unsigned char ch;
109
110 while ((ch = *name++) != '\0')
111 h = (h << 5) + h + ch;
112 return h & 0xffffffff;
113 }
114 #else
115 // From glibc-2.19
116 static uint_fast32_t elf_gnu_hash (const char *s)
117 {
118 uint_fast32_t h = 5381;
119 unsigned char c;
120 for (c = *s; c != '\0'; c = *++s)
121 h = h * 33 + c;
122 return h & 0xffffffff;
123 }
124 #endif
125
126 static Elf32_Word hash_first(const char *name, Elf32_Word *hash_table,
127 int use_gnu_hash) {
128 if (use_gnu_hash) {
129 uint32_t nbuckets = ((uint32_t*)hash_table)[0];
130 // uint32_t symndx = ((uint32_t*)hash_table)[1];
131 uint32_t maskwords = ((uint32_t*)hash_table)[2];
132 uint32_t *buckets = (uint32_t *)
133 ((char *)hash_table + 4*sizeof(uint32_t) + maskwords*sizeof(long unsigned int));
134 // uint32_t *hashval = & buckets[nbuckets];
135 if (buckets[elf_gnu_hash(name) % nbuckets])
136 return buckets[elf_gnu_hash(name) % nbuckets];
137 else
138 return STN_UNDEF;
139 } else {
140 // http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
141 Elf32_Word nbucket = *hash_table++;
142 hash_table++; // Elf32_Word nchain = *hash_table++; // Note: nchain same as n_symtab
143 Elf32_Word *bucket = hash_table;
144 // Elf32_Word *chain = hash_table + nbucket;
145 return bucket[elf_hash(name) % nbucket]; // return index into symbol table
146 }
147 }
148
149 static Elf32_Word hash_next(Elf32_Word index, Elf32_Word *hash_table,
150 int use_gnu_hash) {
151 if (use_gnu_hash) {
152 assert( index > STN_UNDEF );
153 uint32_t nbuckets = ((uint32_t*)hash_table)[0];
154 uint32_t symndx = ((uint32_t*)hash_table)[1];
155 uint32_t maskwords = ((uint32_t*)hash_table)[2];
156 uint32_t *hashval = (uint32_t *)
157 ((char *)hash_table + 4*sizeof(uint32_t) /* sizeof header */
158 + maskwords*sizeof(long unsigned int) /* sizeof Bloom filter */
159 + nbuckets*sizeof(Elf32_Word) /* sizeof hash buckets */
160 );
161 if (hashval[index - symndx] & 1)
162 return STN_UNDEF; // end-of-chain indicator
163 else
164 return index+1;
165 } else {
166 Elf32_Word nbucket = *hash_table++;
167 hash_table++; // Elf32_Word nchain = *hash_table++;
168 // Elf32_Word *bucket = hash_table;
169 Elf32_Word *chain = hash_table + nbucket;
170 return chain[index]; // If this returns STN_UNDEF, then it's end of chain
171 }
172 }
173
174 typedef struct dt_tag{
175 char *base_addr; /* Base address shared object is loaded at. */
176 // ElfW(Sym) *dynsym; // On disk, dynsym would be dynamic symbols only
177 ElfW(Sym) *symtab; // Same as dynsym, for in-memory symbol table.
178 // ElfW(Word) n_symtab;
179 ElfW(Half) *versym;
180 /* elf.h lies. DT_VERDEF is offset from base_addr, not addr. */
181 ElfW(Verdef) *verdef;
182 ElfW(Word) verdefnum;
183 // ElfW(Word) first_ext_def;
184 char *strtab;
185 Elf32_Word *hash;
186 Elf32_Word *gnu_hash;
187 } dt_tag;
188
189 static char *symbol_name(int i, dt_tag *tags) {
190 return tags->strtab + tags->symtab[i].st_name;
191 }
192
193 static char *version_name(ElfW(Word) version_ndx, dt_tag *tags) {
194 ElfW(Verdef) *cur, *prev;
195
196 // Remove hidden bit, if it's set.
197 if (version_ndx & (1<<15))
198 version_ndx -= (1<<15);
199 // Walk the list of all versions.
200 for (prev = NULL, cur =
201 (ElfW(Verdef)*)(tags->base_addr + (unsigned long int)(tags->verdef));
202 // Could alternatively use verdefnum (DT_VERDEFNUM) here.
203 cur != prev;
204 prev = cur, cur = (ElfW(Verdef)*)(((char *)cur)+cur->vd_next))
205 {
206 assert (cur->vd_version == 1);
207 if (cur->vd_ndx == version_ndx) {
208 ElfW(Verdaux) *first = (ElfW(Verdaux) *)(((char *)cur)+cur->vd_aux);
209 return tags->strtab + first->vda_name;
210 }
211 }
212 return NULL; // failed to find version name
213 }
214
215 // Note that the dynamic section is usually also a segment by itself.
216 // [ 'readelf -l libXXX.so' to verify. ]
217 // So, we don't need the object handle. Its base address is enough,
218 // and we can then read the program header to get the right segment.
219 // Also, the _DYNAMIC symbol in a section should also be a pointer to
220 // the address of the dynamic section. (See comment in /usr/include/link.h)
221 static void get_dt_tags(void *handle, dt_tag *tags) {
222 struct link_map *link_map; // from /usr/include/link.h
223 if (dlinfo(handle, RTLD_DI_LINKMAP, &link_map) == -1)
224 printf("ERROR: %s\n", dlerror());
225 ElfW(Dyn) *dyn = link_map -> l_ld; // from /usr/include/link.h
226 // http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#dynamic_section
227 /* Base address shared object is loaded at. (from /usr/include/lnik.h) */
228 tags->base_addr = (char *)(link_map -> l_addr);
229
230 tags->symtab = NULL;
231 tags->versym = NULL;
232 tags->verdef = NULL;
233 tags->strtab = NULL;
234 tags->hash = NULL;
235 tags->gnu_hash = NULL;
236 tags->verdefnum = 0;
237
238 ElfW(Dyn) *cur_dyn;
239 // The _DYNAMIC symbol should be pointer to address of the dynamic section.
240 // printf("dyn: %p; _DYNAMIC: %p\n", dyn, _DYNAMIC);
241 for (cur_dyn = dyn; cur_dyn->d_tag != DT_NULL; cur_dyn++) {
242 if (cur_dyn->d_tag == DT_VERSYM)
243 tags->versym = (void *)cur_dyn->d_un.d_ptr;
244 if (cur_dyn->d_tag == DT_VERDEF)
245 tags->verdef = (void *)cur_dyn->d_un.d_ptr;
246 if (cur_dyn->d_tag == DT_VERDEFNUM)
247 tags->verdefnum = (ElfW(Word))cur_dyn->d_un.d_val;
248 if (cur_dyn->d_tag == DT_STRTAB && tags->strtab == 0)
249 tags->strtab = (void *)cur_dyn->d_un.d_ptr;
250 // Not DT_DYNSYM, since only dynsym section loaded into RAM; not symtab.??
251 // So, DT_SYMTAB refers to dynsym section ??
252 if (cur_dyn->d_tag == DT_SYMTAB)
253 tags->symtab = (void *)cur_dyn->d_un.d_ptr;
254 if (cur_dyn->d_tag == DT_HASH)
255 tags->hash = (void *)cur_dyn->d_un.d_ptr;
256 #ifdef HAS_GNU_HASH
257 if (cur_dyn->d_tag == DT_GNU_HASH)
258 tags->gnu_hash = (void *)cur_dyn->d_un.d_ptr;
259 #endif
260 //if (cur_dyn->d_tag == DT_MIPS_SYMTABNO) // Number of DYNSYM entries
261 // n_symtab = (ElfW(Word))cur_dyn->d_un.d_val;
262 //if (cur_dyn->d_tag == DT_MIPS_UNREFEXTNO) // First external DYNSYM
263 // first_ext_def = (ElfW(Word))cur_dyn->d_un.d_val; // first dynsym entry??
264 }
265 }
266
267 // Don't use dlsym_default_internal(); use dlsym_default.h:DLSYM_DEFAULT()
268 void *dlsym_default_internal(void *handle, const char*symbol) {
269 dt_tag tags;
270 Elf32_Word default_symbol_index = 0;
271 Elf32_Word i;
272
273 #ifdef __USE_GNU
274 if (handle == RTLD_NEXT || handle == RTLD_DEFAULT) {
275 Dl_info info;
276 void *tmp_fnc = dlsym(handle, symbol); // Hack: get symbol w/ any version
277 // printf("tmp_fnc: %p\n", tmp_fnc);
278 dladdr(tmp_fnc, &info);
279 // ... and find what library the symbol is in
280 printf("info.dli_fname: %s\n", info.dli_fname);
281 #if 0
282 char *tmp = info.dli_fname;
283 char *basename = tmp;
284 for ( ; *tmp != '\0'; tmp++ ) {
285 if (*tmp == '/')
286 basename = tmp+1;
287 }
288 #endif
289 // Found handle of RTLD_NEXT or RTLD_DEFAULT
290 handle = dlopen(info.dli_fname, RTLD_NOLOAD | RTLD_LAZY);
291 // symbol name is: info.dli_sname; Could add assert as double-check.
292 if (handle == NULL)
293 printf("ERROR: RTLD_DEFAULT or RTLD_NEXT called; no library found.\n");
294 // Could try: dlopen(info.dli_fname, RTLD_LOCAL|RTLD_LAZY); to get handle
295 // But if library wasn't loaded before, we shouldn't load it now.
296 }
297 // An alternative to the above code is to use dl_iterate_phdr() to walk the
298 // list of loaded libraries, and for each one, hash on the symbol name
299 // to see if it's contained in that one. But dl_iterate_phdr gives you
300 // the base address of the shared object.
301 // dlopen(NULL); provides a handle for main program. dlinfo can then get
302 // dynamic section (see get_dt_tags()), and also the link_map.
303 // When we find a shared object with our symbol in it, the link_map
304 // will give us the name, and dlopen (w/ NOLOAD?) on it gives us a handle.
305 // A better way might be to start with any library handle at all: dlopen
306 // Then call dlinfo(handle, RTLD_DI_LINKMAP, &link_map);
307 // for: 'struct link_map &link_map;' and follow get_dt_tags() for find
308 // info from dynamic section.
309 #endif
310
311 get_dt_tags(handle, &tags);
312 assert(tags.hash != NULL || tags.gnu_hash != NULL);
313 int use_gnu_hash = (tags.hash == NULL);
314 Elf32_Word *hash = (use_gnu_hash ? tags.gnu_hash : tags.hash);
315 for (i = hash_first(symbol, hash, use_gnu_hash); i != STN_UNDEF;
316 i = hash_next(i, hash, use_gnu_hash)) {
317 if (tags.symtab[i].st_name == 0 || tags.symtab[i].st_value == 0)
318 continue;
319 if (strcmp(symbol_name(i, &tags), symbol) != 0) // If different symbol name
320 continue;
321 // We have a symbol of the same name. Let's look at the version number.
322 if ( !(tags.versym[i] & (1<<15)) ) { // If hidden bit is not set.
323 // If default symbol not set or if new version later than old one.
324 // Notice that default_symbol_index will be set first to the
325 // base definition (1 for unversioned symbols; 2 for versioned symbols)
326 if (default_symbol_index) {
327 printf("WARNING: More than one default symbol version.\n");
328 }
329 if (!default_symbol_index ||
330 // Could look at version dependencies, but using strcmp instead.
331 strcmp(version_name(tags.versym[i], &tags),
332 version_name(tags.versym[default_symbol_index], &tags)) > 0) {
333 default_symbol_index = i;
334 }
335 }
336 }
337 #ifdef VERBOSE
338 if (default_symbol_index) {
339 printf("** st_value: %p\n",
340 tags.base_addr + tags.symtab[default_symbol_index].st_value);
341 printf("** symbol version: %s\n",
342 version_name(tags.versym[default_symbol_index], &tags));
343 }
344 #endif
345 if (!default_symbol_index) {
346 printf("ERROR: No default symbol version found for %s.\n"
347 " Extend code to look for hidden symbols?\n", symbol);
348 }
349 if (default_symbol_index)
350 return tags.base_addr + tags.symtab[default_symbol_index].st_value;
351 else
352 assert(0);
353 return NULL;
354 }