dynamic-linked TLS support for everything but dlopen'd libs
[musl] / src / ldso / dynlink.c
index 960e089..e09f071 100644 (file)
@@ -17,6 +17,9 @@
 #include <pthread.h>
 #include <ctype.h>
 #include <dlfcn.h>
+#include "pthread_impl.h"
+#include "libc.h"
+#undef libc
 
 static int errflag;
 static char errbuf[128];
@@ -64,17 +67,24 @@ struct dso {
        char relocated;
        char constructed;
        struct dso **deps;
+       void *tls_image;
+       size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
        char *shortname;
        char buf[];
 };
 
+struct symdef {
+       Sym *sym;
+       struct dso *dso;
+};
+
 #include "reloc.h"
 
 void __init_ssp(size_t *);
+void *__install_initial_tls(void *);
 
 static struct dso *head, *tail, *libc;
 static char *env_path, *sys_path, *r_path;
-static int rtld_used;
 static int ssp_used;
 static int runtime;
 static int ldd_mode;
@@ -83,6 +93,7 @@ static jmp_buf rtld_fail;
 static pthread_rwlock_t lock;
 static struct debug debug;
 static size_t *auxv;
+static size_t tls_cnt, tls_size;
 
 struct debug *_dl_debug_addr = &debug;
 
@@ -166,22 +177,18 @@ static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
        return 0;
 }
 
-#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
+#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON | 1<<STT_TLS)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
-static void *find_sym(struct dso *dso, const char *s, int need_def)
+static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
 {
        uint32_t h = 0, gh = 0;
-       void *def = 0;
+       struct symdef def = {0};
        if (dso->ghashtab) {
                gh = gnu_hash(s);
-               if (gh == 0xf9040207 && !strcmp(s, "dlopen")) rtld_used = 1;
-               if (gh == 0xf4dc4ae && !strcmp(s, "dlsym")) rtld_used = 1;
                if (gh == 0x1f4039c9 && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
        } else {
                h = sysv_hash(s);
-               if (h == 0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-               if (h == 0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
                if (h == 0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
        }
        for (; dso; dso=dso->next) {
@@ -197,8 +204,9 @@ static void *find_sym(struct dso *dso, const char *s, int need_def)
                if (sym && (!need_def || sym->st_shndx) && sym->st_value
                 && (1<<(sym->st_info&0xf) & OK_TYPES)
                 && (1<<(sym->st_info>>4) & OK_BINDS)) {
-                       if (def && sym->st_info>>4 == STB_WEAK) continue;
-                       def = dso->base + sym->st_value;
+                       if (def.sym && sym->st_info>>4 == STB_WEAK) continue;
+                       def.sym = sym;
+                       def.dso = dso;
                        if (sym->st_info>>4 == STB_GLOBAL) break;
                }
        }
@@ -212,22 +220,20 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
        char *strings = dso->strings;
        Sym *sym;
        const char *name;
-       size_t sym_val, sym_size;
-       size_t *reloc_addr;
        void *ctx;
        int type;
        int sym_index;
+       struct symdef def;
 
        for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-               reloc_addr = (void *)(base + rel[0]);
                type = R_TYPE(rel[1]);
                sym_index = R_SYM(rel[1]);
                if (sym_index) {
                        sym = syms + sym_index;
                        name = strings + sym->st_name;
                        ctx = IS_COPY(type) ? head->next : head;
-                       sym_val = (size_t)find_sym(ctx, name, IS_PLT(type));
-                       if (!sym_val && sym->st_info>>4 != STB_WEAK) {
+                       def = find_sym(ctx, name, IS_PLT(type));
+                       if (!def.sym && sym->st_info>>4 != STB_WEAK) {
                                snprintf(errbuf, sizeof errbuf,
                                        "Error relocating %s: %s: symbol not found",
                                        dso->name, name);
@@ -236,11 +242,14 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
                                ldso_fail = 1;
                                continue;
                        }
-                       sym_size = sym->st_size;
                } else {
-                       sym_val = sym_size = 0;
+                       sym = 0;
+                       def.sym = 0;
+                       def.dso = 0;
                }
-               do_single_reloc(reloc_addr, type, sym_val, sym_size, base, rel[2]);
+               do_single_reloc(dso, base, (void *)(base + rel[0]), type,
+                       stride>2 ? rel[2] : 0, sym, sym?sym->st_size:0, def,
+                       def.sym?(size_t)(def.dso->base+def.sym->st_value):0);
        }
 }
 
@@ -275,7 +284,7 @@ static void reclaim_gaps(unsigned char *base, Phdr *ph, size_t phent, size_t phc
        }
 }
 
-static void *map_library(int fd, size_t *lenp, unsigned char **basep, size_t *dynp)
+static void *map_library(int fd, struct dso *dso)
 {
        Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)];
        size_t phsize;
@@ -287,6 +296,7 @@ static void *map_library(int fd, size_t *lenp, unsigned char **basep, size_t *dy
        unsigned prot;
        unsigned char *map, *base;
        size_t dyn;
+       size_t tls_image=0;
        size_t i;
 
        ssize_t l = read(fd, buf, sizeof buf);
@@ -303,6 +313,12 @@ static void *map_library(int fd, size_t *lenp, unsigned char **basep, size_t *dy
        for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
                if (ph->p_type == PT_DYNAMIC)
                        dyn = ph->p_vaddr;
+               if (ph->p_type == PT_TLS) {
+                       tls_image = ph->p_vaddr;
+                       dso->tls_align = ph->p_align;
+                       dso->tls_len = ph->p_filesz;
+                       dso->tls_size = ph->p_memsz;
+               }
                if (ph->p_type != PT_LOAD) continue;
                if (ph->p_vaddr < addr_min) {
                        addr_min = ph->p_vaddr;
@@ -357,9 +373,11 @@ static void *map_library(int fd, size_t *lenp, unsigned char **basep, size_t *dy
                }
        if (!runtime) reclaim_gaps(base, (void *)((char *)buf + eh->e_phoff),
                eh->e_phentsize, eh->e_phnum);
-       *lenp = map_len;
-       *basep = base;
-       *dynp = dyn;
+       dso->map = map;
+       dso->map_len = map_len;
+       dso->base = base;
+       dso->dynv = (void *)(base+dyn);
+       if (dso->tls_size) dso->tls_image = (void *)(base+tls_image);
        return map;
 error:
        munmap(map, map_len);
@@ -399,7 +417,7 @@ static struct dso *load_library(const char *name)
        const char *pathname;
        unsigned char *base, *map;
        size_t dyno, map_len;
-       struct dso *p;
+       struct dso *p, temp_dso = {0};
        int fd;
        struct stat st;
 
@@ -466,21 +484,21 @@ static struct dso *load_library(const char *name)
                        return p;
                }
        }
-       map = map_library(fd, &map_len, &base, &dyno);
+       map = map_library(fd, &temp_dso);
        close(fd);
        if (!map) return 0;
-       p = calloc(1, sizeof *p + strlen(pathname) + 1);
+       p = malloc(sizeof *p + strlen(pathname) + 1);
        if (!p) {
                munmap(map, map_len);
                return 0;
        }
-
-       p->map = map;
-       p->map_len = map_len;
-       p->base = base;
-       p->dynv = (void *)(base + dyno);
+       memcpy(p, &temp_dso, sizeof temp_dso);
        decode_dyn(p);
-
+       if (p->tls_image) {
+               p->tls_id = ++tls_cnt;
+               tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
+                       & -4*sizeof(size_t);
+       }
        p->dev = st.st_dev;
        p->ino = st.st_ino;
        p->refcnt = 1;
@@ -619,6 +637,35 @@ void _dl_debug_state(void)
 {
 }
 
+void *__copy_tls(unsigned char *mem, size_t cnt)
+{
+       struct dso *p;
+       void **dtv = (void *)mem;
+       dtv[0] = (void *)cnt;
+       mem = (void *)(dtv + cnt + 1);
+       for (p=tail; p; p=p->prev) {
+               if (p->tls_id-1 >= cnt) continue;
+               mem += -p->tls_len & (4*sizeof(size_t)-1);
+               mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
+                       & (p->tls_align-1);
+               dtv[p->tls_id] = mem;
+               memcpy(mem, p->tls_image, p->tls_len);
+               mem += p->tls_size;
+       }
+       ((pthread_t)mem)->dtv = dtv;
+       return mem;
+}
+
+void *__tls_get_addr(size_t *p)
+{
+       pthread_t self = __pthread_self();
+       if ((size_t)self->dtv[0] < p[0]) {
+               // FIXME: obtain new DTV and TLS from the DSO
+               a_crash();
+       }
+       return (char *)self->dtv[p[0]] + p[1];
+}
+
 void *__dynlink(int argc, char **argv)
 {
        size_t aux[AUX_CNT] = {0};
@@ -673,6 +720,7 @@ void *__dynlink(int argc, char **argv)
 
        if (aux[AT_PHDR]) {
                size_t interp_off = 0;
+               size_t tls_image = 0;
                /* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
                phdr = (void *)aux[AT_PHDR];
                for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
@@ -680,7 +728,14 @@ void *__dynlink(int argc, char **argv)
                                app->base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
                        else if (phdr->p_type == PT_INTERP)
                                interp_off = (size_t)phdr->p_vaddr;
+                       else if (phdr->p_type == PT_TLS) {
+                               tls_image = phdr->p_vaddr;
+                               app->tls_len = phdr->p_filesz;
+                               app->tls_size = phdr->p_memsz;
+                               app->tls_align = phdr->p_align;
+                       }
                }
+               if (app->tls_size) app->tls_image = (char *)app->base + tls_image;
                if (interp_off) lib->name = (char *)app->base + interp_off;
                app->name = argv[0];
                app->dynv = (void *)(app->base + find_dyn(
@@ -706,7 +761,7 @@ void *__dynlink(int argc, char **argv)
                        _exit(1);
                }
                runtime = 1;
-               ehdr = (void *)map_library(fd, &app->map_len, &app->base, &dyno);
+               ehdr = (void *)map_library(fd, app);
                if (!ehdr) {
                        dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
                        _exit(1);
@@ -715,9 +770,13 @@ void *__dynlink(int argc, char **argv)
                close(fd);
                lib->name = ldname;
                app->name = argv[0];
-               app->dynv = (void *)(app->base + dyno);
                aux[AT_ENTRY] = ehdr->e_entry;
        }
+       if (app->tls_size) {
+               app->tls_id = ++tls_cnt;
+               tls_size += app->tls_size+app->tls_align + 8*sizeof(size_t)-1
+                       & -4*sizeof(size_t);
+       }
        app->global = 1;
        app->constructed = 1;
        decode_dyn(app);
@@ -764,9 +823,38 @@ void *__dynlink(int argc, char **argv)
        if (env_preload) load_preload(env_preload);
        load_deps(app);
        make_global(app);
+
+       /* Make an initial pass setting up TLS before performing relocs.
+        * This provides the TP-based offset of each DSO's TLS for
+        * use in TP-relative relocations. After relocations, we need
+        * to copy the TLS images again in case they had relocs. */
+       tls_size += sizeof(struct pthread) + 4*sizeof(size_t);
+       __libc.tls_size = tls_size;
+       __libc.tls_cnt = tls_cnt;
+       if (tls_cnt) {
+               struct dso *p;
+               void *mem = mmap(0, __libc.tls_size, PROT_READ|PROT_WRITE,
+                       MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+               if (mem==MAP_FAILED ||
+                   !__install_initial_tls(__copy_tls(mem, tls_cnt))) {
+                       dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
+                               argv[0], tls_size);
+                       _exit(127);
+               }
+               for (p=head; p; p=p->next) {
+                       if (!p->tls_id) continue;
+                       p->tls_offset = (char *)__pthread_self()
+                               - (char *)__pthread_self()->dtv[p->tls_id];
+               }
+       }
+
        reloc_all(app->next);
        reloc_all(app);
 
+       /* The initial DTV is located at the base of the memory
+        * allocated for TLS. Repeat copying TLS to pick up relocs. */
+       if (tls_cnt) __copy_tls((void *)__pthread_self()->dtv, tls_cnt);
+
        if (ldso_fail) _exit(127);
        if (ldd_mode) _exit(0);
 
@@ -792,12 +880,6 @@ void *__dynlink(int argc, char **argv)
 
        do_init_fini(tail);
 
-       if (!rtld_used) {
-               free_all(head);
-               free(sys_path);
-               reclaim((void *)builtin_dsos, 0, sizeof builtin_dsos);
-       }
-
        errno = 0;
        return (void *)aux[AT_ENTRY];
 }
@@ -874,17 +956,14 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
        size_t i;
        uint32_t h = 0, gh = 0;
        Sym *sym;
-       if (p == RTLD_NEXT) {
-               for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
-               if (!p) p=head;
-               void *res = find_sym(p->next, s, 0);
-               if (!res) goto failed;
-               return res;
-       }
-       if (p == head || p == RTLD_DEFAULT) {
-               void *res = find_sym(head, s, 0);
-               if (!res) goto failed;
-               return res;
+       if (p == head || p == RTLD_DEFAULT || p == RTLD_NEXT) {
+               if (p == RTLD_NEXT) {
+                       for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
+                       if (!p) p=head;
+               }
+               struct symdef def = find_sym(p->next, s, 0);
+               if (!def.sym) goto failed;
+               return def.dso->base + def.sym->st_value;
        }
        if (p->ghashtab) {
                gh = gnu_hash(s);