fix regression in mips dynamic linker
[musl] / src / ldso / dynlink.c
index 6f23fa5..55124ff 100644 (file)
@@ -1,18 +1,17 @@
 #define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
 #include <string.h>
 #include <unistd.h>
 #include <stdint.h>
 #include <elf.h>
 #include <sys/mman.h>
 #include <limits.h>
-#include <stdint.h>
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <errno.h>
-#include <limits.h>
-#include <elf.h>
 #include <link.h>
 #include <setjmp.h>
 #include <pthread.h>
@@ -51,6 +50,11 @@ struct debug {
        void *base;
 };
 
+struct td_index {
+       size_t args[2];
+       struct td_index *next;
+};
+
 struct dso {
        unsigned char *base;
        char *name;
@@ -59,6 +63,7 @@ struct dso {
 
        Phdr *phdr;
        int phnum;
+       size_t phentsize;
        int refcnt;
        Sym *syms;
        uint32_t *hashtab;
@@ -77,9 +82,11 @@ struct dso {
        char *rpath_orig, *rpath;
        void *tls_image;
        size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
+       size_t relro_start, relro_end;
        void **new_dtv;
        unsigned char *new_tls;
        int new_dtv_idx, new_tls_idx;
+       struct td_index *td_index;
        struct dso *fini_next;
        char *shortname;
        char buf[];
@@ -90,16 +97,34 @@ struct symdef {
        struct dso *dso;
 };
 
+enum {
+       REL_ERR,
+       REL_SYMBOLIC,
+       REL_GOT,
+       REL_PLT,
+       REL_RELATIVE,
+       REL_OFFSET,
+       REL_OFFSET32,
+       REL_COPY,
+       REL_SYM_OR_REL,
+       REL_TLS, /* everything past here is TLS */
+       REL_DTPMOD,
+       REL_DTPOFF,
+       REL_TPOFF,
+       REL_TPOFF_NEG,
+       REL_TLSDESC,
+};
+
 #include "reloc.h"
 
-void __init_ssp(size_t *);
-void *__install_initial_tls(void *);
+int __init_tp(void *);
 void __init_libc(char **, char *);
 
+const char *__libc_get_version(void);
+
 static struct dso *head, *tail, *ldso, *fini_head;
 static char *env_path, *sys_path;
 static unsigned long long gencnt;
-static int ssp_used;
 static int runtime;
 static int ldd_mode;
 static int ldso_fail;
@@ -108,7 +133,9 @@ static jmp_buf *rtld_fail;
 static pthread_rwlock_t lock;
 static struct debug debug;
 static size_t tls_cnt, tls_offset, tls_align = 4*sizeof(size_t);
+static size_t static_tls_cnt;
 static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
+static long long builtin_tls[(sizeof(struct pthread) + 64)/sizeof(long long)];
 
 struct debug *_dl_debug_addr = &debug;
 
@@ -132,6 +159,17 @@ static int search_vec(size_t *v, size_t *r, size_t key)
        return 1;
 }
 
+static void error(const char *fmt, ...)
+{
+       va_list ap;
+       va_start(ap, fmt);
+       vsnprintf(errbuf, sizeof errbuf, fmt, ap);
+       va_end(ap);
+       if (runtime) longjmp(*rtld_fail, 1);
+       dprintf(2, "%s\n", errbuf);
+       ldso_fail = 1;
+}
+
 static uint32_t sysv_hash(const char *s0)
 {
        const unsigned char *s = (void *)s0;
@@ -195,17 +233,14 @@ static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
 #define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON | 1<<STT_TLS)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
 
+#ifndef ARCH_SYM_REJECT_UND
+#define ARCH_SYM_REJECT_UND(s) 0
+#endif
+
 static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
 {
        uint32_t h = 0, gh = 0;
        struct symdef def = {0};
-       if (dso->ghashtab) {
-               gh = gnu_hash(s);
-               if (gh == 0x1f4039c9 && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
-       } else {
-               h = sysv_hash(s);
-               if (h == 0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
-       }
        for (; dso; dso=dso->next) {
                Sym *sym;
                if (!dso->global) continue;
@@ -218,7 +253,8 @@ static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
                }
                if (!sym) continue;
                if (!sym->st_shndx)
-                       if (need_def || (sym->st_info&0xf) == STT_TLS)
+                       if (need_def || (sym->st_info&0xf) == STT_TLS
+                           || ARCH_SYM_REJECT_UND(sym))
                                continue;
                if (!sym->st_value)
                        if ((sym->st_info&0xf) != STT_TLS)
@@ -234,6 +270,10 @@ static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
        return def;
 }
 
+#define NO_INLINE_ADDEND (1<<REL_COPY | 1<<REL_GOT | 1<<REL_PLT)
+
+ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic();
+
 static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
 {
        unsigned char *base = dso->base;
@@ -242,35 +282,114 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
        Sym *sym;
        const char *name;
        void *ctx;
-       int type;
+       int astype, type;
        int sym_index;
        struct symdef def;
+       size_t *reloc_addr;
+       size_t sym_val;
+       size_t tls_val;
+       size_t addend;
 
        for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-               type = R_TYPE(rel[1]);
+               astype = R_TYPE(rel[1]);
+               if (!astype) continue;
+               type = remap_rel(astype);
+               if (!type) {
+                       error("Error relocating %s: unsupported relocation type %d",
+                               dso->name, astype);
+                       continue;
+               }
                sym_index = R_SYM(rel[1]);
+               reloc_addr = (void *)(base + rel[0]);
                if (sym_index) {
                        sym = syms + sym_index;
                        name = strings + sym->st_name;
-                       ctx = IS_COPY(type) ? head->next : head;
-                       def = find_sym(ctx, name, IS_PLT(type));
-                       if (!def.sym && sym->st_info>>4 != STB_WEAK) {
-                               snprintf(errbuf, sizeof errbuf,
-                                       "Error relocating %s: %s: symbol not found",
+                       ctx = type==REL_COPY ? head->next : head;
+                       def = find_sym(ctx, name, type==REL_PLT);
+                       if (!def.sym && (sym->st_shndx != SHN_UNDEF
+                           || sym->st_info>>4 != STB_WEAK)) {
+                               error("Error relocating %s: %s: symbol not found",
                                        dso->name, name);
-                               if (runtime) longjmp(*rtld_fail, 1);
-                               dprintf(2, "%s\n", errbuf);
-                               ldso_fail = 1;
                                continue;
                        }
                } else {
                        sym = 0;
                        def.sym = 0;
-                       def.dso = 0;
+                       def.dso = dso;
+               }
+
+               addend = stride>2 ? rel[2]
+                       : (1<<type & NO_INLINE_ADDEND) ? 0
+                       : *reloc_addr;
+
+               sym_val = def.sym ? (size_t)def.dso->base+def.sym->st_value : 0;
+               tls_val = def.sym ? def.sym->st_value : 0;
+
+               switch(type) {
+               case REL_OFFSET:
+                       addend -= (size_t)reloc_addr;
+               case REL_SYMBOLIC:
+               case REL_GOT:
+               case REL_PLT:
+                       *reloc_addr = sym_val + addend;
+                       break;
+               case REL_RELATIVE:
+                       *reloc_addr = (size_t)base + addend;
+                       break;
+               case REL_SYM_OR_REL:
+                       if (sym) *reloc_addr = sym_val + addend;
+                       else *reloc_addr = (size_t)base + addend;
+                       break;
+               case REL_COPY:
+                       memcpy(reloc_addr, (void *)sym_val, sym->st_size);
+                       break;
+               case REL_OFFSET32:
+                       *(uint32_t *)reloc_addr = sym_val + addend
+                               - (size_t)reloc_addr;
+                       break;
+               case REL_DTPMOD:
+                       *reloc_addr = def.dso->tls_id;
+                       break;
+               case REL_DTPOFF:
+                       *reloc_addr = tls_val + addend;
+                       break;
+#ifdef TLS_ABOVE_TP
+               case REL_TPOFF:
+                       *reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend;
+                       break;
+#else
+               case REL_TPOFF:
+                       *reloc_addr = tls_val - def.dso->tls_offset + addend;
+                       break;
+               case REL_TPOFF_NEG:
+                       *reloc_addr = def.dso->tls_offset - tls_val + addend;
+                       break;
+#endif
+               case REL_TLSDESC:
+                       if (stride<3) addend = reloc_addr[1];
+                       if (runtime && def.dso->tls_id >= static_tls_cnt) {
+                               struct td_index *new = malloc(sizeof *new);
+                               if (!new) error(
+                                       "Error relocating %s: cannot allocate TLSDESC for %s",
+                                       dso->name, sym ? name : "(local)" );
+                               new->next = dso->td_index;
+                               dso->td_index = new;
+                               new->args[0] = def.dso->tls_id;
+                               new->args[1] = tls_val + addend;
+                               reloc_addr[0] = (size_t)__tlsdesc_dynamic;
+                               reloc_addr[1] = (size_t)new;
+                       } else {
+                               reloc_addr[0] = (size_t)__tlsdesc_static;
+#ifdef TLS_ABOVE_TP
+                               reloc_addr[1] = tls_val + def.dso->tls_offset
+                                       + TPOFF_K + addend;
+#else
+                               reloc_addr[1] = tls_val - def.dso->tls_offset
+                                       + addend;
+#endif
+                       }
+                       break;
                }
-               do_single_reloc(dso, base, (void *)(base + rel[0]), type,
-                       stride>2 ? rel[2] : 0, sym, sym?sym->st_size:0, def,
-                       def.sym?(size_t)(def.dso->base+def.sym->st_value):0);
        }
 }
 
@@ -280,27 +399,32 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
  * and "donate" them to the heap by setting up minimal malloc
  * structures and then freeing them. */
 
-static void reclaim(unsigned char *base, size_t start, size_t end)
+static void reclaim(struct dso *dso, size_t start, size_t end)
 {
        size_t *a, *z;
+       if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
+       if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
        start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
        end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
        if (start>end || end-start < 4*sizeof(size_t)) return;
-       a = (size_t *)(base + start);
-       z = (size_t *)(base + end);
+       a = (size_t *)(dso->base + start);
+       z = (size_t *)(dso->base + end);
        a[-2] = 1;
        a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
        z[1] = 1;
        free(a);
 }
 
-static void reclaim_gaps(unsigned char *base, Phdr *ph, size_t phent, size_t phcnt)
+static void reclaim_gaps(struct dso *dso)
 {
-       for (; phcnt--; ph=(void *)((char *)ph+phent)) {
+       Phdr *ph = dso->phdr;
+       size_t phcnt = dso->phnum;
+
+       for (; phcnt--; ph=(void *)((char *)ph+dso->phentsize)) {
                if (ph->p_type!=PT_LOAD) continue;
                if ((ph->p_flags&(PF_R|PF_W))!=(PF_R|PF_W)) continue;
-               reclaim(base, ph->p_vaddr & -PAGE_SIZE, ph->p_vaddr);
-               reclaim(base, ph->p_vaddr+ph->p_memsz,
+               reclaim(dso, ph->p_vaddr & -PAGE_SIZE, ph->p_vaddr);
+               reclaim(dso, ph->p_vaddr+ph->p_memsz,
                        ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE);
        }
 }
@@ -343,13 +467,16 @@ static void *map_library(int fd, struct dso *dso)
                ph = ph0 = (void *)((char *)buf + eh->e_phoff);
        }
        for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
-               if (ph->p_type == PT_DYNAMIC)
+               if (ph->p_type == PT_DYNAMIC) {
                        dyn = ph->p_vaddr;
-               if (ph->p_type == PT_TLS) {
+               } else if (ph->p_type == PT_TLS) {
                        tls_image = ph->p_vaddr;
                        dso->tls_align = ph->p_align;
                        dso->tls_len = ph->p_filesz;
                        dso->tls_size = ph->p_memsz;
+               } else if (ph->p_type == PT_GNU_RELRO) {
+                       dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
+                       dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
                }
                if (ph->p_type != PT_LOAD) continue;
                if (ph->p_vaddr < addr_min) {
@@ -393,6 +520,7 @@ static void *map_library(int fd, struct dso *dso)
                        dso->phdr = (void *)(base + ph->p_vaddr
                                + (eh->e_phoff-ph->p_offset));
                        dso->phnum = eh->e_phnum;
+                       dso->phentsize = eh->e_phentsize;
                }
                /* Reuse the existing mapping for the lowest-address LOAD */
                if ((ph->p_vaddr & -PAGE_SIZE) == addr_min) continue;
@@ -418,12 +546,12 @@ static void *map_library(int fd, struct dso *dso)
                                goto error;
                        break;
                }
-       if (!runtime) reclaim_gaps(base, ph0, eh->e_phentsize, eh->e_phnum);
        dso->map = map;
        dso->map_len = map_len;
        dso->base = base;
        dso->dynv = (void *)(base+dyn);
        if (dso->tls_size) dso->tls_image = (void *)(base+tls_image);
+       if (!runtime) reclaim_gaps(dso);
        free(allocated_buf);
        return map;
 noexec:
@@ -462,7 +590,9 @@ static int fixup_rpath(struct dso *p, char *buf, size_t buf_size)
        }
        n = 0;
        s = p->rpath_orig;
-       while ((t=strstr(s, "$ORIGIN")) || (t=strstr(s, "${ORIGIN}"))) {
+       while ((t=strchr(s, '$'))) {
+               if (strncmp(t, "$ORIGIN", 7) && strncmp(t, "${ORIGIN}", 9))
+                       return -1;
                s = t+1;
                n++;
        }
@@ -477,8 +607,10 @@ static int fixup_rpath(struct dso *p, char *buf, size_t buf_size)
                 * (either system paths or a call to dlopen). */
                if (libc.secure)
                        return -1;
-               if (readlink("/proc/self/exe", buf, buf_size) >= buf_size)
+               l = readlink("/proc/self/exe", buf, buf_size);
+               if (l >= buf_size)
                        return -1;
+               buf[l] = 0;
                origin = buf;
        } else {
                origin = p->name;
@@ -490,11 +622,13 @@ static int fixup_rpath(struct dso *p, char *buf, size_t buf_size)
 
        d = p->rpath;
        s = p->rpath_orig;
-       while ((t=strstr(s, "$ORIGIN")) || (t=strstr(s, "${ORIGIN}"))) {
+       while ((t=strchr(s, '$'))) {
                memcpy(d, s, t-s);
                d += t-s;
                memcpy(d, origin, l);
                d += l;
+               /* It was determined previously that the '$' is followed
+                * either by "ORIGIN" or "{ORIGIN}". */
                s = t + 7 + 2*(t[1]=='{');
        }
        strcpy(d, s);
@@ -608,6 +742,8 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
                                                sys_path = "";
                                        }
                                        fclose(f);
+                               } else if (errno != ENOENT) {
+                                       sys_path = "";
                                }
                        }
                        if (!sys_path) sys_path = "/lib:/usr/local/lib:/usr/lib";
@@ -665,9 +801,10 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
        /* Add a shortname only if name arg was not an explicit pathname. */
        if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
        if (p->tls_image) {
-               if (runtime && !__pthread_self_init()) {
+               if (runtime && !libc.has_thread_pointer) {
                        munmap(map, p->map_len);
                        free(p);
+                       errno = ENOSYS;
                        return 0;
                }
                p->tls_id = ++tls_cnt;
@@ -705,12 +842,8 @@ static void load_deps(struct dso *p)
                        if (p->dynv[i] != DT_NEEDED) continue;
                        dep = load_library(p->strings + p->dynv[i+1], p);
                        if (!dep) {
-                               snprintf(errbuf, sizeof errbuf,
-                                       "Error loading shared library %s: %m (needed by %s)",
+                               error("Error loading shared library %s: %m (needed by %s)",
                                        p->strings + p->dynv[i+1], p->name);
-                               if (runtime) longjmp(*rtld_fail, 1);
-                               dprintf(2, "%s\n", errbuf);
-                               ldso_fail = 1;
                                continue;
                        }
                        if (runtime) {
@@ -756,22 +889,28 @@ static void reloc_all(struct dso *p)
                        2+(dyn[DT_PLTREL]==DT_RELA));
                do_relocs(p, (void *)(p->base+dyn[DT_REL]), dyn[DT_RELSZ], 2);
                do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
+
+               if (p->relro_start != p->relro_end &&
+                   mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ) < 0) {
+                       error("Error relocating %s: RELRO protection failed: %m",
+                               p->name);
+               }
+
                p->relocated = 1;
        }
 }
 
-static size_t find_dyn(Phdr *ph, size_t cnt, size_t stride)
-{
-       for (; cnt--; ph = (void *)((char *)ph + stride))
-               if (ph->p_type == PT_DYNAMIC)
-                       return ph->p_vaddr;
-       return 0;
-}
-
-static void find_map_range(Phdr *ph, size_t cnt, size_t stride, struct dso *p)
+static void kernel_mapped_dso(struct dso *p)
 {
-       size_t min_addr = -1, max_addr = 0;
-       for (; cnt--; ph = (void *)((char *)ph + stride)) {
+       size_t min_addr = -1, max_addr = 0, cnt;
+       Phdr *ph = p->phdr;
+       for (cnt = p->phnum; cnt--; ph = (void *)((char *)ph + p->phentsize)) {
+               if (ph->p_type == PT_DYNAMIC) {
+                       p->dynv = (void *)(p->base + ph->p_vaddr);
+               } else if (ph->p_type == PT_GNU_RELRO) {
+                       p->relro_start = ph->p_vaddr & -PAGE_SIZE;
+                       p->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
+               }
                if (ph->p_type != PT_LOAD) continue;
                if (ph->p_vaddr < min_addr)
                        min_addr = ph->p_vaddr;
@@ -782,6 +921,7 @@ static void find_map_range(Phdr *ph, size_t cnt, size_t stride, struct dso *p)
        max_addr = (max_addr + PAGE_SIZE-1) & -PAGE_SIZE;
        p->map = p->base + min_addr;
        p->map_len = max_addr - min_addr;
+       p->kernel_mapped = 1;
 }
 
 static void do_fini()
@@ -858,10 +998,13 @@ void *__copy_tls(unsigned char *mem)
        pthread_t td;
        struct dso *p;
 
-       if (!tls_cnt) return mem;
-
        void **dtv = (void *)mem;
        dtv[0] = (void *)tls_cnt;
+       if (!tls_cnt) {
+               td = (void *)(dtv+1);
+               td->dtv = dtv;
+               return td;
+       }
 
 #ifdef TLS_ABOVE_TP
        mem += sizeof(void *) * (tls_cnt+1);
@@ -889,17 +1032,15 @@ void *__copy_tls(unsigned char *mem)
        return td;
 }
 
-void *__tls_get_addr(size_t *v)
+void *__tls_get_new(size_t *v)
 {
        pthread_t self = __pthread_self();
-       if (v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]])
-               return (char *)self->dtv[v[0]]+v[1];
 
        /* Block signals to make accessing new TLS async-signal-safe */
        sigset_t set;
-       pthread_sigmask(SIG_BLOCK, SIGALL_SET, &set);
-       if (v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]]) {
-               pthread_sigmask(SIG_SETMASK, &set, 0);
+       __block_all_sigs(&set);
+       if (v[0]<=(size_t)self->dtv[0]) {
+               __restore_sigs(&set);
                return (char *)self->dtv[v[0]]+v[1];
        }
 
@@ -920,13 +1061,19 @@ void *__tls_get_addr(size_t *v)
                self->dtv = newdtv;
        }
 
-       /* Get new TLS memory from new DSO */
-       unsigned char *mem = p->new_tls +
-               (p->tls_size + p->tls_align) * a_fetch_add(&p->new_tls_idx,1);
-       mem += ((uintptr_t)p->tls_image - (uintptr_t)mem) & (p->tls_align-1);
-       self->dtv[v[0]] = mem;
-       memcpy(mem, p->tls_image, p->tls_len);
-       pthread_sigmask(SIG_SETMASK, &set, 0);
+       /* Get new TLS memory from all new DSOs up to the requested one */
+       unsigned char *mem;
+       for (p=head; ; p=p->next) {
+               if (!p->tls_id || self->dtv[p->tls_id]) continue;
+               mem = p->new_tls + (p->tls_size + p->tls_align)
+                       * a_fetch_add(&p->new_tls_idx,1);
+               mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
+                       & (p->tls_align-1);
+               self->dtv[p->tls_id] = mem;
+               memcpy(mem, p->tls_image, p->tls_len);
+               if (p->tls_id == v[0]) break;
+       }
+       __restore_sigs(&set);
        return mem + v[1];
 }
 
@@ -954,6 +1101,7 @@ void *__dynlink(int argc, char **argv)
        size_t vdso_base;
        size_t *auxv;
        char **envp = argv+argc+1;
+       void *initial_tls;
 
        /* Find aux vector just past environ[] */
        for (i=argc+1; argv[i]; i++)
@@ -972,6 +1120,7 @@ void *__dynlink(int argc, char **argv)
                env_preload = 0;
                libc.secure = 1;
        }
+       libc.page_size = aux[AT_PAGESZ];
 
        /* If the dynamic linker was invoked as a program itself, AT_BASE
         * will not be set. In that case, we assume the base address is
@@ -987,13 +1136,11 @@ void *__dynlink(int argc, char **argv)
        lib->base = (void *)aux[AT_BASE];
        lib->name = lib->shortname = "libc.so";
        lib->global = 1;
-       lib->kernel_mapped = 1;
        ehdr = (void *)lib->base;
        lib->phnum = ehdr->e_phnum;
        lib->phdr = (void *)(aux[AT_BASE]+ehdr->e_phoff);
-       find_map_range(lib->phdr, ehdr->e_phnum, ehdr->e_phentsize, lib);
-       lib->dynv = (void *)(lib->base + find_dyn(lib->phdr,
-               ehdr->e_phnum, ehdr->e_phentsize));
+       lib->phentsize = ehdr->e_phentsize;
+       kernel_mapped_dso(lib);
        decode_dyn(lib);
 
        if (aux[AT_PHDR]) {
@@ -1002,6 +1149,7 @@ void *__dynlink(int argc, char **argv)
                /* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
                app->phdr = phdr = (void *)aux[AT_PHDR];
                app->phnum = aux[AT_PHNUM];
+               app->phentsize = aux[AT_PHENT];
                for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
                        if (phdr->p_type == PT_PHDR)
                                app->base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
@@ -1016,22 +1164,44 @@ void *__dynlink(int argc, char **argv)
                }
                if (app->tls_size) app->tls_image = (char *)app->base + tls_image;
                if (interp_off) lib->name = (char *)app->base + interp_off;
-               app->name = argv[0];
-               app->kernel_mapped = 1;
-               app->dynv = (void *)(app->base + find_dyn(
-                       (void *)aux[AT_PHDR], aux[AT_PHNUM], aux[AT_PHENT]));
-               find_map_range((void *)aux[AT_PHDR],
-                       aux[AT_PHNUM], aux[AT_PHENT], app);
+               if ((aux[0] & (1UL<<AT_EXECFN))
+                   && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
+                       app->name = (char *)aux[AT_EXECFN];
+               else
+                       app->name = argv[0];
+               kernel_mapped_dso(app);
        } else {
                int fd;
                char *ldname = argv[0];
                size_t l = strlen(ldname);
                if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
                *argv++ = (void *)-1;
-               if (argv[0] && !strcmp(argv[0], "--")) *argv++ = (void *)-1;
+               while (argv[0] && argv[0][0]=='-' && argv[0][1]=='-') {
+                       char *opt = argv[0]+2;
+                       *argv++ = (void *)-1;
+                       if (!*opt) {
+                               break;
+                       } else if (!memcmp(opt, "list", 5)) {
+                               ldd_mode = 1;
+                       } else if (!memcmp(opt, "library-path", 12)) {
+                               if (opt[12]=='=') env_path = opt+13;
+                               else if (opt[12]) *argv = 0;
+                               else if (*argv) env_path = *argv++;
+                       } else if (!memcmp(opt, "preload", 7)) {
+                               if (opt[7]=='=') env_preload = opt+8;
+                               else if (opt[7]) *argv = 0;
+                               else if (*argv) env_preload = *argv++;
+                       } else {
+                               argv[0] = 0;
+                       }
+                       argv[-1] = (void *)-1;
+               }
                if (!argv[0]) {
-                       dprintf(2, "musl libc/dynamic program loader\n");
-                       dprintf(2, "usage: %s pathname%s\n", ldname,
+                       dprintf(2, "musl libc\n"
+                               "Version %s\n"
+                               "Dynamic Program Loader\n"
+                               "Usage: %s [options] [--] pathname%s\n",
+                               __libc_get_version(), ldname,
                                ldd_mode ? "" : " [args]");
                        _exit(1);
                }
@@ -1084,6 +1254,7 @@ void *__dynlink(int argc, char **argv)
                ehdr = (void *)vdso_base;
                vdso->phdr = phdr = (void *)(vdso_base + ehdr->e_phoff);
                vdso->phnum = ehdr->e_phnum;
+               vdso->phentsize = ehdr->e_phentsize;
                for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
                        if (phdr->p_type == PT_DYNAMIC)
                                vdso->dynv = (void *)(vdso_base + phdr->p_offset);
@@ -1111,10 +1282,8 @@ void *__dynlink(int argc, char **argv)
        /* PAST THIS POINT, ALL LIBC INTERFACES ARE FULLY USABLE. */
 
        /* Donate unused parts of app and library mapping to malloc */
-       reclaim_gaps(app->base, (void *)aux[AT_PHDR], aux[AT_PHENT], aux[AT_PHNUM]);
-       ehdr = (void *)lib->base;
-       reclaim_gaps(lib->base, (void *)(lib->base+ehdr->e_phoff),
-               ehdr->e_phentsize, ehdr->e_phnum);
+       reclaim_gaps(app);
+       reclaim_gaps(lib);
 
        /* Load preload/needed libraries, add their symbols to the global
         * namespace, and perform all remaining relocations. The main
@@ -1124,20 +1293,31 @@ void *__dynlink(int argc, char **argv)
        load_deps(app);
        make_global(app);
 
+#ifndef DYNAMIC_IS_RO
+       for (i=0; app->dynv[i]; i+=2)
+               if (app->dynv[i]==DT_DEBUG)
+                       app->dynv[i+1] = (size_t)&debug;
+#endif
+
        reloc_all(app->next);
        reloc_all(app);
 
        update_tls_size();
-       if (tls_cnt) {
-               void *mem = mmap(0, libc.tls_size, PROT_READ|PROT_WRITE,
-                       MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-               if (mem==MAP_FAILED ||
-                   !__install_initial_tls(__copy_tls(mem))) {
+       if (libc.tls_size > sizeof builtin_tls) {
+               initial_tls = calloc(libc.tls_size, 1);
+               if (!initial_tls) {
                        dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
                                argv[0], libc.tls_size);
                        _exit(127);
                }
+       } else {
+               initial_tls = builtin_tls;
        }
+       if (__init_tp(__copy_tls(initial_tls)) < 0 && tls_cnt) {
+               dprintf(2, "%s: Thread-local storage not supported by kernel.\n", argv[0]);
+               _exit(127);
+       }
+       static_tls_cnt = tls_cnt;
 
        if (ldso_fail) _exit(127);
        if (ldd_mode) _exit(0);
@@ -1148,11 +1328,6 @@ void *__dynlink(int argc, char **argv)
         * all memory used by the dynamic linker. */
        runtime = 1;
 
-#ifndef DYNAMIC_IS_RO
-       for (i=0; app->dynv[i]; i+=2)
-               if (app->dynv[i]==DT_DEBUG)
-                       app->dynv[i+1] = (size_t)&debug;
-#endif
        debug.ver = 1;
        debug.bp = _dl_debug_state;
        debug.head = head;
@@ -1160,7 +1335,6 @@ void *__dynlink(int argc, char **argv)
        debug.state = 0;
        _dl_debug_state();
 
-       if (ssp_used) __init_ssp((void *)aux[AT_RANDOM]);
        __init_libc(envp, argv[0]);
        atexit(do_fini);
        errno = 0;
@@ -1199,6 +1373,11 @@ void *dlopen(const char *file, int mode)
                for (p=orig_tail->next; p; p=next) {
                        next = p->next;
                        munmap(p->map, p->map_len);
+                       while (p->td_index) {
+                               void *tmp = p->td_index->next;
+                               free(p->td_index);
+                               p->td_index = tmp;
+                       }
                        free(p->deps);
                        free(p);
                }
@@ -1210,7 +1389,7 @@ void *dlopen(const char *file, int mode)
                p = 0;
                errflag = 1;
                goto end;
-       } else p = load_library(file, 0);
+       } else p = load_library(file, head);
 
        if (!p) {
                snprintf(errbuf, sizeof errbuf, noload ?
@@ -1242,9 +1421,6 @@ void *dlopen(const char *file, int mode)
        }
 
        update_tls_size();
-
-       if (ssp_used) __init_ssp(libc.auxv);
-
        _dl_debug_state();
        orig_tail = tail;
 end:
@@ -1265,6 +1441,8 @@ static int invalid_dso_handle(void *h)
        return 1;
 }
 
+void *__tls_get_addr(size_t *);
+
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
        size_t i;
@@ -1316,7 +1494,7 @@ failed:
        return 0;
 }
 
-int __dladdr(void *addr, Dl_info *info)
+int __dladdr(const void *addr, Dl_info *info)
 {
        struct dso *p;
        Sym *sym;
@@ -1341,7 +1519,7 @@ int __dladdr(void *addr, Dl_info *info)
                uint32_t *hashval;
                buckets = p->ghashtab + 4 + (p->ghashtab[2]*sizeof(size_t)/4);
                sym += p->ghashtab[1];
-               for (i = 0; i < p->ghashtab[0]; i++) {
+               for (i = nsym = 0; i < p->ghashtab[0]; i++) {
                        if (buckets[i] > nsym)
                                nsym = buckets[i];
                }
@@ -1426,7 +1604,7 @@ void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
 {
        return 0;
 }
-int __dladdr (void *addr, Dl_info *info)
+int __dladdr (const void *addr, Dl_info *info)
 {
        return 0;
 }