implement PT_GNU_RELRO support
[musl] / src / ldso / dynlink.c
index 774ab84..de0f2ac 100644 (file)
@@ -7,19 +7,16 @@
 #include <elf.h>
 #include <sys/mman.h>
 #include <limits.h>
-#include <stdint.h>
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <errno.h>
-#include <limits.h>
-#include <elf.h>
+#include <link.h>
 #include <setjmp.h>
 #include <pthread.h>
 #include <ctype.h>
 #include <dlfcn.h>
 #include "pthread_impl.h"
 #include "libc.h"
-#undef libc
 
 static int errflag;
 static char errbuf[128];
@@ -40,6 +37,9 @@ typedef Elf64_Sym Sym;
 #define R_SYM(x) ((x)>>32)
 #endif
 
+#define MAXP2(a,b) (-(-(a)&-(b)))
+#define ALIGN(x,y) ((x)+(y)-1 & -(y))
+
 struct debug {
        int ver;
        void *head;
@@ -54,10 +54,13 @@ struct dso {
        size_t *dynv;
        struct dso *next, *prev;
 
+       Phdr *phdr;
+       int phnum;
        int refcnt;
        Sym *syms;
        uint32_t *hashtab;
        uint32_t *ghashtab;
+       int16_t *versym;
        char *strings;
        unsigned char *map;
        size_t map_len;
@@ -66,34 +69,51 @@ struct dso {
        signed char global;
        char relocated;
        char constructed;
-       struct dso **deps;
+       char kernel_mapped;
+       struct dso **deps, *needed_by;
+       char *rpath_orig, *rpath;
        void *tls_image;
-       size_t tls_len, tls_size, tls_align, tls_id;
+       size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
+       size_t relro_start, relro_end;
+       void **new_dtv;
+       unsigned char *new_tls;
+       int new_dtv_idx, new_tls_idx;
+       struct dso *fini_next;
        char *shortname;
        char buf[];
 };
 
+struct symdef {
+       Sym *sym;
+       struct dso *dso;
+};
+
 #include "reloc.h"
 
 void __init_ssp(size_t *);
-void *__install_initial_tls(void *);
+int __init_tp(void *);
+void __init_libc(char **, char *);
+
+const char *__libc_get_version(void);
 
-static struct dso *head, *tail, *libc;
-static char *env_path, *sys_path, *r_path;
-static int rtld_used;
+static struct dso *head, *tail, *ldso, *fini_head;
+static char *env_path, *sys_path;
+static unsigned long long gencnt;
 static int ssp_used;
 static int runtime;
 static int ldd_mode;
 static int ldso_fail;
-static jmp_buf rtld_fail;
+static int noload;
+static jmp_buf *rtld_fail;
 static pthread_rwlock_t lock;
 static struct debug debug;
-static size_t *auxv;
-static size_t tls_cnt, tls_size;
+static size_t tls_cnt, tls_offset, tls_align = 4*sizeof(size_t);
+static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
+static long long builtin_tls[(sizeof(struct pthread) + 64)/sizeof(long long)];
 
 struct debug *_dl_debug_addr = &debug;
 
-#define AUX_CNT 24
+#define AUX_CNT 38
 #define DYN_CNT 34
 
 static void decode_vec(size_t *v, size_t *a, size_t cnt)
@@ -140,7 +160,8 @@ static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
        uint32_t *hashtab = dso->hashtab;
        char *strings = dso->strings;
        for (i=hashtab[2+h%hashtab[0]]; i; i=hashtab[2+hashtab[0]+i]) {
-               if (!strcmp(s, strings+syms[i].st_name))
+               if ((!dso->versym || dso->versym[i] >= 0)
+                   && (!strcmp(s, strings+syms[i].st_name)))
                        return syms+i;
        }
        return 0;
@@ -148,47 +169,42 @@ static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 
 static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
 {
-       Sym *sym;
-       char *strings;
+       Sym *syms = dso->syms;
+       char *strings = dso->strings;
        uint32_t *hashtab = dso->ghashtab;
        uint32_t nbuckets = hashtab[0];
        uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
        uint32_t h2;
        uint32_t *hashval;
-       uint32_t n = buckets[h1 % nbuckets];
+       uint32_t i = buckets[h1 % nbuckets];
 
-       if (!n) return 0;
+       if (!i) return 0;
 
-       strings = dso->strings;
-       sym = dso->syms + n;
-       hashval = buckets + nbuckets + (n - hashtab[1]);
+       hashval = buckets + nbuckets + (i - hashtab[1]);
 
-       for (h1 |= 1; ; sym++) {
+       for (h1 |= 1; ; i++) {
                h2 = *hashval++;
-               if ((h1 == (h2|1)) && !strcmp(s, strings + sym->st_name))
-                       return sym;
+               if ((!dso->versym || dso->versym[i] >= 0)
+                   && (h1 == (h2|1)) && !strcmp(s, strings + syms[i].st_name))
+                       return syms+i;
                if (h2 & 1) break;
        }
 
        return 0;
 }
 
-#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
-#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
+#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON | 1<<STT_TLS)
+#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
 
-static void *find_sym(struct dso *dso, const char *s, int need_def)
+static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
 {
        uint32_t h = 0, gh = 0;
-       void *def = 0;
+       struct symdef def = {0};
        if (dso->ghashtab) {
                gh = gnu_hash(s);
-               if (gh == 0xf9040207 && !strcmp(s, "dlopen")) rtld_used = 1;
-               if (gh == 0xf4dc4ae && !strcmp(s, "dlsym")) rtld_used = 1;
                if (gh == 0x1f4039c9 && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
        } else {
                h = sysv_hash(s);
-               if (h == 0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-               if (h == 0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
                if (h == 0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
        }
        for (; dso; dso=dso->next) {
@@ -201,13 +217,20 @@ static void *find_sym(struct dso *dso, const char *s, int need_def)
                        if (!h) h = sysv_hash(s);
                        sym = sysv_lookup(s, h, dso);
                }
-               if (sym && (!need_def || sym->st_shndx) && sym->st_value
-                && (1<<(sym->st_info&0xf) & OK_TYPES)
-                && (1<<(sym->st_info>>4) & OK_BINDS)) {
-                       if (def && sym->st_info>>4 == STB_WEAK) continue;
-                       def = dso->base + sym->st_value;
-                       if (sym->st_info>>4 == STB_GLOBAL) break;
-               }
+               if (!sym) continue;
+               if (!sym->st_shndx)
+                       if (need_def || (sym->st_info&0xf) == STT_TLS)
+                               continue;
+               if (!sym->st_value)
+                       if ((sym->st_info&0xf) != STT_TLS)
+                               continue;
+               if (!(1<<(sym->st_info&0xf) & OK_TYPES)) continue;
+               if (!(1<<(sym->st_info>>4) & OK_BINDS)) continue;
+
+               if (def.sym && sym->st_info>>4 == STB_WEAK) continue;
+               def.sym = sym;
+               def.dso = dso;
+               if (sym->st_info>>4 == STB_GLOBAL) break;
        }
        return def;
 }
@@ -219,35 +242,37 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
        char *strings = dso->strings;
        Sym *sym;
        const char *name;
-       size_t sym_val, sym_size;
-       size_t *reloc_addr;
        void *ctx;
        int type;
        int sym_index;
+       struct symdef def;
 
        for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-               reloc_addr = (void *)(base + rel[0]);
                type = R_TYPE(rel[1]);
                sym_index = R_SYM(rel[1]);
                if (sym_index) {
                        sym = syms + sym_index;
                        name = strings + sym->st_name;
                        ctx = IS_COPY(type) ? head->next : head;
-                       sym_val = (size_t)find_sym(ctx, name, IS_PLT(type));
-                       if (!sym_val && sym->st_info>>4 != STB_WEAK) {
+                       def = find_sym(ctx, name, IS_PLT(type));
+                       if (!def.sym && (sym->st_shndx != SHN_UNDEF
+                           || sym->st_info>>4 != STB_WEAK)) {
                                snprintf(errbuf, sizeof errbuf,
                                        "Error relocating %s: %s: symbol not found",
                                        dso->name, name);
-                               if (runtime) longjmp(rtld_fail, 1);
+                               if (runtime) longjmp(*rtld_fail, 1);
                                dprintf(2, "%s\n", errbuf);
                                ldso_fail = 1;
                                continue;
                        }
-                       sym_size = sym->st_size;
                } else {
-                       sym_val = sym_size = 0;
+                       sym = 0;
+                       def.sym = 0;
+                       def.dso = 0;
                }
-               do_single_reloc(reloc_addr, type, sym_val, sym_size, base, rel[2]);
+               do_single_reloc(dso, base, (void *)(base + rel[0]), type,
+                       stride>2 ? rel[2] : 0, sym, sym?sym->st_size:0, def,
+                       def.sym?(size_t)(def.dso->base+def.sym->st_value):0);
        }
 }
 
@@ -257,27 +282,29 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
  * and "donate" them to the heap by setting up minimal malloc
  * structures and then freeing them. */
 
-static void reclaim(unsigned char *base, size_t start, size_t end)
+static void reclaim(struct dso *dso, size_t start, size_t end)
 {
        size_t *a, *z;
+       if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
+       if (end   >= dso->relro_start && end   < dso->relro_end) end = dso->relro_start;
        start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t);
        end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t);
        if (start>end || end-start < 4*sizeof(size_t)) return;
-       a = (size_t *)(base + start);
-       z = (size_t *)(base + end);
+       a = (size_t *)(dso->base + start);
+       z = (size_t *)(dso->base + end);
        a[-2] = 1;
        a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1;
        z[1] = 1;
        free(a);
 }
 
-static void reclaim_gaps(unsigned char *base, Phdr *ph, size_t phent, size_t phcnt)
+static void reclaim_gaps(struct dso *dso, Phdr *ph, size_t phent, size_t phcnt)
 {
        for (; phcnt--; ph=(void *)((char *)ph+phent)) {
                if (ph->p_type!=PT_LOAD) continue;
                if ((ph->p_flags&(PF_R|PF_W))!=(PF_R|PF_W)) continue;
-               reclaim(base, ph->p_vaddr & -PAGE_SIZE, ph->p_vaddr);
-               reclaim(base, ph->p_vaddr+ph->p_memsz,
+               reclaim(dso, ph->p_vaddr & -PAGE_SIZE, ph->p_vaddr);
+               reclaim(dso, ph->p_vaddr+ph->p_memsz,
                        ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE);
        }
 }
@@ -285,37 +312,51 @@ static void reclaim_gaps(unsigned char *base, Phdr *ph, size_t phent, size_t phc
 static void *map_library(int fd, struct dso *dso)
 {
        Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)];
+       void *allocated_buf=0;
        size_t phsize;
        size_t addr_min=SIZE_MAX, addr_max=0, map_len;
        size_t this_min, this_max;
        off_t off_start;
        Ehdr *eh;
-       Phdr *ph;
+       Phdr *ph, *ph0;
        unsigned prot;
-       unsigned char *map, *base;
-       size_t dyn;
+       unsigned char *map=MAP_FAILED, *base;
+       size_t dyn=0;
        size_t tls_image=0;
        size_t i;
 
        ssize_t l = read(fd, buf, sizeof buf);
-       if (l<sizeof *eh) return 0;
        eh = buf;
+       if (l<0) return 0;
+       if (l<sizeof *eh || (eh->e_type != ET_DYN && eh->e_type != ET_EXEC))
+               goto noexec;
        phsize = eh->e_phentsize * eh->e_phnum;
-       if (phsize + sizeof *eh > l) return 0;
-       if (eh->e_phoff + phsize > l) {
+       if (phsize > sizeof buf - sizeof *eh) {
+               allocated_buf = malloc(phsize);
+               if (!allocated_buf) return 0;
+               l = pread(fd, allocated_buf, phsize, eh->e_phoff);
+               if (l < 0) goto error;
+               if (l != phsize) goto noexec;
+               ph = ph0 = allocated_buf;
+       } else if (eh->e_phoff + phsize > l) {
                l = pread(fd, buf+1, phsize, eh->e_phoff);
-               if (l != phsize) return 0;
-               eh->e_phoff = sizeof *eh;
+               if (l < 0) goto error;
+               if (l != phsize) goto noexec;
+               ph = ph0 = (void *)(buf + 1);
+       } else {
+               ph = ph0 = (void *)((char *)buf + eh->e_phoff);
        }
-       ph = (void *)((char *)buf + eh->e_phoff);
        for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
                if (ph->p_type == PT_DYNAMIC)
                        dyn = ph->p_vaddr;
-               if (ph->p_type == PT_TLS) {
+               else if (ph->p_type == PT_TLS) {
                        tls_image = ph->p_vaddr;
                        dso->tls_align = ph->p_align;
                        dso->tls_len = ph->p_filesz;
                        dso->tls_size = ph->p_memsz;
+               } else if (ph->p_type == PT_GNU_RELRO) {
+                       dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
+                       dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
                }
                if (ph->p_type != PT_LOAD) continue;
                if (ph->p_vaddr < addr_min) {
@@ -329,7 +370,7 @@ static void *map_library(int fd, struct dso *dso)
                        addr_max = ph->p_vaddr+ph->p_memsz;
                }
        }
-       if (!dyn) return 0;
+       if (!dyn) goto noexec;
        addr_max += PAGE_SIZE-1;
        addr_max &= -PAGE_SIZE;
        addr_min &= -PAGE_SIZE;
@@ -340,11 +381,26 @@ static void *map_library(int fd, struct dso *dso)
         * use the invalid part; we just need to reserve the right
         * amount of virtual address space to map over later. */
        map = mmap((void *)addr_min, map_len, prot, MAP_PRIVATE, fd, off_start);
-       if (map==MAP_FAILED) return 0;
+       if (map==MAP_FAILED) goto error;
+       /* If the loaded file is not relocatable and the requested address is
+        * not available, then the load operation must fail. */
+       if (eh->e_type != ET_DYN && addr_min && map!=(void *)addr_min) {
+               errno = EBUSY;
+               goto error;
+       }
        base = map - addr_min;
-       ph = (void *)((char *)buf + eh->e_phoff);
-       for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
+       dso->phdr = 0;
+       dso->phnum = 0;
+       for (ph=ph0, i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
                if (ph->p_type != PT_LOAD) continue;
+               /* Check if the programs headers are in this load segment, and
+                * if so, record the address for use by dl_iterate_phdr. */
+               if (!dso->phdr && eh->e_phoff >= ph->p_offset
+                   && eh->e_phoff+phsize <= ph->p_offset+ph->p_filesz) {
+                       dso->phdr = (void *)(base + ph->p_vaddr
+                               + (eh->e_phoff-ph->p_offset));
+                       dso->phnum = eh->e_phnum;
+               }
                /* Reuse the existing mapping for the lowest-address LOAD */
                if ((ph->p_vaddr & -PAGE_SIZE) == addr_min) continue;
                this_min = ph->p_vaddr & -PAGE_SIZE;
@@ -369,34 +425,95 @@ static void *map_library(int fd, struct dso *dso)
                                goto error;
                        break;
                }
-       if (!runtime) reclaim_gaps(base, (void *)((char *)buf + eh->e_phoff),
-               eh->e_phentsize, eh->e_phnum);
        dso->map = map;
        dso->map_len = map_len;
        dso->base = base;
        dso->dynv = (void *)(base+dyn);
        if (dso->tls_size) dso->tls_image = (void *)(base+tls_image);
+       if (!runtime) reclaim_gaps(dso, ph0, eh->e_phentsize, eh->e_phnum);
+       free(allocated_buf);
        return map;
+noexec:
+       errno = ENOEXEC;
 error:
-       munmap(map, map_len);
+       if (map!=MAP_FAILED) munmap(map, map_len);
+       free(allocated_buf);
        return 0;
 }
 
-static int path_open(const char *name, const char *search, char *buf, size_t buf_size)
+static int path_open(const char *name, const char *s, char *buf, size_t buf_size)
 {
-       const char *s=search, *z;
-       int l, fd;
+       size_t l;
+       int fd;
        for (;;) {
-               while (*s==':') s++;
-               if (!*s) return -1;
-               z = strchr(s, ':');
-               l = z ? z-s : strlen(s);
-               snprintf(buf, buf_size, "%.*s/%s", l, s, name);
+               s += strspn(s, ":\n");
+               l = strcspn(s, ":\n");
+               if (l-1 >= INT_MAX) return -1;
+               if (snprintf(buf, buf_size, "%.*s/%s", (int)l, s, name) >= buf_size)
+                       continue;
                if ((fd = open(buf, O_RDONLY|O_CLOEXEC))>=0) return fd;
                s += l;
        }
 }
 
+static int fixup_rpath(struct dso *p, char *buf, size_t buf_size)
+{
+       size_t n, l;
+       const char *s, *t, *origin;
+       char *d;
+       if (p->rpath) return 0;
+       if (!p->rpath_orig) return -1;
+       if (!strchr(p->rpath_orig, '$')) {
+               p->rpath = p->rpath_orig;
+               return 0;
+       }
+       n = 0;
+       s = p->rpath_orig;
+       while ((t=strchr(s, '$'))) {
+               if (strncmp(t, "$ORIGIN", 7) && strncmp(t, "${ORIGIN}", 9))
+                       return -1;
+               s = t+1;
+               n++;
+       }
+       if (n > SSIZE_MAX/PATH_MAX) return -1;
+
+       if (p->kernel_mapped) {
+               /* $ORIGIN searches cannot be performed for the main program
+                * when it is suid/sgid/AT_SECURE. This is because the
+                * pathname is under the control of the caller of execve.
+                * For libraries, however, $ORIGIN can be processed safely
+                * since the library's pathname came from a trusted source
+                * (either system paths or a call to dlopen). */
+               if (libc.secure)
+                       return -1;
+               l = readlink("/proc/self/exe", buf, buf_size);
+               if (l >= buf_size)
+                       return -1;
+               buf[l] = 0;
+               origin = buf;
+       } else {
+               origin = p->name;
+       }
+       t = strrchr(origin, '/');
+       l = t ? t-origin : 0;
+       p->rpath = malloc(strlen(p->rpath_orig) + n*l + 1);
+       if (!p->rpath) return -1;
+
+       d = p->rpath;
+       s = p->rpath_orig;
+       while ((t=strchr(s, '$'))) {
+               memcpy(d, s, t-s);
+               d += t-s;
+               memcpy(d, origin, l);
+               d += l;
+               /* It was determined previously that the '$' is followed
+                * either by "ORIGIN" or "{ORIGIN}". */
+               s = t + 7 + 2*(t[1]=='{');
+       }
+       strcpy(d, s);
+       return 0;
+}
+
 static void decode_dyn(struct dso *p)
 {
        size_t dyn[DYN_CNT] = {0};
@@ -405,19 +522,25 @@ static void decode_dyn(struct dso *p)
        p->strings = (void *)(p->base + dyn[DT_STRTAB]);
        if (dyn[0]&(1<<DT_HASH))
                p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+       if (dyn[0]&(1<<DT_RPATH))
+               p->rpath_orig = (void *)(p->strings + dyn[DT_RPATH]);
        if (search_vec(p->dynv, dyn, DT_GNU_HASH))
                p->ghashtab = (void *)(p->base + *dyn);
+       if (search_vec(p->dynv, dyn, DT_VERSYM))
+               p->versym = (void *)(p->base + *dyn);
 }
 
-static struct dso *load_library(const char *name)
+static struct dso *load_library(const char *name, struct dso *needed_by)
 {
        char buf[2*NAME_MAX+2];
        const char *pathname;
-       unsigned char *base, *map;
-       size_t dyno, map_len;
+       unsigned char *map;
        struct dso *p, temp_dso = {0};
        int fd;
        struct stat st;
+       size_t alloc_size;
+       int n_th = 0;
+       int is_self = 0;
 
        /* Catch and block attempts to reload the implementation itself */
        if (name[0]=='l' && name[1]=='i' && name[2]=='b') {
@@ -426,17 +549,33 @@ static struct dso *load_library(const char *name)
                char *z = strchr(name, '.');
                if (z) {
                        size_t l = z-name;
-                       for (rp=reserved; *rp && memcmp(name+3, rp, l-3); rp+=strlen(rp)+1);
+                       for (rp=reserved; *rp && strncmp(name+3, rp, l-3); rp+=strlen(rp)+1);
                        if (*rp) {
-                               if (!libc->prev) {
-                                       tail->next = libc;
-                                       libc->prev = tail;
-                                       tail = libc->next ? libc->next : libc;
+                               if (ldd_mode) {
+                                       /* Track which names have been resolved
+                                        * and only report each one once. */
+                                       static unsigned reported;
+                                       unsigned mask = 1U<<(rp-reserved);
+                                       if (!(reported & mask)) {
+                                               reported |= mask;
+                                               dprintf(1, "\t%s => %s (%p)\n",
+                                                       name, ldso->name,
+                                                       ldso->base);
+                                       }
                                }
-                               return libc;
+                               is_self = 1;
                        }
                }
        }
+       if (!strcmp(name, ldso->name)) is_self = 1;
+       if (is_self) {
+               if (!ldso->prev) {
+                       tail->next = ldso;
+                       ldso->prev = tail;
+                       tail = ldso->next ? ldso->next : ldso;
+               }
+               return ldso;
+       }
        if (strchr(name, '/')) {
                pathname = name;
                fd = open(name, O_RDONLY|O_CLOEXEC);
@@ -450,19 +589,44 @@ static struct dso *load_library(const char *name)
                }
                if (strlen(name) > NAME_MAX) return 0;
                fd = -1;
-               if (r_path) fd = path_open(name, r_path, buf, sizeof buf);
-               if (fd < 0 && env_path) fd = path_open(name, env_path, buf, sizeof buf);
+               if (env_path) fd = path_open(name, env_path, buf, sizeof buf);
+               for (p=needed_by; fd < 0 && p; p=p->needed_by)
+                       if (!fixup_rpath(p, buf, sizeof buf))
+                               fd = path_open(name, p->rpath, buf, sizeof buf);
                if (fd < 0) {
                        if (!sys_path) {
-                               FILE *f = fopen(ETC_LDSO_PATH, "rbe");
+                               char *prefix = 0;
+                               size_t prefix_len;
+                               if (ldso->name[0]=='/') {
+                                       char *s, *t, *z;
+                                       for (s=t=z=ldso->name; *s; s++)
+                                               if (*s=='/') z=t, t=s;
+                                       prefix_len = z-ldso->name;
+                                       if (prefix_len < PATH_MAX)
+                                               prefix = ldso->name;
+                               }
+                               if (!prefix) {
+                                       prefix = "";
+                                       prefix_len = 0;
+                               }
+                               char etc_ldso_path[prefix_len + 1
+                                       + sizeof "/etc/ld-musl-" LDSO_ARCH ".path"];
+                               snprintf(etc_ldso_path, sizeof etc_ldso_path,
+                                       "%.*s/etc/ld-musl-" LDSO_ARCH ".path",
+                                       (int)prefix_len, prefix);
+                               FILE *f = fopen(etc_ldso_path, "rbe");
                                if (f) {
-                                       if (getline(&sys_path, (size_t[1]){0}, f) > 0)
-                                               sys_path[strlen(sys_path)-1]=0;
+                                       if (getdelim(&sys_path, (size_t[1]){0}, 0, f) <= 0) {
+                                               free(sys_path);
+                                               sys_path = "";
+                                       }
                                        fclose(f);
+                               } else if (errno != ENOENT) {
+                                       sys_path = "";
                                }
                        }
-                       if (sys_path) fd = path_open(name, sys_path, buf, sizeof buf);
-                       else fd = path_open(name, "/lib:/usr/local/lib:/usr/lib", buf, sizeof buf);
+                       if (!sys_path) sys_path = "/lib:/usr/local/lib:/usr/lib";
+                       fd = path_open(name, sys_path, buf, sizeof buf);
                }
                pathname = buf;
        }
@@ -476,40 +640,74 @@ static struct dso *load_library(const char *name)
                        /* If this library was previously loaded with a
                         * pathname but a search found the same inode,
                         * setup its shortname so it can be found by name. */
-                       if (!p->shortname) p->shortname = strrchr(p->name, '/')+1;
+                       if (!p->shortname && pathname != name)
+                               p->shortname = strrchr(p->name, '/')+1;
                        close(fd);
                        p->refcnt++;
                        return p;
                }
        }
-       map = map_library(fd, &temp_dso);
+       map = noload ? 0 : map_library(fd, &temp_dso);
        close(fd);
        if (!map) return 0;
-       p = malloc(sizeof *p + strlen(pathname) + 1);
+
+       /* Allocate storage for the new DSO. When there is TLS, this
+        * storage must include a reservation for all pre-existing
+        * threads to obtain copies of both the new TLS, and an
+        * extended DTV capable of storing an additional slot for
+        * the newly-loaded DSO. */
+       alloc_size = sizeof *p + strlen(pathname) + 1;
+       if (runtime && temp_dso.tls_image) {
+               size_t per_th = temp_dso.tls_size + temp_dso.tls_align
+                       + sizeof(void *) * (tls_cnt+3);
+               n_th = libc.threads_minus_1 + 1;
+               if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
+               else alloc_size += n_th * per_th;
+       }
+       p = calloc(1, alloc_size);
        if (!p) {
-               munmap(map, map_len);
+               munmap(map, temp_dso.map_len);
                return 0;
        }
        memcpy(p, &temp_dso, sizeof temp_dso);
        decode_dyn(p);
-       if (p->tls_image) {
-               p->tls_id = ++tls_cnt;
-               tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
-                       & -4*sizeof(size_t);
-       }
        p->dev = st.st_dev;
        p->ino = st.st_ino;
        p->refcnt = 1;
+       p->needed_by = needed_by;
        p->name = p->buf;
        strcpy(p->name, pathname);
        /* Add a shortname only if name arg was not an explicit pathname. */
        if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
+       if (p->tls_image) {
+               if (runtime && !libc.has_thread_pointer) {
+                       munmap(map, p->map_len);
+                       free(p);
+                       errno = ENOSYS;
+                       return 0;
+               }
+               p->tls_id = ++tls_cnt;
+               tls_align = MAXP2(tls_align, p->tls_align);
+#ifdef TLS_ABOVE_TP
+               p->tls_offset = tls_offset + ( (tls_align-1) &
+                       -(tls_offset + (uintptr_t)p->tls_image) );
+               tls_offset += p->tls_size;
+#else
+               tls_offset += p->tls_size + p->tls_align - 1;
+               tls_offset -= (tls_offset + (uintptr_t)p->tls_image)
+                       & (p->tls_align-1);
+               p->tls_offset = tls_offset;
+#endif
+               p->new_dtv = (void *)(-sizeof(size_t) &
+                       (uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
+               p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
+       }
 
        tail->next = p;
        p->prev = tail;
        tail = p;
 
-       if (ldd_mode) dprintf(1, "\t%s => %s (%p)\n", name, pathname, base);
+       if (ldd_mode) dprintf(1, "\t%s => %s (%p)\n", name, pathname, p->base);
 
        return p;
 }
@@ -519,31 +717,26 @@ static void load_deps(struct dso *p)
        size_t i, ndeps=0;
        struct dso ***deps = &p->deps, **tmp, *dep;
        for (; p; p=p->next) {
-               for (i=0; p->dynv[i]; i+=2) {
-                       if (p->dynv[i] != DT_RPATH) continue;
-                       r_path = (void *)(p->strings + p->dynv[i+1]);
-               }
                for (i=0; p->dynv[i]; i+=2) {
                        if (p->dynv[i] != DT_NEEDED) continue;
-                       dep = load_library(p->strings + p->dynv[i+1]);
+                       dep = load_library(p->strings + p->dynv[i+1], p);
                        if (!dep) {
                                snprintf(errbuf, sizeof errbuf,
                                        "Error loading shared library %s: %m (needed by %s)",
                                        p->strings + p->dynv[i+1], p->name);
-                               if (runtime) longjmp(rtld_fail, 1);
+                               if (runtime) longjmp(*rtld_fail, 1);
                                dprintf(2, "%s\n", errbuf);
                                ldso_fail = 1;
                                continue;
                        }
                        if (runtime) {
                                tmp = realloc(*deps, sizeof(*tmp)*(ndeps+2));
-                               if (!tmp) longjmp(rtld_fail, 1);
+                               if (!tmp) longjmp(*rtld_fail, 1);
                                tmp[ndeps++] = dep;
                                tmp[ndeps] = 0;
                                *deps = tmp;
                        }
                }
-               r_path = 0;
        }
 }
 
@@ -556,7 +749,7 @@ static void load_preload(char *s)
                for (z=s; *z && !isspace(*z); z++);
                tmp = *z;
                *z = 0;
-               load_library(s);
+               load_library(s, 0);
                *z = tmp;
        }
 }
@@ -579,17 +772,18 @@ static void reloc_all(struct dso *p)
                        2+(dyn[DT_PLTREL]==DT_RELA));
                do_relocs(p, (void *)(p->base+dyn[DT_REL]), dyn[DT_RELSZ], 2);
                do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
-               p->relocated = 1;
-       }
-}
 
-static void free_all(struct dso *p)
-{
-       struct dso *n;
-       while (p) {
-               n = p->next;
-               if (p->map && p!=libc && p!=head) free(p);
-               p = n;
+               if (p->relro_start != p->relro_end &&
+                   mprotect(p->base+p->relro_start, p->relro_end-p->relro_start, PROT_READ) < 0) {
+                       snprintf(errbuf, sizeof errbuf,
+                               "Error relocating %s: RELRO protection failed",
+                               p->name);
+                       if (runtime) longjmp(*rtld_fail, 1);
+                       dprintf(2, "%s\n", errbuf);
+                       ldso_fail = 1;
+               }
+
+               p->relocated = 1;
        }
 }
 
@@ -605,6 +799,10 @@ static void find_map_range(Phdr *ph, size_t cnt, size_t stride, struct dso *p)
 {
        size_t min_addr = -1, max_addr = 0;
        for (; cnt--; ph = (void *)((char *)ph + stride)) {
+               if (ph->p_type == PT_GNU_RELRO) {
+                       p->relro_start = ph->p_vaddr & -PAGE_SIZE;
+                       p->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
+               }
                if (ph->p_type != PT_LOAD) continue;
                if (ph->p_vaddr < min_addr)
                        min_addr = ph->p_vaddr;
@@ -617,51 +815,163 @@ static void find_map_range(Phdr *ph, size_t cnt, size_t stride, struct dso *p)
        p->map_len = max_addr - min_addr;
 }
 
+static void do_fini()
+{
+       struct dso *p;
+       size_t dyn[DYN_CNT] = {0};
+       for (p=fini_head; p; p=p->fini_next) {
+               if (!p->constructed) continue;
+               decode_vec(p->dynv, dyn, DYN_CNT);
+               if (dyn[0] & (1<<DT_FINI_ARRAY)) {
+                       size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
+                       size_t *fn = (size_t *)(p->base + dyn[DT_FINI_ARRAY])+n;
+                       while (n--) ((void (*)(void))*--fn)();
+               }
+#ifndef NO_LEGACY_INITFINI
+               if ((dyn[0] & (1<<DT_FINI)) && dyn[DT_FINI])
+                       ((void (*)(void))(p->base + dyn[DT_FINI]))();
+#endif
+       }
+}
+
 static void do_init_fini(struct dso *p)
 {
        size_t dyn[DYN_CNT] = {0};
+       int need_locking = libc.threads_minus_1;
+       /* Allow recursive calls that arise when a library calls
+        * dlopen from one of its constructors, but block any
+        * other threads until all ctors have finished. */
+       if (need_locking) pthread_mutex_lock(&init_fini_lock);
        for (; p; p=p->prev) {
-               if (p->constructed) return;
+               if (p->constructed) continue;
+               p->constructed = 1;
                decode_vec(p->dynv, dyn, DYN_CNT);
-               if (dyn[0] & (1<<DT_FINI))
-                       atexit((void (*)(void))(p->base + dyn[DT_FINI]));
-               if (dyn[0] & (1<<DT_INIT))
+               if (dyn[0] & ((1<<DT_FINI) | (1<<DT_FINI_ARRAY))) {
+                       p->fini_next = fini_head;
+                       fini_head = p;
+               }
+#ifndef NO_LEGACY_INITFINI
+               if ((dyn[0] & (1<<DT_INIT)) && dyn[DT_INIT])
                        ((void (*)(void))(p->base + dyn[DT_INIT]))();
-               p->constructed = 1;
+#endif
+               if (dyn[0] & (1<<DT_INIT_ARRAY)) {
+                       size_t n = dyn[DT_INIT_ARRAYSZ]/sizeof(size_t);
+                       size_t *fn = (void *)(p->base + dyn[DT_INIT_ARRAY]);
+                       while (n--) ((void (*)(void))*fn++)();
+               }
+               if (!need_locking && libc.threads_minus_1) {
+                       need_locking = 1;
+                       pthread_mutex_lock(&init_fini_lock);
+               }
        }
+       if (need_locking) pthread_mutex_unlock(&init_fini_lock);
 }
 
 void _dl_debug_state(void)
 {
 }
 
-void *__copy_tls(unsigned char *mem, size_t cnt)
+void __reset_tls()
 {
+       pthread_t self = __pthread_self();
        struct dso *p;
+       for (p=head; p; p=p->next) {
+               if (!p->tls_id || !self->dtv[p->tls_id]) continue;
+               memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len);
+               memset((char *)self->dtv[p->tls_id]+p->tls_len, 0,
+                       p->tls_size - p->tls_len);
+               if (p->tls_id == (size_t)self->dtv[0]) break;
+       }
+}
+
+void *__copy_tls(unsigned char *mem)
+{
+       pthread_t td;
+       struct dso *p;
+
        void **dtv = (void *)mem;
-       dtv[0] = (void *)cnt;
-       mem = (void *)(dtv + cnt + 1);
-       for (p=tail; p; p=p->prev) {
-               if (p->tls_id-1 >= cnt) continue;
-               mem += -p->tls_len & (4*sizeof(size_t)-1);
-               mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
-                       & (p->tls_align-1);
-               dtv[p->tls_id] = mem;
-               memcpy(mem, p->tls_image, p->tls_len);
-               mem += p->tls_size;
+       dtv[0] = (void *)tls_cnt;
+       if (!tls_cnt) {
+               td = (void *)(dtv+1);
+               td->dtv = dtv;
+               return td;
+       }
+
+#ifdef TLS_ABOVE_TP
+       mem += sizeof(void *) * (tls_cnt+1);
+       mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1);
+       td = (pthread_t)mem;
+       mem += sizeof(struct pthread);
+
+       for (p=head; p; p=p->next) {
+               if (!p->tls_id) continue;
+               dtv[p->tls_id] = mem + p->tls_offset;
+               memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
+       }
+#else
+       mem += libc.tls_size - sizeof(struct pthread);
+       mem -= (uintptr_t)mem & (tls_align-1);
+       td = (pthread_t)mem;
+
+       for (p=head; p; p=p->next) {
+               if (!p->tls_id) continue;
+               dtv[p->tls_id] = mem - p->tls_offset;
+               memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
        }
-       ((pthread_t)mem)->dtv = dtv;
-       return mem;
+#endif
+       td->dtv = dtv;
+       return td;
 }
 
-void *__tls_get_addr(size_t *p)
+void *__tls_get_addr(size_t *v)
 {
        pthread_t self = __pthread_self();
-       if ((size_t)self->dtv[0] < p[0]) {
-               // FIXME: obtain new DTV and TLS from the DSO
-               a_crash();
+       if (v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]])
+               return (char *)self->dtv[v[0]]+v[1];
+
+       /* Block signals to make accessing new TLS async-signal-safe */
+       sigset_t set;
+       pthread_sigmask(SIG_BLOCK, SIGALL_SET, &set);
+       if (v[0]<=(size_t)self->dtv[0] && self->dtv[v[0]]) {
+               pthread_sigmask(SIG_SETMASK, &set, 0);
+               return (char *)self->dtv[v[0]]+v[1];
        }
-       return (char *)self->dtv[p[0]] + p[1];
+
+       /* This is safe without any locks held because, if the caller
+        * is able to request the Nth entry of the DTV, the DSO list
+        * must be valid at least that far out and it was synchronized
+        * at program startup or by an already-completed call to dlopen. */
+       struct dso *p;
+       for (p=head; p->tls_id != v[0]; p=p->next);
+
+       /* Get new DTV space from new DSO if needed */
+       if (v[0] > (size_t)self->dtv[0]) {
+               void **newdtv = p->new_dtv +
+                       (v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1);
+               memcpy(newdtv, self->dtv,
+                       ((size_t)self->dtv[0]+1) * sizeof(void *));
+               newdtv[0] = (void *)v[0];
+               self->dtv = newdtv;
+       }
+
+       /* Get new TLS memory from new DSO */
+       unsigned char *mem = p->new_tls +
+               (p->tls_size + p->tls_align) * a_fetch_add(&p->new_tls_idx,1);
+       mem += ((uintptr_t)p->tls_image - (uintptr_t)mem) & (p->tls_align-1);
+       self->dtv[v[0]] = mem;
+       memcpy(mem, p->tls_image, p->tls_len);
+       pthread_sigmask(SIG_SETMASK, &set, 0);
+       return mem + v[1];
+}
+
+static void update_tls_size()
+{
+       libc.tls_size = ALIGN(
+               (1+tls_cnt) * sizeof(void *) +
+               tls_offset +
+               sizeof(struct pthread) +
+               tls_align * 2,
+       tls_align);
 }
 
 void *__dynlink(int argc, char **argv)
@@ -676,6 +986,9 @@ void *__dynlink(int argc, char **argv)
        struct dso *const vdso = builtin_dsos+2;
        char *env_preload=0;
        size_t vdso_base;
+       size_t *auxv;
+       char **envp = argv+argc+1;
+       void *initial_tls;
 
        /* Find aux vector just past environ[] */
        for (i=argc+1; argv[i]; i++)
@@ -692,7 +1005,9 @@ void *__dynlink(int argc, char **argv)
          || aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]) {
                env_path = 0;
                env_preload = 0;
+               libc.secure = 1;
        }
+       libc.page_size = aux[AT_PAGESZ];
 
        /* If the dynamic linker was invoked as a program itself, AT_BASE
         * will not be set. In that case, we assume the base address is
@@ -708,11 +1023,12 @@ void *__dynlink(int argc, char **argv)
        lib->base = (void *)aux[AT_BASE];
        lib->name = lib->shortname = "libc.so";
        lib->global = 1;
+       lib->kernel_mapped = 1;
        ehdr = (void *)lib->base;
-       find_map_range((void *)(aux[AT_BASE]+ehdr->e_phoff),
-               ehdr->e_phnum, ehdr->e_phentsize, lib);
-       lib->dynv = (void *)(lib->base + find_dyn(
-               (void *)(aux[AT_BASE]+ehdr->e_phoff),
+       lib->phnum = ehdr->e_phnum;
+       lib->phdr = (void *)(aux[AT_BASE]+ehdr->e_phoff);
+       find_map_range(lib->phdr, ehdr->e_phnum, ehdr->e_phentsize, lib);
+       lib->dynv = (void *)(lib->base + find_dyn(lib->phdr,
                ehdr->e_phnum, ehdr->e_phentsize));
        decode_dyn(lib);
 
@@ -720,7 +1036,8 @@ void *__dynlink(int argc, char **argv)
                size_t interp_off = 0;
                size_t tls_image = 0;
                /* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
-               phdr = (void *)aux[AT_PHDR];
+               app->phdr = phdr = (void *)aux[AT_PHDR];
+               app->phnum = aux[AT_PHNUM];
                for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
                        if (phdr->p_type == PT_PHDR)
                                app->base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
@@ -735,7 +1052,12 @@ void *__dynlink(int argc, char **argv)
                }
                if (app->tls_size) app->tls_image = (char *)app->base + tls_image;
                if (interp_off) lib->name = (char *)app->base + interp_off;
-               app->name = argv[0];
+               if ((aux[0] & (1UL<<AT_EXECFN))
+                   && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
+                       app->name = (char *)aux[AT_EXECFN];
+               else
+                       app->name = argv[0];
+               app->kernel_mapped = 1;
                app->dynv = (void *)(app->base + find_dyn(
                        (void *)aux[AT_PHDR], aux[AT_PHNUM], aux[AT_PHENT]));
                find_map_range((void *)aux[AT_PHDR],
@@ -743,13 +1065,16 @@ void *__dynlink(int argc, char **argv)
        } else {
                int fd;
                char *ldname = argv[0];
-               size_t dyno, l = strlen(ldname);
+               size_t l = strlen(ldname);
                if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
                *argv++ = (void *)-1;
                if (argv[0] && !strcmp(argv[0], "--")) *argv++ = (void *)-1;
                if (!argv[0]) {
-                       dprintf(2, "musl libc/dynamic program loader\n");
-                       dprintf(2, "usage: %s pathname%s\n", ldname,
+                       dprintf(2, "musl libc\n"
+                               "Version %s\n"
+                               "Dynamic Program Loader\n"
+                               "Usage: %s [--] pathname%s\n",
+                               __libc_get_version(), ldname,
                                ldd_mode ? "" : " [args]");
                        _exit(1);
                }
@@ -768,28 +1093,48 @@ void *__dynlink(int argc, char **argv)
                close(fd);
                lib->name = ldname;
                app->name = argv[0];
-               aux[AT_ENTRY] = ehdr->e_entry;
+               aux[AT_ENTRY] = (size_t)app->base + ehdr->e_entry;
+               /* Find the name that would have been used for the dynamic
+                * linker had ldd not taken its place. */
+               if (ldd_mode) {
+                       for (i=0; i<app->phnum; i++) {
+                               if (app->phdr[i].p_type == PT_INTERP)
+                                       lib->name = (void *)(app->base
+                                               + app->phdr[i].p_vaddr);
+                       }
+                       dprintf(1, "\t%s (%p)\n", lib->name, lib->base);
+               }
        }
        if (app->tls_size) {
-               app->tls_id = ++tls_cnt;
-               tls_size += app->tls_size+app->tls_align + 8*sizeof(size_t)-1
-                       & -4*sizeof(size_t);
+               app->tls_id = tls_cnt = 1;
+#ifdef TLS_ABOVE_TP
+               app->tls_offset = 0;
+               tls_offset = app->tls_size
+                       + ( -((uintptr_t)app->tls_image + app->tls_size)
+                       & (app->tls_align-1) );
+#else
+               tls_offset = app->tls_offset = app->tls_size
+                       + ( -((uintptr_t)app->tls_image + app->tls_size)
+                       & (app->tls_align-1) );
+#endif
+               tls_align = MAXP2(tls_align, app->tls_align);
        }
        app->global = 1;
-       app->constructed = 1;
        decode_dyn(app);
 
        /* Attach to vdso, if provided by the kernel */
        if (search_vec(auxv, &vdso_base, AT_SYSINFO_EHDR)) {
                ehdr = (void *)vdso_base;
-               phdr = (void *)(vdso_base + ehdr->e_phoff);
+               vdso->phdr = phdr = (void *)(vdso_base + ehdr->e_phoff);
+               vdso->phnum = ehdr->e_phnum;
                for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
                        if (phdr->p_type == PT_DYNAMIC)
                                vdso->dynv = (void *)(vdso_base + phdr->p_offset);
                        if (phdr->p_type == PT_LOAD)
                                vdso->base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
                }
-               vdso->name = vdso->shortname = "linux-gate.so.1";
+               vdso->name = "";
+               vdso->shortname = "linux-gate.so.1";
                vdso->global = 1;
                decode_dyn(vdso);
                vdso->prev = lib;
@@ -801,7 +1146,7 @@ void *__dynlink(int argc, char **argv)
         * restore the initial chain in preparation for loading third
         * party libraries (preload/needed). */
        head = tail = app;
-       libc = lib;
+       ldso = lib;
        app->next = lib;
        reloc_all(lib);
        app->next = 0;
@@ -809,9 +1154,9 @@ void *__dynlink(int argc, char **argv)
        /* PAST THIS POINT, ALL LIBC INTERFACES ARE FULLY USABLE. */
 
        /* Donate unused parts of app and library mapping to malloc */
-       reclaim_gaps(app->base, (void *)aux[AT_PHDR], aux[AT_PHENT], aux[AT_PHNUM]);
+       reclaim_gaps(app, (void *)aux[AT_PHDR], aux[AT_PHENT], aux[AT_PHNUM]);
        ehdr = (void *)lib->base;
-       reclaim_gaps(lib->base, (void *)(lib->base+ehdr->e_phoff),
+       reclaim_gaps(lib, (void *)(lib->base+ehdr->e_phoff),
                ehdr->e_phentsize, ehdr->e_phnum);
 
        /* Load preload/needed libraries, add their symbols to the global
@@ -821,9 +1166,32 @@ void *__dynlink(int argc, char **argv)
        if (env_preload) load_preload(env_preload);
        load_deps(app);
        make_global(app);
+
+#ifndef DYNAMIC_IS_RO
+       for (i=0; app->dynv[i]; i+=2)
+               if (app->dynv[i]==DT_DEBUG)
+                       app->dynv[i+1] = (size_t)&debug;
+#endif
+
        reloc_all(app->next);
        reloc_all(app);
 
+       update_tls_size();
+       if (libc.tls_size > sizeof builtin_tls) {
+               initial_tls = calloc(libc.tls_size, 1);
+               if (!initial_tls) {
+                       dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
+                               argv[0], libc.tls_size);
+                       _exit(127);
+               }
+       } else {
+               initial_tls = builtin_tls;
+       }
+       if (__init_tp(__copy_tls(initial_tls)) < 0 && tls_cnt) {
+               dprintf(2, "%s: Thread-local storage not supported by kernel.\n", argv[0]);
+               _exit(127);
+       }
+
        if (ldso_fail) _exit(127);
        if (ldd_mode) _exit(0);
 
@@ -833,11 +1201,6 @@ void *__dynlink(int argc, char **argv)
         * all memory used by the dynamic linker. */
        runtime = 1;
 
-#ifndef DYNAMIC_IS_RO
-       for (i=0; app->dynv[i]; i+=2)
-               if (app->dynv[i]==DT_DEBUG)
-                       app->dynv[i+1] = (size_t)&debug;
-#endif
        debug.ver = 1;
        debug.bp = _dl_debug_state;
        debug.head = head;
@@ -845,47 +1208,40 @@ void *__dynlink(int argc, char **argv)
        debug.state = 0;
        _dl_debug_state();
 
-       tls_size += sizeof(struct pthread) + 4*sizeof(size_t);
-       __libc.tls_size = tls_size;
-       __libc.tls_cnt = tls_cnt;
-       if (tls_cnt) {
-               void *mem = mmap(0, __libc.tls_size, PROT_READ|PROT_WRITE,
-                       MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-               if (mem==MAP_FAILED ||
-                   !__install_initial_tls(__copy_tls(mem, tls_cnt))) {
-                       dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
-                               argv[0], tls_size);
-                       _exit(127);
-               }
-       }
-       if (ssp_used) __init_ssp(auxv);
-
+       if (ssp_used) __init_ssp((void *)aux[AT_RANDOM]);
+       __init_libc(envp, argv[0]);
+       atexit(do_fini);
+       errno = 0;
        do_init_fini(tail);
 
-       if (!rtld_used) {
-               free_all(head);
-               free(sys_path);
-               reclaim((void *)builtin_dsos, 0, sizeof builtin_dsos);
-       }
-
-       errno = 0;
        return (void *)aux[AT_ENTRY];
 }
 
 void *dlopen(const char *file, int mode)
 {
-       struct dso *volatile p, *orig_tail = tail, *next;
+       struct dso *volatile p, *orig_tail, *next;
+       size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
        size_t i;
        int cs;
+       jmp_buf jb;
 
        if (!file) return head;
 
        pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
        pthread_rwlock_wrlock(&lock);
+       __inhibit_ptc();
+
+       p = 0;
+       orig_tls_cnt = tls_cnt;
+       orig_tls_offset = tls_offset;
+       orig_tls_align = tls_align;
+       orig_tail = tail;
+       noload = mode & RTLD_NOLOAD;
 
-       if (setjmp(rtld_fail)) {
+       rtld_fail = &jb;
+       if (setjmp(*rtld_fail)) {
                /* Clean up anything new that was (partially) loaded */
-               if (p->deps) for (i=0; p->deps[i]; i++)
+               if (p && p->deps) for (i=0; p->deps[i]; i++)
                        if (p->deps[i]->global < 0)
                                p->deps[i]->global = 0;
                for (p=orig_tail->next; p; p=next) {
@@ -894,16 +1250,21 @@ void *dlopen(const char *file, int mode)
                        free(p->deps);
                        free(p);
                }
+               tls_cnt = orig_tls_cnt;
+               tls_offset = orig_tls_offset;
+               tls_align = orig_tls_align;
                tail = orig_tail;
                tail->next = 0;
                p = 0;
                errflag = 1;
                goto end;
-       } else p = load_library(file);
+       } else p = load_library(file, head);
 
        if (!p) {
-               snprintf(errbuf, sizeof errbuf,
-                       "Error loading shared library %s: %m", file);
+               snprintf(errbuf, sizeof errbuf, noload ?
+                       "Library %s is not already loaded" :
+                       "Error loading shared library %s: %m",
+                       file);
                errflag = 1;
                goto end;
        }
@@ -928,34 +1289,51 @@ void *dlopen(const char *file, int mode)
                p->global = 1;
        }
 
-       if (ssp_used) __init_ssp(auxv);
+       update_tls_size();
 
-       _dl_debug_state();
+       if (ssp_used) __init_ssp(libc.auxv);
 
-       do_init_fini(tail);
+       _dl_debug_state();
+       orig_tail = tail;
 end:
+       __release_ptc();
+       if (p) gencnt++;
        pthread_rwlock_unlock(&lock);
+       if (p) do_init_fini(orig_tail);
        pthread_setcancelstate(cs, 0);
        return p;
 }
 
+static int invalid_dso_handle(void *h)
+{
+       struct dso *p;
+       for (p=head; p; p=p->next) if (h==p) return 0;
+       snprintf(errbuf, sizeof errbuf, "Invalid library handle %p", (void *)h);
+       errflag = 1;
+       return 1;
+}
+
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
        size_t i;
        uint32_t h = 0, gh = 0;
        Sym *sym;
-       if (p == RTLD_NEXT) {
-               for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
-               if (!p) p=head;
-               void *res = find_sym(p->next, s, 0);
-               if (!res) goto failed;
-               return res;
-       }
-       if (p == head || p == RTLD_DEFAULT) {
-               void *res = find_sym(head, s, 0);
-               if (!res) goto failed;
-               return res;
+       if (p == head || p == RTLD_DEFAULT || p == RTLD_NEXT) {
+               if (p == RTLD_DEFAULT) {
+                       p = head;
+               } else if (p == RTLD_NEXT) {
+                       for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
+                       if (!p) p=head;
+                       p = p->next;
+               }
+               struct symdef def = find_sym(p, s, 0);
+               if (!def.sym) goto failed;
+               if ((def.sym->st_info&0xf) == STT_TLS)
+                       return __tls_get_addr((size_t []){def.dso->tls_id, def.sym->st_value});
+               return def.dso->base + def.sym->st_value;
        }
+       if (p != RTLD_DEFAULT && p != RTLD_NEXT && invalid_dso_handle(p))
+               return 0;
        if (p->ghashtab) {
                gh = gnu_hash(s);
                sym = gnu_lookup(s, gh, p);
@@ -963,6 +1341,8 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
                h = sysv_hash(s);
                sym = sysv_lookup(s, h, p);
        }
+       if (sym && (sym->st_info&0xf) == STT_TLS)
+               return __tls_get_addr((size_t []){p->tls_id, sym->st_value});
        if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
                return p->base + sym->st_value;
        if (p->deps) for (i=0; p->deps[i]; i++) {
@@ -973,6 +1353,8 @@ static void *do_dlsym(struct dso *p, const char *s, void *ra)
                        if (!h) h = sysv_hash(s);
                        sym = sysv_lookup(s, h, p->deps[i]);
                }
+               if (sym && (sym->st_info&0xf) == STT_TLS)
+                       return __tls_get_addr((size_t []){p->deps[i]->tls_id, sym->st_value});
                if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
                        return p->deps[i]->base + sym->st_value;
        }
@@ -982,7 +1364,7 @@ failed:
        return 0;
 }
 
-int __dladdr(void *addr, Dl_info *info)
+int __dladdr(const void *addr, Dl_info *info)
 {
        struct dso *p;
        Sym *sym;
@@ -1007,7 +1389,7 @@ int __dladdr(void *addr, Dl_info *info)
                uint32_t *hashval;
                buckets = p->ghashtab + 4 + (p->ghashtab[2]*sizeof(size_t)/4);
                sym += p->ghashtab[1];
-               for (i = 0; i < p->ghashtab[0]; i++) {
+               for (i = nsym = 0; i < p->ghashtab[0]; i++) {
                        if (buckets[i] > nsym)
                                nsym = buckets[i];
                }
@@ -1020,7 +1402,7 @@ int __dladdr(void *addr, Dl_info *info)
        }
 
        for (; nsym; nsym--, sym++) {
-               if (sym->st_shndx && sym->st_value
+               if (sym->st_value
                 && (1<<(sym->st_info&0xf) & OK_TYPES)
                 && (1<<(sym->st_info>>4) & OK_BINDS)) {
                        void *symaddr = p->base + sym->st_value;
@@ -1051,7 +1433,39 @@ void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
        pthread_rwlock_unlock(&lock);
        return res;
 }
+
+int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void *data), void *data)
+{
+       struct dso *current;
+       struct dl_phdr_info info;
+       int ret = 0;
+       for(current = head; current;) {
+               info.dlpi_addr      = (uintptr_t)current->base;
+               info.dlpi_name      = current->name;
+               info.dlpi_phdr      = current->phdr;
+               info.dlpi_phnum     = current->phnum;
+               info.dlpi_adds      = gencnt;
+               info.dlpi_subs      = 0;
+               info.dlpi_tls_modid = current->tls_id;
+               info.dlpi_tls_data  = current->tls_image;
+
+               ret = (callback)(&info, sizeof (info), data);
+
+               if (ret != 0) break;
+
+               pthread_rwlock_rdlock(&lock);
+               current = current->next;
+               pthread_rwlock_unlock(&lock);
+       }
+       return ret;
+}
 #else
+static int invalid_dso_handle(void *h)
+{
+       snprintf(errbuf, sizeof errbuf, "Invalid library handle %p", (void *)h);
+       errflag = 1;
+       return 1;
+}
 void *dlopen(const char *file, int mode)
 {
        return 0;
@@ -1060,12 +1474,24 @@ void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
 {
        return 0;
 }
-int __dladdr (void *addr, Dl_info *info)
+int __dladdr (const void *addr, Dl_info *info)
 {
        return 0;
 }
 #endif
 
+int __dlinfo(void *dso, int req, void *res)
+{
+       if (invalid_dso_handle(dso)) return -1;
+       if (req != RTLD_DI_LINKMAP) {
+               snprintf(errbuf, sizeof errbuf, "Unsupported request %d", req);
+               errflag = 1;
+               return -1;
+       }
+       *(struct link_map **)res = dso;
+       return 0;
+}
+
 char *dlerror()
 {
        if (!errflag) return 0;
@@ -1075,5 +1501,5 @@ char *dlerror()
 
 int dlclose(void *p)
 {
-       return 0;
+       return invalid_dso_handle(p);
 }