dynamic-linked TLS support for everything but dlopen'd libs
[musl] / src / ldso / dynlink.c
index 1b55e07..e09f071 100644 (file)
@@ -1,4 +1,4 @@
-#ifdef __PIC__
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <pthread.h>
 #include <ctype.h>
 #include <dlfcn.h>
+#include "pthread_impl.h"
+#include "libc.h"
+#undef libc
 
-#include "reloc.h"
+static int errflag;
+static char errbuf[128];
+
+#ifdef SHARED
 
 #if ULONG_MAX == 0xffffffff
 typedef Elf32_Ehdr Ehdr;
@@ -34,32 +40,62 @@ typedef Elf64_Sym Sym;
 #define R_SYM(x) ((x)>>32)
 #endif
 
-struct dso
-{
+struct debug {
+       int ver;
+       void *head;
+       void (*bp)(void);
+       int state;
+       void *base;
+};
+
+struct dso {
+       unsigned char *base;
+       char *name;
+       size_t *dynv;
        struct dso *next, *prev;
+
        int refcnt;
-       size_t *dynv;
        Sym *syms;
        uint32_t *hashtab;
+       uint32_t *ghashtab;
        char *strings;
-       unsigned char *base;
        unsigned char *map;
        size_t map_len;
        dev_t dev;
        ino_t ino;
-       int global;
-       int relocated;
+       signed char global;
+       char relocated;
+       char constructed;
        struct dso **deps;
-       char *name;
+       void *tls_image;
+       size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
+       char *shortname;
        char buf[];
 };
 
+struct symdef {
+       Sym *sym;
+       struct dso *dso;
+};
+
+#include "reloc.h"
+
+void __init_ssp(size_t *);
+void *__install_initial_tls(void *);
+
 static struct dso *head, *tail, *libc;
 static char *env_path, *sys_path, *r_path;
-static int rtld_used;
+static int ssp_used;
 static int runtime;
+static int ldd_mode;
+static int ldso_fail;
 static jmp_buf rtld_fail;
 static pthread_rwlock_t lock;
+static struct debug debug;
+static size_t *auxv;
+static size_t tls_cnt, tls_size;
+
+struct debug *_dl_debug_addr = &debug;
 
 #define AUX_CNT 24
 #define DYN_CNT 34
@@ -73,8 +109,17 @@ static void decode_vec(size_t *v, size_t *a, size_t cnt)
        }
 }
 
-static uint32_t hash(const char *s)
+static int search_vec(size_t *v, size_t *r, size_t key)
+{
+       for (; v[0]!=key; v+=2)
+               if (!v[0]) return 0;
+       *r = v[1];
+       return 1;
+}
+
+static uint32_t sysv_hash(const char *s0)
 {
+       const unsigned char *s = (void *)s0;
        uint_fast32_t h = 0;
        while (*s) {
                h = 16*h + *s++;
@@ -83,9 +128,21 @@ static uint32_t hash(const char *s)
        return h & 0xfffffff;
 }
 
-static Sym *lookup(const char *s, uint32_t h, Sym *syms, uint32_t *hashtab, char *strings)
+static uint32_t gnu_hash(const char *s0)
+{
+       const unsigned char *s = (void *)s0;
+       uint_fast32_t h = 5381;
+       for (; *s; s++)
+               h = h*33 + *s;
+       return h;
+}
+
+static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
 {
        size_t i;
+       Sym *syms = dso->syms;
+       uint32_t *hashtab = dso->hashtab;
+       char *strings = dso->strings;
        for (i=hashtab[2+h%hashtab[0]]; i; i=hashtab[2+hashtab[0]+i]) {
                if (!strcmp(s, strings+syms[i].st_name))
                        return syms+i;
@@ -93,57 +150,106 @@ static Sym *lookup(const char *s, uint32_t h, Sym *syms, uint32_t *hashtab, char
        return 0;
 }
 
-#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
+static Sym *gnu_lookup(const char *s, uint32_t h1, struct dso *dso)
+{
+       Sym *sym;
+       char *strings;
+       uint32_t *hashtab = dso->ghashtab;
+       uint32_t nbuckets = hashtab[0];
+       uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
+       uint32_t h2;
+       uint32_t *hashval;
+       uint32_t n = buckets[h1 % nbuckets];
+
+       if (!n) return 0;
+
+       strings = dso->strings;
+       sym = dso->syms + n;
+       hashval = buckets + nbuckets + (n - hashtab[1]);
+
+       for (h1 |= 1; ; sym++) {
+               h2 = *hashval++;
+               if ((h1 == (h2|1)) && !strcmp(s, strings + sym->st_name))
+                       return sym;
+               if (h2 & 1) break;
+       }
+
+       return 0;
+}
+
+#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON | 1<<STT_TLS)
 #define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK)
 
-static void *find_sym(struct dso *dso, const char *s, int need_def)
+static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
 {
-       uint32_t h = hash(s);
-       void *def = 0;
-       if (h==0x6b366be && !strcmp(s, "dlopen")) rtld_used = 1;
-       if (h==0x6b3afd && !strcmp(s, "dlsym")) rtld_used = 1;
+       uint32_t h = 0, gh = 0;
+       struct symdef def = {0};
+       if (dso->ghashtab) {
+               gh = gnu_hash(s);
+               if (gh == 0x1f4039c9 && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+       } else {
+               h = sysv_hash(s);
+               if (h == 0x595a4cc && !strcmp(s, "__stack_chk_fail")) ssp_used = 1;
+       }
        for (; dso; dso=dso->next) {
                Sym *sym;
                if (!dso->global) continue;
-               sym = lookup(s, h, dso->syms, dso->hashtab, dso->strings);
+               if (dso->ghashtab) {
+                       if (!gh) gh = gnu_hash(s);
+                       sym = gnu_lookup(s, gh, dso);
+               } else {
+                       if (!h) h = sysv_hash(s);
+                       sym = sysv_lookup(s, h, dso);
+               }
                if (sym && (!need_def || sym->st_shndx) && sym->st_value
                 && (1<<(sym->st_info&0xf) & OK_TYPES)
                 && (1<<(sym->st_info>>4) & OK_BINDS)) {
-                       if (def && sym->st_info>>4 == STB_WEAK) continue;
-                       def = dso->base + sym->st_value;
+                       if (def.sym && sym->st_info>>4 == STB_WEAK) continue;
+                       def.sym = sym;
+                       def.dso = dso;
                        if (sym->st_info>>4 == STB_GLOBAL) break;
                }
        }
        return def;
 }
 
-static void do_relocs(unsigned char *base, size_t *rel, size_t rel_size, size_t stride, Sym *syms, char *strings, struct dso *dso)
+static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
 {
+       unsigned char *base = dso->base;
+       Sym *syms = dso->syms;
+       char *strings = dso->strings;
        Sym *sym;
        const char *name;
-       size_t sym_val, sym_size;
-       size_t *reloc_addr;
        void *ctx;
        int type;
        int sym_index;
+       struct symdef def;
 
        for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-               reloc_addr = (void *)(base + rel[0]);
                type = R_TYPE(rel[1]);
                sym_index = R_SYM(rel[1]);
                if (sym_index) {
                        sym = syms + sym_index;
                        name = strings + sym->st_name;
-                       ctx = IS_COPY(type) ? dso->next : dso;
-                       sym_val = (size_t)find_sym(ctx, name, IS_PLT(type));
-                       if (!sym_val && sym->st_info>>4 != STB_WEAK) {
+                       ctx = IS_COPY(type) ? head->next : head;
+                       def = find_sym(ctx, name, IS_PLT(type));
+                       if (!def.sym && sym->st_info>>4 != STB_WEAK) {
+                               snprintf(errbuf, sizeof errbuf,
+                                       "Error relocating %s: %s: symbol not found",
+                                       dso->name, name);
                                if (runtime) longjmp(rtld_fail, 1);
-                               dprintf(2, "%s: symbol not found\n", name);
-                               _exit(127);
+                               dprintf(2, "%s\n", errbuf);
+                               ldso_fail = 1;
+                               continue;
                        }
-                       sym_size = sym->st_size;
+               } else {
+                       sym = 0;
+                       def.sym = 0;
+                       def.dso = 0;
                }
-               do_single_reloc(reloc_addr, type, sym_val, sym_size, base, rel[2]);
+               do_single_reloc(dso, base, (void *)(base + rel[0]), type,
+                       stride>2 ? rel[2] : 0, sym, sym?sym->st_size:0, def,
+                       def.sym?(size_t)(def.dso->base+def.sym->st_value):0);
        }
 }
 
@@ -178,7 +284,7 @@ static void reclaim_gaps(unsigned char *base, Phdr *ph, size_t phent, size_t phc
        }
 }
 
-static void *map_library(int fd, size_t *lenp, unsigned char **basep, size_t *dynp)
+static void *map_library(int fd, struct dso *dso)
 {
        Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)];
        size_t phsize;
@@ -190,6 +296,7 @@ static void *map_library(int fd, size_t *lenp, unsigned char **basep, size_t *dy
        unsigned prot;
        unsigned char *map, *base;
        size_t dyn;
+       size_t tls_image=0;
        size_t i;
 
        ssize_t l = read(fd, buf, sizeof buf);
@@ -206,6 +313,12 @@ static void *map_library(int fd, size_t *lenp, unsigned char **basep, size_t *dy
        for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
                if (ph->p_type == PT_DYNAMIC)
                        dyn = ph->p_vaddr;
+               if (ph->p_type == PT_TLS) {
+                       tls_image = ph->p_vaddr;
+                       dso->tls_align = ph->p_align;
+                       dso->tls_len = ph->p_filesz;
+                       dso->tls_size = ph->p_memsz;
+               }
                if (ph->p_type != PT_LOAD) continue;
                if (ph->p_vaddr < addr_min) {
                        addr_min = ph->p_vaddr;
@@ -242,36 +355,37 @@ static void *map_library(int fd, size_t *lenp, unsigned char **basep, size_t *dy
                prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
                        ((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
                        ((ph->p_flags&PF_X) ? PROT_EXEC : 0));
-               if (mmap(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start) == MAP_FAILED) {
-                       munmap(map, map_len);
-                       return 0;
-               }
+               if (mmap(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start) == MAP_FAILED)
+                       goto error;
                if (ph->p_memsz > ph->p_filesz) {
                        size_t brk = (size_t)base+ph->p_vaddr+ph->p_filesz;
                        size_t pgbrk = brk+PAGE_SIZE-1 & -PAGE_SIZE;
                        memset((void *)brk, 0, pgbrk-brk & PAGE_SIZE-1);
-                       if (pgbrk-(size_t)base < this_max && mmap((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED) {
-                               munmap(map, map_len);
-                               return 0;
-                       }
+                       if (pgbrk-(size_t)base < this_max && mmap((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
+                               goto error;
                }
        }
        for (i=0; ((size_t *)(base+dyn))[i]; i+=2)
                if (((size_t *)(base+dyn))[i]==DT_TEXTREL) {
-                       mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC);
+                       if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC) < 0)
+                               goto error;
                        break;
                }
        if (!runtime) reclaim_gaps(base, (void *)((char *)buf + eh->e_phoff),
                eh->e_phentsize, eh->e_phnum);
-       *lenp = map_len;
-       *basep = base;
-       *dynp = dyn;
+       dso->map = map;
+       dso->map_len = map_len;
+       dso->base = base;
+       dso->dynv = (void *)(base+dyn);
+       if (dso->tls_size) dso->tls_image = (void *)(base+tls_image);
        return map;
+error:
+       munmap(map, map_len);
+       return 0;
 }
 
-static int path_open(const char *name, const char *search)
+static int path_open(const char *name, const char *search, char *buf, size_t buf_size)
 {
-       char buf[2*NAME_MAX+2];
        const char *s=search, *z;
        int l, fd;
        for (;;) {
@@ -279,18 +393,31 @@ static int path_open(const char *name, const char *search)
                if (!*s) return -1;
                z = strchr(s, ':');
                l = z ? z-s : strlen(s);
-               snprintf(buf, sizeof buf, "%.*s/%s", l, s, name);
-               if ((fd = open(buf, O_RDONLY))>=0) return fd;
+               snprintf(buf, buf_size, "%.*s/%s", l, s, name);
+               if ((fd = open(buf, O_RDONLY|O_CLOEXEC))>=0) return fd;
                s += l;
        }
 }
 
+static void decode_dyn(struct dso *p)
+{
+       size_t dyn[DYN_CNT] = {0};
+       decode_vec(p->dynv, dyn, DYN_CNT);
+       p->syms = (void *)(p->base + dyn[DT_SYMTAB]);
+       p->strings = (void *)(p->base + dyn[DT_STRTAB]);
+       if (dyn[0]&(1<<DT_HASH))
+               p->hashtab = (void *)(p->base + dyn[DT_HASH]);
+       if (search_vec(p->dynv, dyn, DT_GNU_HASH))
+               p->ghashtab = (void *)(p->base + *dyn);
+}
+
 static struct dso *load_library(const char *name)
 {
+       char buf[2*NAME_MAX+2];
+       const char *pathname;
        unsigned char *base, *map;
        size_t dyno, map_len;
-       struct dso *p;
-       size_t dyn[DYN_CNT] = {0};
+       struct dso *p, temp_dso = {0};
        int fd;
        struct stat st;
 
@@ -312,32 +439,34 @@ static struct dso *load_library(const char *name)
                        }
                }
        }
-       /* Search for the name to see if it's already loaded */
-       for (p=head->next; p; p=p->next) {
-               if (!strcmp(p->name, name)) {
-                       p->refcnt++;
-                       return p;
-               }
-       }
        if (strchr(name, '/')) {
-               fd = open(name, O_RDONLY);
+               pathname = name;
+               fd = open(name, O_RDONLY|O_CLOEXEC);
        } else {
+               /* Search for the name to see if it's already loaded */
+               for (p=head->next; p; p=p->next) {
+                       if (p->shortname && !strcmp(p->shortname, name)) {
+                               p->refcnt++;
+                               return p;
+                       }
+               }
                if (strlen(name) > NAME_MAX) return 0;
                fd = -1;
-               if (r_path) fd = path_open(name, r_path);
-               if (fd < 0 && env_path) fd = path_open(name, env_path);
+               if (r_path) fd = path_open(name, r_path, buf, sizeof buf);
+               if (fd < 0 && env_path) fd = path_open(name, env_path, buf, sizeof buf);
                if (fd < 0) {
                        if (!sys_path) {
-                               FILE *f = fopen(ETC_LDSO_PATH, "r");
+                               FILE *f = fopen(ETC_LDSO_PATH, "rbe");
                                if (f) {
                                        if (getline(&sys_path, (size_t[1]){0}, f) > 0)
                                                sys_path[strlen(sys_path)-1]=0;
                                        fclose(f);
                                }
                        }
-                       if (sys_path) fd = path_open(name, sys_path);
-                       else fd = path_open(name, "/lib:/usr/local/lib:/usr/lib");
+                       if (sys_path) fd = path_open(name, sys_path, buf, sizeof buf);
+                       else fd = path_open(name, "/lib:/usr/local/lib:/usr/lib", buf, sizeof buf);
                }
+               pathname = buf;
        }
        if (fd < 0) return 0;
        if (fstat(fd, &st) < 0) {
@@ -346,39 +475,44 @@ static struct dso *load_library(const char *name)
        }
        for (p=head->next; p; p=p->next) {
                if (p->dev == st.st_dev && p->ino == st.st_ino) {
+                       /* If this library was previously loaded with a
+                        * pathname but a search found the same inode,
+                        * setup its shortname so it can be found by name. */
+                       if (!p->shortname) p->shortname = strrchr(p->name, '/')+1;
                        close(fd);
                        p->refcnt++;
                        return p;
                }
        }
-       map = map_library(fd, &map_len, &base, &dyno);
+       map = map_library(fd, &temp_dso);
        close(fd);
        if (!map) return 0;
-       p = calloc(1, sizeof *p + strlen(name) + 1);
+       p = malloc(sizeof *p + strlen(pathname) + 1);
        if (!p) {
                munmap(map, map_len);
                return 0;
        }
-
-       p->map = map;
-       p->map_len = map_len;
-       p->base = base;
-       p->dynv = (void *)(base + dyno);
-       decode_vec(p->dynv, dyn, DYN_CNT);
-
-       p->syms = (void *)(base + dyn[DT_SYMTAB]);
-       p->hashtab = (void *)(base + dyn[DT_HASH]);
-       p->strings = (void *)(base + dyn[DT_STRTAB]);
+       memcpy(p, &temp_dso, sizeof temp_dso);
+       decode_dyn(p);
+       if (p->tls_image) {
+               p->tls_id = ++tls_cnt;
+               tls_size += p->tls_size + p->tls_align + 8*sizeof(size_t) - 1
+                       & -4*sizeof(size_t);
+       }
        p->dev = st.st_dev;
        p->ino = st.st_ino;
        p->refcnt = 1;
        p->name = p->buf;
-       strcpy(p->name, name);
+       strcpy(p->name, pathname);
+       /* Add a shortname only if name arg was not an explicit pathname. */
+       if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
 
        tail->next = p;
        p->prev = tail;
        tail = p;
 
+       if (ldd_mode) dprintf(1, "\t%s => %s (%p)\n", name, pathname, base);
+
        return p;
 }
 
@@ -395,10 +529,13 @@ static void load_deps(struct dso *p)
                        if (p->dynv[i] != DT_NEEDED) continue;
                        dep = load_library(p->strings + p->dynv[i+1]);
                        if (!dep) {
-                               if (runtime) longjmp(rtld_fail, 1);
-                               dprintf(2, "%s: %m (needed by %s)\n",
+                               snprintf(errbuf, sizeof errbuf,
+                                       "Error loading shared library %s: %m (needed by %s)",
                                        p->strings + p->dynv[i+1], p->name);
-                               _exit(127);
+                               if (runtime) longjmp(rtld_fail, 1);
+                               dprintf(2, "%s\n", errbuf);
+                               ldso_fail = 1;
+                               continue;
                        }
                        if (runtime) {
                                tmp = realloc(*deps, sizeof(*tmp)*(ndeps+2));
@@ -437,12 +574,13 @@ static void reloc_all(struct dso *p)
        for (; p; p=p->next) {
                if (p->relocated) continue;
                decode_vec(p->dynv, dyn, DYN_CNT);
-               do_relocs(p->base, (void *)(p->base+dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
-                       2+(dyn[DT_PLTREL]==DT_RELA), p->syms, p->strings, head);
-               do_relocs(p->base, (void *)(p->base+dyn[DT_REL]), dyn[DT_RELSZ],
-                       2, p->syms, p->strings, head);
-               do_relocs(p->base, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ],
-                       3, p->syms, p->strings, head);
+#ifdef NEED_ARCH_RELOCS
+               do_arch_relocs(p, head);
+#endif
+               do_relocs(p, (void *)(p->base+dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
+                       2+(dyn[DT_PLTREL]==DT_RELA));
+               do_relocs(p, (void *)(p->base+dyn[DT_REL]), dyn[DT_RELSZ], 2);
+               do_relocs(p, (void *)(p->base+dyn[DT_RELA]), dyn[DT_RELASZ], 3);
                p->relocated = 1;
        }
 }
@@ -452,17 +590,85 @@ static void free_all(struct dso *p)
        struct dso *n;
        while (p) {
                n = p->next;
-               if (p->map) free(p);
+               if (p->map && p!=libc && p!=head) free(p);
                p = n;
        }
 }
 
-void *__dynlink(int argc, char **argv, size_t *got)
+static size_t find_dyn(Phdr *ph, size_t cnt, size_t stride)
+{
+       for (; cnt--; ph = (void *)((char *)ph + stride))
+               if (ph->p_type == PT_DYNAMIC)
+                       return ph->p_vaddr;
+       return 0;
+}
+
+static void find_map_range(Phdr *ph, size_t cnt, size_t stride, struct dso *p)
+{
+       size_t min_addr = -1, max_addr = 0;
+       for (; cnt--; ph = (void *)((char *)ph + stride)) {
+               if (ph->p_type != PT_LOAD) continue;
+               if (ph->p_vaddr < min_addr)
+                       min_addr = ph->p_vaddr;
+               if (ph->p_vaddr+ph->p_memsz > max_addr)
+                       max_addr = ph->p_vaddr+ph->p_memsz;
+       }
+       min_addr &= -PAGE_SIZE;
+       max_addr = (max_addr + PAGE_SIZE-1) & -PAGE_SIZE;
+       p->map = p->base + min_addr;
+       p->map_len = max_addr - min_addr;
+}
+
+static void do_init_fini(struct dso *p)
 {
-       size_t *auxv, aux[AUX_CNT] = {0};
-       size_t app_dyn[DYN_CNT] = {0};
-       size_t lib_dyn[DYN_CNT] = {0};
-       size_t vdso_dyn[DYN_CNT] = {0};
+       size_t dyn[DYN_CNT] = {0};
+       for (; p; p=p->prev) {
+               if (p->constructed) return;
+               decode_vec(p->dynv, dyn, DYN_CNT);
+               if (dyn[0] & (1<<DT_FINI))
+                       atexit((void (*)(void))(p->base + dyn[DT_FINI]));
+               if (dyn[0] & (1<<DT_INIT))
+                       ((void (*)(void))(p->base + dyn[DT_INIT]))();
+               p->constructed = 1;
+       }
+}
+
+void _dl_debug_state(void)
+{
+}
+
+void *__copy_tls(unsigned char *mem, size_t cnt)
+{
+       struct dso *p;
+       void **dtv = (void *)mem;
+       dtv[0] = (void *)cnt;
+       mem = (void *)(dtv + cnt + 1);
+       for (p=tail; p; p=p->prev) {
+               if (p->tls_id-1 >= cnt) continue;
+               mem += -p->tls_len & (4*sizeof(size_t)-1);
+               mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
+                       & (p->tls_align-1);
+               dtv[p->tls_id] = mem;
+               memcpy(mem, p->tls_image, p->tls_len);
+               mem += p->tls_size;
+       }
+       ((pthread_t)mem)->dtv = dtv;
+       return mem;
+}
+
+void *__tls_get_addr(size_t *p)
+{
+       pthread_t self = __pthread_self();
+       if ((size_t)self->dtv[0] < p[0]) {
+               // FIXME: obtain new DTV and TLS from the DSO
+               a_crash();
+       }
+       return (char *)self->dtv[p[0]] + p[1];
+}
+
+void *__dynlink(int argc, char **argv)
+{
+       size_t aux[AUX_CNT] = {0};
        size_t i;
        Phdr *phdr;
        Ehdr *ehdr;
@@ -470,8 +676,8 @@ void *__dynlink(int argc, char **argv, size_t *got)
        struct dso *const app = builtin_dsos+0;
        struct dso *const lib = builtin_dsos+1;
        struct dso *const vdso = builtin_dsos+2;
-       size_t vdso_base=0;
        char *env_preload=0;
+       size_t vdso_base;
 
        /* Find aux vector just past environ[] */
        for (i=argc+1; argv[i]; i++)
@@ -490,48 +696,93 @@ void *__dynlink(int argc, char **argv, size_t *got)
                env_preload = 0;
        }
 
-       for (i=0; auxv[i]; i+=2) {
-               if (auxv[i]==AT_SYSINFO_EHDR) {
-                       vdso_base = auxv[i+1];
-                       break;
-               }
+       /* If the dynamic linker was invoked as a program itself, AT_BASE
+        * will not be set. In that case, we assume the base address is
+        * the start of the page containing the PHDRs; I don't know any
+        * better approach... */
+       if (!aux[AT_BASE]) {
+               aux[AT_BASE] = aux[AT_PHDR] & -PAGE_SIZE;
+               aux[AT_PHDR] = aux[AT_PHENT] = aux[AT_PHNUM] = 0;
        }
 
-       /* Relocate ldso's DYNAMIC pointer and load vector */
-       decode_vec((void *)(got[0] += aux[AT_BASE]), lib_dyn, DYN_CNT);
+       /* The dynamic linker load address is passed by the kernel
+        * in the AUX vector, so this is easy. */
+       lib->base = (void *)aux[AT_BASE];
+       lib->name = lib->shortname = "libc.so";
+       lib->global = 1;
+       ehdr = (void *)lib->base;
+       find_map_range((void *)(aux[AT_BASE]+ehdr->e_phoff),
+               ehdr->e_phnum, ehdr->e_phentsize, lib);
+       lib->dynv = (void *)(lib->base + find_dyn(
+               (void *)(aux[AT_BASE]+ehdr->e_phoff),
+               ehdr->e_phnum, ehdr->e_phentsize));
+       decode_dyn(lib);
 
-       /* Find the program image's DYNAMIC section and decode it */
-       phdr = (void *)aux[AT_PHDR];
-       for (i=aux[AT_PHNUM]; i--; phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
-               if (phdr->p_type == PT_DYNAMIC) {
-                       decode_vec((void *)phdr->p_vaddr, app_dyn, DYN_CNT);
-                       break;
+       if (aux[AT_PHDR]) {
+               size_t interp_off = 0;
+               size_t tls_image = 0;
+               /* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
+               phdr = (void *)aux[AT_PHDR];
+               for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
+                       if (phdr->p_type == PT_PHDR)
+                               app->base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
+                       else if (phdr->p_type == PT_INTERP)
+                               interp_off = (size_t)phdr->p_vaddr;
+                       else if (phdr->p_type == PT_TLS) {
+                               tls_image = phdr->p_vaddr;
+                               app->tls_len = phdr->p_filesz;
+                               app->tls_size = phdr->p_memsz;
+                               app->tls_align = phdr->p_align;
+                       }
+               }
+               if (app->tls_size) app->tls_image = (char *)app->base + tls_image;
+               if (interp_off) lib->name = (char *)app->base + interp_off;
+               app->name = argv[0];
+               app->dynv = (void *)(app->base + find_dyn(
+                       (void *)aux[AT_PHDR], aux[AT_PHNUM], aux[AT_PHENT]));
+               find_map_range((void *)aux[AT_PHDR],
+                       aux[AT_PHNUM], aux[AT_PHENT], app);
+       } else {
+               int fd;
+               char *ldname = argv[0];
+               size_t dyno, l = strlen(ldname);
+               if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
+               *argv++ = (void *)-1;
+               if (argv[0] && !strcmp(argv[0], "--")) *argv++ = (void *)-1;
+               if (!argv[0]) {
+                       dprintf(2, "musl libc/dynamic program loader\n");
+                       dprintf(2, "usage: %s pathname%s\n", ldname,
+                               ldd_mode ? "" : " [args]");
+                       _exit(1);
+               }
+               fd = open(argv[0], O_RDONLY);
+               if (fd < 0) {
+                       dprintf(2, "%s: cannot load %s: %s\n", ldname, argv[0], strerror(errno));
+                       _exit(1);
                }
+               runtime = 1;
+               ehdr = (void *)map_library(fd, app);
+               if (!ehdr) {
+                       dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
+                       _exit(1);
+               }
+               runtime = 0;
+               close(fd);
+               lib->name = ldname;
+               app->name = argv[0];
+               aux[AT_ENTRY] = ehdr->e_entry;
+       }
+       if (app->tls_size) {
+               app->tls_id = ++tls_cnt;
+               tls_size += app->tls_size+app->tls_align + 8*sizeof(size_t)-1
+                       & -4*sizeof(size_t);
        }
+       app->global = 1;
+       app->constructed = 1;
+       decode_dyn(app);
 
-       *app = (struct dso){
-               .base = 0,
-               .strings = (void *)(app_dyn[DT_STRTAB]),
-               .hashtab = (void *)(app_dyn[DT_HASH]),
-               .syms = (void *)(app_dyn[DT_SYMTAB]),
-               .dynv = (void *)(phdr->p_vaddr),
-               .name = argv[0],
-               .global = 1,
-               .next = lib
-       };
-
-       *lib = (struct dso){
-               .base = (void *)aux[AT_BASE],
-               .strings = (void *)(aux[AT_BASE]+lib_dyn[DT_STRTAB]),
-               .hashtab = (void *)(aux[AT_BASE]+lib_dyn[DT_HASH]),
-               .syms = (void *)(aux[AT_BASE]+lib_dyn[DT_SYMTAB]),
-               .dynv = (void *)(got[0]),
-               .name = "libc.so",
-               .global = 1,
-               .relocated = 1
-       };
-
-       if (vdso_base) {
+       /* Attach to vdso, if provided by the kernel */
+       if (search_vec(auxv, &vdso_base, AT_SYSINFO_EHDR)) {
                ehdr = (void *)vdso_base;
                phdr = (void *)(vdso_base + ehdr->e_phoff);
                for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
@@ -540,46 +791,94 @@ void *__dynlink(int argc, char **argv, size_t *got)
                        if (phdr->p_type == PT_LOAD)
                                vdso->base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
                }
-               decode_vec(vdso->dynv, vdso_dyn, DYN_CNT);
-               vdso->syms = (void *)(vdso->base + vdso_dyn[DT_SYMTAB]);
-               vdso->hashtab = (void *)(vdso->base + vdso_dyn[DT_HASH]);
-               vdso->strings = (void *)(vdso->base + vdso_dyn[DT_STRTAB]);
-               vdso->name = "linux-gate.so.1";
+               vdso->name = vdso->shortname = "linux-gate.so.1";
                vdso->global = 1;
-
+               decode_dyn(vdso);
                vdso->prev = lib;
                lib->next = vdso;
        }
 
-       /* Relocate the dynamic linker/libc */
-       do_relocs((void *)aux[AT_BASE], (void *)(aux[AT_BASE]+lib_dyn[DT_REL]),
-               lib_dyn[DT_RELSZ], 2, lib->syms, lib->strings, app);
-       do_relocs((void *)aux[AT_BASE], (void *)(aux[AT_BASE]+lib_dyn[DT_RELA]),
-               lib_dyn[DT_RELASZ], 3, lib->syms, lib->strings, app);
+       /* Initial dso chain consists only of the app. We temporarily
+        * append the dynamic linker/libc so we can relocate it, then
+        * restore the initial chain in preparation for loading third
+        * party libraries (preload/needed). */
+       head = tail = app;
+       libc = lib;
+       app->next = lib;
+       reloc_all(lib);
+       app->next = 0;
 
-       /* At this point the standard library is fully functional */
+       /* PAST THIS POINT, ALL LIBC INTERFACES ARE FULLY USABLE. */
 
+       /* Donate unused parts of app and library mapping to malloc */
        reclaim_gaps(app->base, (void *)aux[AT_PHDR], aux[AT_PHENT], aux[AT_PHNUM]);
        ehdr = (void *)lib->base;
        reclaim_gaps(lib->base, (void *)(lib->base+ehdr->e_phoff),
                ehdr->e_phentsize, ehdr->e_phnum);
 
-       head = tail = app;
-       libc = lib;
-       app->next = 0;
+       /* Load preload/needed libraries, add their symbols to the global
+        * namespace, and perform all remaining relocations. The main
+        * program must be relocated LAST since it may contain copy
+        * relocations which depend on libraries' relocations. */
        if (env_preload) load_preload(env_preload);
-       load_deps(head);
+       load_deps(app);
+       make_global(app);
+
+       /* Make an initial pass setting up TLS before performing relocs.
+        * This provides the TP-based offset of each DSO's TLS for
+        * use in TP-relative relocations. After relocations, we need
+        * to copy the TLS images again in case they had relocs. */
+       tls_size += sizeof(struct pthread) + 4*sizeof(size_t);
+       __libc.tls_size = tls_size;
+       __libc.tls_cnt = tls_cnt;
+       if (tls_cnt) {
+               struct dso *p;
+               void *mem = mmap(0, __libc.tls_size, PROT_READ|PROT_WRITE,
+                       MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+               if (mem==MAP_FAILED ||
+                   !__install_initial_tls(__copy_tls(mem, tls_cnt))) {
+                       dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
+                               argv[0], tls_size);
+                       _exit(127);
+               }
+               for (p=head; p; p=p->next) {
+                       if (!p->tls_id) continue;
+                       p->tls_offset = (char *)__pthread_self()
+                               - (char *)__pthread_self()->dtv[p->tls_id];
+               }
+       }
+
+       reloc_all(app->next);
+       reloc_all(app);
+
+       /* The initial DTV is located at the base of the memory
+        * allocated for TLS. Repeat copying TLS to pick up relocs. */
+       if (tls_cnt) __copy_tls((void *)__pthread_self()->dtv, tls_cnt);
 
-       make_global(head);
-       reloc_all(head->next);
-       reloc_all(head);
+       if (ldso_fail) _exit(127);
+       if (ldd_mode) _exit(0);
 
+       /* Switch to runtime mode: any further failures in the dynamic
+        * linker are a reportable failure rather than a fatal startup
+        * error. If the dynamic loader (dlopen) will not be used, free
+        * all memory used by the dynamic linker. */
        runtime = 1;
-       if (!rtld_used) {
-               free_all(head);
-               free(sys_path);
-               reclaim((void *)builtin_dsos, 0, sizeof builtin_dsos);
-       }
+
+#ifndef DYNAMIC_IS_RO
+       for (i=0; app->dynv[i]; i+=2)
+               if (app->dynv[i]==DT_DEBUG)
+                       app->dynv[i+1] = (size_t)&debug;
+#endif
+       debug.ver = 1;
+       debug.bp = _dl_debug_state;
+       debug.head = head;
+       debug.base = lib->base;
+       debug.state = 0;
+       _dl_debug_state();
+
+       if (ssp_used) __init_ssp(auxv);
+
+       do_init_fini(tail);
 
        errno = 0;
        return (void *)aux[AT_ENTRY];
@@ -589,9 +888,11 @@ void *dlopen(const char *file, int mode)
 {
        struct dso *volatile p, *orig_tail = tail, *next;
        size_t i;
+       int cs;
 
        if (!file) return head;
 
+       pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
        pthread_rwlock_wrlock(&lock);
 
        if (setjmp(rtld_fail)) {
@@ -607,12 +908,17 @@ void *dlopen(const char *file, int mode)
                }
                tail = orig_tail;
                tail->next = 0;
-               pthread_rwlock_unlock(&lock);
-               return 0;
-       }
+               p = 0;
+               errflag = 1;
+               goto end;
+       } else p = load_library(file);
 
-       p = load_library(file);
-       if (!p) goto end;
+       if (!p) {
+               snprintf(errbuf, sizeof errbuf,
+                       "Error loading shared library %s: %m", file);
+               errflag = 1;
+               goto end;
+       }
 
        /* First load handling */
        if (!p->deps) {
@@ -634,37 +940,119 @@ void *dlopen(const char *file, int mode)
                p->global = 1;
        }
 
+       if (ssp_used) __init_ssp(auxv);
+
+       _dl_debug_state();
+
+       do_init_fini(tail);
 end:
        pthread_rwlock_unlock(&lock);
+       pthread_setcancelstate(cs, 0);
        return p;
 }
 
 static void *do_dlsym(struct dso *p, const char *s, void *ra)
 {
        size_t i;
-       uint32_t h;
+       uint32_t h = 0, gh = 0;
        Sym *sym;
-       if (p == RTLD_NEXT) {
-               for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
-               if (!p) p=head;
-               p=p->next;
-       }
-       if (p == head || p == RTLD_DEFAULT)
-               return find_sym(head, s, 0);
-       h = hash(s);
-       sym = lookup(s, h, p->syms, p->hashtab, p->strings);
+       if (p == head || p == RTLD_DEFAULT || p == RTLD_NEXT) {
+               if (p == RTLD_NEXT) {
+                       for (p=head; p && (unsigned char *)ra-p->map>p->map_len; p=p->next);
+                       if (!p) p=head;
+               }
+               struct symdef def = find_sym(p->next, s, 0);
+               if (!def.sym) goto failed;
+               return def.dso->base + def.sym->st_value;
+       }
+       if (p->ghashtab) {
+               gh = gnu_hash(s);
+               sym = gnu_lookup(s, gh, p);
+       } else {
+               h = sysv_hash(s);
+               sym = sysv_lookup(s, h, p);
+       }
        if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
                return p->base + sym->st_value;
        if (p->deps) for (i=0; p->deps[i]; i++) {
-               sym = lookup(s, h, p->deps[i]->syms,
-                       p->deps[i]->hashtab, p->deps[i]->strings);
+               if (p->deps[i]->ghashtab) {
+                       if (!gh) gh = gnu_hash(s);
+                       sym = gnu_lookup(s, gh, p->deps[i]);
+               } else {
+                       if (!h) h = sysv_hash(s);
+                       sym = sysv_lookup(s, h, p->deps[i]);
+               }
                if (sym && sym->st_value && (1<<(sym->st_info&0xf) & OK_TYPES))
                        return p->deps[i]->base + sym->st_value;
        }
+failed:
+       errflag = 1;
+       snprintf(errbuf, sizeof errbuf, "Symbol not found: %s", s);
        return 0;
 }
 
-void *__dlsym(void *p, const char *s, void *ra)
+int __dladdr(void *addr, Dl_info *info)
+{
+       struct dso *p;
+       Sym *sym;
+       uint32_t nsym;
+       char *strings;
+       size_t i;
+       void *best = 0;
+       char *bestname;
+
+       pthread_rwlock_rdlock(&lock);
+       for (p=head; p && (unsigned char *)addr-p->map>p->map_len; p=p->next);
+       pthread_rwlock_unlock(&lock);
+
+       if (!p) return 0;
+
+       sym = p->syms;
+       strings = p->strings;
+       if (p->hashtab) {
+               nsym = p->hashtab[1];
+       } else {
+               uint32_t *buckets;
+               uint32_t *hashval;
+               buckets = p->ghashtab + 4 + (p->ghashtab[2]*sizeof(size_t)/4);
+               sym += p->ghashtab[1];
+               for (i = 0; i < p->ghashtab[0]; i++) {
+                       if (buckets[i] > nsym)
+                               nsym = buckets[i];
+               }
+               if (nsym) {
+                       nsym -= p->ghashtab[1];
+                       hashval = buckets + p->ghashtab[0] + nsym;
+                       do nsym++;
+                       while (!(*hashval++ & 1));
+               }
+       }
+
+       for (; nsym; nsym--, sym++) {
+               if (sym->st_shndx && sym->st_value
+                && (1<<(sym->st_info&0xf) & OK_TYPES)
+                && (1<<(sym->st_info>>4) & OK_BINDS)) {
+                       void *symaddr = p->base + sym->st_value;
+                       if (symaddr > addr || symaddr < best)
+                               continue;
+                       best = symaddr;
+                       bestname = strings + sym->st_name;
+                       if (addr == symaddr)
+                               break;
+               }
+       }
+
+       if (!best) return 0;
+
+       info->dli_fname = p->name;
+       info->dli_fbase = p->base;
+       info->dli_sname = bestname;
+       info->dli_saddr = best;
+
+       return 1;
+}
+
+void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
 {
        void *res;
        pthread_rwlock_rdlock(&lock);
@@ -672,14 +1060,29 @@ void *__dlsym(void *p, const char *s, void *ra)
        pthread_rwlock_unlock(&lock);
        return res;
 }
+#else
+void *dlopen(const char *file, int mode)
+{
+       return 0;
+}
+void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
+{
+       return 0;
+}
+int __dladdr (void *addr, Dl_info *info)
+{
+       return 0;
+}
+#endif
 
 char *dlerror()
 {
-       return "unknown error";
+       if (!errflag) return 0;
+       errflag = 0;
+       return errbuf;
 }
 
 int dlclose(void *p)
 {
        return 0;
 }
-#endif