#define _GNU_SOURCE
+#define SYSCALL_NO_TLS 1
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#define ALIGN(x,y) ((x)+(y)-1 & -(y))
#define container_of(p,t,m) ((t*)((char *)(p)-offsetof(t,m)))
+#define countof(a) ((sizeof (a))/(sizeof (a)[0]))
struct debug {
int ver;
char runtime_loaded;
struct dso **deps, *needed_by;
size_t ndeps_direct;
+ size_t next_dep;
+ int ctor_visitor;
char *rpath_orig, *rpath;
struct tls_module tls;
size_t tls_id;
static int ldd_mode;
static int ldso_fail;
static int noload;
+static int shutting_down;
static jmp_buf *rtld_fail;
static pthread_rwlock_t lock;
static struct debug debug;
static struct tls_module *tls_tail;
static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
static size_t static_tls_cnt;
-static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
+static pthread_mutex_t init_fini_lock;
+static pthread_cond_t ctor_cond;
+static struct dso *builtin_deps[2];
+static struct dso *const no_deps[1];
+static struct dso *builtin_ctor_queue[4];
+static struct dso **main_ctor_queue;
static struct fdpic_loadmap *app_loadmap;
static struct fdpic_dummy_loadmap app_dummy_loadmap;
#else
p = (void *)__syscall(SYS_mmap, 0, n, prot, flags, -1, 0);
#endif
- return p == MAP_FAILED ? 0 : p;
+ return (unsigned long)p > -4096UL ? 0 : p;
}
static void makefuncdescs(struct dso *p)
static void load_direct_deps(struct dso *p)
{
size_t i, cnt=0;
+
if (p->deps) return;
/* For head, all preloads are direct pseudo-dependencies.
* Count and include them now to avoid realloc later. */
cnt++;
for (i=0; p->dynv[i]; i+=2)
if (p->dynv[i] == DT_NEEDED) cnt++;
- p->deps = calloc(cnt+1, sizeof *p->deps);
+ /* Use builtin buffer for apps with no external deps, to
+ * preserve property of no runtime failure paths. */
+ p->deps = (p==head && cnt<2) ? builtin_deps :
+ calloc(cnt+1, sizeof *p->deps);
if (!p->deps) {
error("Error loading dependencies for %s", p->name);
if (runtime) longjmp(*rtld_fail, 1);
struct dso **tmp;
/* Can't use realloc if the original p->deps was allocated at
- * program entry and malloc has been replaced. */
- int no_realloc = __malloc_replaced && !p->runtime_loaded;
+ * program entry and malloc has been replaced, or if it's
+ * the builtin non-allocated trivial main program deps array. */
+ int no_realloc = (__malloc_replaced && !p->runtime_loaded)
+ || p->deps == builtin_deps;
if (p->bfs_built) return;
ndeps_all = p->ndeps_direct;
{
struct dso *p;
size_t dyn[DYN_CNT];
+ int self = __pthread_self()->tid;
+
+ /* Take both locks before setting shutting_down, so that
+ * either lock is sufficient to read its value. The lock
+ * order matches that in dlopen to avoid deadlock. */
+ pthread_rwlock_wrlock(&lock);
+ pthread_mutex_lock(&init_fini_lock);
+ shutting_down = 1;
+ pthread_rwlock_unlock(&lock);
for (p=fini_head; p; p=p->fini_next) {
+ while (p->ctor_visitor && p->ctor_visitor!=self)
+ pthread_cond_wait(&ctor_cond, &init_fini_lock);
if (!p->constructed) continue;
decode_vec(p->dynv, dyn, DYN_CNT);
if (dyn[0] & (1<<DT_FINI_ARRAY)) {
}
}
-static void do_init_fini(struct dso *p)
+static struct dso **queue_ctors(struct dso *dso)
{
- size_t dyn[DYN_CNT];
- int need_locking = libc.threads_minus_1;
- /* Allow recursive calls that arise when a library calls
- * dlopen from one of its constructors, but block any
- * other threads until all ctors have finished. */
- if (need_locking) pthread_mutex_lock(&init_fini_lock);
- for (; p; p=p->prev) {
- if (p->constructed) continue;
- p->constructed = 1;
+ size_t cnt, qpos, spos, i;
+ struct dso *p, **queue, **stack;
+
+ if (ldd_mode) return 0;
+
+ /* Bound on queue size is the total number of indirect deps.
+ * If a bfs deps list was built, we can use it. Otherwise,
+ * bound by the total number of DSOs, which is always safe and
+ * is reasonable we use it (for main app at startup). */
+ if (dso->bfs_built) {
+ for (cnt=0; dso->deps[cnt]; cnt++)
+ dso->deps[cnt]->mark = 0;
+ cnt++; /* self, not included in deps */
+ } else {
+ for (cnt=0, p=head; p; cnt++, p=p->next)
+ p->mark = 0;
+ }
+ cnt++; /* termination slot */
+ if (dso==head && cnt <= countof(builtin_ctor_queue))
+ queue = builtin_ctor_queue;
+ else
+ queue = calloc(cnt, sizeof *queue);
+
+ if (!queue) {
+ error("Error allocating constructor queue: %m\n");
+ if (runtime) longjmp(*rtld_fail, 1);
+ return 0;
+ }
+
+ /* Opposite ends of the allocated buffer serve as an output queue
+ * and a working stack. Setup initial stack with just the argument
+ * dso and initial queue empty... */
+ stack = queue;
+ qpos = 0;
+ spos = cnt;
+ stack[--spos] = dso;
+ dso->next_dep = 0;
+ dso->mark = 1;
+
+ /* Then perform pseudo-DFS sort, but ignoring circular deps. */
+ while (spos<cnt) {
+ p = stack[spos++];
+ while (p->next_dep < p->ndeps_direct) {
+ if (p->deps[p->next_dep]->mark) {
+ p->next_dep++;
+ } else {
+ stack[--spos] = p;
+ p = p->deps[p->next_dep];
+ p->next_dep = 0;
+ p->mark = 1;
+ }
+ }
+ queue[qpos++] = p;
+ }
+ queue[qpos] = 0;
+ for (i=0; i<qpos; i++) queue[i]->mark = 0;
+
+ return queue;
+}
+
+static void do_init_fini(struct dso **queue)
+{
+ struct dso *p;
+ size_t dyn[DYN_CNT], i;
+ int self = __pthread_self()->tid;
+
+ pthread_mutex_lock(&init_fini_lock);
+ for (i=0; (p=queue[i]); i++) {
+ while ((p->ctor_visitor && p->ctor_visitor!=self) || shutting_down)
+ pthread_cond_wait(&ctor_cond, &init_fini_lock);
+ if (p->ctor_visitor || p->constructed)
+ continue;
+ p->ctor_visitor = self;
+
decode_vec(p->dynv, dyn, DYN_CNT);
if (dyn[0] & ((1<<DT_FINI) | (1<<DT_FINI_ARRAY))) {
p->fini_next = fini_head;
fini_head = p;
}
+
+ pthread_mutex_unlock(&init_fini_lock);
+
#ifndef NO_LEGACY_INITFINI
if ((dyn[0] & (1<<DT_INIT)) && dyn[DT_INIT])
fpaddr(p, dyn[DT_INIT])();
size_t *fn = laddr(p, dyn[DT_INIT_ARRAY]);
while (n--) ((void (*)(void))*fn++)();
}
- if (!need_locking && libc.threads_minus_1) {
- need_locking = 1;
- pthread_mutex_lock(&init_fini_lock);
- }
+
+ pthread_mutex_lock(&init_fini_lock);
+ p->ctor_visitor = 0;
+ p->constructed = 1;
+ pthread_cond_broadcast(&ctor_cond);
}
- if (need_locking) pthread_mutex_unlock(&init_fini_lock);
+ pthread_mutex_unlock(&init_fini_lock);
}
void __libc_start_init(void)
{
- do_init_fini(tail);
+ do_init_fini(main_ctor_queue);
+ if (!__malloc_replaced && main_ctor_queue != builtin_ctor_queue)
+ free(main_ctor_queue);
+ main_ctor_queue = 0;
}
static void dl_debug_state(void)
libc.auxv = auxv = (void *)(argv+i+1);
decode_vec(auxv, aux, AUX_CNT);
__hwcap = aux[AT_HWCAP];
+ search_vec(auxv, &__sysinfo, AT_SYSINFO);
+ __pthread_self()->sysinfo = __sysinfo;
libc.page_size = aux[AT_PAGESZ];
libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
reclaim_gaps(&ldso);
/* Load preload/needed libraries, add symbols to global namespace. */
+ ldso.deps = (struct dso **)no_deps;
if (env_preload) load_preload(env_preload);
load_deps(&app);
for (struct dso *p=head; p; p=p->next)
vdso.name = "";
vdso.shortname = "linux-gate.so.1";
vdso.relocated = 1;
+ vdso.deps = (struct dso **)no_deps;
decode_dyn(&vdso);
vdso.prev = tail;
tail->next = &vdso;
}
}
+ /* This must be done before final relocations, since it calls
+ * malloc, which may be provided by the application. Calling any
+ * application code prior to the jump to its entry point is not
+ * valid in our model and does not work with FDPIC, where there
+ * are additional relocation-like fixups that only the entry point
+ * code can see to perform. */
+ main_ctor_queue = queue_ctors(&app);
+
/* The main program must be relocated LAST since it may contin
* copy relocations which depend on libraries' relocations. */
reloc_all(app.next);
size_t i;
int cs;
jmp_buf jb;
+ struct dso **volatile ctor_queue = 0;
if (!file) return head;
__inhibit_ptc();
p = 0;
+ if (shutting_down) {
+ error("Cannot dlopen while program is exiting.");
+ goto end;
+ }
orig_tls_tail = tls_tail;
orig_tls_cnt = tls_cnt;
orig_tls_offset = tls_offset;
unmap_library(p);
free(p);
}
+ free(ctor_queue);
+ ctor_queue = 0;
if (!orig_tls_tail) libc.tls_head = 0;
tls_tail = orig_tls_tail;
if (tls_tail) tls_tail->next = 0;
/* First load handling */
load_deps(p);
extend_bfs_deps(p);
+ pthread_mutex_lock(&init_fini_lock);
+ if (!p->constructed) ctor_queue = queue_ctors(p);
+ pthread_mutex_unlock(&init_fini_lock);
if (!p->relocated && (mode & RTLD_LAZY)) {
prepare_lazy(p);
for (i=0; p->deps[i]; i++)
__release_ptc();
if (p) gencnt++;
pthread_rwlock_unlock(&lock);
- if (p) do_init_fini(orig_tail);
+ if (ctor_queue) {
+ do_init_fini(ctor_queue);
+ free(ctor_queue);
+ }
pthread_setcancelstate(cs, 0);
return p;
}