#include "atomic.h"
#include "pthread_impl.h"
-uintptr_t __brk(uintptr_t);
+#if defined(__GNUC__) && defined(__PIC__)
+#define inline inline __attribute__((always_inline))
+#endif
+
void *__mmap(void *, size_t, int, int, int, off_t);
int __munmap(void *, size_t);
void *__mremap(void *, size_t, size_t, int, ...);
int __madvise(void *, size_t, int);
struct chunk {
- size_t data[1];
- struct chunk *next;
- struct chunk *prev;
+ size_t psize, csize;
+ struct chunk *next, *prev;
};
struct bin {
- int lock[2];
+ volatile int lock[2];
struct chunk *head;
struct chunk *tail;
};
static struct {
- uintptr_t brk;
- size_t *heap;
- uint64_t binmap;
+ volatile uint64_t binmap;
struct bin bins[64];
- int brk_lock[2];
- int free_lock[2];
+ volatile int free_lock[2];
} mal;
#define DONTCARE 16
#define RECLAIM 163840
-#define CHUNK_SIZE(c) ((c)->data[0] & SIZE_MASK)
-#define CHUNK_PSIZE(c) ((c)->data[-1] & SIZE_MASK)
+#define CHUNK_SIZE(c) ((c)->csize & -2)
+#define CHUNK_PSIZE(c) ((c)->psize & -2)
#define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c)))
#define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c)))
-#define MEM_TO_CHUNK(p) (struct chunk *)((size_t *)p - 1)
-#define CHUNK_TO_MEM(c) (void *)((c)->data+1)
+#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD)
+#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD)
#define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head))
#define C_INUSE ((size_t)1)
-#define C_FLAGS ((size_t)3)
-#define C_SIZE SIZE_MASK
-#define IS_MMAPPED(c) !((c)->data[0] & (C_INUSE))
+#define IS_MMAPPED(c) !((c)->csize & (C_INUSE))
/* Synchronization tools */
-static void lock(volatile int *lk)
+static inline void lock(volatile int *lk)
{
- if (!libc.threads_minus_1) return;
- while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
+ if (libc.threads_minus_1)
+ while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
}
-static void unlock(volatile int *lk)
+static inline void unlock(volatile int *lk)
{
- if (!libc.threads_minus_1) return;
- a_store(lk, 0);
- if (lk[1]) __wake(lk, 1, 1);
+ if (lk[0]) {
+ a_store(lk, 0);
+ if (lk[1]) __wake(lk, 1, 1);
+ }
}
-static void lock_bin(int i)
+static inline void lock_bin(int i)
{
- if (libc.threads_minus_1)
- lock(mal.bins[i].lock);
+ lock(mal.bins[i].lock);
if (!mal.bins[i].head)
mal.bins[i].head = mal.bins[i].tail = BIN_TO_CHUNK(i);
}
-static void unlock_bin(int i)
+static inline void unlock_bin(int i)
{
- if (!libc.threads_minus_1) return;
unlock(mal.bins[i].lock);
}
#endif
}
+static const unsigned char bin_tab[60] = {
+ 32,33,34,35,36,36,37,37,38,38,39,39,
+ 40,40,40,40,41,41,41,41,42,42,42,42,43,43,43,43,
+ 44,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,
+ 46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,
+};
+
static int bin_index(size_t x)
{
x = x / SIZE_ALIGN - 1;
if (x <= 32) return x;
+ if (x < 512) return bin_tab[x/8-4];
if (x > 0x1c00) return 63;
- return ((union { float v; uint32_t r; }){ x }.r>>21) - 496;
+ return bin_tab[x/128-4] + 16;
}
static int bin_index_up(size_t x)
{
x = x / SIZE_ALIGN - 1;
if (x <= 32) return x;
- return ((union { float v; uint32_t r; }){ x }.r+0x1fffff>>21) - 496;
+ x--;
+ if (x < 512) return bin_tab[x/8-4] + 1;
+ return bin_tab[x/128-4] + 17;
}
#if 0
for (c = (void *)mal.heap; CHUNK_SIZE(c); c = NEXT_CHUNK(c))
fprintf(stderr, "base %p size %zu (%d) flags %d/%d\n",
c, CHUNK_SIZE(c), bin_index(CHUNK_SIZE(c)),
- c->data[0] & 15,
- NEXT_CHUNK(c)->data[-1] & 15);
+ c->csize & 15,
+ NEXT_CHUNK(c)->psize & 15);
for (i=0; i<64; i++) {
if (mal.bins[i].head != BIN_TO_CHUNK(i) && mal.bins[i].head) {
fprintf(stderr, "bin %d: %p\n", i, mal.bins[i].head);
}
#endif
+void *__expand_heap(size_t *);
+
static struct chunk *expand_heap(size_t n)
{
+ static int heap_lock[2];
+ static void *end;
+ void *p;
struct chunk *w;
- uintptr_t new;
-
- lock(mal.brk_lock);
- if (n > SIZE_MAX - mal.brk - 2*PAGE_SIZE) goto fail;
- new = mal.brk + n + SIZE_ALIGN + PAGE_SIZE - 1 & -PAGE_SIZE;
- n = new - mal.brk;
+ /* The argument n already accounts for the caller's chunk
+ * overhead needs, but if the heap can't be extended in-place,
+ * we need room for an extra zero-sized sentinel chunk. */
+ n += SIZE_ALIGN;
- if (__brk(new) != new) goto fail;
+ lock(heap_lock);
- w = MEM_TO_CHUNK(new);
- w->data[-1] = n | C_INUSE;
- w->data[0] = 0 | C_INUSE;
-
- w = MEM_TO_CHUNK(mal.brk);
- w->data[0] = n | C_INUSE;
- mal.brk = new;
-
- unlock(mal.brk_lock);
-
- return w;
-fail:
- unlock(mal.brk_lock);
- return 0;
-}
-
-static int init_malloc(size_t n)
-{
- static int init, waiters;
- int state;
- struct chunk *c;
-
- if (init == 2) return 0;
-
- while ((state=a_swap(&init, 1)) == 1)
- __wait(&init, &waiters, 1, 1);
- if (state) {
- a_store(&init, 2);
+ p = __expand_heap(&n);
+ if (!p) {
+ unlock(heap_lock);
return 0;
}
- mal.brk = __brk(0) + 2*SIZE_ALIGN-1 & -SIZE_ALIGN;
+ /* If not just expanding existing space, we need to make a
+ * new sentinel chunk below the allocated space. */
+ if (p != end) {
+ /* Valid/safe because of the prologue increment. */
+ n -= SIZE_ALIGN;
+ p = (char *)p + SIZE_ALIGN;
+ w = MEM_TO_CHUNK(p);
+ w->psize = 0 | C_INUSE;
+ }
- c = expand_heap(n);
+ /* Record new heap end and fill in footer. */
+ end = (char *)p + n;
+ w = MEM_TO_CHUNK(end);
+ w->psize = n | C_INUSE;
+ w->csize = 0 | C_INUSE;
- if (!c) {
- a_store(&init, 0);
- if (waiters) __wake(&init, 1, 1);
- return -1;
- }
+ /* Fill in header, which may be new or may be replacing a
+ * zero-size sentinel header at the old end-of-heap. */
+ w = MEM_TO_CHUNK(p);
+ w->csize = n | C_INUSE;
- mal.heap = (void *)c;
- c->data[-1] = 0 | C_INUSE;
- free(CHUNK_TO_MEM(c));
+ unlock(heap_lock);
- a_store(&init, 2);
- if (waiters) __wake(&init, -1, 1);
- return 1;
+ return w;
}
static int adjust_size(size_t *n)
a_and_64(&mal.binmap, ~(1ULL<<i));
c->prev->next = c->next;
c->next->prev = c->prev;
- c->data[0] |= C_INUSE;
- NEXT_CHUNK(c)->data[-1] |= C_INUSE;
+ c->csize |= C_INUSE;
+ NEXT_CHUNK(c)->psize |= C_INUSE;
}
static int alloc_fwd(struct chunk *c)
{
int i;
size_t k;
- while (!((k=c->data[0]) & C_INUSE)) {
+ while (!((k=c->csize) & C_INUSE)) {
i = bin_index(k);
lock_bin(i);
- if (c->data[0] == k) {
+ if (c->csize == k) {
unbin(c, i);
unlock_bin(i);
return 1;
{
int i;
size_t k;
- while (!((k=c->data[-1]) & C_INUSE)) {
+ while (!((k=c->psize) & C_INUSE)) {
i = bin_index(k);
lock_bin(i);
- if (c->data[-1] == k) {
+ if (c->psize == k) {
unbin(PREV_CHUNK(c), i);
unlock_bin(i);
return 1;
split->next = self->next;
split->prev->next = split;
split->next->prev = split;
- split->data[-1] = n | C_INUSE;
- split->data[0] = n1-n;
- next->data[-1] = n1-n;
- self->data[0] = n | C_INUSE;
+ split->psize = n | C_INUSE;
+ split->csize = n1-n;
+ next->psize = n1-n;
+ self->csize = n | C_INUSE;
return 1;
}
next = NEXT_CHUNK(self);
split = (void *)((char *)self + n);
- split->data[-1] = n | C_INUSE;
- split->data[0] = n1-n | C_INUSE;
- next->data[-1] = n1-n | C_INUSE;
- self->data[0] = n | C_INUSE;
+ split->psize = n | C_INUSE;
+ split->csize = n1-n | C_INUSE;
+ next->psize = n1-n | C_INUSE;
+ self->csize = n | C_INUSE;
free(CHUNK_TO_MEM(split));
}
char *base = __mmap(0, len, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (base == (void *)-1) return 0;
- c = (void *)(base + SIZE_ALIGN - sizeof(size_t));
- c->data[0] = len - (SIZE_ALIGN - sizeof(size_t));
- c->data[-1] = SIZE_ALIGN - sizeof(size_t);
+ c = (void *)(base + SIZE_ALIGN - OVERHEAD);
+ c->csize = len - (SIZE_ALIGN - OVERHEAD);
+ c->psize = SIZE_ALIGN - OVERHEAD;
return CHUNK_TO_MEM(c);
}
for (;;) {
uint64_t mask = mal.binmap & -(1ULL<<i);
if (!mask) {
- if (init_malloc(n) > 0) continue;
c = expand_heap(n);
if (!c) return 0;
if (alloc_rev(c)) {
struct chunk *x = c;
c = PREV_CHUNK(c);
- NEXT_CHUNK(x)->data[-1] = c->data[0] =
- x->data[0] + CHUNK_SIZE(c);
+ NEXT_CHUNK(x)->psize = c->csize =
+ x->csize + CHUNK_SIZE(c);
}
break;
}
j = first_set(mask);
lock_bin(j);
c = mal.bins[j].head;
- if (c != BIN_TO_CHUNK(j) && j == bin_index(c->data[0])) {
+ if (c != BIN_TO_CHUNK(j)) {
if (!pretrim(c, n, i, j)) unbin(c, j);
unlock_bin(j);
break;
return CHUNK_TO_MEM(c);
}
+void *__malloc0(size_t n)
+{
+ void *p = malloc(n);
+ if (p && !IS_MMAPPED(MEM_TO_CHUNK(p))) {
+ size_t *z;
+ n = (n + sizeof *z - 1)/sizeof *z;
+ for (z=p; n; n--, z++) if (*z) *z=0;
+ }
+ return p;
+}
+
void *realloc(void *p, size_t n)
{
struct chunk *self, *next;
n1 = n0 = CHUNK_SIZE(self);
if (IS_MMAPPED(self)) {
- size_t extra = self->data[-1];
+ size_t extra = self->psize;
char *base = (char *)self - extra;
size_t oldlen = n0 + extra;
size_t newlen = n + extra;
/* Crash on realloc of freed chunk */
- if ((uintptr_t)base < mal.brk) *(char *)0=0;
+ if (extra & 1) a_crash();
if (newlen < PAGE_SIZE && (new = malloc(n))) {
memcpy(new, p, n-OVERHEAD);
free(p);
if (oldlen == newlen) return p;
base = __mremap(base, oldlen, newlen, MREMAP_MAYMOVE);
if (base == (void *)-1)
- return newlen < oldlen ? p : 0;
+ goto copy_realloc;
self = (void *)(base + extra);
- self->data[0] = newlen - extra;
+ self->csize = newlen - extra;
return CHUNK_TO_MEM(self);
}
next = NEXT_CHUNK(self);
+ /* Crash on corrupted footer (likely from buffer overflow) */
+ if (next->psize != self->csize) a_crash();
+
/* Merge adjacent chunks if we need more space. This is not
* a waste of time even if we fail to get enough space, because our
* subsequent call to free would otherwise have to do the merge. */
self = PREV_CHUNK(self);
n1 += CHUNK_SIZE(self);
}
- self->data[0] = n1 | C_INUSE;
- next->data[-1] = n1 | C_INUSE;
+ self->csize = n1 | C_INUSE;
+ next->psize = n1 | C_INUSE;
/* If we got enough space, split off the excess and return */
if (n <= n1) {
return CHUNK_TO_MEM(self);
}
+copy_realloc:
/* As a last resort, allocate a new chunk and copy to it. */
new = malloc(n-OVERHEAD);
if (!new) return 0;
void free(void *p)
{
- struct chunk *self = MEM_TO_CHUNK(p);
- struct chunk *next;
+ struct chunk *self, *next;
size_t final_size, new_size, size;
int reclaim=0;
int i;
if (!p) return;
+ self = MEM_TO_CHUNK(p);
+
if (IS_MMAPPED(self)) {
- size_t extra = self->data[-1];
+ size_t extra = self->psize;
char *base = (char *)self - extra;
size_t len = CHUNK_SIZE(self) + extra;
/* Crash on double free */
- if ((uintptr_t)base < mal.brk) *(char *)0=0;
+ if (extra & 1) a_crash();
__munmap(base, len);
return;
}
final_size = new_size = CHUNK_SIZE(self);
next = NEXT_CHUNK(self);
- for (;;) {
- /* Replace middle of large chunks with fresh zero pages */
- if (reclaim && (self->data[-1] & next->data[0] & C_INUSE)) {
- uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
- uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
-#if 1
- __madvise((void *)a, b-a, MADV_DONTNEED);
-#else
- __mmap((void *)a, b-a, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
-#endif
- }
+ /* Crash on corrupted footer (likely from buffer overflow) */
+ if (next->psize != self->csize) a_crash();
- if (self->data[-1] & next->data[0] & C_INUSE) {
- self->data[0] = final_size | C_INUSE;
- next->data[-1] = final_size | C_INUSE;
+ for (;;) {
+ if (self->psize & next->csize & C_INUSE) {
+ self->csize = final_size | C_INUSE;
+ next->psize = final_size | C_INUSE;
i = bin_index(final_size);
lock_bin(i);
lock(mal.free_lock);
- if (self->data[-1] & next->data[0] & C_INUSE)
+ if (self->psize & next->csize & C_INUSE)
break;
unlock(mal.free_lock);
unlock_bin(i);
}
}
- self->data[0] = final_size;
- next->data[-1] = final_size;
+ if (!(mal.binmap & 1ULL<<i))
+ a_or_64(&mal.binmap, 1ULL<<i);
+
+ self->csize = final_size;
+ next->psize = final_size;
unlock(mal.free_lock);
self->next = BIN_TO_CHUNK(i);
self->next->prev = self;
self->prev->next = self;
- if (!(mal.binmap & 1ULL<<i))
- a_or_64(&mal.binmap, 1ULL<<i);
+ /* Replace middle of large chunks with fresh zero pages */
+ if (reclaim) {
+ uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
+ uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
+#if 1
+ __madvise((void *)a, b-a, MADV_DONTNEED);
+#else
+ __mmap((void *)a, b-a, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
+#endif
+ }
unlock_bin(i);
}