aarch64: add HWCAP_DCPOP from linux v4.14

[musl] / src / malloc / malloc.c
diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c

index bc8382e..9e05e1d 100644 (file)
--- a/src/malloc/malloc.c
+++ b/src/malloc/malloc.c
@@ -9,31 +9,30 @@
  #include "atomic.h"
  #include "pthread_impl.h"
  
-uintptr_t __brk(uintptr_t);
+#if defined(__GNUC__) && defined(__PIC__)
+#define inline inline __attribute__((always_inline))
+#endif
+
  void *__mmap(void *, size_t, int, int, int, off_t);
  int __munmap(void *, size_t);
  void *__mremap(void *, size_t, size_t, int, ...);
  int __madvise(void *, size_t, int);
  
  struct chunk {
-       size_t data[1];
-       struct chunk *next;
-       struct chunk *prev;
+       size_t psize, csize;
+       struct chunk *next, *prev;
  };
  
  struct bin {
-       int lock[2];
+       volatile int lock[2];
         struct chunk *head;
         struct chunk *tail;
  };
  
  static struct {
-       uintptr_t brk;
-       size_t *heap;
-       uint64_t binmap;
+       volatile uint64_t binmap;
         struct bin bins[64];
-       int brk_lock[2];
-       int free_lock[2];
+       volatile int free_lock[2];
  } mal;
  
  
@@ -44,47 +43,44 @@ static struct {
  #define DONTCARE 16
  #define RECLAIM 163840
  
-#define CHUNK_SIZE(c) ((c)->data[0] & SIZE_MASK)
-#define CHUNK_PSIZE(c) ((c)->data[-1] & SIZE_MASK)
+#define CHUNK_SIZE(c) ((c)->csize & -2)
+#define CHUNK_PSIZE(c) ((c)->psize & -2)
  #define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c)))
  #define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c)))
-#define MEM_TO_CHUNK(p) (struct chunk *)((size_t *)p - 1)
-#define CHUNK_TO_MEM(c) (void *)((c)->data+1)
+#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD)
+#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD)
  #define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head))
  
  #define C_INUSE  ((size_t)1)
-#define C_FLAGS  ((size_t)3)
-#define C_SIZE   SIZE_MASK
  
-#define IS_MMAPPED(c) !((c)->data[0] & (C_INUSE))
+#define IS_MMAPPED(c) !((c)->csize & (C_INUSE))
  
  
  /* Synchronization tools */
  
-static void lock(volatile int *lk)
+static inline void lock(volatile int *lk)
  {
-       if (!libc.threads_minus_1) return;
-       while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
+       if (libc.threads_minus_1)
+               while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
  }
  
-static void unlock(volatile int *lk)
+static inline void unlock(volatile int *lk)
  {
-       if (!libc.threads_minus_1) return;
-       a_store(lk, 0);
-       if (lk[1]) __wake(lk, 1, 1);
+       if (lk[0]) {
+               a_store(lk, 0);
+               if (lk[1]) __wake(lk, 1, 1);
+       }
  }
  
-static void lock_bin(int i)
+static inline void lock_bin(int i)
  {
-       if (libc.threads_minus_1)
-               lock(mal.bins[i].lock);
+       lock(mal.bins[i].lock);
         if (!mal.bins[i].head)
                 mal.bins[i].head = mal.bins[i].tail = BIN_TO_CHUNK(i);
  }
  
-static void unlock_bin(int i)
+static inline void unlock_bin(int i)
  {
-       if (!libc.threads_minus_1) return;
         unlock(mal.bins[i].lock);
  }
  
@@ -115,19 +111,29 @@ static int first_set(uint64_t x)
  #endif
  }
  
+static const unsigned char bin_tab[60] = {
+                   32,33,34,35,36,36,37,37,38,38,39,39,
+       40,40,40,40,41,41,41,41,42,42,42,42,43,43,43,43,
+       44,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,
+       46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,
+};
+
  static int bin_index(size_t x)
  {
         x = x / SIZE_ALIGN - 1;
         if (x <= 32) return x;
+       if (x < 512) return bin_tab[x/8-4];
         if (x > 0x1c00) return 63;
-       return ((union { float v; uint32_t r; }){ x }.r>>21) - 496;
+       return bin_tab[x/128-4] + 16;
  }
  
  static int bin_index_up(size_t x)
  {
         x = x / SIZE_ALIGN - 1;
         if (x <= 32) return x;
-       return ((union { float v; uint32_t r; }){ x }.r+0x1fffff>>21) - 496;
+       x--;
+       if (x < 512) return bin_tab[x/8-4] + 1;
+       return bin_tab[x/128-4] + 17;
  }
  
  #if 0
@@ -138,8 +144,8 @@ void __dump_heap(int x)
         for (c = (void *)mal.heap; CHUNK_SIZE(c); c = NEXT_CHUNK(c))
                 fprintf(stderr, "base %p size %zu (%d) flags %d/%d\n",
                         c, CHUNK_SIZE(c), bin_index(CHUNK_SIZE(c)),
-                       c->data[0] & 15,
-                       NEXT_CHUNK(c)->data[-1] & 15);
+                       c->csize & 15,
+                       NEXT_CHUNK(c)->psize & 15);
         for (i=0; i<64; i++) {
                 if (mal.bins[i].head != BIN_TO_CHUNK(i) && mal.bins[i].head) {
                         fprintf(stderr, "bin %d: %p\n", i, mal.bins[i].head);
@@ -151,67 +157,52 @@ void __dump_heap(int x)
  }
  #endif
  
+void *__expand_heap(size_t *);
+
  static struct chunk *expand_heap(size_t n)
  {
+       static int heap_lock[2];
+       static void *end;
+       void *p;
         struct chunk *w;
-       uintptr_t new;
-
-       lock(mal.brk_lock);
  
-       if (n > SIZE_MAX - mal.brk - 2*PAGE_SIZE) goto fail;
-       new = mal.brk + n + SIZE_ALIGN + PAGE_SIZE - 1 & -PAGE_SIZE;
-       n = new - mal.brk;
+       /* The argument n already accounts for the caller's chunk
+        * overhead needs, but if the heap can't be extended in-place,
+        * we need room for an extra zero-sized sentinel chunk. */
+       n += SIZE_ALIGN;
  
-       if (__brk(new) != new) goto fail;
+       lock(heap_lock);
  
-       w = MEM_TO_CHUNK(new);
-       w->data[-1] = n | C_INUSE;
-       w->data[0] = 0 | C_INUSE;
-
-       w = MEM_TO_CHUNK(mal.brk);
-       w->data[0] = n | C_INUSE;
-       mal.brk = new;
-       
-       unlock(mal.brk_lock);
-
-       return w;
-fail:
-       unlock(mal.brk_lock);
-       return 0;
-}
-
-static int init_malloc(size_t n)
-{
-       static int init, waiters;
-       int state;
-       struct chunk *c;
-
-       if (init == 2) return 0;
-
-       while ((state=a_swap(&init, 1)) == 1)
-               __wait(&init, &waiters, 1, 1);
-       if (state) {
-               a_store(&init, 2);
+       p = __expand_heap(&n);
+       if (!p) {
+               unlock(heap_lock);
                 return 0;
         }
  
-       mal.brk = __brk(0) + 2*SIZE_ALIGN-1 & -SIZE_ALIGN;
+       /* If not just expanding existing space, we need to make a
+        * new sentinel chunk below the allocated space. */
+       if (p != end) {
+               /* Valid/safe because of the prologue increment. */
+               n -= SIZE_ALIGN;
+               p = (char *)p + SIZE_ALIGN;
+               w = MEM_TO_CHUNK(p);
+               w->psize = 0 | C_INUSE;
+       }
  
-       c = expand_heap(n);
+       /* Record new heap end and fill in footer. */
+       end = (char *)p + n;
+       w = MEM_TO_CHUNK(end);
+       w->psize = n | C_INUSE;
+       w->csize = 0 | C_INUSE;
  
-       if (!c) {
-               a_store(&init, 0);
-               if (waiters) __wake(&init, 1, 1);
-               return -1;
-       }
+       /* Fill in header, which may be new or may be replacing a
+        * zero-size sentinel header at the old end-of-heap. */
+       w = MEM_TO_CHUNK(p);
+       w->csize = n | C_INUSE;
  
-       mal.heap = (void *)c;
-       c->data[-1] = 0 | C_INUSE;
-       free(CHUNK_TO_MEM(c));
+       unlock(heap_lock);
  
-       a_store(&init, 2);
-       if (waiters) __wake(&init, -1, 1);
-       return 1;
+       return w;
  }
  
  static int adjust_size(size_t *n)
@@ -236,18 +227,18 @@ static void unbin(struct chunk *c, int i)
                 a_and_64(&mal.binmap, ~(1ULL<<i));
         c->prev->next = c->next;
         c->next->prev = c->prev;
-       c->data[0] |= C_INUSE;
-       NEXT_CHUNK(c)->data[-1] |= C_INUSE;
+       c->csize |= C_INUSE;
+       NEXT_CHUNK(c)->psize |= C_INUSE;
  }
  
  static int alloc_fwd(struct chunk *c)
  {
         int i;
         size_t k;
-       while (!((k=c->data[0]) & C_INUSE)) {
+       while (!((k=c->csize) & C_INUSE)) {
                 i = bin_index(k);
                 lock_bin(i);
-               if (c->data[0] == k) {
+               if (c->csize == k) {
                         unbin(c, i);
                         unlock_bin(i);
                         return 1;
@@ -261,10 +252,10 @@ static int alloc_rev(struct chunk *c)
  {
         int i;
         size_t k;
-       while (!((k=c->data[-1]) & C_INUSE)) {
+       while (!((k=c->psize) & C_INUSE)) {
                 i = bin_index(k);
                 lock_bin(i);
-               if (c->data[-1] == k) {
+               if (c->psize == k) {
                         unbin(PREV_CHUNK(c), i);
                         unlock_bin(i);
                         return 1;
@@ -301,10 +292,10 @@ static int pretrim(struct chunk *self, size_t n, int i, int j)
         split->next = self->next;
         split->prev->next = split;
         split->next->prev = split;
-       split->data[-1] = n | C_INUSE;
-       split->data[0] = n1-n;
-       next->data[-1] = n1-n;
-       self->data[0] = n | C_INUSE;
+       split->psize = n | C_INUSE;
+       split->csize = n1-n;
+       next->psize = n1-n;
+       self->csize = n | C_INUSE;
         return 1;
  }
  
@@ -318,10 +309,10 @@ static void trim(struct chunk *self, size_t n)
         next = NEXT_CHUNK(self);
         split = (void *)((char *)self + n);
  
-       split->data[-1] = n | C_INUSE;
-       split->data[0] = n1-n | C_INUSE;
-       next->data[-1] = n1-n | C_INUSE;
-       self->data[0] = n | C_INUSE;
+       split->psize = n | C_INUSE;
+       split->csize = n1-n | C_INUSE;
+       next->psize = n1-n | C_INUSE;
+       self->csize = n | C_INUSE;
  
         free(CHUNK_TO_MEM(split));
  }
@@ -338,9 +329,9 @@ void *malloc(size_t n)
                 char *base = __mmap(0, len, PROT_READ|PROT_WRITE,
                         MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
                 if (base == (void *)-1) return 0;
-               c = (void *)(base + SIZE_ALIGN - sizeof(size_t));
-               c->data[0] = len - (SIZE_ALIGN - sizeof(size_t));
-               c->data[-1] = SIZE_ALIGN - sizeof(size_t);
+               c = (void *)(base + SIZE_ALIGN - OVERHEAD);
+               c->csize = len - (SIZE_ALIGN - OVERHEAD);
+               c->psize = SIZE_ALIGN - OVERHEAD;
                 return CHUNK_TO_MEM(c);
         }
  
@@ -348,21 +339,20 @@ void *malloc(size_t n)
         for (;;) {
                 uint64_t mask = mal.binmap & -(1ULL<<i);
                 if (!mask) {
-                       if (init_malloc(n) > 0) continue;
                         c = expand_heap(n);
                         if (!c) return 0;
                         if (alloc_rev(c)) {
                                 struct chunk *x = c;
                                 c = PREV_CHUNK(c);
-                               NEXT_CHUNK(x)->data[-1] = c->data[0] =
-                                       x->data[0] + CHUNK_SIZE(c);
+                               NEXT_CHUNK(x)->psize = c->csize =
+                                       x->csize + CHUNK_SIZE(c);
                         }
                         break;
                 }
                 j = first_set(mask);
                 lock_bin(j);
                 c = mal.bins[j].head;
-               if (c != BIN_TO_CHUNK(j) && j == bin_index(c->data[0])) {
+               if (c != BIN_TO_CHUNK(j)) {
                         if (!pretrim(c, n, i, j)) unbin(c, j);
                         unlock_bin(j);
                         break;
@@ -376,6 +366,17 @@ void *malloc(size_t n)
         return CHUNK_TO_MEM(c);
  }
  
+void *__malloc0(size_t n)
+{
+       void *p = malloc(n);
+       if (p && !IS_MMAPPED(MEM_TO_CHUNK(p))) {
+               size_t *z;
+               n = (n + sizeof *z - 1)/sizeof *z;
+               for (z=p; n; n--, z++) if (*z) *z=0;
+       }
+       return p;
+}
+
  void *realloc(void *p, size_t n)
  {
         struct chunk *self, *next;
@@ -390,12 +391,12 @@ void *realloc(void *p, size_t n)
         n1 = n0 = CHUNK_SIZE(self);
  
         if (IS_MMAPPED(self)) {
-               size_t extra = self->data[-1];
+               size_t extra = self->psize;
                 char *base = (char *)self - extra;
                 size_t oldlen = n0 + extra;
                 size_t newlen = n + extra;
                 /* Crash on realloc of freed chunk */
-               if ((uintptr_t)base < mal.brk) *(char *)0=0;
+               if (extra & 1) a_crash();
                 if (newlen < PAGE_SIZE && (new = malloc(n))) {
                         memcpy(new, p, n-OVERHEAD);
                         free(p);
@@ -405,14 +406,17 @@ void *realloc(void *p, size_t n)
                 if (oldlen == newlen) return p;
                 base = __mremap(base, oldlen, newlen, MREMAP_MAYMOVE);
                 if (base == (void *)-1)
-                       return newlen < oldlen ? p : 0;
+                       goto copy_realloc;
                 self = (void *)(base + extra);
-               self->data[0] = newlen - extra;
+               self->csize = newlen - extra;
                 return CHUNK_TO_MEM(self);
         }
  
         next = NEXT_CHUNK(self);
  
+       /* Crash on corrupted footer (likely from buffer overflow) */
+       if (next->psize != self->csize) a_crash();
+
         /* Merge adjacent chunks if we need more space. This is not
          * a waste of time even if we fail to get enough space, because our
          * subsequent call to free would otherwise have to do the merge. */
@@ -425,8 +429,8 @@ void *realloc(void *p, size_t n)
                 self = PREV_CHUNK(self);
                 n1 += CHUNK_SIZE(self);
         }
-       self->data[0] = n1 | C_INUSE;
-       next->data[-1] = n1 | C_INUSE;
+       self->csize = n1 | C_INUSE;
+       next->psize = n1 | C_INUSE;
  
         /* If we got enough space, split off the excess and return */
         if (n <= n1) {
@@ -435,6 +439,7 @@ void *realloc(void *p, size_t n)
                 return CHUNK_TO_MEM(self);
         }
  
+copy_realloc:
         /* As a last resort, allocate a new chunk and copy to it. */
         new = malloc(n-OVERHEAD);
         if (!new) return 0;
@@ -445,20 +450,21 @@ void *realloc(void *p, size_t n)
  
  void free(void *p)
  {
-       struct chunk *self = MEM_TO_CHUNK(p);
-       struct chunk *next;
+       struct chunk *self, *next;
         size_t final_size, new_size, size;
         int reclaim=0;
         int i;
  
         if (!p) return;
  
+       self = MEM_TO_CHUNK(p);
+
         if (IS_MMAPPED(self)) {
-               size_t extra = self->data[-1];
+               size_t extra = self->psize;
                 char *base = (char *)self - extra;
                 size_t len = CHUNK_SIZE(self) + extra;
                 /* Crash on double free */
-               if ((uintptr_t)base < mal.brk) *(char *)0=0;
+               if (extra & 1) a_crash();
                 __munmap(base, len);
                 return;
         }
@@ -466,26 +472,17 @@ void free(void *p)
         final_size = new_size = CHUNK_SIZE(self);
         next = NEXT_CHUNK(self);
  
-       for (;;) {
-               /* Replace middle of large chunks with fresh zero pages */
-               if (reclaim && (self->data[-1] & next->data[0] & C_INUSE)) {
-                       uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
-                       uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
-#if 1
-                       __madvise((void *)a, b-a, MADV_DONTNEED);
-#else
-                       __mmap((void *)a, b-a, PROT_READ|PROT_WRITE,
-                               MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
-#endif
-               }
+       /* Crash on corrupted footer (likely from buffer overflow) */
+       if (next->psize != self->csize) a_crash();
  
-               if (self->data[-1] & next->data[0] & C_INUSE) {
-                       self->data[0] = final_size | C_INUSE;
-                       next->data[-1] = final_size | C_INUSE;
+       for (;;) {
+               if (self->psize & next->csize & C_INUSE) {
+                       self->csize = final_size | C_INUSE;
+                       next->psize = final_size | C_INUSE;
                         i = bin_index(final_size);
                         lock_bin(i);
                         lock(mal.free_lock);
-                       if (self->data[-1] & next->data[0] & C_INUSE)
+                       if (self->psize & next->csize & C_INUSE)
                                 break;
                         unlock(mal.free_lock);
                         unlock_bin(i);
@@ -508,8 +505,11 @@ void free(void *p)
                 }
         }
  
-       self->data[0] = final_size;
-       next->data[-1] = final_size;
+       if (!(mal.binmap & 1ULL<<i))
+               a_or_64(&mal.binmap, 1ULL<<i);
+
+       self->csize = final_size;
+       next->psize = final_size;
         unlock(mal.free_lock);
  
         self->next = BIN_TO_CHUNK(i);
@@ -517,8 +517,17 @@ void free(void *p)
         self->next->prev = self;
         self->prev->next = self;
  
-       if (!(mal.binmap & 1ULL<<i))
-               a_or_64(&mal.binmap, 1ULL<<i);
+       /* Replace middle of large chunks with fresh zero pages */
+       if (reclaim) {
+               uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
+               uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
+#if 1
+               __madvise((void *)a, b-a, MADV_DONTNEED);
+#else
+               __mmap((void *)a, b-a, PROT_READ|PROT_WRITE,
+                       MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
+#endif
+       }
  
         unlock_bin(i);
  }