before restrict was added, memove called memcpy for forward copies and
used a byte-at-a-time loop for reverse copies. this was changed to
avoid invoking UB now that memcpy has an undefined copying order,
making memmove considerably slower.
performance is still rather bad, so I'll be adding asm soon.
+#include <stdint.h>
+
+#define WT size_t
+#define WS (sizeof(WT))
void *memmove(void *dest, const void *src, size_t n)
{
char *d = dest;
const char *s = src;
void *memmove(void *dest, const void *src, size_t n)
{
char *d = dest;
const char *s = src;
- if ((size_t)(d-s) < n)
- while (n--) d[n] = s[n];
- else
- while (n--) *d++ = *s++;
+ if (s+n <= d || d+n <= s) return memcpy(d, s, n);
+
+ if (d<s) {
+ if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
+ while ((uintptr_t)d % WS) {
+ if (!n--) return dest;
+ *d++ = *s++;
+ }
+ for (; n>=WS; n-=WS, d+=WS, s+=WS) *(WT *)d = *(WT *)s;
+ }
+ for (; n; n--) *d++ = *s++;
+ } else {
+ if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
+ while ((uintptr_t)(d+n) % WS) {
+ if (!n--) return dest;
+ d[n] = s[n];
+ }
+ while (n>=WS) n-=WS, *(WT *)(d+n) = *(WT *)(s+n);
+ }
+ while (n) n--, d[n] = s[n];
+ }
+