- char *base = _base;
- size_t i;
-
- if (!nel) return;
- for (i=(nel+1)/2; i; i--)
- sift(base, i-1, nel-1, width, cmp);
- for (i=nel-1; i; i--) {
- swap(base, base+i*width, width);
- sift(base, 0, i-1, width, cmp);
+ size_t lp[12*sizeof(size_t)];
+ size_t i, size = width * nel;
+ unsigned char *head, *high;
+ size_t p[2] = {1, 0};
+ int pshift = 1;
+ int trail;
+
+ if (!size) return;
+
+ head = base;
+ high = head + size - width;
+
+ /* Precompute Leonardo numbers, scaled by element width */
+ for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
+
+ while(head < high) {
+ if((p[0] & 3) == 3) {
+ sift(head, width, cmp, pshift, lp);
+ shr(p, 2);
+ pshift += 2;
+ } else {
+ if(lp[pshift - 1] >= high - head) {
+ trinkle(head, width, cmp, p, pshift, 0, lp);
+ } else {
+ sift(head, width, cmp, pshift, lp);
+ }
+
+ if(pshift == 1) {
+ shl(p, 1);
+ pshift = 0;
+ } else {
+ shl(p, pshift - 1);
+ pshift = 1;
+ }
+ }
+
+ p[0] |= 1;
+ head += width;
+ }
+
+ trinkle(head, width, cmp, p, pshift, 0, lp);
+
+ while(pshift != 1 || p[0] != 1 || p[1] != 0) {
+ if(pshift <= 1) {
+ trail = pntz(p);
+ shr(p, trail);
+ pshift += trail;
+ } else {
+ shl(p, 2);
+ pshift -= 2;
+ p[0] ^= 7;
+ shr(p, 1);
+ trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp);
+ shl(p, 1);
+ p[0] |= 1;
+ trinkle(head - width, width, cmp, p, pshift, 1, lp);
+ }
+ head -= width;