2 ** The Computer Language Shootout
3 ** http://shootout.alioth.debian.org/
4 ** contributed by Mike Pall
5 ** de-optimized by Isaac Gouy
8 ** gcc -O3 -fomit-frame-pointer -ffast-math -o partialsums partialsums.c -lm
9 ** Adding -march=<yourcpu> may help, too.
10 ** On a P4/K8 or later try adding: --march=<yourcpu> -mfpmath=sse -msse2
17 int main(int argc, char **argv)
27 ** Yes, I (Mike Pall) tried using a double as a primary or secondary loop variable.
28 ** But the x86 ABI requires a cleared x87 FPU stack before every call
29 ** (e.g. to sin()) which nullifies any performance gains.
31 ** Combining all loops does not pay off because the x87 FPU has to shuffle
32 ** stack slots and/or runs out of registers. This may not be entirely true
33 ** for SSE2 with fully inlined FPU code (-ffast-math required). Dito for
34 ** other CPUs with a register-based FPU and a sane FP ABI.
36 ** Auto vectorization may be a bit easier with separate loops, too.
39 #define kd ((double)k)
42 for (k = 0; k <= n; k++) sum += pow(2.0/3.0, kd);
43 printf("%.9f\t(2/3)^k\n", sum);
46 for (k = 1 ; k <= n; k++) sum += 1/sqrt(kd); /* aka pow(kd, -0.5) */
47 printf("%.9f\tk^-0.5\n", sum);
50 for (k = 1; k <= n; k++) sum += 1.0/(kd*(kd+1.0));
51 printf("%.9f\t1/k(k+1)\n", sum);
54 for (k = 1; k <= n; k++) {
56 sum += 1.0/(kd*kd*kd*sk*sk);
58 printf("%.9f\tFlint Hills\n", sum);
61 for (k = 1; k <= n; k++) {
63 sum += 1.0/(kd*kd*kd*ck*ck);
65 printf("%.9f\tCookson Hills\n", sum);
68 for (k = 1; k <= n; k++) sum += 1.0/kd;
69 printf("%.9f\tHarmonic\n", sum);
72 for (k = 1; k <= n; k++) sum += 1.0/(kd*kd);
73 printf("%.9f\tRiemann Zeta\n", sum);
76 for (k = 1; k <= n; k++) sum += (a = -a)/kd;
77 printf("%.9f\tAlternating Harmonic\n", sum);
80 for (k = 1; k <= n; k++) sum += (a = -a)/(2.0*kd - 1.0);
81 printf("%.9f\tGregory\n", sum);