From 67dd6c698b990c3a6ff8dab875b841a9c05afdda Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andreas=20Sch=C3=B6sser?= Date: Mon, 13 Aug 2007 12:34:35 +0000 Subject: [PATCH] Added 2 testcases for the IA32 backend [r15523] --- ir/be/test/fehler67.c | 120 ++++++++++++++++++++++++++++++++++++++++++ ir/be/test/fehler68.c | 84 +++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 ir/be/test/fehler67.c create mode 100644 ir/be/test/fehler68.c diff --git a/ir/be/test/fehler67.c b/ir/be/test/fehler67.c new file mode 100644 index 000000000..34f8bd91c --- /dev/null +++ b/ir/be/test/fehler67.c @@ -0,0 +1,120 @@ +/***************************************************************************** + * Program: sad.c + * Function: New implementation of the intel application note + * AP-940: "Block matching in Motion estimation Algorithms + * using Streaming SIMD Extensions 3" + * We changed: + * - We used local arrays instead of pointer arithmetic + * because of the limited capability of the memory disambiguator + * - Used if/else instead of abs function since we can't + * use function calls in specification + * - Unrolled the inner loop manually since our loop + * unroller does not work so well. + * Used as a test for the simd optimization. + * TODO: - Maybe use the "restrict" keyword to implement pointer + * arithmetic + * Author: Andreas Schoesser + * Date: 2007-08-06 + *****************************************************************************/ + +#include +#include +#include +#include + +unsigned int sad(int test_blockx, int test_blocky, int *best_block_x, int *best_block_y, int iterations); + +main() +{ + int best_block_x, best_block_y; + unsigned int min_diff; + int iterations = 100; + + printf("PSADBW Example\n--------------\n\n"); + + printf("Executing 'motion estimation' %d times...\n\n", iterations); + min_diff = sad(0, 0, &best_block_x, &best_block_y, iterations); + + printf("MinDiff: %u\nBest X: %d\nBest Y: %d\n", min_diff, best_block_x, best_block_y); +} + +unsigned int sad(int test_blockx, int test_blocky, int *best_block_x, int *best_block_y, int iterations) +{ + unsigned char b[256][256]; + + unsigned char a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15; + unsigned char b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15; + + int i, x, y, blocky; + unsigned tmp_diff, min_diff = 0xFFFFFFFF; // MAX_UINT + + clock_t t_time_bev, t_time_after, t_clocks_dauer; + double d_zeitdauer; + + // Fill in some random values to compare + for(x = 0; x < 256; x++) + for(y = 0; y < 256; y++) + b[y][x] = (unsigned char) rand() % 255; + + // Start time measurement + t_time_bev = clock(); + + // Execute Block matching 100 times + for(i = 0; i < iterations; i++) + { + // Iterate over whole frame, x,y=coords of current block + for(x = 1; x < 256 - 16; x++) + for(y = 0; y < 256 - 16; y++) + { + tmp_diff = 0; + + // Compare current Block with reference block + for(blocky = 0; blocky < 16; blocky++) + { + // Vektor Loads + a0 = b[blocky][0]; a1 = b[blocky][1]; a2 = b[blocky][2]; a3 = b[blocky][3]; a4 = b[blocky][4]; a5 = b[blocky][5]; a6 = b[blocky][6]; a7 = b[blocky][7]; a8 = b[blocky][8]; a9 = b[blocky][9]; a10 = b[blocky][10]; a11 = b[blocky][11]; a12 = b[blocky][12]; a13 = b[blocky][13]; a14 = b[blocky][14]; a15 = b[blocky][15]; + b0 = b[blocky + y][x + 0]; b1 = b[blocky + y][x + 1]; b2 = b[blocky + y][x + 2]; b3 = b[blocky + y][x + 3]; b4 = b[blocky + y][x + 4]; b5 = b[blocky + y][x + 5]; b6 = b[blocky + y][x + 6]; b7 = b[blocky + y][x + 7]; b8 = b[blocky + y][x + 8]; b9 = b[blocky + y][x + 9]; b10 = b[blocky + y][x + 10]; b11 = b[blocky + y][x + 11]; b12 = b[blocky + y][x + 12]; b13 = b[blocky + y][x + 13]; b14 = b[blocky + y][x + 14]; b15 = b[blocky + y][x + 15]; + + // psadpw, would be nice if this could be done by loop unrolling + tmp_diff += ((a0 > b0) ? (a0 - b0) : (b0 - a0)) + + ((a1 > b1) ? (a1 - b1) : (b1 - a1)) + + ((a2 > b2) ? (a2 - b2) : (b2 - a2)) + + ((a3 > b3) ? (a3 - b3) : (b3 - a3)) + + ((a4 > b4) ? (a4 - b4) : (b4 - a4)) + + ((a5 > b5) ? (a5 - b5) : (b5 - a5)) + + ((a6 > b6) ? (a6 - b6) : (b6 - a6)) + + ((a7 > b7) ? (a7 - b7) : (b7 - a7)) + + ((a8 > b8) ? (a8 - b8) : (b8 - a8)) + + ((a9 > b9) ? (a9 - b9) : (b9 - a9)) + + ((a10 > b10) ? (a10 - b10) : (b10 - a10)) + + ((a11 > b11) ? (a11 - b11) : (b11 - a11)) + + ((a12 > b12) ? (a12 - b12) : (b12 - a12)) + + ((a13 > b13) ? (a13 - b13) : (b13 - a13)) + + ((a14 > b14) ? (a14 - b14) : (b14 - a14)) + + ((a15 > b15) ? (a15 - b15) : (b15 - a15)); + } + + // Check if the current block is least different + if(min_diff > tmp_diff) + { + min_diff = tmp_diff; + *best_block_x = x; + *best_block_y = y; + } + } + } + + // End time measurement + t_time_after = clock(); + + t_clocks_dauer = (t_time_after-t_time_bev); + d_zeitdauer = (double) (t_time_after-t_time_bev) / CLOCKS_PER_SEC; + + #ifdef __GNUC__ + printf("Zeitdauer %g s\n", d_zeitdauer); + #else + printf("Zeitdauer %g ms\n", d_zeitdauer); + #endif + + return(min_diff); +} diff --git a/ir/be/test/fehler68.c b/ir/be/test/fehler68.c new file mode 100644 index 000000000..0b5cc990f --- /dev/null +++ b/ir/be/test/fehler68.c @@ -0,0 +1,84 @@ +/************************************************************************ + * Program: scalar_product.c + * Function: Calculates the scalar product of vector lying in memory + * Used as a test for the simd optimization. + * Author: Andreas Schoesser + * Date: 2007-06-13 + ************************************************************************/ + +#include +#include +#include +#include + +float scalar_product(float *a, float *b, unsigned int max_elements); + +main() +{ + float res; + int i, max_elements = 100000000; + clock_t t_time_bev, t_time_after, t_clocks_dauer; + double d_zeitdauer; + + // Allocate memory and make sure pointers are aligned to 16 byte addresses + char *a = malloc(16 + max_elements * sizeof(float)); + char *b = malloc(16 + max_elements * sizeof(float)); + float c; + char *ca = &a[0] + 16 - (unsigned int) ((unsigned int) &a[0] % 16); + char *cb = &b[0] + 16 - (unsigned int) ((unsigned int) &b[0] % 16); + + float *aa = (float *) ca; + float *ab = (float *) cb; + + printf("Scalar product\n==============\n\n"); + + //printf("Array Position: %u, %u, %u, %u\n", a, b, aa, ba/*(unsigned int) &aa[0] % 16, (unsigned int) &ba[0] % 16*/); + + // Fill both arrays with random values + for(i = 0; i < max_elements; i++) + { + aa[i] = (float) (rand() % 10); + ab[i] = (float) (rand() % 10); + + //printf("(%g * %g) + ", a[i], b[i]); + } + + // Start time measurement + t_time_bev = clock(); + + //for(i = 0; i < max_elements - 4; i += 4) + res = scalar_product(aa, ab, max_elements); + + // Stop time measurement + t_time_after = clock(); + t_clocks_dauer = (t_time_after-t_time_bev); + d_zeitdauer = (double) (t_time_after-t_time_bev) / CLOCKS_PER_SEC; + + #ifdef __GNUC__ + printf("Zeitdauer %g s\n", d_zeitdauer); + #else + printf("Zeitdauer %g ms\n", d_zeitdauer); + #endif + + printf("\nResult: %g\n", res); +} + + +float scalar_product(float * a, float * b, unsigned int max_elements) +{ + float res; + int i; + + /*for(i = 0; i < 4; i++) + { + a[i] = (float) (rand() % 10); + b[i] = (float) (rand() % 10); + + printf("(%g * %g) + ", a[i], b[i]); + }*/ + + for(i = 0; i < max_elements; i += 4) + res += a[i] * b[i] + a[i + 1] * b[i + 1] + a[i + 2] * b[i + 2] + a[i + 3] * b[i + 3]; + + return(res); +} -- 2.20.1