arch_register_class_put() is only used locally
[libfirm] / ir / be / test / fehler067.c
1 /*****************************************************************************
2  * Program:  sad.c
3  * Function: New implementation of the intel application note
4  *                       AP-940: "Block matching in Motion estimation Algorithms
5  *                       using Streaming SIMD Extensions 3"
6  *           We changed:
7  *                       -      We used local arrays instead of pointer arithmetic
8  *                              because of the limited capability of the memory disambiguator
9  *                       -      Used if/else instead of abs function since we can't
10  *                              use function calls in specification
11  *                       -      Unrolled the inner loop manually since our loop
12  *                              unroller does not work so well.
13  *           Used as a test for the simd optimization.
14  * TODO:         -      Maybe use the "restrict" keyword to implement pointer
15  *                              arithmetic
16  * Author:   Andreas Schoesser
17  * Date:     2007-08-06
18  *****************************************************************************/
19
20 #include <stdio.h>
21 #include <malloc.h>
22 #include <stdlib.h>
23 #include <time.h>
24
25 unsigned int sad(int test_blockx, int test_blocky, int *best_block_x, int *best_block_y, int iterations);
26
27 main()
28 {
29         int best_block_x, best_block_y;
30         unsigned int min_diff;
31         int                      iterations = 2;
32
33         printf("PSADBW Example\n--------------\n\n");
34
35         printf("Executing 'motion estimation' %d times...\n\n", iterations);
36         min_diff = sad(0, 0, &best_block_x, &best_block_y, iterations);
37
38         printf("MinDiff: %u\nBest X: %d\nBest Y: %d\n", min_diff, best_block_x, best_block_y);
39 }
40
41 unsigned int sad(int test_blockx, int test_blocky, int *best_block_x, int *best_block_y, int iterations)
42 {
43         unsigned char b[256][256];
44
45         unsigned char a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15;
46         unsigned char b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15;
47
48         int i, x, y, blocky;
49         unsigned tmp_diff, min_diff = 0xFFFFFFFF; // MAX_UINT
50
51         // Fill in some random values to compare
52         for(x = 0; x < 256; x++)
53                 for(y = 0; y < 256; y++)
54                         b[y][x] = (unsigned char) rand() % 255;
55
56         // Execute Block matching 100 times
57         for(i = 0; i < iterations; i++)
58         {
59                 // Iterate over whole frame, x,y=coords of current block
60                 for(x = 1; x < 256 - 16; x++)
61                         for(y = 0; y < 256 - 16; y++)
62                         {
63                                 tmp_diff = 0;
64
65                                 // Compare current Block with reference block
66                                 for(blocky = 0; blocky < 16; blocky++)
67                                 {
68                                         // Vektor Loads
69                                         a0 = b[blocky][0]; a1 = b[blocky][1]; a2 = b[blocky][2]; a3 = b[blocky][3]; a4 = b[blocky][4]; a5 = b[blocky][5]; a6 = b[blocky][6]; a7 = b[blocky][7]; a8 = b[blocky][8]; a9 = b[blocky][9]; a10 = b[blocky][10]; a11 = b[blocky][11]; a12 = b[blocky][12]; a13 = b[blocky][13]; a14 = b[blocky][14]; a15 = b[blocky][15];
70                                         b0 = b[blocky + y][x + 0]; b1 = b[blocky + y][x + 1]; b2 = b[blocky + y][x + 2]; b3 = b[blocky + y][x + 3]; b4 = b[blocky + y][x + 4]; b5 = b[blocky + y][x + 5]; b6 = b[blocky + y][x + 6]; b7 = b[blocky + y][x + 7]; b8 = b[blocky + y][x + 8]; b9 = b[blocky + y][x + 9]; b10 = b[blocky + y][x + 10]; b11 = b[blocky + y][x + 11]; b12 = b[blocky + y][x + 12]; b13 = b[blocky + y][x + 13]; b14 = b[blocky + y][x + 14]; b15 = b[blocky + y][x + 15];
71
72                                         // psadpw, would be nice if this could be done by loop unrolling
73                                         tmp_diff += ((a0 > b0) ? (a0 - b0) : (b0 - a0))  +
74                                                 ((a1 > b1) ? (a1 - b1) : (b1 - a1)) +
75                                                 ((a2 > b2) ? (a2 - b2) : (b2 - a2)) +
76                                                 ((a3 > b3) ? (a3 - b3) : (b3 - a3)) +
77                                                 ((a4 > b4) ? (a4 - b4) : (b4 - a4)) +
78                                                 ((a5 > b5) ? (a5 - b5) : (b5 - a5)) +
79                                                 ((a6 > b6) ? (a6 - b6) : (b6 - a6)) +
80                                                 ((a7 > b7) ? (a7 - b7) : (b7 - a7)) +
81                                                 ((a8 > b8) ? (a8 - b8) : (b8 - a8)) +
82                                                 ((a9 > b9) ? (a9 - b9) : (b9 - a9)) +
83                                                 ((a10 > b10) ? (a10 - b10) : (b10 - a10)) +
84                                                 ((a11 > b11) ? (a11 - b11) : (b11 - a11)) +
85                                                 ((a12 > b12) ? (a12 - b12) : (b12 - a12)) +
86                                                 ((a13 > b13) ? (a13 - b13) : (b13 - a13)) +
87                                                 ((a14 > b14) ? (a14 - b14) : (b14 - a14)) +
88                                                 ((a15 > b15) ? (a15 - b15) : (b15 - a15));
89                                 }
90
91                         // Check if the current block is least different
92                         if(min_diff > tmp_diff)
93                         {
94                                 min_diff = tmp_diff;
95                                 *best_block_x = x;
96                                 *best_block_y = y;
97                         }
98                 }
99         }
100
101         return(min_diff);
102 }