5 typedef unsigned int bitset_unit_t;
6 #define BITSET_UNIT_FMT "%0x"
8 #undef _bitset_inside_clear
9 #undef _bitset_inside_set
10 #undef _bitset_inside_flip
11 #undef _bitset_inside_is_set
13 #undef _bitset_inside_nlz
14 #undef _bitset_inside_ntz
15 #undef _bitset_inside_ntz_value
17 #define _bitset_inside_set(unit,bit) \
18 __asm__( "btsl %1,%0" :"=m" (unit) :"Ir" (bit))
20 #define _bitset_inside_clear(unit,bit) \
21 __asm__( "btrl %1,%0" :"=m" (unit) :"Ir" (bit))
23 #define _bitset_inside_flip(unit,bit) \
24 __asm__( "btcl %1,%0" :"=m" (unit) :"Ir" (bit))
26 #define _bitset_inside_is_set(unit,bit) _bitset_ia32_inside_is_set(unit, bit)
27 #define _bitset_inside_nlz(unit) _bitset_ia32_inside_nlz(unit)
28 #define _bitset_inside_ntz(unit) _bitset_ia32_inside_ntz(unit)
29 #define _bitset_inside_ntz_value(unit) _bitset_ia32_inside_ntz_value(unit)
31 static INLINE int _bitset_ia32_inside_is_set(bitset_unit_t *unit, unsigned bit)
34 __asm__("mov $0,%0\n\tbtl %1,%2\n\tadc $0,%0"
36 : "Ir" (bit), "m" (unit)
41 static INLINE unsigned _bitset_ia32_inside_nlz(bitset_unit_t *unit)
44 __asm__("bsrl %1,%0" :"=r" (res) :"m" (unit));
45 return *unit == 0 ? 32 : res;
48 static INLINE unsigned _bitset_ia32_inside_ntz(bitset_unit_t *unit) {
50 __asm__("bsfl %1,%0" :"=r" (res) :"m" (unit));
51 return *unit == 0 ? 32 : res;
54 static INLINE unsigned _bitset_ia32_inside_ntz_value(bitset_unit_t unit) {
56 __asm__("bsfl %1,%0" :"=r" (res) :"rm" (unit));
57 return unit == 0 ? 32 : res;
60 #if defined(__GNUC__) && defined(__SSE2__)
63 #include <xmmintrin.h>
66 #undef _bitset_overall_size
67 #undef _bitset_data_ptr
69 #undef _BITSET_BINOP_UNITS_INC
71 #undef _bitset_inside_binop_and
72 #undef _bitset_inside_binop_andnot
73 #undef _bitset_inside_binop_or
74 #undef _bitset_inside_binop_xor
76 #undef _bitset_inside_binop_with_zero_and
77 #undef _bitset_inside_binop_with_zero_andnot
78 #undef _bitset_inside_binop_with_zero_or
79 #undef _bitset_inside_binop_with_zero_xor
81 #define _bitset_units(highest_bit) (round_up2(highest_bit, 128) / BS_UNIT_SIZE_BITS)
83 #define _bitset_overall_size(bitset_base_size,highest_bit) \
84 ((bitset_base_size) + 16 + _bitset_units(highest_bit) * BS_UNIT_SIZE)
86 #define _bitset_data_ptr(data,bitset_base_size,highest_bit) \
87 _bitset_sse_data_ptr(data, bitset_base_size, highest_bit)
89 static INLINE bitset_unit_t *_bitset_sse_data_ptr(void *data, size_t bitset_base_size,
90 bitset_pos_t highest_bit)
95 diff = (units - (char *) 0) + bitset_base_size;
96 diff = round_up2(diff, 16);
97 units = (char *) 0 + diff;
98 return (bitset_unit_t *) units;
101 #define _BITSET_BINOP_UNITS_INC 4
102 #define _bitset_inside_binop_and(tgt,src) _bitset_sse_inside_binop_and(tgt,src)
103 #define _bitset_inside_binop_andnot(tgt,src) _bitset_sse_inside_binop_andnot(tgt,src)
104 #define _bitset_inside_binop_or(tgt,src) _bitset_sse_inside_binop_or(tgt,src)
105 #define _bitset_inside_binop_xor(tgt,src) _bitset_sse_inside_binop_xor(tgt,src)
107 #define _BITSET_SSE_BINOP(name) \
108 static INLINE void _bitset_sse_inside_binop_ ## name(bitset_unit_t *tgt, bitset_unit_t *src) \
110 __m128i src_op = _mm_load_si128((__m128i *) src); \
111 __m128i tgt_op = _mm_load_si128((__m128i *) tgt); \
112 __m128i res = _mm_ ## name ## _si128(tgt_op, src_op); \
113 _mm_store_si128((void *) tgt, res); \
117 static INLINE void _bitset_sse_inside_binop_with_zero_and(bitset_unit_t *tgt)
125 static INLINE void _bitset_sse_inside_binop_andnot(bitset_unit_t *tgt, bitset_unit_t *src)
127 __m128i src_op = _mm_load_si128((void *) src);
128 __m128i tgt_op = _mm_load_si128((void *) tgt);
129 __m128i res = _mm_andnot_si128(src_op, tgt_op);
130 _mm_store_si128((__m128i *) tgt, res);
133 _BITSET_SSE_BINOP(and)
134 _BITSET_SSE_BINOP(or)
135 _BITSET_SSE_BINOP(xor)