1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
#include "sse4_2-check.h"
#include <nmmintrin.h>
#include <string.h>
#define POLYNOMIAL 0x11EDC6F41LL
#define MAX_BUF 16
static void
shift_mem_by1 (unsigned char* buf, int len)
{
int i;
for (i = len - 1; i >= 0; i--)
{
buf[i] = buf[i] << 1;
if (i > 0 && (buf[i-1] & 0x80))
buf[i] |= 1;
}
}
static void
do_div (unsigned char* buf, unsigned char* div)
{
int i;
for (i = 0; i < 5; i++)
buf[i] ^= div[i];
}
static unsigned int
calc_rem (unsigned char* buf, int len)
{
union
{
unsigned long long ll;
unsigned char c[8];
} divisor;
union
{
unsigned int i;
unsigned char c[4];
} ret;
unsigned char *div_buf;
unsigned char divident[MAX_BUF];
int disp = len / 8;
int i;
divisor.ll = POLYNOMIAL << 7LL;
memcpy (divident, buf, disp);
div_buf = divident + disp - 5;
for (i = 0; i < len - 32; i++)
{
if ((div_buf[4] & 0x80))
do_div (div_buf, divisor.c);
shift_mem_by1 (divident, disp);
}
memcpy (ret.c, div_buf + 1, sizeof (ret));
return ret.i;
}
static void
reverse_bits (unsigned char *src, int len)
{
unsigned char buf[MAX_BUF];
unsigned char *tmp = buf + len - 1;
unsigned char ch;
int i, j;
for (i = 0; i < len; i++)
{
ch = 0;
for (j = 0; j < 8; j++)
if ((src[i] & (1 << j)))
ch |= 1 << (7 - j);
*tmp-- = ch;
}
for (i = 0; i < len; i++)
src[i] = buf[i];
}
static void
shift_mem ( unsigned char *src, unsigned char *dst, int len, int shft)
{
int disp = shft / 8;
int i;
memset (dst, 0, len + disp);
for (i = 0; i < len; i++)
dst[i + disp] = src[i];
}
static void
xor_mem (unsigned char *src, unsigned char *dst, int len)
{
int disp = len / 8;
int i;
for (i = 0; i < disp; i++)
dst[i] ^= src[i];
}
static DST_T
compute_crc32 (DST_T crc, SRC_T inp)
{
unsigned char crcbuf[sizeof (DST_T)];
unsigned char inbuf[sizeof (SRC_T)];
unsigned char tmp1[MAX_BUF], tmp2[MAX_BUF];
int crc_sh, xor_sz;
union
{
unsigned int i;
unsigned char c[4];
} ret;
crc_sh = sizeof (SRC_T) * 8;
xor_sz = 32 + crc_sh;
memcpy (crcbuf, &crc, sizeof (DST_T));
memcpy (inbuf, &inp, sizeof (SRC_T));
reverse_bits (crcbuf, 4);
reverse_bits (inbuf, sizeof (SRC_T));
shift_mem (inbuf, tmp1, sizeof (SRC_T), 32);
shift_mem (crcbuf, tmp2, 4, crc_sh);
xor_mem (tmp1, tmp2, xor_sz);
ret.i = calc_rem (tmp2, xor_sz);
reverse_bits (ret.c, 4);
return (DST_T)ret.i;
}
#define NUM 1024
static void
sse4_2_test (void)
{
DST_T dst[NUM];
SRC_T src[NUM];
int i;
for (i = 0; i < NUM; i++)
{
dst[i] = rand ();
if (sizeof (DST_T) > 4)
dst[i] |= (DST_T)rand () << (DST_T)(sizeof (DST_T) * 4);
src[i] = rand ();
if (sizeof (SRC_T) > 4)
src[i] |= (SRC_T)rand () << (SRC_T)(sizeof (DST_T) * 4);
}
for (i = 0; i < NUM; i++)
if (CRC32 (dst[i], src[i]) != compute_crc32 (dst[i], src[i]))
abort ();
}
|