blob: 996f78b13a37fd709eb381948042886b4780f715 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
#include <math.h>
#include <stdint.h>
double fmod(double x, double y) {
union {double f; uint64_t i;} ux = {x}, uy = {y};
int ex = ux.i>>52 & 0x7ff;
int ey = uy.i>>52 & 0x7ff;
int sx = ux.i>>63;
uint64_t i;
/* in the followings uxi should be ux.i, but then gcc wrongly adds */
/* float load/store to inner loops ruining performance and code size */
uint64_t uxi = ux.i;
if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
return (x*y)/(x*y);
if (uxi<<1 <= uy.i<<1) {
if (uxi<<1 == uy.i<<1)
return 0*x;
return x;
}
/* normalize x and y */
if (!ex) {
for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
uxi <<= -ex + 1;
} else {
uxi &= -1ULL >> 12;
uxi |= 1ULL << 52;
}
if (!ey) {
for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
uy.i <<= -ey + 1;
} else {
uy.i &= -1ULL >> 12;
uy.i |= 1ULL << 52;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 63 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
uxi <<= 1;
}
i = uxi - uy.i;
if (i >> 63 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
for (; uxi>>52 == 0; uxi <<= 1, ex--);
/* scale result */
if (ex > 0) {
uxi -= 1ULL << 52;
uxi |= (uint64_t)ex << 52;
} else {
uxi >>= -ex + 1;
}
uxi |= (uint64_t)sx << 63;
ux.i = uxi;
return ux.f;
}
float fmodf(float x, float y) {
union {float f; uint32_t i;} ux = {x}, uy = {y};
int ex = ux.i>>23 & 0xff;
int ey = uy.i>>23 & 0xff;
uint32_t sx = ux.i & 0x80000000;
uint32_t i;
uint32_t uxi = ux.i;
if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
return (x*y)/(x*y);
if (uxi<<1 <= uy.i<<1) {
if (uxi<<1 == uy.i<<1)
return 0*x;
return x;
}
/* normalize x and y */
if (!ex) {
for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
uxi <<= -ex + 1;
} else {
uxi &= -1U >> 9;
uxi |= 1U << 23;
}
if (!ey) {
for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
uy.i <<= -ey + 1;
} else {
uy.i &= -1U >> 9;
uy.i |= 1U << 23;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 31 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
uxi <<= 1;
}
i = uxi - uy.i;
if (i >> 31 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
for (; uxi>>23 == 0; uxi <<= 1, ex--);
/* scale result up */
if (ex > 0) {
uxi -= 1U << 23;
uxi |= (uint32_t)ex << 23;
} else {
uxi >>= -ex + 1;
}
uxi |= sx;
ux.i = uxi;
return ux.f;
}
|