diff options
Diffstat (limited to 'libk/src/math/pow.c')
-rw-r--r-- | libk/src/math/pow.c | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/libk/src/math/pow.c b/libk/src/math/pow.c new file mode 100644 index 0000000..c850db7 --- /dev/null +++ b/libk/src/math/pow.c @@ -0,0 +1,152 @@ +#include <stdint.h> +#include <string.h> +#include <math.h> + +uint32_t float_as_uint32 (float a) { + uint32_t r; + memcpy (&r, &a, sizeof r); + return r; +} + +float uint32_as_float (uint32_t a) { + float r; + memcpy (&r, &a, sizeof r); + return r; +} + +void logf_ext (float a, float *loghi, float *loglo) { + const float LOG2_HI = 6.93147182e-1f; + const float LOG2_LO = -1.90465421e-9f; + const float SQRT_HALF = 0.70710678f; + float m, r, i, s, t, p, qhi, qlo; + int e; + + const float POW_TWO_M23 = 1.19209290e-7f; + const float POW_TWO_P23 = 8388608.0f; + const float FP32_MIN_NORM = 1.175494351e-38f; + i = 0.0f; + + if (a < FP32_MIN_NORM) { + a = a * POW_TWO_P23; + i = -23.0f; + } + + e = (float_as_uint32 (a) - float_as_uint32 (SQRT_HALF)) & 0xff800000; + m = uint32_as_float (float_as_uint32 (a) - e); + i = fmaf ((float)e, POW_TWO_M23, i); + + p = m + 1.0f; + m = m - 1.0f; + r = 1.0f / p; + qhi = r * m; + qlo = r * fmaf (qhi, -m, fmaf (qhi, -2.0f, m)); + + s = qhi * qhi; + r = 0.1293334961f; + r = fmaf (r, s, 0.1419928074f); + r = fmaf (r, s, 0.2000148296f); + r = fmaf (r, s, 0.3333332539f); + t = fmaf (qhi, qlo + qlo, fmaf (qhi, qhi, -s)); + p = s * qhi; + t = fmaf (s, qlo, fmaf (t, qhi, fmaf (s, qhi, -p))); + s = fmaf (r, p, fmaf (r, t, qlo)); + r = 2 * qhi; + + t = fmaf ( LOG2_HI, i, r); + p = fmaf (-LOG2_HI, i, t); + s = fmaf ( LOG2_LO, i, fmaf (2.f, s, r - p)); + *loghi = p = t + s; + *loglo = (t - p) + s; +} + +float expf_unchecked (float a) { + float f, j, r; + int i; + + j = fmaf (1.442695f, a, 12582912.f) - 12582912.f; + f = fmaf (j, -6.93145752e-1f, a); + f = fmaf (j, -1.42860677e-6f, f); + i = (int)j; + + r = 1.37805939e-3f; + r = fmaf (r, f, 8.37312452e-3f); + r = fmaf (r, f, 4.16695364e-2f); + r = fmaf (r, f, 1.66664720e-1f); + r = fmaf (r, f, 4.99999851e-1f); + r = fmaf (r, f, 1.00000000e+0f); + r = fmaf (r, f, 1.00000000e+0f); + + float s, t; + uint32_t ia = (i > 0) ? 0u : 0x83000000u; + s = uint32_as_float (0x7f000000u + ia); + t = uint32_as_float (((uint32_t)i << 23) - ia); + r = r * s; + r = r * t; + + return r; +} + +float powf_core (float a, float b) { + const float MAX_IEEE754_FLT = uint32_as_float (0x7f7fffff); + const float EXP_OVFL_BOUND = 88.7228394f; // 0x1.62e430p+6f; + const float EXP_OVFL_UNFL_F = 104.0f; + const float MY_INF_F = uint32_as_float (0x7f800000); + float lhi, llo, thi, tlo, phi, plo, r; + + logf_ext (a, &lhi, &llo); + + thi = lhi * b; + + if (fabsf (thi) > EXP_OVFL_UNFL_F) { + r = (thi < 0.0f) ? 0.0f : MY_INF_F; + } else { + tlo = fmaf (lhi, b, -thi); + tlo = fmaf (llo, b, +tlo); + phi = thi + tlo; + + if (phi == EXP_OVFL_BOUND) + phi = uint32_as_float (float_as_uint32 (phi) - 1); + + plo = (thi - phi) + tlo; + r = expf_unchecked (phi); + if (fabsf (r) <= MAX_IEEE754_FLT) { + r = fmaf (plo, r, r); + } + } + return r; +} + +float powf (float a, float b) { + const float MY_INF_F = uint32_as_float (0x7f800000); + const float MY_NAN_F = uint32_as_float (0xffc00000); + int expo_odd_int; + float r; + + expo_odd_int = fmaf (-2.0f, floorf (0.5f * b), b) == 1.0f; + if ((a == 1.0f) || (b == 0.0f)) { + r = 1.0f; + } else if (isnan (a) || isnan (b)) { + r = a + b; + } else if (isinf (b)) { + r = ((fabsf (a) < 1.0f) != (b < 0.0f)) ? 0.0f : MY_INF_F; + if (a == -1.0f) r = 1.0f; + } else if (isinf (a)) { + r = (b < 0.0f) ? 0.0f : MY_INF_F; + if ((a < 0.0f) && expo_odd_int) r = -r; + } else if (a == 0.0f) { + r = (expo_odd_int) ? (a + a) : 0.0f; + if (b < 0.0f) r = copysignf (MY_INF_F, r); + } else if ((a < 0.0f) && (b != floorf (b))) { + r = MY_NAN_F; + } else { + r = powf_core (fabsf (a), b); + if ((a < 0.0f) && expo_odd_int) { + r = -r; + } + } + return r; +} + +double pow (double a, double b) { + return powf(a, b); // who cares about precision in a kernel :3 +} |