summaryrefslogtreecommitdiff
path: root/libk/src/math/pow.c
diff options
context:
space:
mode:
Diffstat (limited to 'libk/src/math/pow.c')
-rw-r--r--libk/src/math/pow.c152
1 files changed, 152 insertions, 0 deletions
diff --git a/libk/src/math/pow.c b/libk/src/math/pow.c
new file mode 100644
index 0000000..c850db7
--- /dev/null
+++ b/libk/src/math/pow.c
@@ -0,0 +1,152 @@
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+uint32_t float_as_uint32 (float a) {
+ uint32_t r;
+ memcpy (&r, &a, sizeof r);
+ return r;
+}
+
+float uint32_as_float (uint32_t a) {
+ float r;
+ memcpy (&r, &a, sizeof r);
+ return r;
+}
+
+void logf_ext (float a, float *loghi, float *loglo) {
+ const float LOG2_HI = 6.93147182e-1f;
+ const float LOG2_LO = -1.90465421e-9f;
+ const float SQRT_HALF = 0.70710678f;
+ float m, r, i, s, t, p, qhi, qlo;
+ int e;
+
+ const float POW_TWO_M23 = 1.19209290e-7f;
+ const float POW_TWO_P23 = 8388608.0f;
+ const float FP32_MIN_NORM = 1.175494351e-38f;
+ i = 0.0f;
+
+ if (a < FP32_MIN_NORM) {
+ a = a * POW_TWO_P23;
+ i = -23.0f;
+ }
+
+ e = (float_as_uint32 (a) - float_as_uint32 (SQRT_HALF)) & 0xff800000;
+ m = uint32_as_float (float_as_uint32 (a) - e);
+ i = fmaf ((float)e, POW_TWO_M23, i);
+
+ p = m + 1.0f;
+ m = m - 1.0f;
+ r = 1.0f / p;
+ qhi = r * m;
+ qlo = r * fmaf (qhi, -m, fmaf (qhi, -2.0f, m));
+
+ s = qhi * qhi;
+ r = 0.1293334961f;
+ r = fmaf (r, s, 0.1419928074f);
+ r = fmaf (r, s, 0.2000148296f);
+ r = fmaf (r, s, 0.3333332539f);
+ t = fmaf (qhi, qlo + qlo, fmaf (qhi, qhi, -s));
+ p = s * qhi;
+ t = fmaf (s, qlo, fmaf (t, qhi, fmaf (s, qhi, -p)));
+ s = fmaf (r, p, fmaf (r, t, qlo));
+ r = 2 * qhi;
+
+ t = fmaf ( LOG2_HI, i, r);
+ p = fmaf (-LOG2_HI, i, t);
+ s = fmaf ( LOG2_LO, i, fmaf (2.f, s, r - p));
+ *loghi = p = t + s;
+ *loglo = (t - p) + s;
+}
+
+float expf_unchecked (float a) {
+ float f, j, r;
+ int i;
+
+ j = fmaf (1.442695f, a, 12582912.f) - 12582912.f;
+ f = fmaf (j, -6.93145752e-1f, a);
+ f = fmaf (j, -1.42860677e-6f, f);
+ i = (int)j;
+
+ r = 1.37805939e-3f;
+ r = fmaf (r, f, 8.37312452e-3f);
+ r = fmaf (r, f, 4.16695364e-2f);
+ r = fmaf (r, f, 1.66664720e-1f);
+ r = fmaf (r, f, 4.99999851e-1f);
+ r = fmaf (r, f, 1.00000000e+0f);
+ r = fmaf (r, f, 1.00000000e+0f);
+
+ float s, t;
+ uint32_t ia = (i > 0) ? 0u : 0x83000000u;
+ s = uint32_as_float (0x7f000000u + ia);
+ t = uint32_as_float (((uint32_t)i << 23) - ia);
+ r = r * s;
+ r = r * t;
+
+ return r;
+}
+
+float powf_core (float a, float b) {
+ const float MAX_IEEE754_FLT = uint32_as_float (0x7f7fffff);
+ const float EXP_OVFL_BOUND = 88.7228394f; // 0x1.62e430p+6f;
+ const float EXP_OVFL_UNFL_F = 104.0f;
+ const float MY_INF_F = uint32_as_float (0x7f800000);
+ float lhi, llo, thi, tlo, phi, plo, r;
+
+ logf_ext (a, &lhi, &llo);
+
+ thi = lhi * b;
+
+ if (fabsf (thi) > EXP_OVFL_UNFL_F) {
+ r = (thi < 0.0f) ? 0.0f : MY_INF_F;
+ } else {
+ tlo = fmaf (lhi, b, -thi);
+ tlo = fmaf (llo, b, +tlo);
+ phi = thi + tlo;
+
+ if (phi == EXP_OVFL_BOUND)
+ phi = uint32_as_float (float_as_uint32 (phi) - 1);
+
+ plo = (thi - phi) + tlo;
+ r = expf_unchecked (phi);
+ if (fabsf (r) <= MAX_IEEE754_FLT) {
+ r = fmaf (plo, r, r);
+ }
+ }
+ return r;
+}
+
+float powf (float a, float b) {
+ const float MY_INF_F = uint32_as_float (0x7f800000);
+ const float MY_NAN_F = uint32_as_float (0xffc00000);
+ int expo_odd_int;
+ float r;
+
+ expo_odd_int = fmaf (-2.0f, floorf (0.5f * b), b) == 1.0f;
+ if ((a == 1.0f) || (b == 0.0f)) {
+ r = 1.0f;
+ } else if (isnan (a) || isnan (b)) {
+ r = a + b;
+ } else if (isinf (b)) {
+ r = ((fabsf (a) < 1.0f) != (b < 0.0f)) ? 0.0f : MY_INF_F;
+ if (a == -1.0f) r = 1.0f;
+ } else if (isinf (a)) {
+ r = (b < 0.0f) ? 0.0f : MY_INF_F;
+ if ((a < 0.0f) && expo_odd_int) r = -r;
+ } else if (a == 0.0f) {
+ r = (expo_odd_int) ? (a + a) : 0.0f;
+ if (b < 0.0f) r = copysignf (MY_INF_F, r);
+ } else if ((a < 0.0f) && (b != floorf (b))) {
+ r = MY_NAN_F;
+ } else {
+ r = powf_core (fabsf (a), b);
+ if ((a < 0.0f) && expo_odd_int) {
+ r = -r;
+ }
+ }
+ return r;
+}
+
+double pow (double a, double b) {
+ return powf(a, b); // who cares about precision in a kernel :3
+}