/** * * @file FR_math.c - c implementation file for basic fixed * radix math routines * * @copy Copyright (C) <2001-2026> * @author M A Chatterjee * * This file contains integer math settable fixed point radix math routines for * use on systems in which floating point is not desired or unavailable. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages * arising from the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, please place an acknowledgment in the product documentation. * * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * * 3. This notice may not be removed or altered from any source * distribution. * */ #include "FR_math.h" #ifndef FR_NO_STDINT #include #endif /*======================================================= * Trig lookup tables (inlined — no separate header needed) * * Sine quadrant table: 129 entries covering [0, pi/2] in u0.15 format. * Tangent octant table: 65 entries covering [0, pi/4] in u0.15 format. * Generated by tools/coef-gen.py — do not hand-edit. */ #define FR_TRIG_TABLE_BITS (7) #define FR_TRIG_TABLE_SIZE ((1 << FR_TRIG_TABLE_BITS) + 1) #define FR_TRIG_FRAC_BITS (14 - FR_TRIG_TABLE_BITS) #define FR_TRIG_FRAC_MAX (1 << FR_TRIG_FRAC_BITS) #define FR_TRIG_FRAC_MASK (FR_TRIG_FRAC_MAX - 1) #define FR_TRIG_FRAC_HALF (FR_TRIG_FRAC_MAX >> 1) #define FR_TRIG_QUADRANT (1 << 14) static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = { 0, 402, 804, 1206, 1608, 2009, 2411, 2811, 3212, 3612, 4011, 4410, 4808, 5205, 5602, 5998, 6393, 6787, 7180, 7571, 7962, 8351, 8740, 9127, 9512, 9896, 10279, 10660, 11039, 11417, 11793, 12167, 12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091, 15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869, 18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475, 20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884, 23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073, 25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020, 27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707, 28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118, 30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238, 31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058, 32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568, 32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766, 32768 }; #define FR_TAN_TABLE_BITS (6) #define FR_TAN_TABLE_SIZE ((1 << FR_TAN_TABLE_BITS) + 1) #define FR_TAN_FRAC_BITS (13 - FR_TAN_TABLE_BITS) #define FR_TAN_FRAC_MAX (1 << FR_TAN_FRAC_BITS) #define FR_TAN_FRAC_MASK (FR_TAN_FRAC_MAX - 1) #define FR_TAN_FRAC_HALF (FR_TAN_FRAC_MAX >> 1) #define FR_TAN_OCTANT (1 << 13) static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = { 0, 402, 804, 1207, 1610, 2013, 2417, 2822, 3227, 3634, 4042, 4450, 4861, 5272, 5686, 6101, 6518, 6937, 7358, 7782, 8208, 8637, 9068, 9503, 9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104, 13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001, 17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318, 21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226, 26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973, 32768 }; /*======================================================= * Full-precision radian/degree → BAM conversion helpers * * rad_to_bam_full(r) returns a full s32 BAM value where: * upper 16 bits = integer BAM (the u16 table index) * lower 16 bits = sub-BAM fractional part * Input r must already be normalized to radix 16 and reduced to [-pi, pi]. * * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are * reordered so intermediate sums stay within s32 for |r| <= pi at r16. */ static s32 rad_to_bam_full(s32 r) { /* 10 terms: 65536/(2*pi) ≈ 10430.37835... * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14 * = 10430.378357 (~21-bit accuracy) * Terms reordered: interleave negatives early to keep all intermediate * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */ return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14); } #ifndef FR_LEAN /* deg_to_bam_full(d) — same idea for degrees. * Input d must already be normalized to radix 16 and reduced to [-90, 90). * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM). * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */ static s32 deg_to_bam_full(s32 d) { return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); } #endif /* Normalize a fixed-radix value to radix 16. */ static s32 normalize_to_r16(s32 val, u16 radix) { return (radix > 16) ? (val >> (radix - 16)) : (radix < 16) ? (val << (16 - radix)) : val; } /* Reduce non-negative radian (at r16) to [0, 2*pi). */ static s32 reduce_to_2pi(s32 r) { const s32 two_pi = FR_TWO_PI(16); /* 411775 */ if (r > (two_pi << 1)) r -= (r / two_pi) * two_pi; else if (r > two_pi) r -= two_pi; return r; } /* rad_r16_to_bam — convert radian (at r16) in [0, 2π) to u16 BAM. * Uses quadrant decomposition to keep rad_to_bam_full in its safe * [-π/2, π/2) range, mirroring the approach in fr_deg_to_bam. */ static u16 rad_r16_to_bam(s32 r) { const s32 half_pi = FR_HALF_PI(16); /* 102944 */ const s32 three_half_pi = FR_THREE_HALF_PI(16); /* 308831 */ const s32 pi = FR_PI(16); /* 205887 */ const s32 two_pi = FR_TWO_PI(16); /* 411775 */ u16 offset = 0; if (r >= half_pi && r < three_half_pi) { r -= pi; offset = 0x8000u; } else if (r >= three_half_pi) { r -= two_pi; /* r is now in [-π/2, 0), no offset needed (u16 wraps naturally) */ } return (u16)(offset + (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16)); } /* (rad_r16_to_bam32 removed — sub-BAM interpolation approach abandoned) */ /* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion. * Normalizes to r16, reduces to [0, 2π), uses quadrant decomposition. */ u16 fr_rad_to_bam(s32 rad, u16 radix) { s32 r = normalize_to_r16(rad, radix); /* Normalize to [0, 2π) */ if (r < 0) { r += ((-r) / FR_TWO_PI(16)) * FR_TWO_PI(16); if (r < 0) r += FR_TWO_PI(16); } r = reduce_to_2pi(r); return rad_r16_to_bam(r); } #ifndef FR_LEAN /* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion. * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */ u16 fr_deg_to_bam(s32 deg, u16 radix) { s32 d = normalize_to_r16(deg, radix); /* Reduce to [-180, 180) */ if (d >= FR_D360_R16 || d < -FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; } if (d >= FR_D180_R16) d -= FR_D360_R16; if (d < -FR_D180_R16) d += FR_D360_R16; /* Reduce to [-90, 90) with BAM quadrant offset */ u16 offset = 0; if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; } else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; } return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16)); } #endif /*======================================================= * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan * * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd * quadrants (1, 3) reverse the in-quadrant index so the table is always read * in the same direction. * * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at * index 128). After mirroring, small full_pos → small output (near zero), * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles * below one table step (~0.7°). This eliminates table quantization error * in the region where it matters most. * * Sign rule: quadrants 2 and 3 negate the result. * Mirror rule: quadrants 1 and 3 flip the in-quadrant position. */ s32 fr_sin_bam(u16 bam) { u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */ u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */ /* Exact cardinal angles */ if (inq == 0) { if (q == 0 || q == 2) return 0; /* 0° or 180° → 0 */ if (q == 1) return FR_TRIG_ONE; /* 90° → 1.0 */ return -FR_TRIG_ONE; /* 270° → -1.0 */ } /* Odd quadrants mirror: read table from the far end */ if (q == 1 || q == 3) inq = FR_TRIG_QUADRANT - inq; s32 v; /* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step). * θ_rad = inq * (π/2) / 16384. Output = θ * 65536 = inq * FR_kQ2RAD / 16384. * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */ if (inq < FR_TRIG_FRAC_MAX) { v = (s32)(((u32)inq * 102944u + 8192u) >> 14); } else { /* Table lookup with 7-bit interpolation fraction */ u32 idx = inq >> FR_TRIG_FRAC_BITS; u32 frac = inq & FR_TRIG_FRAC_MASK; s32 lo = (s32)gFR_SIN_TAB_Q[idx]; s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1]; v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); v <<= 1; /* u0.15 → s15.16 */ } return (q >= 2) ? -v : v; } s32 fr_cos_bam(u16 bam) { /* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */ return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT)); } s32 fr_cos(s32 rad, u16 radix) { if (rad == 0) return FR_TRIG_ONE; s32 r = normalize_to_r16(rad, radix); if (r < 0) r = -r; r = reduce_to_2pi(r); /* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ, * cos(3π/2+δ) = sin(δ) ≈ δ. */ s32 delta = r - FR_HALF_PI(16); if (delta >= -256 && delta <= 256) return -delta; delta = r - FR_THREE_HALF_PI(16); if (delta >= -256 && delta <= 256) return delta; return fr_cos_bam(fr_rad_to_bam(rad, radix)); } s32 fr_sin(s32 rad, u16 radix) { if (rad == 0) return 0; s32 r = normalize_to_r16(rad, radix); s32 sign = 1; if (r < 0) { r = -r; sign = -1; } r = reduce_to_2pi(r); /* Near 0 after reduction: sin(δ) ≈ δ */ if (r < 256) { s32 v = r; return (sign < 0) ? -v : v; } /* Near π: sin(π + δ) = -sin(δ) ≈ -δ */ s32 delta = r - FR_PI(16); if (delta >= -256 && delta <= 256) { s32 v = -delta; return (sign < 0) ? -v : v; } /* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */ delta = FR_TWO_PI(16) - r; if (delta >= 0 && delta < 256) { s32 v = -delta; return (sign < 0) ? -v : v; } /* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */ if (r > FR_PI(16)) r -= FR_TWO_PI(16); u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16); s32 v = fr_sin_bam(bam); return (sign < 0) ? -v : v; } #ifndef FR_LEAN /*======================================================= * BAM-native tangent: fr_tan_bam * * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the * second octant (45°, 90°). Result is s15.16 with saturation at the * poles. * * No 64-bit intermediates. One 32-bit division only in the >45° path. */ s32 fr_tan_bam(u16 bam) { u32 q = ((u32)bam >> 14) & 0x3; /* quadrant (top 2 bits) */ u32 inq = (u32)bam & 0x3FFFu; /* in-quadrant (14 bits) */ s32 sign = 1; u32 idx, frac; s32 lo, hi, raw; /* Exact zeros: bam lands exactly on 0° or 180° */ if (inq == 0 && (q == 0 || q == 2)) return 0; /* Poles: bam lands exactly on 90° or 270° */ if (inq == 0 && (q == 1 || q == 3)) return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; /* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */ if (q == 1 || q == 3) { inq = 0x4000u - inq; sign = -1; } /* Now inq is in (0, 0x4000) = (0°, 90°) exclusive. * Split into first octant [0, 45°) and second octant [45°, 90°). */ if (inq < FR_TAN_OCTANT) { /* First octant: direct table lookup + lerp. * inq is 13 bits; top FR_TAN_TABLE_BITS index the table, * bottom FR_TAN_FRAC_BITS drive interpolation. */ idx = inq >> FR_TAN_FRAC_BITS; frac = inq & FR_TAN_FRAC_MASK; lo = (s32)gFR_TAN_TAB_O[idx]; hi = (s32)gFR_TAN_TAB_O[idx + 1]; raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw < 0x40) { /* Near zero: redo interpolation with 4 extra bits of * precision to reduce rounding error when result is small. */ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ } else { raw <<= 1; /* u0.15 → s15.16 */ } } else { /* Second octant: tan(x) = 1 / tan(90° - x). * complement is in (0, 0x2000] = (0°, 45°]. */ u32 comp = 0x4000u - inq; /* Look up tan(complement) from the table */ idx = comp >> FR_TAN_FRAC_BITS; frac = comp & FR_TAN_FRAC_MASK; lo = (s32)gFR_TAN_TAB_O[idx]; hi = (s32)gFR_TAN_TAB_O[idx + 1]; raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw < 0x40) { /* Near pole: redo interpolation with 4 extra bits of * precision. The reciprocal amplifies small interpolation * errors, so extra precision significantly helps here. * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw_hp < 32) { raw = FR_TRIG_MAXVAL; } else { raw = (s32)((0x80000000u / (u32)raw_hp) << 4); } } else { raw = (s32)(0x80000000u / (u32)raw); } } return (sign < 0) ? -raw : raw; } #endif /* FR_LEAN */ /* fr_tan — radian-input tangent. * * Normalize to [0, 2π], extract quadrant sign, convert rad→u16 BAM, * then do direct octant table lookup + interpolation inline. * Small-angle bypass at zero crossings: tan(x) ≈ x. * Near poles: use radian distance directly (cot(δ) ≈ 1/δ) to avoid * BAM quantization error amplified by the reciprocal. */ s32 fr_tan(s32 rad, u16 radix) { if (rad == 0) return 0; s32 r = normalize_to_r16(rad, radix); /* tan(-x) = -tan(x): extract sign, work with |r| */ s32 sign = 1; if (r < 0) { r = -r; sign = -1; } r = reduce_to_2pi(r); /* Small-angle bypass at zero crossings: tan(δ) ≈ δ */ if (r < 256) return (sign < 0) ? -r : r; { s32 delta = r - FR_PI(16); if (delta >= -256 && delta <= 256) return (sign < 0) ? -delta : delta; } { s32 delta = FR_TWO_PI(16) - r; if (delta >= 0 && delta < 256) return (sign < 0) ? delta : -delta; } /* Near-pole bypass: within POLE_THRESH r16 of π/2 or 3π/2, * use cot(δ) ≈ 1/δ from the radian distance directly. * Compute δ at r24 using precise pole constants (8× less rounding * error than the r16 FR_HALF_PI/FR_THREE_HALF_PI constants). * At δ=2048 r16 (1.79°), 1/δ error is ~0.03%. */ { const s32 pole_thresh = 2048; /* r16 units (~1.79°) */ /* Precise pole positions at r24: * π/2 × 2^24 = 26353589.76 → 26353590 * 3π/2 × 2^24 = 79060769.28 → 79060769 */ const s32 half_pi_r24 = 26353590; const s32 three_half_pi_r24 = 79060769; s32 d1 = r - FR_HALF_PI(16); /* coarse check at r16 */ s32 d2 = r - FR_THREE_HALF_PI(16); s32 pole_delta_r24 = 0; if (d1 >= -pole_thresh && d1 <= pole_thresh) { s32 r24 = r << 8; s32 dd = r24 - half_pi_r24; pole_delta_r24 = (dd < 0) ? -dd : dd; } else if (d2 >= -pole_thresh && d2 <= pole_thresh) { s32 r24 = r << 8; s32 dd = r24 - three_half_pi_r24; pole_delta_r24 = (dd < 0) ? -dd : dd; } if (pole_delta_r24 > 0) { /* Determine sign from radian quadrant */ s32 pole_sign; if (r < FR_HALF_PI(16)) pole_sign = 1; /* before π/2: → +∞ */ else if (r < FR_PI(16)) pole_sign = -1; /* past π/2: → -∞ */ else if (r <= FR_THREE_HALF_PI(16)) pole_sign = 1; /* before 3π/2: → +∞ */ else pole_sign = -1; /* past 3π/2: → -∞ */ s32 raw; if (pole_delta_r24 < 512) { raw = FR_TRIG_MAXVAL; /* δ < 2 at r16 → saturate */ } else { /* cot(δ) ≈ 1/δ. In s15.16: (2^40) / δ_r24 */ raw = (s32)((1ULL << 40) / (u32)pole_delta_r24); if (raw > FR_TRIG_MAXVAL) raw = FR_TRIG_MAXVAL; } s32 v = (pole_sign < 0) ? -raw : raw; return (sign < 0) ? -v : v; } } /* Convert radian to u16 BAM */ u16 bam = rad_r16_to_bam(r); /* Decompose BAM into quadrant + in-quadrant */ u32 q = ((u32)bam >> 14) & 0x3; u32 inq = (u32)bam & 0x3FFFu; s32 tsign = 1; /* tan sign from quadrant */ /* Exact zeros: bam lands on 0° or 180° */ if (inq == 0 && (q == 0 || q == 2)) return 0; /* Q1/Q3: reflect and negate */ if (q == 1 || q == 3) { inq = 0x4000u - inq; tsign = -1; } /* Octant table lookup + interpolation (same logic as fr_tan_bam) */ u32 idx, frac; s32 raw; if (inq < FR_TAN_OCTANT) { /* First octant [0°, 45°): direct lookup */ idx = inq >> FR_TAN_FRAC_BITS; frac = inq & FR_TAN_FRAC_MASK; s32 lo = (s32)gFR_TAN_TAB_O[idx]; s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw < 0x40) { s32 lo4 = lo << 4; s32 hi4 = hi << 4; raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); raw = (raw + 4) >> 3; } else { raw <<= 1; } } else { /* Second octant [45°, 90°): reciprocal identity */ u32 comp = 0x4000u - inq; idx = comp >> FR_TAN_FRAC_BITS; frac = comp & FR_TAN_FRAC_MASK; s32 lo = (s32)gFR_TAN_TAB_O[idx]; s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw < 0x40) { s32 lo4 = lo << 4; s32 hi4 = hi << 4; s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw_hp < 32) raw = FR_TRIG_MAXVAL; else raw = (s32)((0x80000000u / (u32)raw_hp) << 4); } else { raw = (s32)(0x80000000u / (u32)raw); } } /* Combine quadrant sign and input sign */ s32 v = (tsign < 0) ? -raw : raw; return (sign < 0) ? -v : v; } #ifndef FR_LEAN /*======================================================= * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then * call the BAM-native functions. Cardinal angles are exact. */ s32 fr_cos_deg(s32 deg, u16 radix) { if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg)); if (deg < 0) deg = -deg; /* Exact cardinal angles */ s32 frac_mask = (1 << radix) - 1; if ((deg & frac_mask) == 0) { s32 rem = (deg >> radix) % 360; if (rem == 0) return FR_TRIG_ONE; if (rem == 90) return 0; if (rem == 180) return -FR_TRIG_ONE; if (rem == 270) return 0; } /* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180, * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */ s32 d = normalize_to_r16(deg, radix); if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; } { const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ s32 delta = d - FR_D90_R16; if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); return -dr; } delta = d - (FR_D90_R16 + FR_D180_R16); if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); return dr; } } return fr_cos_bam(fr_deg_to_bam(deg, radix)); } s32 fr_sin_deg(s32 deg, u16 radix) { if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg)); s32 sign = 1; if (deg < 0) { deg = -deg; sign = -1; } /* Exact cardinal angles */ s32 frac_mask = (1 << radix) - 1; if ((deg & frac_mask) == 0) { s32 rem = (deg >> radix) % 360; if (rem == 0) return 0; if (rem == 90) return (sign < 0) ? -FR_TRIG_ONE : FR_TRIG_ONE; if (rem == 180) return 0; if (rem == 270) return (sign < 0) ? FR_TRIG_ONE : -FR_TRIG_ONE; } s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix)); return (sign < 0) ? -v : v; } s32 FR_TanI(s32 deg) { /* Exact pole: deg mod 180 == ±90. Sign matches input sign. */ s32 rem = deg % 180; if (rem == 90 || rem == -90) return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; return fr_tan_bam(FR_DEG2BAM_I(deg)); } s32 fr_tan_deg(s32 deg, u16 radix) { if (radix == 0) return FR_TanI(deg); s32 deg_orig = deg; /* Normalize to [0, 360°) at caller radix */ s32 d360 = 360 << radix; if (deg < 0) { deg += ((-deg) / d360) * d360; if (deg < 0) deg += d360; } if (deg >= d360) { deg -= (deg / d360) * d360; } /* Exact cardinal angles */ s32 frac_mask = (1 << radix) - 1; if ((deg & frac_mask) == 0) { s32 ideg = deg >> radix; if (ideg == 0 || ideg == 180) return 0; if (ideg == 90 || ideg == 270) return (deg_orig >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; } /* Near 0° or 180° (tan=0 crossings): tan(δ) ≈ δ in radians */ s32 d = normalize_to_r16(deg, radix); { const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ s32 delta; /* Near 0° */ if (d < DEG_THRESH) { s32 up = d << 8; return (FR_DEG2RAD(up) + (1 << 7)) >> 8; } /* Near 180° */ delta = d - FR_D180_R16; if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { s32 up = delta << 8; return (FR_DEG2RAD(up) + (1 << 7)) >> 8; } /* Near 360° */ delta = FR_D360_R16 - d; if (delta >= 0 && delta < DEG_THRESH) { s32 up = delta << 8; return -((FR_DEG2RAD(up) + (1 << 7)) >> 8); } } /* Main path: convert to u16 BAM, table lookup */ u16 bam = fr_deg_to_bam(deg, radix); s32 v = fr_tan_bam(bam); /* Near-pole BAM alias: determine sign from normalized angle position */ if (bam == 0x4000u || bam == 0xC000u) { s32 pole_d = (bam == 0x4000u) ? FR_D90_R16 : (FR_D90_R16 + FR_D180_R16); v = (d < pole_d) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; } return v; } #endif /* FR_LEAN */ /*======================================================= * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest) * * Treats x and y as fixed-point values at the same radix r and returns * (x*y) >> r at radix r. The user is responsible for tracking the radix * point and for guaranteeing the product fits in 32 bits. * * Adds 0.5 LSB (0x8000) before the shift so the result rounds to * nearest instead of truncating toward zero. */ s32 FR_FixMuls(s32 x, s32 y) { int64_t v = (int64_t)x * (int64_t)y; return (s32)((v + 0x8000) >> 16); } /*======================================================= * FR_FixMulSat (x*y signed, SATURATED, round-to-nearest) * * Same semantics as FR_FixMuls but clamps to [INT32_MIN, INT32_MAX] on * overflow instead of wrapping. The fixed-point radix is fixed at 16 bits * (sM.16 inputs and output). Rounds to nearest (adds 0.5 LSB before shift). */ s32 FR_FixMulSat(s32 x, s32 y) { int64_t v = ((int64_t)x * (int64_t)y + 0x8000) >> 16; if (v > (int64_t)0x7fffffff) return FR_OVERFLOW_POS; if (v < -(int64_t)0x80000000) return FR_OVERFLOW_NEG; return (s32)v; } /*======================================================= FR_FixAddSat (x+y saturated add) programmer must align radix points before using this function */ s32 FR_FixAddSat(s32 x, s32 y) { s32 sum = x + y; if (x < 0) { if (y < 0) return (sum >= 0) ? FR_OVERFLOW_NEG : sum; } else { if (y >= 0) return (sum <= 0) ? FR_OVERFLOW_POS : sum; } return sum; } /* Inverse Trig * acos with binary search of the BAM-native quadrant table. * * Algorithm: bring `input` into s0.15, then binary-search the first-quadrant * cos table for the table entry closest to |input|. Apply quadrant mirror * if input was negative. */ /* FR_acos — returns radians at out_radix. * Range: [0, pi]. Input is a cosine value at the given radix. * * Uses the 129-entry sine table in reverse: binary-search the ascending * table to find asin(|input|), then acos = pi/2 - asin (with sign handling * for the second quadrant). */ s32 FR_acos(s32 input, u16 radix, u16 out_radix) { s32 v; s16 sign; s32 lo, hi, mid; s32 idx, d, num, frac; s32 input_abs; /* Work with absolute value at the caller's radix */ sign = (s16)((input < 0) ? 1 : 0); input_abs = sign ? -input : input; /* Clamp at the caller's radix */ { s32 one = (s32)1 << radix; if (input_abs >= one) return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0; } v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */ /* Small-angle fast path: when cos(θ) is close to 1.0, the sine table * has poor resolution near the top (entries close together). * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121] * means the input is > cos(7*π/256) ≈ 0.9975. */ if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8]) { s32 one = (s32)1 << radix; s32 one_minus_x = one - input_abs; /* 1-|x| at caller radix */ s32 two_omx = one_minus_x << 1; /* 2(1-|x|) at caller radix */ s32 rad_native = FR_sqrt(two_omx, radix); /* radians at caller radix */ s32 rad_out = FR_CHRDX(rad_native, radix, out_radix); if (sign) rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out; return rad_out; } /* Binary search on the ascending sine table. * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°). * * Find the first index where table[idx] >= v. */ lo = 0; hi = FR_TRIG_TABLE_SIZE; while (lo < hi) { mid = (lo + hi) >> 1; if ((s32)gFR_SIN_TAB_Q[mid] < v) lo = mid + 1; else hi = mid; } /* lo is now the first index where table[lo] >= v. * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo]. * This gives us the asin angle; acos = pi/2 - asin. */ idx = lo; if (idx <= 0) { idx = 0; frac = 0; } else if (idx >= FR_TRIG_TABLE_SIZE) { idx = FR_TRIG_TABLE_SIZE - 1; frac = 0; } else { /* Interpolate between table[idx-1] and table[idx]. * d = table[idx] - table[idx-1] (>= 0, sin increasing) * num = v - table[idx-1] (how far past table[idx-1]) */ d = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1]; num = v - (s32)gFR_SIN_TAB_Q[idx - 1]; if (d > 0) frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d; else frac = 0; idx = idx - 1; } { /* asin_bam is the angle in first-quadrant BAM whose sin = v */ u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); /* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */ u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam); if (sign) bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */ return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); } } /* FR_asin — returns radians at out_radix. Range: [-pi/2, pi/2]. */ s32 FR_asin(s32 input, u16 radix, u16 out_radix) { /* asin(x) = pi/2 - acos(x) */ s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); return half_pi - FR_acos(input, radix, out_radix); } /* FR_atan2(y, x, out_radix) — full-circle arctangent, returns radians * at the specified output radix (s32). * * Range: [-pi, pi]. Returns 0 for atan2(0,0). * * Implementation: normalise (x,y) via FR_hypot_fast8, then recover the * angle with FR_asin or FR_acos (both use the 129-entry cosine table). * To stay in the well-conditioned region of each inverse function we * switch at 45°: * |y| <= |x| → use asin(y/h) — asin stable near 0 * |y| > |x| → use acos(x/h) — acos stable near pi/2 * This keeps the derivative amplification factor below 1.414x everywhere. */ s32 FR_atan2(s32 y, s32 x, u16 out_radix) { s32 ax, ay, h, q1_angle; /* Axis cases — exact angles, no divide. */ if (x == 0) { if (y > 0) return FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* pi/2 */ if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* -pi/2 */ return 0; } if (y == 0) return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix); /* 0 or pi */ ax = (x < 0) ? -x : x; ay = (y < 0) ? -y : y; /* Normalise so max(ax,ay) sits in [2^14, 2^15). This gives * FR_hypot_fast8 enough integer bits for the shift-only segments * to produce an accurate ratio — critical when the raw inputs are * small (e.g. atan2(1,1) at radix 0). Scaling both by the same * power of two doesn't change the angle. */ { s32 mx = (ax > ay) ? ax : ay; while (mx < (1L << 14)) { ax <<= 1; ay <<= 1; mx <<= 1; } while (mx >= (1L << 16)) { ax >>= 1; ay >>= 1; mx >>= 1; } } h = FR_hypot_fast8((s32)ax, (s32)ay); if (h == 0) return 0; /* degenerate */ /* Compute the first-quadrant angle (positive, [0..pi/2]). * Divide produces a value in [0,1] at radix FR_TRIG_PREC (s0.15). * * Small-angle fast path: when the minor-axis ratio is small, * asin(x) ≈ x (error < x³/6). Below ~5° the cubic term is * smaller than the table-lookup error, so the direct identity * is both faster and more accurate. Threshold 2753 at r15 * corresponds to sin(~4.8°) = 0.084. */ #define FR_ATAN2_SMALL 2753 if (ay <= ax) { /* angle in [0°..45°]: use asin(ay/h) — well-conditioned near 0 */ s32 sin_val = (s32)(((int64_t)ay << FR_TRIG_PREC) / h); if (sin_val < FR_ATAN2_SMALL) q1_angle = FR_CHRDX(sin_val, FR_TRIG_PREC, out_radix); else q1_angle = FR_asin(sin_val, FR_TRIG_PREC, out_radix); } else { /* angle in [45°..90°]: use acos(ax/h) — well-conditioned near pi/2 */ s32 cos_val = (s32)(((int64_t)ax << FR_TRIG_PREC) / h); if (cos_val < FR_ATAN2_SMALL) { /* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */ s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix); } else q1_angle = FR_acos(cos_val, FR_TRIG_PREC, out_radix); } /* Apply quadrant from signs of x and y. * q1_angle is always positive [0..pi/2]. */ { s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix); if (x > 0) return (y > 0) ? q1_angle : -q1_angle; /* x < 0: mirror across y-axis */ return (y > 0) ? (pi - q1_angle) : (q1_angle - pi); } } /* FR_atan(input, radix, out_radix) — arctangent of a single argument. * Returns radians at out_radix, range [-pi/2, pi/2]. */ s32 FR_atan(s32 input, u16 radix, u16 out_radix) { s32 one = (s32)1 << radix; return FR_atan2(input, one, out_radix); } /* 2^f table for f in [0, 1] in 65 entries (64 segments), output in s.16 * fixed point. Entry i = round(2^(i/64) * 65536). Size: 260 bytes. * Used by FR_pow2 to look up the fractional power of 2 with linear * interpolation. */ static const u32 gFR_POW2_FRAC_TAB[65] = { 65536, 66250, 66971, 67700, 68438, 69183, 69936, 70698, 71468, 72246, 73032, 73828, 74632, 75444, 76266, 77096, 77936, 78785, 79642, 80510, 81386, 82273, 83169, 84074, 84990, 85915, 86851, 87796, 88752, 89719, 90696, 91684, 92682, 93691, 94711, 95743, 96785, 97839, 98905, 99982, 101070, 102171, 103283, 104408, 105545, 106694, 107856, 109031, 110218, 111418, 112631, 113858, 115098, 116351, 117618, 118899, 120194, 121502, 122825, 124163, 125515, 126882, 128263, 129660, 131072 }; /* FR_pow2(input, radix) — computes 2^(input/2^radix), result at same radix. * * Algorithm: split input into floor(integer) and fractional part. The * fractional part is in [0, 1) by construction (Euclidean / mathematical * floor — the fractional part of -2.3 is +0.7, not -0.3). Then * 2^(int + frac) = 2^int * 2^frac * where 2^frac is looked up from a 65-entry table at radix 16, and 2^int * is a shift. * * Worst-case absolute error: ~1e-5 over [-8, 8] (65-entry table). * Linear interpolation leaves a small concavity error in each interval. */ s32 FR_pow2(s32 input, u16 radix) { s32 flr, frac_full, idx, frac_lo, lo, hi, mant, result; u32 mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0; /* Mathematical floor: for positive input it's input>>radix; for * negative input we need to round toward -infinity, not toward zero. */ if (input >= 0) { flr = (s32)((u32)input >> radix); frac_full = (s32)((u32)input & mask); } else { s32 neg = -input; s32 nflr = (s32)((u32)neg >> radix); s32 nfrc = (s32)((u32)neg & mask); if (nfrc == 0) { flr = -nflr; frac_full = 0; } else { flr = -nflr - 1; /* floor toward -inf */ frac_full = (s32)((1L << radix) - nfrc); } } /* frac_full is in [0, 2^radix). Re-radix it to s.16 for table lookup. */ if (radix > 16) frac_full >>= (radix - 16); else if (radix < 16) frac_full <<= (16 - radix); /* now frac_full is in [0, 65536) representing fractional in s.16. */ /* Top 6 bits index the table; bottom 10 are the interpolation fraction. */ idx = frac_full >> 10; frac_lo = frac_full & ((1L << 10) - 1); lo = (s32)gFR_POW2_FRAC_TAB[idx]; hi = (s32)gFR_POW2_FRAC_TAB[idx + 1]; mant = lo + (((hi - lo) * frac_lo) >> 10); /* mant in s.16, in [1.0, 2.0) */ /* Apply integer shift. mant is at radix 16. We want output at `radix`. * If radix == 16: just shift mant. * Otherwise re-radix mant first. */ if (flr >= 0) { /* result = mant << flr, then re-radix to caller's radix. */ if (flr >= 30) return FR_OVERFLOW_POS; result = mant << flr; return FR_CHRDX(result, 16, radix); } else { /* mant >> -flr at radix 16, then re-radix. */ s32 sh = -flr; if (sh >= 30) return 0; /* underflow */ result = mant >> sh; return FR_CHRDX(result, 16, radix); } } /* log2 mantissa table for m in [1, 2), m = 1 + i/64, returning log2(m) * in s.16 fixed point. 65 entries (last is log2(2) = 1.0 = 65536) so the * interpolation between idx and idx+1 never reads out of bounds. * Size: 260 bytes. Entry i = round(log2(1 + i/64) * 65536). */ static const u32 gFR_LOG2_MANT_TAB[65] = { 0, 1466, 2909, 4331, 5732, 7112, 8473, 9814, 11136, 12440, 13727, 14996, 16248, 17484, 18704, 19909, 21098, 22272, 23433, 24579, 25711, 26830, 27936, 29029, 30109, 31178, 32234, 33279, 34312, 35334, 36346, 37346, 38336, 39316, 40286, 41246, 42196, 43137, 44068, 44990, 45904, 46809, 47705, 48593, 49472, 50344, 51207, 52063, 52911, 53751, 54584, 55410, 56229, 57040, 57845, 58643, 59434, 60219, 60997, 61769, 62534, 63294, 64047, 64794, 65536 }; /* FR_log2(input, radix, output_radix) — log base 2 of a fixed-point number. * * input : value to take log2 of, treated as a positive sM.radix value. * radix : number of fractional bits in `input`. * output_radix : number of fractional bits in the result. * * Returns FR_LOG2MIN for input <= 0 (log of zero/negative is undefined; we * return a large negative sentinel rather than crash). * * Algorithm: * 1. Find p, the position of the leading 1 bit of `input`. * log2(input) = p + log2(input / 2^p), where the second term is in * [0, 1) because (input / 2^p) is in [1, 2). * 2. Normalize the mantissa to s1.31 by shifting `input` so its top bit * sits at bit 31 (so bits 30..25 are the upper 6 bits of m-1). * 3. Look up log2(m) in the 65-entry table with linear interpolation * across the next 24 bits. Result is in s.16. * 4. integer_part = (p - radix), then result = (integer_part << 16) + * mantissa_log2. * 5. Re-radix to the requested output_radix via FR_CHRDX. * * Worst-case absolute error: ~6e-5 in log2 units (65-entry table). */ s32 FR_log2(s32 input, u16 radix, u16 output_radix) { s32 p, integer_part, idx, frac, lo, hi, mant_log2, result; u32 m, u; if (input <= 0) return FR_LOG2MIN; /* Step 1: find the position of the leading 1 bit. */ u = (u32)input; p = 0; while (u > 1) { u >>= 1; p++; } /* Step 2: shift input so the leading 1 bit is at bit 30 (s1.30 mantissa). * Equivalently: m = input << (30 - p), where m is in [2^30, 2^31). * The fractional part of m / 2^30 is in [0, 1), and that's what we look * up in the table. */ if (p >= 30) m = (u32)input >> (p - 30); else m = (u32)input << (30 - p); /* m is now in [2^30, 2^31). Subtract 2^30 to get the fractional part * (m_frac in [0, 2^30)). Index into the 64-entry table is the top 6 * bits of m_frac; the lower 24 bits are the interpolation fraction. */ m -= (1u << 30); idx = (s32)(m >> 24); /* 6 bits */ frac = (s32)(m & ((1u << 24) - 1)); /* 24 bits */ lo = (s32)gFR_LOG2_MANT_TAB[idx]; hi = (s32)gFR_LOG2_MANT_TAB[idx + 1]; mant_log2 = lo + (s32)(((int64_t)(hi - lo) * frac) >> 24); /* Step 3: assemble. integer_part = p - radix. */ integer_part = p - (s32)radix; result = (integer_part << 16) + mant_log2; /* Step 4: re-radix to output_radix. */ return FR_CHRDX(result, 16, output_radix); } s32 FR_ln(s32 input, u16 radix, u16 output_radix) { s32 r = FR_log2(input, radix, output_radix); return FR_MULK28(r, FR_krLOG2E_28); } #ifndef FR_LEAN s32 FR_log10(s32 input, u16 radix, u16 output_radix) { s32 r = FR_log2(input, radix, output_radix); return FR_MULK28(r, FR_krLOG2_10_28); } #endif #ifndef FR_NO_PRINT /*************************************** * FR_printNumD - write a decimal integer with space padding. * * Equivalent to "%*d" in printf, modulo the return convention. * * f : per-character output function (e.g. putchar). Must not be NULL. * n : signed integer to print. * pad : minimum field width; spaces are prepended to reach this width. * * Returns the number of characters written on success, or -1 if `f` is NULL. */ int FR_printNumD(int (*f)(char), int n, int pad) { unsigned int mag; int written = 0, neg = 0; int digits = 1; unsigned int t; if (!f) return -1; if (n < 0) { neg = 1; mag = (unsigned int)(-(long)n); /* safe for INT_MIN */ } else { mag = (unsigned int)n; } /* Count decimal digits in mag (always at least 1 for n=0). */ t = mag; while (t >= 10) { t /= 10; digits++; } /* Pad with spaces. The total width includes the sign. */ { int total = digits + (neg ? 1 : 0); while (pad-- > total) { f(' '); written++; } } if (neg) { f('-'); written++; } /* Print digits MSB first by computing the largest power of 10 <= mag. */ { unsigned int p = 1; int i; for (i = 1; i < digits; i++) p *= 10; while (p > 0) { f((char)('0' + (mag / p) % 10)); written++; if (p == 1) break; p /= 10; } } return written; } /*************************************** * FR_printNumF - write a fixed-point number as a decimal floating-point string. * * f : per-character output function. Must not be NULL. * n : signed fixed-point value at the given radix. * radix : number of fractional bits in `n`. * pad : minimum field width (including sign and decimal point). * prec : number of fractional digits to print. * * Returns the number of characters written on success, -1 if `f` is NULL. * * Rounding policy: truncates fractional digits beyond `prec` (no rounding). */ int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec) { unsigned int mag_int; u32 mag_frac; u32 frac_mask; int written = 0, neg = 0; int int_digits = 1; int total; unsigned int t; if (!f) return -1; frac_mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0; if (n < 0) { neg = 1; /* Negate as unsigned to avoid INT_MIN overflow. */ u32 un = (u32)(-(int64_t)n); mag_int = (unsigned int)(un >> radix); mag_frac = un & frac_mask; } else { mag_int = (unsigned int)((u32)n >> radix); mag_frac = (u32)n & frac_mask; } /* Count integer digits. */ t = mag_int; while (t >= 10) { t /= 10; int_digits++; } /* Total visible width = sign + int + (dot + prec digits if prec>0). */ total = int_digits + (neg ? 1 : 0) + ((prec > 0) ? (1 + prec) : 0); while (pad-- > total) { f(' '); written++; } if (neg) { f('-'); written++; } /* Print integer part. */ { unsigned int p = 1; int i; for (i = 1; i < int_digits; i++) p *= 10; while (p > 0) { f((char)('0' + (mag_int / p) % 10)); written++; if (p == 1) break; p /= 10; } } /* Print fractional part. Extract one decimal digit at a time: * frac' = frac * 10 * digit = frac' >> radix * frac = frac' & frac_mask */ if (prec > 0) { f('.'); written++; while (prec-- > 0) { u32 scaled; int digit; scaled = (u32)(((uint64_t)mag_frac * 10)); digit = (int)(scaled >> radix); mag_frac = scaled & frac_mask; f((char)('0' + (digit % 10))); written++; } } return written; } /*************************************** * FR_printNumH - write an integer as hexadecimal. * * f : per-character output function. Must not be NULL. * n : integer to print (interpreted as unsigned for the digits). * showPrefix : if non-zero, prepend "0x". * * Returns the number of characters written on success, -1 if f is NULL. */ int FR_printNumH(int (*f)(char), int n, int showPrefix) { unsigned int u = (unsigned int)n; int written = 0; int x = (int)((sizeof(int) << 1) - 1); int d; if (!f) return -1; if (showPrefix) { f('0'); f('x'); written += 2; } do { d = (int)((u >> (x << 2)) & 0xf); d = (d > 9) ? (d - 0xa + 'a') : (d + '0'); f((char)d); written++; } while (x--); return written; } /*======================================================= * FR_numstr — parse a decimal string into a fixed-point value. * * This is the runtime inverse of FR_printNumF: given a string like * "12.34" or "-0.05" and a radix (number of fractional bits), it * returns the s32 fixed-point representation. * * Features: * - Leading whitespace is skipped. * - Optional sign ('+' or '-'). * - Up to 9 fractional digits are used (s32 range). * - No malloc, no strtod, no libm. * * Returns 0 for NULL or empty input. */ s32 FR_numstr(const char *s, u16 radix) { static const s32 pow10[10] = { 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L, 1000000000L }; s32 int_part = 0, frac_part = 0; int frac_digits = 0, neg = 0; s32 result; if (!s || !*s) return 0; while (*s == ' ' || *s == '\t') s++; /* skip whitespace */ if (*s == '-') { neg = 1; s++; } /* sign */ else if (*s == '+') { s++; } while (*s >= '0' && *s <= '9') /* integer part */ { int_part = int_part * 10 + (*s - '0'); s++; } if (*s == '.') { /* fractional part */ s++; while (*s >= '0' && *s <= '9') { if (frac_digits < 9) { frac_part = frac_part * 10 + (*s - '0'); frac_digits++; } s++; } } result = int_part << radix; if (frac_digits > 0) result += (s32)(((int64_t)frac_part << radix) / pow10[frac_digits]); return neg ? -result : result; } #endif /* FR_NO_PRINT */ /*======================================================= * Square root and hypot * * fr_isqrt64 is a private helper implementing the digit-by-digit * ("shift-and-subtract") integer square root. The core loop computes * floor(sqrt(n)), then a final remainder check rounds to nearest. * Uses no division. At most 32 iterations. */ static u32 fr_isqrt64(uint64_t n) { uint64_t root = 0; uint64_t bit = (uint64_t)1 << 62; while (bit > n) bit >>= 2; while (bit != 0) { uint64_t trial = root + bit; if (n >= trial) { n -= trial; root = (root >> 1) + bit; } else { root >>= 1; } bit >>= 2; } /* round to nearest: if remainder > root, (root+1)^2 is closer */ if (n > root) root++; return (u32)root; } /*======================================================= * FR_sqrt - fixed-radix square root. * * input : value at radix `radix`. Must be >= 0. * radix : fractional bits of input AND result. * return : sqrt(input) at radix `radix`, or FR_DOMAIN_ERROR if input < 0. * * Math: sqrt(input_fp / 2^r) at radix r is * result_fp = sqrt(input_fp / 2^r) * 2^r = sqrt(input_fp * 2^r) * so we compute isqrt(input_fp << radix) on a 64-bit accumulator. This * works for any input that fits in s32 and any radix in [0, 30]. * * Precision: round-to-nearest sqrt. Worst-case absolute error is * <= 0.5 LSB at the requested radix. * Always non-negative for non-negative input. Result is monotone in * input. * * Saturation: input < 0 returns FR_DOMAIN_ERROR (= INT32_MIN). Caller * can test `result == FR_DOMAIN_ERROR` to detect domain errors. * * Side effects: none. Pure function. */ s32 FR_sqrt(s32 input, u16 radix) { uint64_t n; if (input < 0) return FR_DOMAIN_ERROR; if (input == 0) return 0; n = (uint64_t)(u32)input << radix; return (s32)fr_isqrt64(n); } /*======================================================= * FR_hypot - sqrt(x*x + y*y) without intermediate overflow. * * x, y : values at radix `radix` * radix : fractional bits of inputs AND result * return : sqrt(x*x + y*y) at radix `radix`. * * Math: x*x + y*y is naturally at radix 2*radix; isqrt of a 2r-radix * value yields an r-radix result, so no extra shifting is needed. The * u64 accumulator can hold (INT32_MAX^2)*2 = ~2^63, so (x*x + y*y) never * overflows for any s32 inputs. * * Precision: round-to-nearest. Worst-case absolute error <= 0.5 LSB * at the requested radix. * * Side effects: none. Pure function. */ #ifndef FR_LEAN s32 FR_hypot(s32 x, s32 y, u16 radix) { uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x); uint64_t yy = (uint64_t)((int64_t)y * (int64_t)y); (void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */ return (s32)fr_isqrt64(xx + yy); } #endif /*======================================================= * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation. * * Shift-only, no multiply, no 64-bit. Based on the piecewise-linear * method described in US Patent 6,567,777 B1 (Chatterjee, expired). * Peak error: ~0.10%. */ s32 FR_hypot_fast8(s32 x, s32 y) { s32 hi, lo; /* absolute values (clamp INT32_MIN to INT32_MAX to avoid UB) */ if (x < 0) x = (x == (s32)0x80000000) ? 0x7FFFFFFF : -x; if (y < 0) y = (y == (s32)0x80000000) ? 0x7FFFFFFF : -y; /* hi = max(|x|,|y|), lo = min(|x|,|y|) */ if (x > y) { hi = x; lo = y; } else { hi = y; lo = x; } if (hi == 0) return 0; /* 8 piecewise-linear segments: dist ≈ a*hi + b*lo. * Boundaries at β = 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875. */ if ((hi >> 1) < lo) { /* β in (0.5, 1.0] */ if (lo > hi - (hi >> 2)) { /* β in (0.75, 1.0] */ if (lo > hi - (hi >> 3)) /* β > 0.875 */ /* a≈0.7305, b≈0.6836 */ return hi - (hi >> 2) - (hi >> 6) - (hi >> 8) + lo - (lo >> 2) - (lo >> 4) - (lo >> 8); else /* β in (0.75, 0.875] */ /* a≈0.7803, b≈0.6262 */ return hi - (hi >> 2) + (hi >> 5) - (hi >> 10) + (lo >> 1) + (lo >> 3) + (lo >> 10) + (lo >> 12); } else { /* β in (0.5, 0.75] */ if (lo > hi - (hi >> 1) + (hi >> 3)) /* β > 0.625 */ /* a≈0.8281, b≈0.5630 */ return hi - (hi >> 2) + (hi >> 4) + (hi >> 6) + (lo >> 1) + (lo >> 4) + (lo >> 11); else /* β in (0.5, 0.625] */ /* a≈0.8728, b≈0.4893 */ return hi - (hi >> 3) - (hi >> 9) - (hi >> 12) + (lo >> 1) - (lo >> 6) + (lo >> 8) + (lo >> 10); } } else { /* β in [0, 0.5] */ if ((hi >> 2) < lo) { /* β in (0.25, 0.5] */ if ((hi >> 1) - (hi >> 3) < lo) /* β > 0.375 */ /* a≈0.9180, b≈0.3984 */ return hi - (hi >> 4) - (hi >> 6) - (hi >> 8) + (lo >> 1) - (lo >> 3) + (lo >> 5) - (lo >> 7); else /* β in (0.25, 0.375] */ /* a≈0.9551, b≈0.2988 */ return hi - (hi >> 4) + (hi >> 6) + (hi >> 9) + (lo >> 2) + (lo >> 4) - (lo >> 6) + (lo >> 9); } else { /* β in [0, 0.25] */ if ((hi >> 3) < lo) /* β in (0.125, 0.25] */ /* a≈0.9839, b≈0.1838 */ return hi - (hi >> 6) - (hi >> 11) + (lo >> 2) - (lo >> 4) - (lo >> 8) + (lo >> 12); else /* β in [0, 0.125] */ /* a≈0.9990, b≈0.0620 */ return hi - (hi >> 10) + (lo >> 4) - (lo >> 11); } } } #ifndef FR_NO_WAVES /*======================================================= * Wave generators — synth-style fixed-shape waveforms. * * All wave functions take a u16 BAM phase in [0, 65535] (a full cycle) * and return s16 in s0.15 format, clamped to [-32767, +32767] to match * the trig amplitude convention used by fr_cos_bam / fr_sin_bam. * * Use FR_HZ2BAM_INC(hz, sample_rate) to compute a phase increment for * a given output frequency, then accumulate it (mod 2^16) per sample. * * Side effects: pure functions (except fr_wave_noise which advances a * caller-provided LFSR state pointer). */ /* fr_wave_sqr - 50%-duty square wave. * phase < pi (BAM<0x8000) → +full; phase >= pi → -full. */ s16 fr_wave_sqr(u16 phase) { return (phase < 0x8000) ? (s16)32767 : (s16)-32767; } /* fr_wave_pwm - variable-duty pulse. * `duty` is the BAM threshold: phase < duty → high, else low. * duty = 0 → always low * duty = 0x8000 → 50% duty (same as fr_wave_sqr) * duty = 0xffff → high almost everywhere (one BAM step low) */ s16 fr_wave_pwm(u16 phase, u16 duty) { return (phase < duty) ? (s16)32767 : (s16)-32767; } /* fr_wave_saw - rising sawtooth. * Linear ramp from -32767 (just after phase=0) to +32767 (at phase=0xffff), * passing through 0 at phase=0x8000. The single boundary case phase=0 * (which would naturally produce -32768) is clamped to -32767 to keep the * amplitude symmetric. */ s16 fr_wave_saw(u16 phase) { s32 v = (s32)phase - (s32)0x8000; if (v < -32767) v = -32767; return (s16)v; } /* fr_wave_tri - symmetric triangle. * Four linear segments: * Q1 [0, 0x4000) : rising 0 → +peak * Q2 [0x4000, 0x8000): falling +peak → 0 * Q3 [0x8000, 0xc000): falling 0 → -peak * Q4 [0xc000, 0x10000): rising -peak → 0 * Peaks are clamped to +/-32767 (the natural unclamped formula gives * +/-32768 at the exact peak BAM). */ s16 fr_wave_tri(u16 phase) { s32 t; if (phase < 0x8000) { /* First half: 0 -> +peak -> 0 */ if (phase < 0x4000) t = (s32)phase << 1; /* 0 .. 0x7ffe */ else t = (s32)(0x8000 - phase) << 1; /* 0x8000 .. 2 */ if (t > 32767) t = 32767; return (s16)t; } else { /* Second half: 0 -> -peak -> 0 */ if (phase < 0xc000) t = (s32)(phase - 0x8000) << 1; /* 0 .. 0x7ffe */ else t = (s32)(0x10000 - phase) << 1;/* 0x8000 .. 2 */ if (t > 32767) t = 32767; return (s16)-t; } } /* fr_wave_tri_morph - variable-symmetry triangle. * * phase : u16 BAM * break_point : u16 BAM where the wave reaches its positive peak. * * Going from 0 to +peak in [0, break_point), then from +peak back to 0 * in [break_point, 0xffff]. The result is a triangle whose rising and * falling slopes can differ. * * break_point = 0x8000 → symmetric triangle * break_point = 0xffff → rising sawtooth (instant fall) * break_point = 0x0001 → falling sawtooth (instant rise) * break_point = 0 → degenerate; treated as 1 to avoid div-by-zero * * Note that this version returns values in [0, 32767] only (not bipolar). * Caller can subtract 16384 and double if a bipolar version is desired. * * Costs: one 32-bit divide per sample. On Cortex-M3+ this is ~10-20 * cycles. On 8051 / MSP430 this is much slower; pre-compute slopes if * those targets matter to you. */ s16 fr_wave_tri_morph(u16 phase, u16 break_point) { u32 t; if (break_point == 0) break_point = 1; if (phase < break_point) { /* rising: 0 at phase=0, 32767 at phase=break_point */ t = (u32)(((u32)phase * 32767UL) / (u32)break_point); } else { /* falling: 32767 at phase=break_point, 0 at phase=0xffff */ u32 span = (u32)0xffff - (u32)break_point; if (span == 0) return 32767; t = (u32)(((u32)((u32)0xffff - (u32)phase) * 32767UL) / span); } if (t > 32767) t = 32767; return (s16)t; } /* fr_wave_noise - LFSR-based pseudorandom noise. * * state : pointer to a u32 the caller maintains. Initial value must * be non-zero (zero is a fixed point of the LFSR). A common * seed is 0xACE1u or any other non-zero constant. * * Returns the next s16 sample in s0.15 (full ±32767 range, white-ish). * Implementation: 32-bit Galois LFSR with the standard maximal-period * tap polynomial 0xD0000001 (period 2^32 - 1 samples). * * Quality: this is "fast white noise" suitable for synth use. It is NOT * cryptographically secure. For better statistical properties (FFT * flatness etc.) layer a longer LFSR or use a separate PRNG. */ s16 fr_wave_noise(u32 *state) { u32 lsb; if (!state) return 0; lsb = *state & 1u; *state >>= 1; if (lsb) *state ^= 0xD0000001u; /* Take the top 16 bits and re-bias to s16 range, clamp to ±32767. */ { s32 v = (s32)((*state >> 16) & 0xffffu) - 32768; if (v < -32767) v = -32767; return (s16)v; } } /*======================================================= * ADSR envelope generator * * Linear-segment Attack-Decay-Sustain-Release envelope. State is held * in caller-allocated fr_adsr_t struct (no global state, no malloc). * * Lifecycle: * 1. Caller allocates an fr_adsr_t (stack or static). * 2. fr_adsr_init() once per patch with attack/decay/release durations * in samples and a sustain level in s0.15. * 3. fr_adsr_trigger() on note-on. Output rises 0 -> peak over `atk` * samples, falls peak -> sustain over `dec` samples, then holds. * 4. fr_adsr_release() on note-off. Output falls current -> 0 over a * time controlled by the release rate (rate, not duration: the * time depends on where in the envelope we are). * 5. fr_adsr_step() once per audio sample to read the current value. * * Internal precision: levels are stored as s32 in s1.30 format so even * very long envelopes (e.g. 48000-sample attack at 48 kHz = 1 second) * have a non-zero per-sample increment. Output is converted to s0.15. * * Saturation: the envelope state machine is self-clamping; level cannot * escape [0, 1<<30]. Output is in [0, 32767]. */ #define FR_ADSR_PEAK_S130 ((s32)1 << 30) void fr_adsr_init(fr_adsr_t *env, u32 attack_samples, u32 decay_samples, s16 sustain_level_s015, u32 release_samples) { if (!env) return; env->state = FR_ADSR_IDLE; env->level = 0; /* sustain_level_s015 is s16 so its upper bound (32767) is already the * type's max; only the lower bound needs an explicit clamp. */ if (sustain_level_s015 < 0) sustain_level_s015 = 0; /* Convert s0.15 -> s1.30 by shifting left 15. */ env->sustain = (s32)sustain_level_s015 << 15; env->attack_inc = (attack_samples > 0) ? (s32)(FR_ADSR_PEAK_S130 / attack_samples) : FR_ADSR_PEAK_S130; env->decay_dec = (decay_samples > 0) ? (s32)((FR_ADSR_PEAK_S130 - env->sustain) / (s32)decay_samples) : (FR_ADSR_PEAK_S130 - env->sustain); env->release_dec = (release_samples > 0) ? (s32)(FR_ADSR_PEAK_S130 / release_samples) : FR_ADSR_PEAK_S130; } void fr_adsr_trigger(fr_adsr_t *env) { if (!env) return; env->state = FR_ADSR_ATTACK; env->level = 0; } void fr_adsr_release(fr_adsr_t *env) { if (!env) return; env->state = FR_ADSR_RELEASE; } s16 fr_adsr_step(fr_adsr_t *env) { if (!env) return 0; switch (env->state) { case FR_ADSR_ATTACK: env->level += env->attack_inc; if (env->level >= FR_ADSR_PEAK_S130) { env->level = FR_ADSR_PEAK_S130; env->state = FR_ADSR_DECAY; } break; case FR_ADSR_DECAY: env->level -= env->decay_dec; if (env->level <= env->sustain) { env->level = env->sustain; env->state = FR_ADSR_SUSTAIN; } break; case FR_ADSR_SUSTAIN: env->level = env->sustain; break; case FR_ADSR_RELEASE: env->level -= env->release_dec; if (env->level <= 0) { env->level = 0; env->state = FR_ADSR_IDLE; } break; case FR_ADSR_IDLE: default: env->level = 0; break; } /* s1.30 -> s0.15: shift right 15. Clamp for safety. */ { s32 out = env->level >> 15; if (out < 0) out = 0; if (out > 32767) out = 32767; return (s16)out; } } #endif /* FR_NO_WAVES */