From 17c021a2a4ea54a3881ec5467f08f6c855d33147 Mon Sep 17 00:00:00 2001 From: Alessandro Mauri Date: Thu, 14 May 2026 21:30:51 +0200 Subject: [PATCH] hand rolled fixed point math --- fw/Makefile | 9 +- fw/fpmath.h | 119 +++ fw/fr_math/FR_defs.h | 120 --- fw/fr_math/FR_math.c | 1837 ------------------------------------------ fw/fr_math/FR_math.h | 653 --------------- fw/main.c | 59 +- 6 files changed, 149 insertions(+), 2648 deletions(-) create mode 100644 fw/fpmath.h delete mode 100644 fw/fr_math/FR_defs.h delete mode 100644 fw/fr_math/FR_math.c delete mode 100644 fw/fr_math/FR_math.h diff --git a/fw/Makefile b/fw/Makefile index 7cd0dd5..8ada67c 100644 --- a/fw/Makefile +++ b/fw/Makefile @@ -32,19 +32,14 @@ U8G2_OBJS := $(addprefix $(BUILD_DIR)/, $(notdir $(U8G2_SRC:.c=.o))) $(BUILD_DIR)/%.o : $(U8G2_DIR)/%.c | $(BUILD_DIR) $(PREFIX)-gcc $(CFLAGS) -c $< -o $@ -# build fr_math -FRMATH_OBJS := $(addprefix $(BUILD_DIR)/, $(notdir $(FRMATH_SRC:.c=.o))) -$(BUILD_DIR)/%.o : $(FRMATH_DIR)/%.c | $(BUILD_DIR) - $(PREFIX)-gcc $(CFLAGS) -c $< -o $@ - # build target TARGET_OBJS := $(addprefix $(BUILD_DIR)/, $(filter-out ch32fun.o, $(notdir $(FILES_TO_COMPILE:.c=.o)))) $(BUILD_DIR)/%.o : %.c $(HEADER_FILES) | $(BUILD_DIR) $(PREFIX)-gcc $(CFLAGS) -c $< -o $@ # link target, the rest of the build is specified in ch32fun.mk -$(TARGET).elf : $(FILES_TO_COMPILE) $(LINKER_SCRIPT) $(EXTRA_ELF_DEPENDENCIES) $(U8G2_OBJS) $(FRMATH_OBJS) $(TARGET_OBJS) ch32fun.o - $(PREFIX)-gcc -o $@ $(U8G2_OBJS) $(TARGET_OBJS) $(FRMATH_OBJS) ch32fun.o $(CFLAGS) $(LDFLAGS) +$(TARGET).elf : $(FILES_TO_COMPILE) $(LINKER_SCRIPT) $(EXTRA_ELF_DEPENDENCIES) $(U8G2_OBJS) $(TARGET_OBJS) ch32fun.o + $(PREFIX)-gcc -o $@ $(U8G2_OBJS) $(TARGET_OBJS) ch32fun.o $(CFLAGS) $(LDFLAGS) flash : cv_flash diff --git a/fw/fpmath.h b/fw/fpmath.h new file mode 100644 index 0000000..927dace --- /dev/null +++ b/fw/fpmath.h @@ -0,0 +1,119 @@ +#ifndef _FPMATH_H +#define _FPMATH_H + +#include +#include + +// This library depends on sign extension +static_assert(-4 >> 1 == -2, ">> doesn't do sign extension"); + + +// Fixed point signed type in 16.16 format +#define FP_R 16 +typedef int32_t fp16_t; + + +#define POW10(d) ( \ + ((d) == 0) ? 1L : \ + ((d) == 1) ? 10L : \ + ((d) == 2) ? 100L : \ + ((d) == 3) ? 1000L : \ + ((d) == 4) ? 10000L : \ + ((d) == 5) ? 100000L : \ + ((d) == 6) ? 1000000L : \ + ((d) == 7) ? 10000000L : \ + ((d) == 8) ? 100000000L : 1000000000L) + + +static inline fp16_t i2fp(int16_t i) +{ + return (int32_t)i << FP_R; +} + + +static inline int16_t fp2i(fp16_t f) +{ + return (int16_t)(f >> FP_R); +} + + +static inline fp16_t num2fp(int16_t i, int16_t f, uint8_t d) +{ + return i2fp(i) + (i < 0 ? -(i2fp(f)/POW10(d)) : i2fp(f)/POW10(d)); +} + + +// Saturating addition +static inline fp16_t fp_add(fp16_t a, fp16_t b) +{ + fp16_t r; + if (__builtin_add_overflow(a, b, &r)) { + // Positive overflow + if (a > 0 && b > 0) { + r = INT32_MAX; + } else { + r = INT32_MIN; + } + } + return r; +} + + +static inline fp16_t fp_sub(fp16_t a, fp16_t b) +{ + fp16_t r; + if (__builtin_sub_overflow(a, b, &r)) { + if (a >= 0 && b < 0) { + r = INT32_MAX; + } else { + r = INT32_MIN; + } + } + return r; +} + + +// Saturating multiplication +static inline fp16_t fp_mul(fp16_t a, fp16_t b) +{ + int64_t r = ((int64_t)a * (int64_t)b + 0x8000) >> 16; + if (r > (int64_t)0x7fffffff) return INT32_MAX; + if (r < -(int64_t)0x80000000) return INT32_MIN; + return r; +} + + +static inline fp16_t fp_div(fp16_t a, fp16_t b) +{ + if (b == 0) { + return (a >= 0) ? INT32_MAX : INT32_MIN; + } + + int64_t r = ((int64_t)a << 16) / b; + + // Saturate to 32-bit bounds + if (r > (int64_t)0x7fffffff) return INT32_MAX; + if (r < -(int64_t)0x80000000) return INT32_MIN; + + return (fp16_t)r; +} + + +static inline fp16_t fp_clamp(fp16_t x, fp16_t min, fp16_t max) +{ + return x < min ? min : (x > max ? max : x); +} + + +static inline fp16_t fp_map(fp16_t x, fp16_t src_start, fp16_t src_end, fp16_t dst_start, fp16_t dst_end) +{ + fp16_t x_offset = fp_sub(x, src_start); + fp16_t dst_range = fp_sub(dst_end, dst_start); + fp16_t src_range = fp_sub(src_end, src_start); + fp16_t scaled = fp_mul(x_offset, dst_range); + fp16_t mapped_offset = fp_div(scaled, src_range); + return fp_add(mapped_offset, dst_start); +} + + +#endif // _FPMATH_H diff --git a/fw/fr_math/FR_defs.h b/fw/fr_math/FR_defs.h deleted file mode 100644 index 631126e..0000000 --- a/fw/fr_math/FR_defs.h +++ /dev/null @@ -1,120 +0,0 @@ -/** - * @file FR_Defs.h - type definitions used in Fixed-Radix math lib - * - * @copy Copyright (C) <2001-2026> - * @author M A Chatterjee - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * - * 3. This notice may not be removed or altered from any source - * distribution. - * - */ - -#ifndef __FR_Platform_Defs_H__ -#define __FR_Platform_Defs_H__ - -/* - * Fixed-width integer typedefs. - * - * Prefer C99 when available (gcc, clang, MSVC, IAR, Keil, sdcc, - * MSP430-gcc, AVR-gcc, RISC-V, ARM toolchains). For bare-metal toolchains - * or pre-C99 compilers that lack , define FR_NO_STDINT before - * including this header and the types are provided via sizeof()-based - * fallback definitions that cover the common 8/16/32/64-bit layouts. - */ -#ifndef FR_NO_STDINT -#include -#else -/* ---- fallback: no ------------------------------------ */ -/* Works on any toolchain where char=8, short=16, int/long=32 bits, - * which covers virtually all embedded targets (AVR, MSP430, ARM, - * 68HC11, 68k, PPC, RISC-V, Xtensa, x86). Adjust if your platform - * differs. - */ -typedef unsigned char uint8_t; -typedef signed char int8_t; -typedef unsigned short uint16_t; -typedef signed short int16_t; -#if defined(__AVR__) || defined(__MSP430__) || defined(__m68hc1x__) - /* On these targets int is 16-bit; use long for 32-bit */ - typedef unsigned long uint32_t; - typedef signed long int32_t; -#else - typedef unsigned int uint32_t; - typedef signed int int32_t; -#endif -/* 64-bit: available on most 32-bit+ GCC targets via long long */ -typedef unsigned long long uint64_t; -typedef signed long long int64_t; -#endif /* FR_NO_STDINT */ - -/* - * Arduino's USBAPI.h typedefs u8 and u16 as unsigned char / unsigned short. - * On AVR, uint8_t/uint16_t resolve to unsigned int types, which are the same - * width but different C++ types — causing a redefinition error. Skip those - * two typedefs when building in an Arduino environment; the Arduino-provided - * types are the same width and work identically. - * - * The guard checks __cplusplus too because Arduino.h (which pulls in - * USBAPI.h) is only auto-included in .ino/.cpp translation units. - * Plain-C files (.c) compiled by the Arduino build system need our - * typedefs even though the ARDUINO macro is defined for them. - */ -#if defined(ARDUINO) && defined(__cplusplus) - /* Arduino C++ TU — USBAPI.h already provides u8 and u16 */ -#else -typedef uint8_t u8; -typedef uint16_t u16; -#endif -typedef int8_t s8; -typedef int16_t s16; -typedef uint32_t u32; -typedef int32_t s32; -typedef uint64_t u64; -typedef int64_t s64; - -typedef short FR_bool; - -#define FR_SWAP_BYTES(x) (((x >> 8) & 0xff) | ((x << 8) & 0xff00)) - -/*======================================================= - * Sentinel values for math errors. - * - * Functions that can hit a domain error (sqrt of negative, log of <=0, etc.) - * return one of these sentinels rather than raising an error code. Callers - * that care can check `result == FR_DOMAIN_ERROR` before using the value. - * - * FR_OVERFLOW_POS and FR_OVERFLOW_NEG are the saturating-overflow values - * returned by FR_FixMulSat / FR_FixAddSat etc. They are deliberately the - * extremes of s32 so that they compare correctly under signed comparison. - * - * Note: FR_DOMAIN_ERROR equals INT32_MIN (the same value as FR_OVERFLOW_NEG - * would naturally be), so a single check `result <= FR_OVERFLOW_NEG` cannot - * distinguish "saturating-negative-overflow" from "domain error". In - * practice the two never occur in the same call: saturating ops never go - * to a domain-error state, and domain-error ops never saturate. The names - * exist to make caller intent self-documenting. - */ -#define FR_DOMAIN_ERROR ((s32)0x80000000) /* INT32_MIN: e.g. sqrt(<0) */ -#define FR_OVERFLOW_POS ((s32)0x7fffffff) /* INT32_MAX: e.g. FR_FixMulSat + */ -#define FR_OVERFLOW_NEG ((s32)0x80000000) /* INT32_MIN: e.g. FR_FixMulSat - */ - -#define FR_FALSE (0) -#define FR_TRUE (!FR_FALSE) - -#endif /* __FR_Platform_Defs_H__ */ diff --git a/fw/fr_math/FR_math.c b/fw/fr_math/FR_math.c deleted file mode 100644 index 95809f8..0000000 --- a/fw/fr_math/FR_math.c +++ /dev/null @@ -1,1837 +0,0 @@ -/** - * - * @file FR_math.c - c implementation file for basic fixed - * radix math routines - * - * @copy Copyright (C) <2001-2026> - * @author M A Chatterjee - * - * This file contains integer math settable fixed point radix math routines for - * use on systems in which floating point is not desired or unavailable. - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, please place an acknowledgment in the product documentation. - * - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * - * 3. This notice may not be removed or altered from any source - * distribution. - * - */ - -#include "FR_math.h" - -#ifndef FR_NO_STDINT -#include -#endif - -/*======================================================= - * Trig lookup tables (inlined — no separate header needed) - * - * Sine quadrant table: 129 entries covering [0, pi/2] in u0.15 format. - * Tangent octant table: 65 entries covering [0, pi/4] in u0.15 format. - * Generated by tools/coef-gen.py — do not hand-edit. - */ - -#define FR_TRIG_TABLE_BITS (7) -#define FR_TRIG_TABLE_SIZE ((1 << FR_TRIG_TABLE_BITS) + 1) - -#define FR_TRIG_FRAC_BITS (14 - FR_TRIG_TABLE_BITS) -#define FR_TRIG_FRAC_MAX (1 << FR_TRIG_FRAC_BITS) -#define FR_TRIG_FRAC_MASK (FR_TRIG_FRAC_MAX - 1) -#define FR_TRIG_FRAC_HALF (FR_TRIG_FRAC_MAX >> 1) -#define FR_TRIG_QUADRANT (1 << 14) - -static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = { - 0, 402, 804, 1206, 1608, 2009, 2411, 2811, - 3212, 3612, 4011, 4410, 4808, 5205, 5602, 5998, - 6393, 6787, 7180, 7571, 7962, 8351, 8740, 9127, - 9512, 9896, 10279, 10660, 11039, 11417, 11793, 12167, - 12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091, - 15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869, - 18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475, - 20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884, - 23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073, - 25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020, - 27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707, - 28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118, - 30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238, - 31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058, - 32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568, - 32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766, - 32768 -}; - -#define FR_TAN_TABLE_BITS (6) -#define FR_TAN_TABLE_SIZE ((1 << FR_TAN_TABLE_BITS) + 1) -#define FR_TAN_FRAC_BITS (13 - FR_TAN_TABLE_BITS) -#define FR_TAN_FRAC_MAX (1 << FR_TAN_FRAC_BITS) -#define FR_TAN_FRAC_MASK (FR_TAN_FRAC_MAX - 1) -#define FR_TAN_FRAC_HALF (FR_TAN_FRAC_MAX >> 1) -#define FR_TAN_OCTANT (1 << 13) - -static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = { - 0, 402, 804, 1207, 1610, 2013, 2417, 2822, - 3227, 3634, 4042, 4450, 4861, 5272, 5686, 6101, - 6518, 6937, 7358, 7782, 8208, 8637, 9068, 9503, - 9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104, - 13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001, - 17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318, - 21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226, - 26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973, - 32768 -}; - -/*======================================================= - * Full-precision radian/degree → BAM conversion helpers - * - * rad_to_bam_full(r) returns a full s32 BAM value where: - * upper 16 bits = integer BAM (the u16 table index) - * lower 16 bits = sub-BAM fractional part - * Input r must already be normalized to radix 16 and reduced to [-pi, pi]. - * - * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are - * reordered so intermediate sums stay within s32 for |r| <= pi at r16. - */ -static s32 rad_to_bam_full(s32 r) -{ - /* 10 terms: 65536/(2*pi) ≈ 10430.37835... - * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14 - * = 10430.378357 (~21-bit accuracy) - * Terms reordered: interleave negatives early to keep all intermediate - * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */ - return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14); -} - -#ifndef FR_LEAN -/* deg_to_bam_full(d) — same idea for degrees. - * Input d must already be normalized to radix 16 and reduced to [-90, 90). - * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM). - * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */ -static s32 deg_to_bam_full(s32 d) -{ - return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); -} -#endif - -/* Normalize a fixed-radix value to radix 16. */ -static s32 normalize_to_r16(s32 val, u16 radix) -{ - return (radix > 16) ? (val >> (radix - 16)) - : (radix < 16) ? (val << (16 - radix)) - : val; -} - -/* Reduce non-negative radian (at r16) to [0, 2*pi). */ -static s32 reduce_to_2pi(s32 r) -{ - const s32 two_pi = FR_TWO_PI(16); /* 411775 */ - if (r > (two_pi << 1)) - r -= (r / two_pi) * two_pi; - else if (r > two_pi) - r -= two_pi; - return r; -} - - -/* rad_r16_to_bam — convert radian (at r16) in [0, 2π) to u16 BAM. - * Uses quadrant decomposition to keep rad_to_bam_full in its safe - * [-π/2, π/2) range, mirroring the approach in fr_deg_to_bam. */ -static u16 rad_r16_to_bam(s32 r) -{ - const s32 half_pi = FR_HALF_PI(16); /* 102944 */ - const s32 three_half_pi = FR_THREE_HALF_PI(16); /* 308831 */ - const s32 pi = FR_PI(16); /* 205887 */ - const s32 two_pi = FR_TWO_PI(16); /* 411775 */ - - u16 offset = 0; - if (r >= half_pi && r < three_half_pi) { - r -= pi; - offset = 0x8000u; - } else if (r >= three_half_pi) { - r -= two_pi; - /* r is now in [-π/2, 0), no offset needed (u16 wraps naturally) */ - } - return (u16)(offset + (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16)); -} - -/* (rad_r16_to_bam32 removed — sub-BAM interpolation approach abandoned) */ - -/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion. - * Normalizes to r16, reduces to [0, 2π), uses quadrant decomposition. */ -u16 fr_rad_to_bam(s32 rad, u16 radix) -{ - s32 r = normalize_to_r16(rad, radix); - /* Normalize to [0, 2π) */ - if (r < 0) { - r += ((-r) / FR_TWO_PI(16)) * FR_TWO_PI(16); - if (r < 0) r += FR_TWO_PI(16); - } - r = reduce_to_2pi(r); - return rad_r16_to_bam(r); -} - -#ifndef FR_LEAN -/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion. - * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */ -u16 fr_deg_to_bam(s32 deg, u16 radix) -{ - s32 d = normalize_to_r16(deg, radix); - - /* Reduce to [-180, 180) */ - if (d >= FR_D360_R16 || d < -FR_D360_R16) { - s32 n = d / FR_D360_R16; - d -= n * FR_D360_R16; - } - if (d >= FR_D180_R16) d -= FR_D360_R16; - if (d < -FR_D180_R16) d += FR_D360_R16; - - /* Reduce to [-90, 90) with BAM quadrant offset */ - u16 offset = 0; - if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; } - else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; } - - return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16)); -} -#endif - -/*======================================================= - * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan - * - * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits - * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd - * quadrants (1, 3) reverse the in-quadrant index so the table is always read - * in the same direction. - * - * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at - * index 128). After mirroring, small full_pos → small output (near zero), - * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles - * below one table step (~0.7°). This eliminates table quantization error - * in the region where it matters most. - * - * Sign rule: quadrants 2 and 3 negate the result. - * Mirror rule: quadrants 1 and 3 flip the in-quadrant position. - */ -s32 fr_sin_bam(u16 bam) -{ - u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */ - u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */ - - /* Exact cardinal angles */ - if (inq == 0) { - if (q == 0 || q == 2) return 0; /* 0° or 180° → 0 */ - if (q == 1) return FR_TRIG_ONE; /* 90° → 1.0 */ - return -FR_TRIG_ONE; /* 270° → -1.0 */ - } - - /* Odd quadrants mirror: read table from the far end */ - if (q == 1 || q == 3) - inq = FR_TRIG_QUADRANT - inq; - - s32 v; - - /* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step). - * θ_rad = inq * (π/2) / 16384. Output = θ * 65536 = inq * FR_kQ2RAD / 16384. - * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */ - if (inq < FR_TRIG_FRAC_MAX) { - v = (s32)(((u32)inq * 102944u + 8192u) >> 14); - } else { - /* Table lookup with 7-bit interpolation fraction */ - u32 idx = inq >> FR_TRIG_FRAC_BITS; - u32 frac = inq & FR_TRIG_FRAC_MASK; - s32 lo = (s32)gFR_SIN_TAB_Q[idx]; - s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1]; - v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); - v <<= 1; /* u0.15 → s15.16 */ - } - - return (q >= 2) ? -v : v; -} - -s32 fr_cos_bam(u16 bam) -{ - /* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */ - return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT)); -} - -s32 fr_cos(s32 rad, u16 radix) -{ - if (rad == 0) return FR_TRIG_ONE; - s32 r = normalize_to_r16(rad, radix); - if (r < 0) r = -r; - r = reduce_to_2pi(r); - /* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ, - * cos(3π/2+δ) = sin(δ) ≈ δ. */ - s32 delta = r - FR_HALF_PI(16); - if (delta >= -256 && delta <= 256) - return -delta; - delta = r - FR_THREE_HALF_PI(16); - if (delta >= -256 && delta <= 256) - return delta; - return fr_cos_bam(fr_rad_to_bam(rad, radix)); -} - -s32 fr_sin(s32 rad, u16 radix) -{ - if (rad == 0) return 0; - s32 r = normalize_to_r16(rad, radix); - s32 sign = 1; - if (r < 0) { r = -r; sign = -1; } - r = reduce_to_2pi(r); - /* Near 0 after reduction: sin(δ) ≈ δ */ - if (r < 256) { - s32 v = r; - return (sign < 0) ? -v : v; - } - /* Near π: sin(π + δ) = -sin(δ) ≈ -δ */ - s32 delta = r - FR_PI(16); - if (delta >= -256 && delta <= 256) { - s32 v = -delta; - return (sign < 0) ? -v : v; - } - /* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */ - delta = FR_TWO_PI(16) - r; - if (delta >= 0 && delta < 256) { - s32 v = -delta; - return (sign < 0) ? -v : v; - } - /* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */ - if (r > FR_PI(16)) r -= FR_TWO_PI(16); - u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16); - s32 v = fr_sin_bam(bam); - return (sign < 0) ? -v : v; -} - -#ifndef FR_LEAN -/*======================================================= - * BAM-native tangent: fr_tan_bam - * - * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant - * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the - * second octant (45°, 90°). Result is s15.16 with saturation at the - * poles. - * - * No 64-bit intermediates. One 32-bit division only in the >45° path. - */ -s32 fr_tan_bam(u16 bam) -{ - u32 q = ((u32)bam >> 14) & 0x3; /* quadrant (top 2 bits) */ - u32 inq = (u32)bam & 0x3FFFu; /* in-quadrant (14 bits) */ - s32 sign = 1; - u32 idx, frac; - s32 lo, hi, raw; - - /* Exact zeros: bam lands exactly on 0° or 180° */ - if (inq == 0 && (q == 0 || q == 2)) - return 0; - - /* Poles: bam lands exactly on 90° or 270° */ - if (inq == 0 && (q == 1 || q == 3)) - return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - - /* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */ - if (q == 1 || q == 3) { - inq = 0x4000u - inq; - sign = -1; - } - - /* Now inq is in (0, 0x4000) = (0°, 90°) exclusive. - * Split into first octant [0, 45°) and second octant [45°, 90°). */ - if (inq < FR_TAN_OCTANT) { - /* First octant: direct table lookup + lerp. - * inq is 13 bits; top FR_TAN_TABLE_BITS index the table, - * bottom FR_TAN_FRAC_BITS drive interpolation. */ - idx = inq >> FR_TAN_FRAC_BITS; - frac = inq & FR_TAN_FRAC_MASK; - lo = (s32)gFR_TAN_TAB_O[idx]; - hi = (s32)gFR_TAN_TAB_O[idx + 1]; - raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - - if (raw < 0x40) { - /* Near zero: redo interpolation with 4 extra bits of - * precision to reduce rounding error when result is small. */ - s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; - s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; - raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ - } else { - raw <<= 1; /* u0.15 → s15.16 */ - } - } else { - /* Second octant: tan(x) = 1 / tan(90° - x). - * complement is in (0, 0x2000] = (0°, 45°]. */ - u32 comp = 0x4000u - inq; - - /* Look up tan(complement) from the table */ - idx = comp >> FR_TAN_FRAC_BITS; - frac = comp & FR_TAN_FRAC_MASK; - lo = (s32)gFR_TAN_TAB_O[idx]; - hi = (s32)gFR_TAN_TAB_O[idx + 1]; - raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - - if (raw < 0x40) { - /* Near pole: redo interpolation with 4 extra bits of - * precision. The reciprocal amplifies small interpolation - * errors, so extra precision significantly helps here. - * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */ - s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; - s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; - s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - if (raw_hp < 32) { - raw = FR_TRIG_MAXVAL; - } else { - raw = (s32)((0x80000000u / (u32)raw_hp) << 4); - } - } else { - raw = (s32)(0x80000000u / (u32)raw); - } - } - - return (sign < 0) ? -raw : raw; -} -#endif /* FR_LEAN */ - -/* fr_tan — radian-input tangent. - * - * Normalize to [0, 2π], extract quadrant sign, convert rad→u16 BAM, - * then do direct octant table lookup + interpolation inline. - * Small-angle bypass at zero crossings: tan(x) ≈ x. - * Near poles: use radian distance directly (cot(δ) ≈ 1/δ) to avoid - * BAM quantization error amplified by the reciprocal. */ -s32 fr_tan(s32 rad, u16 radix) -{ - if (rad == 0) return 0; - s32 r = normalize_to_r16(rad, radix); - - /* tan(-x) = -tan(x): extract sign, work with |r| */ - s32 sign = 1; - if (r < 0) { r = -r; sign = -1; } - r = reduce_to_2pi(r); - - /* Small-angle bypass at zero crossings: tan(δ) ≈ δ */ - if (r < 256) - return (sign < 0) ? -r : r; - { - s32 delta = r - FR_PI(16); - if (delta >= -256 && delta <= 256) - return (sign < 0) ? -delta : delta; - } - { - s32 delta = FR_TWO_PI(16) - r; - if (delta >= 0 && delta < 256) - return (sign < 0) ? delta : -delta; - } - - /* Near-pole bypass: within POLE_THRESH r16 of π/2 or 3π/2, - * use cot(δ) ≈ 1/δ from the radian distance directly. - * Compute δ at r24 using precise pole constants (8× less rounding - * error than the r16 FR_HALF_PI/FR_THREE_HALF_PI constants). - * At δ=2048 r16 (1.79°), 1/δ error is ~0.03%. */ - { - const s32 pole_thresh = 2048; /* r16 units (~1.79°) */ - /* Precise pole positions at r24: - * π/2 × 2^24 = 26353589.76 → 26353590 - * 3π/2 × 2^24 = 79060769.28 → 79060769 */ - const s32 half_pi_r24 = 26353590; - const s32 three_half_pi_r24 = 79060769; - - s32 d1 = r - FR_HALF_PI(16); /* coarse check at r16 */ - s32 d2 = r - FR_THREE_HALF_PI(16); - s32 pole_delta_r24 = 0; - - if (d1 >= -pole_thresh && d1 <= pole_thresh) { - s32 r24 = r << 8; - s32 dd = r24 - half_pi_r24; - pole_delta_r24 = (dd < 0) ? -dd : dd; - } else if (d2 >= -pole_thresh && d2 <= pole_thresh) { - s32 r24 = r << 8; - s32 dd = r24 - three_half_pi_r24; - pole_delta_r24 = (dd < 0) ? -dd : dd; - } - - if (pole_delta_r24 > 0) { - /* Determine sign from radian quadrant */ - s32 pole_sign; - if (r < FR_HALF_PI(16)) - pole_sign = 1; /* before π/2: → +∞ */ - else if (r < FR_PI(16)) - pole_sign = -1; /* past π/2: → -∞ */ - else if (r <= FR_THREE_HALF_PI(16)) - pole_sign = 1; /* before 3π/2: → +∞ */ - else - pole_sign = -1; /* past 3π/2: → -∞ */ - - s32 raw; - if (pole_delta_r24 < 512) { - raw = FR_TRIG_MAXVAL; /* δ < 2 at r16 → saturate */ - } else { - /* cot(δ) ≈ 1/δ. In s15.16: (2^40) / δ_r24 */ - raw = (s32)((1ULL << 40) / (u32)pole_delta_r24); - if (raw > FR_TRIG_MAXVAL) raw = FR_TRIG_MAXVAL; - } - s32 v = (pole_sign < 0) ? -raw : raw; - return (sign < 0) ? -v : v; - } - } - - /* Convert radian to u16 BAM */ - u16 bam = rad_r16_to_bam(r); - - /* Decompose BAM into quadrant + in-quadrant */ - u32 q = ((u32)bam >> 14) & 0x3; - u32 inq = (u32)bam & 0x3FFFu; - s32 tsign = 1; /* tan sign from quadrant */ - - /* Exact zeros: bam lands on 0° or 180° */ - if (inq == 0 && (q == 0 || q == 2)) - return 0; - - /* Q1/Q3: reflect and negate */ - if (q == 1 || q == 3) { - inq = 0x4000u - inq; - tsign = -1; - } - - /* Octant table lookup + interpolation (same logic as fr_tan_bam) */ - u32 idx, frac; - s32 raw; - - if (inq < FR_TAN_OCTANT) { - /* First octant [0°, 45°): direct lookup */ - idx = inq >> FR_TAN_FRAC_BITS; - frac = inq & FR_TAN_FRAC_MASK; - s32 lo = (s32)gFR_TAN_TAB_O[idx]; - s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; - raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - - if (raw < 0x40) { - s32 lo4 = lo << 4; - s32 hi4 = hi << 4; - raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - raw = (raw + 4) >> 3; - } else { - raw <<= 1; - } - } else { - /* Second octant [45°, 90°): reciprocal identity */ - u32 comp = 0x4000u - inq; - idx = comp >> FR_TAN_FRAC_BITS; - frac = comp & FR_TAN_FRAC_MASK; - s32 lo = (s32)gFR_TAN_TAB_O[idx]; - s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; - raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - - if (raw < 0x40) { - s32 lo4 = lo << 4; - s32 hi4 = hi << 4; - s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - if (raw_hp < 32) - raw = FR_TRIG_MAXVAL; - else - raw = (s32)((0x80000000u / (u32)raw_hp) << 4); - } else { - raw = (s32)(0x80000000u / (u32)raw); - } - } - - /* Combine quadrant sign and input sign */ - s32 v = (tsign < 0) ? -raw : raw; - return (sign < 0) ? -v : v; -} - -#ifndef FR_LEAN -/*======================================================= - * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then - * call the BAM-native functions. Cardinal angles are exact. - */ - -s32 fr_cos_deg(s32 deg, u16 radix) -{ - if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg)); - if (deg < 0) deg = -deg; - /* Exact cardinal angles */ - s32 frac_mask = (1 << radix) - 1; - if ((deg & frac_mask) == 0) { - s32 rem = (deg >> radix) % 360; - if (rem == 0) return FR_TRIG_ONE; - if (rem == 90) return 0; - if (rem == 180) return -FR_TRIG_ONE; - if (rem == 270) return 0; - } - /* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180, - * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */ - s32 d = normalize_to_r16(deg, radix); - if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; } - { - const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ - s32 delta = d - FR_D90_R16; - if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { - s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); - return -dr; - } - delta = d - (FR_D90_R16 + FR_D180_R16); - if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { - s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); - return dr; - } - } - return fr_cos_bam(fr_deg_to_bam(deg, radix)); -} - -s32 fr_sin_deg(s32 deg, u16 radix) -{ - if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg)); - s32 sign = 1; - if (deg < 0) { deg = -deg; sign = -1; } - /* Exact cardinal angles */ - s32 frac_mask = (1 << radix) - 1; - if ((deg & frac_mask) == 0) { - s32 rem = (deg >> radix) % 360; - if (rem == 0) return 0; - if (rem == 90) return (sign < 0) ? -FR_TRIG_ONE : FR_TRIG_ONE; - if (rem == 180) return 0; - if (rem == 270) return (sign < 0) ? FR_TRIG_ONE : -FR_TRIG_ONE; - } - s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix)); - return (sign < 0) ? -v : v; -} - -s32 FR_TanI(s32 deg) -{ - /* Exact pole: deg mod 180 == ±90. Sign matches input sign. */ - s32 rem = deg % 180; - if (rem == 90 || rem == -90) - return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - return fr_tan_bam(FR_DEG2BAM_I(deg)); -} - -s32 fr_tan_deg(s32 deg, u16 radix) -{ - if (radix == 0) return FR_TanI(deg); - s32 deg_orig = deg; - /* Normalize to [0, 360°) at caller radix */ - s32 d360 = 360 << radix; - if (deg < 0) { - deg += ((-deg) / d360) * d360; - if (deg < 0) deg += d360; - } - if (deg >= d360) { - deg -= (deg / d360) * d360; - } - /* Exact cardinal angles */ - s32 frac_mask = (1 << radix) - 1; - if ((deg & frac_mask) == 0) { - s32 ideg = deg >> radix; - if (ideg == 0 || ideg == 180) return 0; - if (ideg == 90 || ideg == 270) - return (deg_orig >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - } - /* Near 0° or 180° (tan=0 crossings): tan(δ) ≈ δ in radians */ - s32 d = normalize_to_r16(deg, radix); - { - const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ - s32 delta; - /* Near 0° */ - if (d < DEG_THRESH) { - s32 up = d << 8; - return (FR_DEG2RAD(up) + (1 << 7)) >> 8; - } - /* Near 180° */ - delta = d - FR_D180_R16; - if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { - s32 up = delta << 8; - return (FR_DEG2RAD(up) + (1 << 7)) >> 8; - } - /* Near 360° */ - delta = FR_D360_R16 - d; - if (delta >= 0 && delta < DEG_THRESH) { - s32 up = delta << 8; - return -((FR_DEG2RAD(up) + (1 << 7)) >> 8); - } - } - /* Main path: convert to u16 BAM, table lookup */ - u16 bam = fr_deg_to_bam(deg, radix); - s32 v = fr_tan_bam(bam); - /* Near-pole BAM alias: determine sign from normalized angle position */ - if (bam == 0x4000u || bam == 0xC000u) { - s32 pole_d = (bam == 0x4000u) ? FR_D90_R16 : (FR_D90_R16 + FR_D180_R16); - v = (d < pole_d) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - } - return v; -} -#endif /* FR_LEAN */ - -/*======================================================= - * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest) - * - * Treats x and y as fixed-point values at the same radix r and returns - * (x*y) >> r at radix r. The user is responsible for tracking the radix - * point and for guaranteeing the product fits in 32 bits. - * - * Adds 0.5 LSB (0x8000) before the shift so the result rounds to - * nearest instead of truncating toward zero. - */ -s32 FR_FixMuls(s32 x, s32 y) -{ - int64_t v = (int64_t)x * (int64_t)y; - return (s32)((v + 0x8000) >> 16); -} - -/*======================================================= - * FR_FixMulSat (x*y signed, SATURATED, round-to-nearest) - * - * Same semantics as FR_FixMuls but clamps to [INT32_MIN, INT32_MAX] on - * overflow instead of wrapping. The fixed-point radix is fixed at 16 bits - * (sM.16 inputs and output). Rounds to nearest (adds 0.5 LSB before shift). - */ -s32 FR_FixMulSat(s32 x, s32 y) -{ - int64_t v = ((int64_t)x * (int64_t)y + 0x8000) >> 16; - if (v > (int64_t)0x7fffffff) return FR_OVERFLOW_POS; - if (v < -(int64_t)0x80000000) return FR_OVERFLOW_NEG; - return (s32)v; -} - -/*======================================================= - FR_FixAddSat (x+y saturated add) - programmer must align radix points before using this function - */ -s32 FR_FixAddSat(s32 x, s32 y) -{ - s32 sum = x + y; - if (x < 0) - { - if (y < 0) - return (sum >= 0) ? FR_OVERFLOW_NEG : sum; - } - else - { - if (y >= 0) - return (sum <= 0) ? FR_OVERFLOW_POS : sum; - } - return sum; -} - -/* Inverse Trig - * acos with binary search of the BAM-native quadrant table. - * - * Algorithm: bring `input` into s0.15, then binary-search the first-quadrant - * cos table for the table entry closest to |input|. Apply quadrant mirror - * if input was negative. - */ -/* FR_acos — returns radians at out_radix. - * Range: [0, pi]. Input is a cosine value at the given radix. - * - * Uses the 129-entry sine table in reverse: binary-search the ascending - * table to find asin(|input|), then acos = pi/2 - asin (with sign handling - * for the second quadrant). - */ -s32 FR_acos(s32 input, u16 radix, u16 out_radix) -{ - s32 v; - s16 sign; - s32 lo, hi, mid; - s32 idx, d, num, frac; - s32 input_abs; - - /* Work with absolute value at the caller's radix */ - sign = (s16)((input < 0) ? 1 : 0); - input_abs = sign ? -input : input; - - /* Clamp at the caller's radix */ - { - s32 one = (s32)1 << radix; - if (input_abs >= one) - return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0; - } - - v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */ - - /* Small-angle fast path: when cos(θ) is close to 1.0, the sine table - * has poor resolution near the top (entries close together). - * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121] - * means the input is > cos(7*π/256) ≈ 0.9975. */ - if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8]) - { - s32 one = (s32)1 << radix; - s32 one_minus_x = one - input_abs; /* 1-|x| at caller radix */ - s32 two_omx = one_minus_x << 1; /* 2(1-|x|) at caller radix */ - s32 rad_native = FR_sqrt(two_omx, radix); /* radians at caller radix */ - s32 rad_out = FR_CHRDX(rad_native, radix, out_radix); - if (sign) - rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out; - return rad_out; - } - - /* Binary search on the ascending sine table. - * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°). - * - * Find the first index where table[idx] >= v. */ - lo = 0; - hi = FR_TRIG_TABLE_SIZE; - while (lo < hi) - { - mid = (lo + hi) >> 1; - if ((s32)gFR_SIN_TAB_Q[mid] < v) - lo = mid + 1; - else - hi = mid; - } - - /* lo is now the first index where table[lo] >= v. - * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo]. - * This gives us the asin angle; acos = pi/2 - asin. */ - idx = lo; - if (idx <= 0) - { - idx = 0; - frac = 0; - } - else if (idx >= FR_TRIG_TABLE_SIZE) - { - idx = FR_TRIG_TABLE_SIZE - 1; - frac = 0; - } - else - { - /* Interpolate between table[idx-1] and table[idx]. - * d = table[idx] - table[idx-1] (>= 0, sin increasing) - * num = v - table[idx-1] (how far past table[idx-1]) - */ - d = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1]; - num = v - (s32)gFR_SIN_TAB_Q[idx - 1]; - if (d > 0) - frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d; - else - frac = 0; - idx = idx - 1; - } - - { - /* asin_bam is the angle in first-quadrant BAM whose sin = v */ - u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); - /* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */ - u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam); - if (sign) - bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */ - return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); - } -} - -/* FR_asin — returns radians at out_radix. Range: [-pi/2, pi/2]. */ -s32 FR_asin(s32 input, u16 radix, u16 out_radix) -{ - /* asin(x) = pi/2 - acos(x) */ - s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); - return half_pi - FR_acos(input, radix, out_radix); -} - -/* FR_atan2(y, x, out_radix) — full-circle arctangent, returns radians - * at the specified output radix (s32). - * - * Range: [-pi, pi]. Returns 0 for atan2(0,0). - * - * Implementation: normalise (x,y) via FR_hypot_fast8, then recover the - * angle with FR_asin or FR_acos (both use the 129-entry cosine table). - * To stay in the well-conditioned region of each inverse function we - * switch at 45°: - * |y| <= |x| → use asin(y/h) — asin stable near 0 - * |y| > |x| → use acos(x/h) — acos stable near pi/2 - * This keeps the derivative amplification factor below 1.414x everywhere. - */ -s32 FR_atan2(s32 y, s32 x, u16 out_radix) -{ - s32 ax, ay, h, q1_angle; - - /* Axis cases — exact angles, no divide. */ - if (x == 0) - { - if (y > 0) return FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* pi/2 */ - if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* -pi/2 */ - return 0; - } - if (y == 0) - return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix); /* 0 or pi */ - - ax = (x < 0) ? -x : x; - ay = (y < 0) ? -y : y; - - /* Normalise so max(ax,ay) sits in [2^14, 2^15). This gives - * FR_hypot_fast8 enough integer bits for the shift-only segments - * to produce an accurate ratio — critical when the raw inputs are - * small (e.g. atan2(1,1) at radix 0). Scaling both by the same - * power of two doesn't change the angle. */ - { - s32 mx = (ax > ay) ? ax : ay; - while (mx < (1L << 14)) { ax <<= 1; ay <<= 1; mx <<= 1; } - while (mx >= (1L << 16)) { ax >>= 1; ay >>= 1; mx >>= 1; } - } - - h = FR_hypot_fast8((s32)ax, (s32)ay); - if (h == 0) return 0; /* degenerate */ - - /* Compute the first-quadrant angle (positive, [0..pi/2]). - * Divide produces a value in [0,1] at radix FR_TRIG_PREC (s0.15). - * - * Small-angle fast path: when the minor-axis ratio is small, - * asin(x) ≈ x (error < x³/6). Below ~5° the cubic term is - * smaller than the table-lookup error, so the direct identity - * is both faster and more accurate. Threshold 2753 at r15 - * corresponds to sin(~4.8°) = 0.084. */ - #define FR_ATAN2_SMALL 2753 - if (ay <= ax) - { - /* angle in [0°..45°]: use asin(ay/h) — well-conditioned near 0 */ - s32 sin_val = (s32)(((int64_t)ay << FR_TRIG_PREC) / h); - if (sin_val < FR_ATAN2_SMALL) - q1_angle = FR_CHRDX(sin_val, FR_TRIG_PREC, out_radix); - else - q1_angle = FR_asin(sin_val, FR_TRIG_PREC, out_radix); - } - else - { - /* angle in [45°..90°]: use acos(ax/h) — well-conditioned near pi/2 */ - s32 cos_val = (s32)(((int64_t)ax << FR_TRIG_PREC) / h); - if (cos_val < FR_ATAN2_SMALL) - { - /* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */ - s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); - q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix); - } - else - q1_angle = FR_acos(cos_val, FR_TRIG_PREC, out_radix); - } - - /* Apply quadrant from signs of x and y. - * q1_angle is always positive [0..pi/2]. */ - { - s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix); - if (x > 0) - return (y > 0) ? q1_angle : -q1_angle; - /* x < 0: mirror across y-axis */ - return (y > 0) ? (pi - q1_angle) : (q1_angle - pi); - } -} - -/* FR_atan(input, radix, out_radix) — arctangent of a single argument. - * Returns radians at out_radix, range [-pi/2, pi/2]. - */ -s32 FR_atan(s32 input, u16 radix, u16 out_radix) -{ - s32 one = (s32)1 << radix; - return FR_atan2(input, one, out_radix); -} - -/* 2^f table for f in [0, 1] in 65 entries (64 segments), output in s.16 - * fixed point. Entry i = round(2^(i/64) * 65536). Size: 260 bytes. - * Used by FR_pow2 to look up the fractional power of 2 with linear - * interpolation. - */ -static const u32 gFR_POW2_FRAC_TAB[65] = { - 65536, 66250, 66971, 67700, 68438, 69183, 69936, 70698, - 71468, 72246, 73032, 73828, 74632, 75444, 76266, 77096, - 77936, 78785, 79642, 80510, 81386, 82273, 83169, 84074, - 84990, 85915, 86851, 87796, 88752, 89719, 90696, 91684, - 92682, 93691, 94711, 95743, 96785, 97839, 98905, 99982, - 101070, 102171, 103283, 104408, 105545, 106694, 107856, 109031, - 110218, 111418, 112631, 113858, 115098, 116351, 117618, 118899, - 120194, 121502, 122825, 124163, 125515, 126882, 128263, 129660, - 131072 -}; - -/* FR_pow2(input, radix) — computes 2^(input/2^radix), result at same radix. - * - * Algorithm: split input into floor(integer) and fractional part. The - * fractional part is in [0, 1) by construction (Euclidean / mathematical - * floor — the fractional part of -2.3 is +0.7, not -0.3). Then - * 2^(int + frac) = 2^int * 2^frac - * where 2^frac is looked up from a 65-entry table at radix 16, and 2^int - * is a shift. - * - * Worst-case absolute error: ~1e-5 over [-8, 8] (65-entry table). - * Linear interpolation leaves a small concavity error in each interval. - */ -s32 FR_pow2(s32 input, u16 radix) -{ - s32 flr, frac_full, idx, frac_lo, lo, hi, mant, result; - u32 mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0; - - /* Mathematical floor: for positive input it's input>>radix; for - * negative input we need to round toward -infinity, not toward zero. - */ - if (input >= 0) - { - flr = (s32)((u32)input >> radix); - frac_full = (s32)((u32)input & mask); - } - else - { - s32 neg = -input; - s32 nflr = (s32)((u32)neg >> radix); - s32 nfrc = (s32)((u32)neg & mask); - if (nfrc == 0) - { - flr = -nflr; - frac_full = 0; - } - else - { - flr = -nflr - 1; /* floor toward -inf */ - frac_full = (s32)((1L << radix) - nfrc); - } - } - - /* frac_full is in [0, 2^radix). Re-radix it to s.16 for table lookup. */ - if (radix > 16) - frac_full >>= (radix - 16); - else if (radix < 16) - frac_full <<= (16 - radix); - /* now frac_full is in [0, 65536) representing fractional in s.16. */ - - /* Top 6 bits index the table; bottom 10 are the interpolation fraction. */ - idx = frac_full >> 10; - frac_lo = frac_full & ((1L << 10) - 1); - lo = (s32)gFR_POW2_FRAC_TAB[idx]; - hi = (s32)gFR_POW2_FRAC_TAB[idx + 1]; - mant = lo + (((hi - lo) * frac_lo) >> 10); /* mant in s.16, in [1.0, 2.0) */ - - /* Apply integer shift. mant is at radix 16. We want output at `radix`. - * If radix == 16: just shift mant. - * Otherwise re-radix mant first. - */ - if (flr >= 0) - { - /* result = mant << flr, then re-radix to caller's radix. */ - if (flr >= 30) - return FR_OVERFLOW_POS; - result = mant << flr; - return FR_CHRDX(result, 16, radix); - } - else - { - /* mant >> -flr at radix 16, then re-radix. */ - s32 sh = -flr; - if (sh >= 30) - return 0; /* underflow */ - result = mant >> sh; - return FR_CHRDX(result, 16, radix); - } -} - -/* log2 mantissa table for m in [1, 2), m = 1 + i/64, returning log2(m) - * in s.16 fixed point. 65 entries (last is log2(2) = 1.0 = 65536) so the - * interpolation between idx and idx+1 never reads out of bounds. - * Size: 260 bytes. Entry i = round(log2(1 + i/64) * 65536). - */ -static const u32 gFR_LOG2_MANT_TAB[65] = { - 0, 1466, 2909, 4331, 5732, 7112, 8473, 9814, - 11136, 12440, 13727, 14996, 16248, 17484, 18704, 19909, - 21098, 22272, 23433, 24579, 25711, 26830, 27936, 29029, - 30109, 31178, 32234, 33279, 34312, 35334, 36346, 37346, - 38336, 39316, 40286, 41246, 42196, 43137, 44068, 44990, - 45904, 46809, 47705, 48593, 49472, 50344, 51207, 52063, - 52911, 53751, 54584, 55410, 56229, 57040, 57845, 58643, - 59434, 60219, 60997, 61769, 62534, 63294, 64047, 64794, - 65536 -}; - -/* FR_log2(input, radix, output_radix) — log base 2 of a fixed-point number. - * - * input : value to take log2 of, treated as a positive sM.radix value. - * radix : number of fractional bits in `input`. - * output_radix : number of fractional bits in the result. - * - * Returns FR_LOG2MIN for input <= 0 (log of zero/negative is undefined; we - * return a large negative sentinel rather than crash). - * - * Algorithm: - * 1. Find p, the position of the leading 1 bit of `input`. - * log2(input) = p + log2(input / 2^p), where the second term is in - * [0, 1) because (input / 2^p) is in [1, 2). - * 2. Normalize the mantissa to s1.31 by shifting `input` so its top bit - * sits at bit 31 (so bits 30..25 are the upper 6 bits of m-1). - * 3. Look up log2(m) in the 65-entry table with linear interpolation - * across the next 24 bits. Result is in s.16. - * 4. integer_part = (p - radix), then result = (integer_part << 16) + - * mantissa_log2. - * 5. Re-radix to the requested output_radix via FR_CHRDX. - * - * Worst-case absolute error: ~6e-5 in log2 units (65-entry table). - */ -s32 FR_log2(s32 input, u16 radix, u16 output_radix) -{ - s32 p, integer_part, idx, frac, lo, hi, mant_log2, result; - u32 m, u; - - if (input <= 0) - return FR_LOG2MIN; - - /* Step 1: find the position of the leading 1 bit. */ - u = (u32)input; - p = 0; - while (u > 1) - { - u >>= 1; - p++; - } - - /* Step 2: shift input so the leading 1 bit is at bit 30 (s1.30 mantissa). - * Equivalently: m = input << (30 - p), where m is in [2^30, 2^31). - * The fractional part of m / 2^30 is in [0, 1), and that's what we look - * up in the table. - */ - if (p >= 30) - m = (u32)input >> (p - 30); - else - m = (u32)input << (30 - p); - - /* m is now in [2^30, 2^31). Subtract 2^30 to get the fractional part - * (m_frac in [0, 2^30)). Index into the 64-entry table is the top 6 - * bits of m_frac; the lower 24 bits are the interpolation fraction. - */ - m -= (1u << 30); - idx = (s32)(m >> 24); /* 6 bits */ - frac = (s32)(m & ((1u << 24) - 1)); /* 24 bits */ - lo = (s32)gFR_LOG2_MANT_TAB[idx]; - hi = (s32)gFR_LOG2_MANT_TAB[idx + 1]; - mant_log2 = lo + (s32)(((int64_t)(hi - lo) * frac) >> 24); - - /* Step 3: assemble. integer_part = p - radix. */ - integer_part = p - (s32)radix; - result = (integer_part << 16) + mant_log2; - - /* Step 4: re-radix to output_radix. */ - return FR_CHRDX(result, 16, output_radix); -} - -s32 FR_ln(s32 input, u16 radix, u16 output_radix) -{ - s32 r = FR_log2(input, radix, output_radix); - return FR_MULK28(r, FR_krLOG2E_28); -} - -#ifndef FR_LEAN -s32 FR_log10(s32 input, u16 radix, u16 output_radix) -{ - s32 r = FR_log2(input, radix, output_radix); - return FR_MULK28(r, FR_krLOG2_10_28); -} -#endif - -#ifndef FR_NO_PRINT -/*************************************** - * FR_printNumD - write a decimal integer with space padding. - * - * Equivalent to "%*d" in printf, modulo the return convention. - * - * f : per-character output function (e.g. putchar). Must not be NULL. - * n : signed integer to print. - * pad : minimum field width; spaces are prepended to reach this width. - * - * Returns the number of characters written on success, or -1 if `f` is NULL. - */ -int FR_printNumD(int (*f)(char), int n, int pad) -{ - unsigned int mag; - int written = 0, neg = 0; - int digits = 1; - unsigned int t; - - if (!f) - return -1; - - if (n < 0) - { - neg = 1; - mag = (unsigned int)(-(long)n); /* safe for INT_MIN */ - } - else - { - mag = (unsigned int)n; - } - - /* Count decimal digits in mag (always at least 1 for n=0). */ - t = mag; - while (t >= 10) - { - t /= 10; - digits++; - } - - /* Pad with spaces. The total width includes the sign. */ - { - int total = digits + (neg ? 1 : 0); - while (pad-- > total) - { - f(' '); - written++; - } - } - - if (neg) - { - f('-'); - written++; - } - - /* Print digits MSB first by computing the largest power of 10 <= mag. */ - { - unsigned int p = 1; - int i; - for (i = 1; i < digits; i++) - p *= 10; - while (p > 0) - { - f((char)('0' + (mag / p) % 10)); - written++; - if (p == 1) - break; - p /= 10; - } - } - - return written; -} - -/*************************************** - * FR_printNumF - write a fixed-point number as a decimal floating-point string. - * - * f : per-character output function. Must not be NULL. - * n : signed fixed-point value at the given radix. - * radix : number of fractional bits in `n`. - * pad : minimum field width (including sign and decimal point). - * prec : number of fractional digits to print. - * - * Returns the number of characters written on success, -1 if `f` is NULL. - * - * Rounding policy: truncates fractional digits beyond `prec` (no rounding). - */ -int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec) -{ - unsigned int mag_int; - u32 mag_frac; - u32 frac_mask; - int written = 0, neg = 0; - int int_digits = 1; - int total; - unsigned int t; - - if (!f) - return -1; - - frac_mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0; - - if (n < 0) - { - neg = 1; - /* Negate as unsigned to avoid INT_MIN overflow. */ - u32 un = (u32)(-(int64_t)n); - mag_int = (unsigned int)(un >> radix); - mag_frac = un & frac_mask; - } - else - { - mag_int = (unsigned int)((u32)n >> radix); - mag_frac = (u32)n & frac_mask; - } - - /* Count integer digits. */ - t = mag_int; - while (t >= 10) - { - t /= 10; - int_digits++; - } - - /* Total visible width = sign + int + (dot + prec digits if prec>0). */ - total = int_digits + (neg ? 1 : 0) + ((prec > 0) ? (1 + prec) : 0); - while (pad-- > total) - { - f(' '); - written++; - } - - if (neg) - { - f('-'); - written++; - } - - /* Print integer part. */ - { - unsigned int p = 1; - int i; - for (i = 1; i < int_digits; i++) - p *= 10; - while (p > 0) - { - f((char)('0' + (mag_int / p) % 10)); - written++; - if (p == 1) - break; - p /= 10; - } - } - - /* Print fractional part. Extract one decimal digit at a time: - * frac' = frac * 10 - * digit = frac' >> radix - * frac = frac' & frac_mask - */ - if (prec > 0) - { - f('.'); - written++; - while (prec-- > 0) - { - u32 scaled; - int digit; - scaled = (u32)(((uint64_t)mag_frac * 10)); - digit = (int)(scaled >> radix); - mag_frac = scaled & frac_mask; - f((char)('0' + (digit % 10))); - written++; - } - } - - return written; -} - -/*************************************** - * FR_printNumH - write an integer as hexadecimal. - * - * f : per-character output function. Must not be NULL. - * n : integer to print (interpreted as unsigned for the digits). - * showPrefix : if non-zero, prepend "0x". - * - * Returns the number of characters written on success, -1 if f is NULL. - */ -int FR_printNumH(int (*f)(char), int n, int showPrefix) -{ - unsigned int u = (unsigned int)n; - int written = 0; - int x = (int)((sizeof(int) << 1) - 1); - int d; - - if (!f) - return -1; - - if (showPrefix) - { - f('0'); - f('x'); - written += 2; - } - - do - { - d = (int)((u >> (x << 2)) & 0xf); - d = (d > 9) ? (d - 0xa + 'a') : (d + '0'); - f((char)d); - written++; - } while (x--); - - return written; -} - -/*======================================================= - * FR_numstr — parse a decimal string into a fixed-point value. - * - * This is the runtime inverse of FR_printNumF: given a string like - * "12.34" or "-0.05" and a radix (number of fractional bits), it - * returns the s32 fixed-point representation. - * - * Features: - * - Leading whitespace is skipped. - * - Optional sign ('+' or '-'). - * - Up to 9 fractional digits are used (s32 range). - * - No malloc, no strtod, no libm. - * - * Returns 0 for NULL or empty input. - */ -s32 FR_numstr(const char *s, u16 radix) -{ - static const s32 pow10[10] = { - 1L, 10L, 100L, 1000L, 10000L, - 100000L, 1000000L, 10000000L, 100000000L, 1000000000L - }; - s32 int_part = 0, frac_part = 0; - int frac_digits = 0, neg = 0; - s32 result; - - if (!s || !*s) return 0; - - while (*s == ' ' || *s == '\t') s++; /* skip whitespace */ - if (*s == '-') { neg = 1; s++; } /* sign */ - else if (*s == '+') { s++; } - - while (*s >= '0' && *s <= '9') /* integer part */ - { int_part = int_part * 10 + (*s - '0'); s++; } - - if (*s == '.') { /* fractional part */ - s++; - while (*s >= '0' && *s <= '9') { - if (frac_digits < 9) - { frac_part = frac_part * 10 + (*s - '0'); frac_digits++; } - s++; - } - } - - result = int_part << radix; - if (frac_digits > 0) - result += (s32)(((int64_t)frac_part << radix) / pow10[frac_digits]); - - return neg ? -result : result; -} -#endif /* FR_NO_PRINT */ - -/*======================================================= - * Square root and hypot - * - * fr_isqrt64 is a private helper implementing the digit-by-digit - * ("shift-and-subtract") integer square root. The core loop computes - * floor(sqrt(n)), then a final remainder check rounds to nearest. - * Uses no division. At most 32 iterations. - */ -static u32 fr_isqrt64(uint64_t n) -{ - uint64_t root = 0; - uint64_t bit = (uint64_t)1 << 62; - while (bit > n) bit >>= 2; - while (bit != 0) - { - uint64_t trial = root + bit; - if (n >= trial) - { - n -= trial; - root = (root >> 1) + bit; - } - else - { - root >>= 1; - } - bit >>= 2; - } - /* round to nearest: if remainder > root, (root+1)^2 is closer */ - if (n > root) - root++; - return (u32)root; -} - -/*======================================================= - * FR_sqrt - fixed-radix square root. - * - * input : value at radix `radix`. Must be >= 0. - * radix : fractional bits of input AND result. - * return : sqrt(input) at radix `radix`, or FR_DOMAIN_ERROR if input < 0. - * - * Math: sqrt(input_fp / 2^r) at radix r is - * result_fp = sqrt(input_fp / 2^r) * 2^r = sqrt(input_fp * 2^r) - * so we compute isqrt(input_fp << radix) on a 64-bit accumulator. This - * works for any input that fits in s32 and any radix in [0, 30]. - * - * Precision: round-to-nearest sqrt. Worst-case absolute error is - * <= 0.5 LSB at the requested radix. - * Always non-negative for non-negative input. Result is monotone in - * input. - * - * Saturation: input < 0 returns FR_DOMAIN_ERROR (= INT32_MIN). Caller - * can test `result == FR_DOMAIN_ERROR` to detect domain errors. - * - * Side effects: none. Pure function. - */ -s32 FR_sqrt(s32 input, u16 radix) -{ - uint64_t n; - - if (input < 0) - return FR_DOMAIN_ERROR; - if (input == 0) - return 0; - - n = (uint64_t)(u32)input << radix; - return (s32)fr_isqrt64(n); -} - -/*======================================================= - * FR_hypot - sqrt(x*x + y*y) without intermediate overflow. - * - * x, y : values at radix `radix` - * radix : fractional bits of inputs AND result - * return : sqrt(x*x + y*y) at radix `radix`. - * - * Math: x*x + y*y is naturally at radix 2*radix; isqrt of a 2r-radix - * value yields an r-radix result, so no extra shifting is needed. The - * u64 accumulator can hold (INT32_MAX^2)*2 = ~2^63, so (x*x + y*y) never - * overflows for any s32 inputs. - * - * Precision: round-to-nearest. Worst-case absolute error <= 0.5 LSB - * at the requested radix. - * - * Side effects: none. Pure function. - */ -#ifndef FR_LEAN -s32 FR_hypot(s32 x, s32 y, u16 radix) -{ - uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x); - uint64_t yy = (uint64_t)((int64_t)y * (int64_t)y); - (void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */ - return (s32)fr_isqrt64(xx + yy); -} -#endif - -/*======================================================= - * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation. - * - * Shift-only, no multiply, no 64-bit. Based on the piecewise-linear - * method described in US Patent 6,567,777 B1 (Chatterjee, expired). - * Peak error: ~0.10%. - */ -s32 FR_hypot_fast8(s32 x, s32 y) -{ - s32 hi, lo; - - /* absolute values (clamp INT32_MIN to INT32_MAX to avoid UB) */ - if (x < 0) x = (x == (s32)0x80000000) ? 0x7FFFFFFF : -x; - if (y < 0) y = (y == (s32)0x80000000) ? 0x7FFFFFFF : -y; - - /* hi = max(|x|,|y|), lo = min(|x|,|y|) */ - if (x > y) { hi = x; lo = y; } - else { hi = y; lo = x; } - - if (hi == 0) return 0; - - /* 8 piecewise-linear segments: dist ≈ a*hi + b*lo. - * Boundaries at β = 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875. */ - if ((hi >> 1) < lo) { - /* β in (0.5, 1.0] */ - if (lo > hi - (hi >> 2)) { - /* β in (0.75, 1.0] */ - if (lo > hi - (hi >> 3)) /* β > 0.875 */ - /* a≈0.7305, b≈0.6836 */ - return hi - (hi >> 2) - (hi >> 6) - (hi >> 8) - + lo - (lo >> 2) - (lo >> 4) - (lo >> 8); - else /* β in (0.75, 0.875] */ - /* a≈0.7803, b≈0.6262 */ - return hi - (hi >> 2) + (hi >> 5) - (hi >> 10) - + (lo >> 1) + (lo >> 3) + (lo >> 10) + (lo >> 12); - } else { - /* β in (0.5, 0.75] */ - if (lo > hi - (hi >> 1) + (hi >> 3)) /* β > 0.625 */ - /* a≈0.8281, b≈0.5630 */ - return hi - (hi >> 2) + (hi >> 4) + (hi >> 6) - + (lo >> 1) + (lo >> 4) + (lo >> 11); - else /* β in (0.5, 0.625] */ - /* a≈0.8728, b≈0.4893 */ - return hi - (hi >> 3) - (hi >> 9) - (hi >> 12) - + (lo >> 1) - (lo >> 6) + (lo >> 8) + (lo >> 10); - } - } else { - /* β in [0, 0.5] */ - if ((hi >> 2) < lo) { - /* β in (0.25, 0.5] */ - if ((hi >> 1) - (hi >> 3) < lo) /* β > 0.375 */ - /* a≈0.9180, b≈0.3984 */ - return hi - (hi >> 4) - (hi >> 6) - (hi >> 8) - + (lo >> 1) - (lo >> 3) + (lo >> 5) - (lo >> 7); - else /* β in (0.25, 0.375] */ - /* a≈0.9551, b≈0.2988 */ - return hi - (hi >> 4) + (hi >> 6) + (hi >> 9) - + (lo >> 2) + (lo >> 4) - (lo >> 6) + (lo >> 9); - } else { - /* β in [0, 0.25] */ - if ((hi >> 3) < lo) /* β in (0.125, 0.25] */ - /* a≈0.9839, b≈0.1838 */ - return hi - (hi >> 6) - (hi >> 11) - + (lo >> 2) - (lo >> 4) - (lo >> 8) + (lo >> 12); - else /* β in [0, 0.125] */ - /* a≈0.9990, b≈0.0620 */ - return hi - (hi >> 10) - + (lo >> 4) - (lo >> 11); - } - } -} - -#ifndef FR_NO_WAVES -/*======================================================= - * Wave generators — synth-style fixed-shape waveforms. - * - * All wave functions take a u16 BAM phase in [0, 65535] (a full cycle) - * and return s16 in s0.15 format, clamped to [-32767, +32767] to match - * the trig amplitude convention used by fr_cos_bam / fr_sin_bam. - * - * Use FR_HZ2BAM_INC(hz, sample_rate) to compute a phase increment for - * a given output frequency, then accumulate it (mod 2^16) per sample. - * - * Side effects: pure functions (except fr_wave_noise which advances a - * caller-provided LFSR state pointer). - */ - -/* fr_wave_sqr - 50%-duty square wave. - * phase < pi (BAM<0x8000) → +full; phase >= pi → -full. - */ -s16 fr_wave_sqr(u16 phase) -{ - return (phase < 0x8000) ? (s16)32767 : (s16)-32767; -} - -/* fr_wave_pwm - variable-duty pulse. - * `duty` is the BAM threshold: phase < duty → high, else low. - * duty = 0 → always low - * duty = 0x8000 → 50% duty (same as fr_wave_sqr) - * duty = 0xffff → high almost everywhere (one BAM step low) - */ -s16 fr_wave_pwm(u16 phase, u16 duty) -{ - return (phase < duty) ? (s16)32767 : (s16)-32767; -} - -/* fr_wave_saw - rising sawtooth. - * Linear ramp from -32767 (just after phase=0) to +32767 (at phase=0xffff), - * passing through 0 at phase=0x8000. The single boundary case phase=0 - * (which would naturally produce -32768) is clamped to -32767 to keep the - * amplitude symmetric. - */ -s16 fr_wave_saw(u16 phase) -{ - s32 v = (s32)phase - (s32)0x8000; - if (v < -32767) v = -32767; - return (s16)v; -} - -/* fr_wave_tri - symmetric triangle. - * Four linear segments: - * Q1 [0, 0x4000) : rising 0 → +peak - * Q2 [0x4000, 0x8000): falling +peak → 0 - * Q3 [0x8000, 0xc000): falling 0 → -peak - * Q4 [0xc000, 0x10000): rising -peak → 0 - * Peaks are clamped to +/-32767 (the natural unclamped formula gives - * +/-32768 at the exact peak BAM). - */ -s16 fr_wave_tri(u16 phase) -{ - s32 t; - if (phase < 0x8000) - { - /* First half: 0 -> +peak -> 0 */ - if (phase < 0x4000) - t = (s32)phase << 1; /* 0 .. 0x7ffe */ - else - t = (s32)(0x8000 - phase) << 1; /* 0x8000 .. 2 */ - if (t > 32767) t = 32767; - return (s16)t; - } - else - { - /* Second half: 0 -> -peak -> 0 */ - if (phase < 0xc000) - t = (s32)(phase - 0x8000) << 1; /* 0 .. 0x7ffe */ - else - t = (s32)(0x10000 - phase) << 1;/* 0x8000 .. 2 */ - if (t > 32767) t = 32767; - return (s16)-t; - } -} - -/* fr_wave_tri_morph - variable-symmetry triangle. - * - * phase : u16 BAM - * break_point : u16 BAM where the wave reaches its positive peak. - * - * Going from 0 to +peak in [0, break_point), then from +peak back to 0 - * in [break_point, 0xffff]. The result is a triangle whose rising and - * falling slopes can differ. - * - * break_point = 0x8000 → symmetric triangle - * break_point = 0xffff → rising sawtooth (instant fall) - * break_point = 0x0001 → falling sawtooth (instant rise) - * break_point = 0 → degenerate; treated as 1 to avoid div-by-zero - * - * Note that this version returns values in [0, 32767] only (not bipolar). - * Caller can subtract 16384 and double if a bipolar version is desired. - * - * Costs: one 32-bit divide per sample. On Cortex-M3+ this is ~10-20 - * cycles. On 8051 / MSP430 this is much slower; pre-compute slopes if - * those targets matter to you. - */ -s16 fr_wave_tri_morph(u16 phase, u16 break_point) -{ - u32 t; - if (break_point == 0) - break_point = 1; - if (phase < break_point) - { - /* rising: 0 at phase=0, 32767 at phase=break_point */ - t = (u32)(((u32)phase * 32767UL) / (u32)break_point); - } - else - { - /* falling: 32767 at phase=break_point, 0 at phase=0xffff */ - u32 span = (u32)0xffff - (u32)break_point; - if (span == 0) - return 32767; - t = (u32)(((u32)((u32)0xffff - (u32)phase) * 32767UL) / span); - } - if (t > 32767) t = 32767; - return (s16)t; -} - -/* fr_wave_noise - LFSR-based pseudorandom noise. - * - * state : pointer to a u32 the caller maintains. Initial value must - * be non-zero (zero is a fixed point of the LFSR). A common - * seed is 0xACE1u or any other non-zero constant. - * - * Returns the next s16 sample in s0.15 (full ±32767 range, white-ish). - * Implementation: 32-bit Galois LFSR with the standard maximal-period - * tap polynomial 0xD0000001 (period 2^32 - 1 samples). - * - * Quality: this is "fast white noise" suitable for synth use. It is NOT - * cryptographically secure. For better statistical properties (FFT - * flatness etc.) layer a longer LFSR or use a separate PRNG. - */ -s16 fr_wave_noise(u32 *state) -{ - u32 lsb; - if (!state) - return 0; - lsb = *state & 1u; - *state >>= 1; - if (lsb) - *state ^= 0xD0000001u; - /* Take the top 16 bits and re-bias to s16 range, clamp to ±32767. */ - { - s32 v = (s32)((*state >> 16) & 0xffffu) - 32768; - if (v < -32767) v = -32767; - return (s16)v; - } -} - -/*======================================================= - * ADSR envelope generator - * - * Linear-segment Attack-Decay-Sustain-Release envelope. State is held - * in caller-allocated fr_adsr_t struct (no global state, no malloc). - * - * Lifecycle: - * 1. Caller allocates an fr_adsr_t (stack or static). - * 2. fr_adsr_init() once per patch with attack/decay/release durations - * in samples and a sustain level in s0.15. - * 3. fr_adsr_trigger() on note-on. Output rises 0 -> peak over `atk` - * samples, falls peak -> sustain over `dec` samples, then holds. - * 4. fr_adsr_release() on note-off. Output falls current -> 0 over a - * time controlled by the release rate (rate, not duration: the - * time depends on where in the envelope we are). - * 5. fr_adsr_step() once per audio sample to read the current value. - * - * Internal precision: levels are stored as s32 in s1.30 format so even - * very long envelopes (e.g. 48000-sample attack at 48 kHz = 1 second) - * have a non-zero per-sample increment. Output is converted to s0.15. - * - * Saturation: the envelope state machine is self-clamping; level cannot - * escape [0, 1<<30]. Output is in [0, 32767]. - */ - -#define FR_ADSR_PEAK_S130 ((s32)1 << 30) - -void fr_adsr_init(fr_adsr_t *env, - u32 attack_samples, - u32 decay_samples, - s16 sustain_level_s015, - u32 release_samples) -{ - if (!env) - return; - env->state = FR_ADSR_IDLE; - env->level = 0; - - /* sustain_level_s015 is s16 so its upper bound (32767) is already the - * type's max; only the lower bound needs an explicit clamp. */ - if (sustain_level_s015 < 0) - sustain_level_s015 = 0; - /* Convert s0.15 -> s1.30 by shifting left 15. */ - env->sustain = (s32)sustain_level_s015 << 15; - - env->attack_inc = (attack_samples > 0) - ? (s32)(FR_ADSR_PEAK_S130 / attack_samples) - : FR_ADSR_PEAK_S130; - env->decay_dec = (decay_samples > 0) - ? (s32)((FR_ADSR_PEAK_S130 - env->sustain) / (s32)decay_samples) - : (FR_ADSR_PEAK_S130 - env->sustain); - env->release_dec = (release_samples > 0) - ? (s32)(FR_ADSR_PEAK_S130 / release_samples) - : FR_ADSR_PEAK_S130; -} - -void fr_adsr_trigger(fr_adsr_t *env) -{ - if (!env) - return; - env->state = FR_ADSR_ATTACK; - env->level = 0; -} - -void fr_adsr_release(fr_adsr_t *env) -{ - if (!env) - return; - env->state = FR_ADSR_RELEASE; -} - -s16 fr_adsr_step(fr_adsr_t *env) -{ - if (!env) - return 0; - switch (env->state) - { - case FR_ADSR_ATTACK: - env->level += env->attack_inc; - if (env->level >= FR_ADSR_PEAK_S130) - { - env->level = FR_ADSR_PEAK_S130; - env->state = FR_ADSR_DECAY; - } - break; - case FR_ADSR_DECAY: - env->level -= env->decay_dec; - if (env->level <= env->sustain) - { - env->level = env->sustain; - env->state = FR_ADSR_SUSTAIN; - } - break; - case FR_ADSR_SUSTAIN: - env->level = env->sustain; - break; - case FR_ADSR_RELEASE: - env->level -= env->release_dec; - if (env->level <= 0) - { - env->level = 0; - env->state = FR_ADSR_IDLE; - } - break; - case FR_ADSR_IDLE: - default: - env->level = 0; - break; - } - /* s1.30 -> s0.15: shift right 15. Clamp for safety. */ - { - s32 out = env->level >> 15; - if (out < 0) out = 0; - if (out > 32767) out = 32767; - return (s16)out; - } -} -#endif /* FR_NO_WAVES */ diff --git a/fw/fr_math/FR_math.h b/fw/fr_math/FR_math.h deleted file mode 100644 index a2db262..0000000 --- a/fw/fr_math/FR_math.h +++ /dev/null @@ -1,653 +0,0 @@ -/** - * @FR_math.h - header definition file for fixed radix math routines - * - * @copy Copyright (C) <2001-2026> - * @author M A Chatterjee - * - * This file contains integer math settable fixed point radix math routines for - * use on systems in which floating point is not desired or unavailable. - * - * @license: - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * - * 3. This notice may not be removed or altered from any source - * distribution. - * - */ - -#ifndef __FR_Math_h__ -#define __FR_Math_h__ - -#define FR_MATH_VERSION "2.0.8" -#define FR_MATH_VERSION_HEX 0x020008 /* major << 16 | minor << 8 | patch */ - -#ifdef FR_CORE_ONLY -#define FR_NO_PRINT -#define FR_NO_WAVES -#endif - -#ifdef FR_LEAN -#define FR_NO_WAVES -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - -#ifndef __FR_Platform_Defs_H__ -#include "FR_defs.h" -#endif - -/* Quick note on macro parameter wrapping: - * Arguments are parenthesized in expansions, e.g. - * #define MACRO_X_SQUARED(x) ((x)*(x)) // inner parens around each x - * Macros substitute text as-is. If a parameter is an expression like 3+4*5 - * and the body mixes operators without extra parentheses, precedence errors - * follow. Parenthesize parameters (and fragile subexpressions) in the macro body. - * Example: MACRO_X_SQUARED_BAD(x) (x*x) -> 3+4*5*3+4*5 == 83 (wrong). - * MACRO_X_SQUARED(x) ((x)*(x)) -> (3+4*5)*(3+4*5) == 529 (right). - */ - -/*absolute value for integer and fixed radix types*/ -#define FR_ABS(x) (((x) < 0) ? (-(x)) : (x)) - -/*sign of x. Not as good as The Sign of Four, but I digress */ -#define FR_SGN(x) ((x) >> ((((signed)sizeof(x)) << 3) - 1)) - -/*=============================================== - * Simple Fixed Point Math Conversions - * r is radix precision in bits, converts to/from integer w truncation */ -#define I2FR(x, r) ((x) << (r)) -#define FR2I(x, r) ((x) >> (r)) - -/*=============================================== - * Make a fixed radix number from integer + base-10 fractional parts. - * r = output radix (fractional bits) - * i = integer part (signed) - * f = decimal-fraction digits as written, e.g. for "12.34" pass f=34 - * d = number of decimal digits in f, e.g. for "12.34" pass d=2 - * - * FR_NUM(12, 34, 2, 10) ≈ 12.34 in s.10 = (12<<10) + (34<<10)/100 = 12636 - * FR_NUM(-3, 5, 1, 16) ≈ -3.5 in s.16 = (-3<<16) - (5<<16)/10 - * - * The fraction is rounded toward zero. For round-to-nearest, add half an LSB - * before scaling at the call site. Sign of the fractional part follows the - * sign of i (for i==0 the result is positive, matching "+0.5" intuition). - */ -#define FR_NUM_POW10(d) ( \ - ((d) == 0) ? 1L : \ - ((d) == 1) ? 10L : \ - ((d) == 2) ? 100L : \ - ((d) == 3) ? 1000L : \ - ((d) == 4) ? 10000L : \ - ((d) == 5) ? 100000L : \ - ((d) == 6) ? 1000000L : \ - ((d) == 7) ? 10000000L : \ - ((d) == 8) ? 100000000L : 1000000000L) -#define FR_NUM(i, f, d, r) ( \ - ((s32)(i) << (r)) + \ - (((i) < 0) \ - ? -((s32)(((s32)(f) << (r)) / FR_NUM_POW10(d))) \ - : ((s32)(((s32)(f) << (r)) / FR_NUM_POW10(d))))) -/* -FR_INT(x,r) convert a fixed radix variable x of radix r to an integer -*/ -#define FR_INT(x, r) (((x) < 0) ? -((-(x)) >> (r)) : ((x) >> (r))) - -/* Change Radix (x,current_radix, new_radix) - * change number from its current fixed radix (can be 0 or integer) to a new fixed radix - * Useful when dealing with numbers with mixed radixes. This is a MACRO so - * this code expands in place and x is modified. - */ -#define FR_CHRDX(x, r_cur, r_new) (((r_cur) - (r_new)) >= 0 ? ((x) >> ((r_cur) - (r_new))) : ((x) << ((r_new) - (r_cur)))) - -/* return only the fractional part of x */ -#define FR_FRAC(x, r) ((FR_ABS(x)) & (((1 << (r)) - 1))) - -/* return the fractional part of number x with radix xr scaled to radix nr bits */ -#define FR_FRACS(x, xr, nr) (FR_CHRDX(FR_FRAC((x), (xr)), (xr), (nr))) - -/****************************************************** - Add (sub) to fixed point numbers by converting the second number to the - same radix as the first. If yr < xr then possibility of overflow is increased. - Note: for two vars, i, j, of prec ir, jr, it is not necessarily true that - FR_ADD(i,ir,j,jr) == FR_ADD(j,jr,i,ir) -*/ -#define FR_ADD(x, xr, y, yr) ((x) += FR_CHRDX(y, yr, xr)) -#define FR_SUB(x, xr, y, yr) ((x) -= FR_CHRDX(y, yr, xr)) - -/* Fixed-radix division with round-to-nearest: x (at radix xr) / y (at radix yr), - * result at radix xr. Uses a 64-bit intermediate so the full Q16.16 range works - * correctly. Adds half the divisor before truncation to achieve ≤ 0.5 LSB error. */ -static inline s32 FR_div_rnd(s64 num, s32 den) { - if ((num ^ den) >= 0) /* same sign: positive quotient */ - return (s32)((num + den / 2) / den); - else /* negative quotient */ - return (s32)((num - den / 2) / den); -} -#define FR_DIV(x, xr, y, yr) FR_div_rnd((s64)(x) << (yr), (s32)(y)) - -/* FR_DIV_TRUNC: truncating division (old FR_DIV behavior). Useful when the - * caller knows the quotient is exact or truncation is acceptable. */ -#define FR_DIV_TRUNC(x, xr, y, yr) ((s32)(((s64)(x) << (yr)) / (s32)(y))) - -/* FR_DIV32: 32-bit-only division. Requires |x| < 2^(31-yr) to avoid - * overflow in the intermediate (x << yr). Use FR_DIV for full-range - * division with 64-bit intermediate. */ -#define FR_DIV32(x, xr, y, yr) (((s32)(x) << (yr)) / (s32)(y)) - -/* Remainder: both operands should be at the same radix. */ -#define FR_MOD(x, y) ((x) % (y)) - -/* min, max, clamp */ -#define FR_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define FR_MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define FR_CLAMP(x, lo, hi) (FR_MIN(FR_MAX((x), (lo)), (hi))) - -/* Check if x is a power of 2. */ -#define FR_ISPOW2(x) (!((x) & ((x) - 1))) - -/* floor and ceiling in current radix, leaving current radix intact - * this means the lower radix number of bits are set to 0 - */ -#define FR_FLOOR(x, r) ((x) & (~((1 << r) - 1))) -#define FR_CEIL(x, r) (FR_FLOOR(x, r) + ((FR_FRAC(x, r) ? (1 << r) : 0))) - -/******************************************************* - Interpolate between 2 fixed point values (x0,x1) of any radix, - with a fractional delta, of a supplied precision. - If delta is outside 0<= delta<=1 then extrapolation - does work but programmer should watch for possible overflow. - x0,x1 need not have same radix as delta -*/ -#define FR_INTERP(x0, x1, delta, prec) ((x0) + ((((x1) - (x0)) * (delta)) >> (prec))) - -/****************************************************** - FR_INTERPI is the same as FR_INTERP except that insures the - range is btw [0<= delta < 1] --> ((0 ... (1<= 0 -*/ -#define FR_INTERPI(x0, x1, delta, prec) ((x0) + ((((x1) - (x0)) * ((delta) & ((1 << (prec)) - 1))) >> (prec))) - -/****************************************************** - Convert to double, this is for debug only and WILL NOT compile under many embedded systems. - Fixed Radix to Floating Point Double Conversion - since this is a MACRO it will not be compiled or instantiated unless it is actually called in code. -*/ -#define FR2D(x, r) ((double)(((double)(x)) / ((double)(1 << (r))))) -#define D2FR(d, r) ((s32)(d * (1 << r))) - -/****************************************************** - Useful Constants - - FR_kXXXX "k" denotes constant to help when reading macros used in source code - FR_krXXX "r" == reciprocal e.g. 1/XXX - As these are MACROS, they take only compiled code space if actually used. - Consts here calculated by multiply the natural base10 value by (1<> 28)) - -// common sqrts -#define FR_kSQRT2 (92682) /* 1.414213562373 */ -#define FR_krSQRT2 (46341) /* 0.707106781186 */ -#define FR_kSQRT3 (113512) /* 1.732050807568 */ -#define FR_krSQRT3 (37837) /* 0.577350269189 */ -#define FR_kSQRT5 (146543) /* 2.236067977599 */ -#define FR_krSQRT5 (29309) /* 0.447213595499 */ -#define FR_kSQRT10 (207243) /* 3.162277660168 */ -#define FR_krSQRT10 (20724) /* 0.316227766016 */ - - /*=============================================== - * Arithmetic operations - */ - s32 FR_FixMuls(s32 x, s32 y); // mul signed, round-to-nearest, NOT saturated - s32 FR_FixMulSat(s32 x, s32 y); // mul signed, round-to-nearest, saturated - s32 FR_FixAddSat(s32 x, s32 y); // add signed, saturated - -/*================================================ - * Constants used in Trig tables, definitions - * - * FR_TRIG_PREC — internal table precision (u0.15, sine table) - * FR_TRIG_OUT_PREC — output precision of sin/cos/tan (s15.16 since v2.0.1) - * FR_TRIG_ONE — exact 1.0 in output format (1 << 16 = 65536) - * - * sin/cos return s32 at radix 16 (s15.16). This matches libfixmath Q16.16 - * precision and allows exact representation of 1.0 at the poles. - * tan returns s32 at radix 16 (s15.16). Saturates at ±FR_TRIG_MAXVAL. - */ -#define FR_TRIG_PREC (15) -#define FR_TRIG_OUT_PREC (16) -#define FR_TRIG_MASK ((1 << (FR_TRIG_PREC)) - 1) -#define FR_TRIG_ONE (1L << FR_TRIG_OUT_PREC) /* 65536 = 1.0 */ -#define FR_TRIG_MAXVAL ((s32)0x7fffffff) /* tan saturation max */ -#define FR_TRIG_MINVAL (-FR_TRIG_MAXVAL) /* tan saturation min */ - -/* Bit Shift Scaling macros. Useful on some platforms with poor MUL performance. - * Also can be useful if you need to scale numbers with - * large portions of bits_in_use and a larger register size is not available. - * For example, suppose you need to scale a large 32bit number say z=0xa4239323 - * from degrees to radians. Ideally this number would be multiplied by 0.017 - * but using a (z*FR_kDEG2RAD) >> FR_kPREC type operation is likely to overflow - * due the MUL result being large. The FR_RAD2DEG MACRO below doesn't require - * accumulator headroom bits and so has no chance for loss of precision. - * Another benefit is some low end micros (8051, 68xx, MSP430(low end), - * PIC-8 family) have no multiplier. So these allow the programmer an option - * to see if performance or precision are better expressed as shifts rather than - * scaled multiplies. As always, mileage may vary depending on architecture, - * compiler and other considerations. - */ - -/* scale by 10s */ -#define FR_SMUL10(x) (((x) << 3) + (((x) << 1))) -#define FR_SDIV10(x) (((x) >> 3) - ((x) >> 5) + ((x) >> 7) - ((x) >> 9) + ((x) >> 11)) - -/* scale by 1/log2(e) 0.693147180560 used for converting log2() to ln() */ -#define FR_SrLOG2E(x) (((x) >> 1) + ((x) >> 2) - ((x) >> 3) + ((x) >> 4) + ((x) >> 7) - ((x) >> 9) - ((x) >> 12) + ((x) >> 15)) - -/* scale by log2(e) 1.442695040889 used for converting pow2() to exp() */ -#define FR_SLOG2E(x) ((x) + ((x) >> 1) - ((x) >> 4) + ((x) >> 8) + ((x) >> 10) + ((x) >> 12) + ((x) >> 14)) - -/* scale by 1/log2(10) 0.30102999566 used for converting log2() to log10 */ -#define FR_SrLOG2_10(x) (((x) >> 2) + ((x) >> 4) - ((x) >> 6) + ((x) >> 7) - ((x) >> 8) + ((x) >> 12)) - -/* scale by log2(10) 3.32192809489 used for converting pow2() to pow10 */ -#define FR_SLOG2_10(x) (((x) << 1) + (x) + ((x) >> 2) + ((x) >> 4) + ((x) >> 7) + ((x) >> 10) + ((x) >> 11) + ((x) >> 13)) - -/* Shift-only angular conversion macros - * - * All are pure constant multipliers expressed as shifts — no multiply, no - * divide, no 64-bit intermediates, no accumulators. Work at any radix: if - * your input is degrees at radix 8, the output is the target unit at radix 8. - * The caller shifts as needed. - * - * Angular units: - * degrees = 360 per revolution - * radians = 2*pi per revolution - * BAM = 65536 per revolution (Binary Angular Measure, u16) - * quadrants = 4 per revolution (= BAM >> 14) - * - * Side-effect note: x is referenced multiple times in each macro — do not - * pass expressions with side effects. - */ - -/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 (5 terms, ~17 bits) */ -#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13) - ((x) >> 19) - ((x) >> 20)) - -/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.29578 (7 terms, ~19 bits) */ -#define FR_RAD2DEG(x) (((x) << 6) - ((x) << 3) + (x) + ((x) >> 2) + (((x) >> 4) - ((x) >> 6)) - ((x) >> 10)) - -/* FR_DEG2BAM(x): multiply by 65536/360 ≈ 182.0449 (7 terms, ~18 bits). - * Intermediate terms overflow s32 when |x| > ~256 deg at s15.16 (x<<7 term), - * but the overflow is harmless when the result is truncated to u16 BAM - * (two's complement wrapping preserves modular correctness). - * For full-precision s32 BAM (sub-BAM interpolation), use fr_deg_to_bam(). */ -#define FR_DEG2BAM(x) (((x)<<7)+((x)<<6)-((x)<<3)-((x)<<1)+((x)>>5)+((x)>>6)-((x)>>9)) - -/* FR_BAM2DEG(x): multiply by 360/65536 = 0.00549316 (4 terms, exact) */ -#define FR_BAM2DEG(x) (((x)>>8)+((x)>>9)-((x)>>12)-((x)>>13)) - -/* FR_RAD2BAM(x): multiply by 65536/(2*pi) ≈ 10430.378 (7 terms, ~21 bits). - * CAUTION: overflows s32 when |x| > ~4 rad at s15.16 (x<<13 term). - * For safe conversion at any radix, use fr_rad_to_bam() instead. - * #define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)) */ -#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)+((x)>>8)-((x)>>11)-((x)>>14)) -/* ── Overflow-safe rad/deg to BAM conversion functions ───────────── - * - * These replace the FR_RAD2BAM / FR_DEG2BAM macros for callers that - * need the full ±2*pi or ±360° range at any radix. - * - * Strategy: normalize input to radix 16, conditionally reduce into - * a safe zone, apply the full-precision shift-only multiply, then - * extract the u16 BAM. No precision loss from halving/quartering. - * - * fr_rad_to_bam: reduce to [-pi, pi], reordered terms. ±2*pi safe. - * fr_deg_to_bam: reduce to [-90, 90) + quadrant offset. ±360° safe. - */ - -/* Pi constants at any radix: FR_PI(r) = round(pi * 2^r), etc. - * Compiler evaluates at compile time when r is a constant. - * Max safe radix: FR_PI r<=29, FR_TWO_PI r<=28, FR_HALF_PI r<=30. */ -#define FR_PI(r) ((s32)(3.14159265358979323846 * (1LL << (r)) + 0.5)) -#define FR_TWO_PI(r) ((s32)(6.28318530717958647692 * (1LL << (r)) + 0.5)) -#define FR_HALF_PI(r) ((s32)(1.57079632679489661923 * (1LL << (r)) + 0.5)) -#define FR_THREE_HALF_PI(r) ((s32)(4.71238898038468985769 * (1LL << (r)) + 0.5)) - -/* Convenience aliases at radix 16 */ -#define FR_PI_R16 FR_PI(16) -#define FR_TWO_PI_R16 FR_TWO_PI(16) - -/* Degree constants at radix 16 (exact — no truncation) */ -#define FR_D90_R16 ((s32)90 << 16) -#define FR_D180_R16 ((s32)180 << 16) -#define FR_D360_R16 ((s32)360 << 16) - - u16 fr_rad_to_bam(s32 rad, u16 radix); -#ifndef FR_LEAN - u16 fr_deg_to_bam(s32 deg, u16 radix); -#endif - -/* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */ -#define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25)) - -/* Legacy quadrant macros (quadrants = BAM >> 14) */ -#define FR_RAD2Q(x) (((x) >> 1) + ((x) >> 3) + ((x) >> 7) + ((x) >> 8) - ((x) >> 14)) -#define FR_Q2RAD(x) ((x) + ((x) >> 1) + ((x) >> 4) + ((x) >> 7) + ((x) >> 11)) -#define FR_DEG2Q(x) (((x) >> 6) - ((x) >> 8) - ((x) >> 11) - ((x) >> 13)) -#define FR_Q2DEG(x) (((x) << 6) + ((x) << 4) + ((x) << 3) + ((x) << 1)) - -/*=============================================== - * BAM (Binary Angular Measure) — internal angle representation - * - * One full circle = 2^16 BAM units. So: - * 0 = 0 deg = 0 rad - * 16384 = 90 deg = pi/2 rad (FR_BAM_QUADRANT) - * 32768 = 180 deg = pi rad - * 49152 = 270 deg = 3pi/2 rad - * 65536 wraps to 0 (because BAM is u16) - * - * BAM is the natural representation for fixed-point trig because: - * - The top 2 bits select the quadrant (no `% 360` modulo needed). - * - The next 7 bits index the 128-entry quadrant table directly. - * - The bottom 7 bits give linear-interpolation precision. - */ -#define FR_BAM_BITS (16) -#define FR_BAM_FULL (1L << FR_BAM_BITS) /* 65536 */ -#define FR_BAM_QUADRANT (FR_BAM_FULL >> 2) /* 16384 */ -#define FR_BAM_HALF (FR_BAM_FULL >> 1) /* 32768 */ - -/*=============================================== - * Radian-native and BAM-native trig (recommended) - * - * All sin/cos functions return s32 at radix 16 (s15.16). - * 1.0 is represented exactly as FR_TRIG_ONE (65536). - * Poles (0, 90, 180, 270 deg) produce exact ±FR_TRIG_ONE or 0. - * - * fr_cos_bam(bam) — cos of a BAM angle, s15.16 result - * fr_sin_bam(bam) — sin of a BAM angle, s15.16 result - * fr_cos(rad, radix) — cos of radians at radix, s15.16 result - * fr_sin(rad, radix) — sin of radians at radix, s15.16 result - * fr_tan(rad, radix) — tan of radians at radix, s15.16 result - * fr_cos_deg(deg, radix) — cos of fixed-radix degrees, s15.16 result - * fr_sin_deg(deg, radix) — sin of fixed-radix degrees, s15.16 result - * fr_tan_deg(deg, radix) — tan of fixed-radix degrees, s15.16 result - * - * All go through the same 129-entry quadrant table with linear interpolation. - * Worst-case error: ~2 LSB in s15.16 (~3e-5 absolute), except at the four - * cardinal angles where the result is exact. - * - * The radian and degree wrappers (fr_sin, fr_cos, fr_tan, etc.) range-reduce - * their input, convert to u16 BAM, and call the BAM-native functions. Small- - * angle bypasses at the zero crossings (sin≈0, cos≈0, tan≈0) use the linear - * approximation sin(δ)≈δ to avoid BAM quantization error where it matters most. - */ - s32 fr_cos_bam(u16 bam); - s32 fr_sin_bam(u16 bam); -#ifndef FR_LEAN - s32 fr_tan_bam(u16 bam); -#endif - s32 fr_cos(s32 rad, u16 radix); - s32 fr_sin(s32 rad, u16 radix); - s32 fr_tan(s32 rad, u16 radix); - -/* Integer degrees -> BAM using division (exact at all multiples of 45 deg). */ -#define FR_DEG2BAM_I(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360)) - -/* Legacy single-arg integer-degree macros — use FR_CosI / FR_SinI instead */ -/* #define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */ -/* #define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */ - -#ifndef FR_LEAN -/*=============================================== - * Degree-input trig API - * - * FR_CosI(deg) — cos of integer degrees, s15.16 result - * FR_SinI(deg) — sin of integer degrees, s15.16 result - * FR_TanI(deg) — tan of integer degrees, s15.16 result - * fr_cos_deg(deg, radix) — cos of fixed-radix degrees, s15.16 result - * fr_sin_deg(deg, radix) — sin of fixed-radix degrees, s15.16 result - * fr_tan_deg(deg, radix) — tan of fixed-radix degrees, s15.16 result - */ -#define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) -#define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) - - s32 fr_cos_deg(s32 deg, u16 radix); - s32 fr_sin_deg(s32 deg, u16 radix); - s32 FR_TanI(s32 deg); - s32 fr_tan_deg(s32 deg, u16 radix); - - /* Legacy macros — use fr_sin_deg/fr_cos_deg/fr_tan_deg in new code */ - #define FR_Sin fr_sin_deg - #define FR_Cos fr_cos_deg - #define FR_Tan fr_tan_deg -#endif /* FR_LEAN */ - - /* Inverse trig — output in radians at caller-specified radix (s32). - * FR_atan2 returns radians at radix 16 (s15.16). - * Range: acos [0, pi], asin [-pi/2, pi/2], - * atan [-pi/2, pi/2], atan2 [-pi, pi]. - */ - s32 FR_acos(s32 input, u16 radix, u16 out_radix); - s32 FR_asin(s32 input, u16 radix, u16 out_radix); - s32 FR_atan(s32 input, u16 radix, u16 out_radix); - s32 FR_atan2(s32 y, s32 x, u16 out_radix); - -/* Logarithms */ -#define FR_LOG2MIN (-(32767 << 16)) /* returned instead of "negative infinity" */ - - s32 FR_log2(s32 input, u16 radix, u16 output_radix); - s32 FR_ln(s32 input, u16 radix, u16 output_radix); -#ifndef FR_LEAN - s32 FR_log10(s32 input, u16 radix, u16 output_radix); -#endif - - /* Power */ - s32 FR_pow2(s32 input, u16 radix); -#define FR_EXP(input, radix) (FR_pow2(FR_MULK28((input), FR_kLOG2E_28), (radix))) -#define FR_POW10(input, radix) (FR_pow2(FR_MULK28((input), FR_kLOG2_10_28), (radix))) - -/* Shift-only (multiply-free) base-conversion variants. - * Lower accuracy (~5-10 LSB at Q16.16) but no multiply instruction. - * Use these on targets where 32x32->64 multiply is expensive. - */ -#define FR_EXP_FAST(input, radix) (FR_pow2(FR_SLOG2E(input), radix)) -#define FR_POW10_FAST(input, radix) (FR_pow2(FR_SLOG2_10(input), radix)) - -/*=============================================== - * Formatted output and string parsing - * - * Define FR_NO_PRINT before including this header to exclude all - * print/format functions from compilation. This saves ~1.7 KB of ROM - * on targets that don't need human-readable output (e.g. headless - * sensor nodes, DSP-only firmware). - * - * #define FR_NO_PRINT - * #include "FR_math.h" - */ -#ifndef FR_NO_PRINT - - /* printing family of functions */ - int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec); /* print fixed radix num as floating point e.g. -12.34" */ - int FR_printNumD(int (*f)(char), int n, int pad); /* print decimal number with optional padding e.g. " 12" */ - int FR_printNumH(int (*f)(char), int n, int showPrefix); /* print num as a hexidecimal e.g. "0x12ab" */ - - /* string-to-fixed-point parser (inverse of FR_printNumF) */ - s32 FR_numstr(const char *s, u16 radix); - -#endif /* FR_NO_PRINT */ - -/*=============================================== - * Square root and hypot - * - * Both take fixed-radix inputs and return a result at the same radix. - * Algorithm: digit-by-digit isqrt on a 64-bit accumulator (no division, - * at most 32 iterations). Rounds to nearest. - * - * Domain error sentinel: input < 0 (sqrt) returns FR_DOMAIN_ERROR. Caller - * can check `result == FR_DOMAIN_ERROR` to detect domain errors. - */ - s32 FR_sqrt(s32 input, u16 radix); -#ifndef FR_LEAN - s32 FR_hypot(s32 x, s32 y, u16 radix); -#endif - - /* Fast approximate magnitude — shift-only, no multiply, no 64-bit. - * Based on piecewise-linear approximation of sqrt(x*x + y*y). - * See US Patent 6,567,777 B1 (Chatterjee, expired). - * - * FR_hypot_fast8(x, y) 8-segment, ~0.10% peak error - * - * Inputs are raw signed integers (or fixed-point at any radix — the - * result is at the same radix as the inputs, just like FR_hypot). - * No radix parameter needed because the algorithm is scale-invariant. - */ - s32 FR_hypot_fast8(s32 x, s32 y); - -/*=============================================== - * Wave generators and ADSR envelope - * - * Define FR_NO_WAVES before including this header to exclude all - * waveform generators (square, pulse, triangle, saw, noise) and the - * ADSR envelope from compilation. This saves ~400 B of ROM on targets - * that only need math/trig and don't do audio synthesis. - * - * #define FR_NO_WAVES - * #include "FR_math.h" - */ -#ifndef FR_NO_WAVES - -/*=============================================== - * Wave generators — synth-style fixed-shape waveforms. - * - * All take a u16 BAM phase in [0, 65535] (a full cycle) and return s16 - * in s0.15 in [-32767, +32767]. Use FR_HZ2BAM_INC below to compute a - * phase increment for a target frequency. - * - * fr_wave_sqr(phase) 50% square - * fr_wave_pwm(phase, duty) variable-duty pulse - * fr_wave_tri(phase) symmetric triangle - * fr_wave_saw(phase) rising sawtooth - * fr_wave_tri_morph(phase, brk) variable-symmetry triangle (morphs to saw) - * fr_wave_noise(state*) LFSR pseudorandom noise - * - * fr_wave_tri_morph returns [0, 32767] (unipolar) — caller can re-bias - * if a bipolar form is desired. The other waves are bipolar [-32767, +32767]. - */ - s16 fr_wave_sqr(u16 phase); - s16 fr_wave_pwm(u16 phase, u16 duty); - s16 fr_wave_tri(u16 phase); - s16 fr_wave_saw(u16 phase); - s16 fr_wave_tri_morph(u16 phase, u16 break_point); - s16 fr_wave_noise(u32 *state); - -/* FR_HZ2BAM_INC(hz, sample_rate) - * Compute the per-sample BAM phase increment for a target frequency in Hz - * given a sample rate in Hz. Result is a u16 to add to the running phase - * each sample (the running phase wraps mod 2^16 naturally because it's u16). - * - * u16 phase = 0; - * u16 inc = FR_HZ2BAM_INC(440, 48000); - * for (...) { sample = fr_sin_bam(phase); phase += inc; } - * - * Range: hz must be < sample_rate / 2 (Nyquist) for a meaningful tone; - * higher hz aliases. The macro does not enforce this. - * - * Side-effect note: hz and sample_rate are evaluated once each. - */ -#define FR_HZ2BAM_INC(hz, sample_rate) ((u16)(((u32)(hz) * 65536UL) / (u32)(sample_rate))) - -/*=============================================== - * ADSR envelope generator - * - * Linear-segment Attack-Decay-Sustain-Release envelope. Caller-allocated - * struct, no malloc, no global state. Internal level is held in s1.30 so - * very long envelopes (e.g. 48000-sample attack at 48 kHz) still get a - * non-zero per-sample increment. Output is s0.15 in [0, 32767]. - * - * Lifecycle: - * fr_adsr_t env; - * fr_adsr_init(&env, atk_samples, dec_samples, sustain_s015, rel_samples); - * fr_adsr_trigger(&env); // note-on - * for (...) sample = fr_adsr_step(&env); // per-audio-sample - * fr_adsr_release(&env); // note-off - * for (...) sample = fr_adsr_step(&env); // until env.state == FR_ADSR_IDLE - */ -#define FR_ADSR_IDLE (0) -#define FR_ADSR_ATTACK (1) -#define FR_ADSR_DECAY (2) -#define FR_ADSR_SUSTAIN (3) -#define FR_ADSR_RELEASE (4) - -typedef struct fr_adsr_s { - u8 state; /* FR_ADSR_* */ - s32 level; /* current envelope value, s1.30 */ - s32 sustain; /* sustain target, s1.30 */ - s32 attack_inc; /* per-sample increment during attack */ - s32 decay_dec; /* per-sample decrement during decay */ - s32 release_dec; /* per-sample decrement during release */ -} fr_adsr_t; - - void fr_adsr_init(fr_adsr_t *env, - u32 attack_samples, - u32 decay_samples, - s16 sustain_level_s015, - u32 release_samples); - void fr_adsr_trigger(fr_adsr_t *env); - void fr_adsr_release(fr_adsr_t *env); - s16 fr_adsr_step(fr_adsr_t *env); - -#endif /* FR_NO_WAVES */ - -#ifdef __cplusplus - -} // extern "C" -#endif - -#endif /* __FR_Math_h__ */ diff --git a/fw/main.c b/fw/main.c index 38c6c39..a75947c 100644 --- a/fw/main.c +++ b/fw/main.c @@ -3,19 +3,14 @@ #include #include -#define FR_LEAN -// #define FR_CORE_ONLY -#include - #include "funconfig.h" #include "lib_i2c.h" #include "display.h" #include "filter.h" #include "sc7a20.h" #include "pd.h" +#include "fpmath.h" -// Radix for fixed point operations -#define R 16 // constants // LUT for converting NTC readings to degrees celsius @@ -45,8 +40,8 @@ static inline int16_t get_temp_c(uint16_t adc_reading) uint8_t index = adc_reading / ntc_step_size; uint8_t remainder = adc_reading % ntc_step_size; int16_t temp_base = index < 64 ? ntc_lut[index] : 0; - int16_t temp_next = ntc_lut[index + 1]; - return temp_base + ((temp_next - temp_base) * remainder)/ntc_step_size; + int16_t temp_next = ntc_lut[index + 1]; + return temp_base + ((temp_next - temp_base) * remainder)/ntc_step_size; } @@ -369,39 +364,41 @@ static inline uint16_t isqrt(uint32_t x) uint16_t pid(int16_t delta, int16_t max_duty) { // PID coefficients - const s32 Kp = FR_NUM( 1, 1700, 4, R); - const s32 Ti = FR_NUM( 5, 0000, 4, R); - const s32 Td = FR_NUM( 0, 450, 4, R); + const fp16_t Kp = num2fp( 1, 1700, 4); + const fp16_t Ti = num2fp(10, 0000, 4); + const fp16_t Td = num2fp( 0, 700, 4); - static s32 err_p, err_i, intgrt, err_d, dt, prev_err; + static fp16_t err_p, err_i, intgrt, err_d, dt, prev_err; static u32 t, prev_t; t = funSysTick32(); - dt = FR_DIV(I2FR((t-prev_t)/DELAY_MS_TIME, R), R, I2FR(1000, R), R); + dt = fp_div(i2fp((t-prev_t)/DELAY_MS_TIME), i2fp(1000)); - s32 err = I2FR(delta, R); // temperature delta as fixed point number + fp16_t err = i2fp(delta); // temperature delta as fixed point number err_p = err; - err_i = FR_FixMulSat(FR_DIV(err, R, Ti, R), dt); - err_d = FR_FixMulSat(FR_DIV(FR_FixAddSat(err, -prev_err), R, dt, R), Td); + err_i = fp_mul(fp_div(err, Ti), dt); + err_d = fp_mul(fp_div(fp_sub(err, prev_err), dt), Td); prev_err = err; prev_t = t; - s32 e = 0; - e = FR_FixAddSat(e, err_p); - e = FR_FixAddSat(FR_FixAddSat(e, err_i), intgrt); - e = FR_FixAddSat(e, err_d); - e = FR_FixMulSat(e, Kp); + fp16_t e = 0; + e = fp_add(e, err_p); + e = fp_add(fp_add(e, err_i), intgrt); + e = fp_add(e, err_d); + e = fp_mul(e, Kp); + // TODO: use a back calculation anti windup strategy // only integrate if the output is less then max - if (e < I2FR(100, R)) { - FR_FixAddSat(intgrt, err_i); + if (e < i2fp(100)) { + intgrt = fp_add(intgrt, err_i); } - e = FR_CLAMP(e, I2FR(0, R), I2FR(100, R)); + e = fp_clamp(e, i2fp(0), i2fp(100)); - return FR2I(FR_FixMulSat(FR_DIV(e, R, I2FR(100, R), R), I2FR(max_duty, R)), R); + // TODO: implement minimum duty cycle + return fp2i(fp_map(e, 0, i2fp(100), 0, i2fp(max_duty))); } @@ -543,16 +540,16 @@ __attribute__((noreturn)) int main(void) if (!pwm || !enabled) { Delay_Ms(TURN_OFF_DELAY); adc_injection_conversion(); - u32 tip_mv = ((u32)injection_results[0]*VCC_MV)/4096; + u16 tip_mv = ((u32)injection_results[0]*VCC_MV)/4096; // Tip calibration factors - const s32 tip_k = FR_NUM(0, 14473, 5, R); - const s32 tip_off = FR_NUM(0, 0, 0, R); - int16_t tt_now = FR2I(FR_FixAddSat(FR_FixMulSat(I2FR(tip_mv, R), tip_k), tip_off), R); - tip_temp_c = I16_FP_EMA_K4(tip_temp_c, tt_now + temp_c); + const fp16_t tip_k = num2fp(0, 14473, 5); + const fp16_t tip_off = num2fp(0, 0, 0); + int16_t tt_now = fp2i(fp_add(fp_mul(i2fp(tip_mv), tip_k), tip_off)); + tip_temp_c = I16_FP_EMA_K4(tip_temp_c, tt_now + temp_c); if (enabled) { duty = pid((int16_t)pd_profile.set_temp - tip_temp_c, pd_profile.max_duty); } else { - duty = 0; + duty = 0; } }