From 204e750b747fe4423b268918b37cfd5eeea8bed8 Mon Sep 17 00:00:00 2001
From: Alessandro Mauri <alemauri001@gmail.com>
Date: Sun, 10 May 2026 12:08:56 +0200
Subject: [PATCH] use FR_math for fixed point arithmetic

---
 fw/Makefile          |   13 +-
 fw/filter.h          |   53 --
 fw/fr_math/FR_defs.h |  120 +++
 fw/fr_math/FR_math.c | 1837 ++++++++++++++++++++++++++++++++++++++++++
 fw/fr_math/FR_math.h |  653 +++++++++++++++
 fw/main.c            |   22 +-
 6 files changed, 2635 insertions(+), 63 deletions(-)
 create mode 100644 fw/fr_math/FR_defs.h
 create mode 100644 fw/fr_math/FR_math.c
 create mode 100644 fw/fr_math/FR_math.h

diff --git a/fw/Makefile b/fw/Makefile
index f9c9dc2..6e76936 100644
--- a/fw/Makefile
+++ b/fw/Makefile
@@ -11,7 +11,11 @@ U8G2_DIR:=u8g2/csrc
 U8G2_SRC:=$(U8G2_DIR)/u8x8_d_$(DISPLAY).c $(filter-out $(U8G2_DIR)/u8x8_d_%.c, \
 	$(filter-out $(U8G2_DIR)/mui%.c, $(wildcard $(U8G2_DIR)/*.c)))
 
-EXTRA_CFLAGS += -I$(U8G2_DIR)
+# include fr_math
+FRMATH_DIR:=fr_math
+FRMATH_SRC:=$(wildcard $(FRMATH_DIR)/*.c)
+
+EXTRA_CFLAGS += -I$(U8G2_DIR) -I$(FRMATH_DIR)
 ADDITIONAL_C_FILES += lib_i2c.c display.c sc7a20.c pd.c
 HEADER_FILES := $(wildcard *.h)
 
@@ -28,13 +32,18 @@ U8G2_OBJS := $(addprefix $(BUILD_DIR)/, $(notdir $(U8G2_SRC:.c=.o)))
 $(BUILD_DIR)/%.o : $(U8G2_DIR)/%.c | $(BUILD_DIR)
 	$(PREFIX)-gcc $(CFLAGS) -c $< -o $@
 
+# build fr_math
+FRMATH_OBJS := $(addprefix $(BUILD_DIR)/, $(notdir $(FRMATH_SRC:.c=.o)))
+$(BUILD_DIR)/%.o : $(FRMATH_DIR)/%.c | $(BUILD_DIR)
+	$(PREFIX)-gcc $(CFLAGS) -c $< -o $@
+
 # build target
 TARGET_OBJS := $(addprefix $(BUILD_DIR)/, $(filter-out ch32fun.o, $(notdir $(FILES_TO_COMPILE:.c=.o))))
 $(BUILD_DIR)/%.o : %.c $(HEADER_FILES) | $(BUILD_DIR)
 	$(PREFIX)-gcc $(CFLAGS) -c $< -o $@
 
 # link target, the rest of the build is specified in ch32fun.mk
-$(TARGET).elf : $(FILES_TO_COMPILE) $(LINKER_SCRIPT) $(EXTRA_ELF_DEPENDENCIES) $(U8G2_OBJS) $(TARGET_OBJS) ch32fun.o
+$(TARGET).elf : $(FILES_TO_COMPILE) $(LINKER_SCRIPT) $(EXTRA_ELF_DEPENDENCIES) $(U8G2_OBJS) $(FRMATH_OBJS) $(TARGET_OBJS) ch32fun.o
 	$(PREFIX)-gcc -o $@ $(U8G2_OBJS) $(TARGET_OBJS) ch32fun.o $(CFLAGS) $(LDFLAGS)
 
 
diff --git a/fw/filter.h b/fw/filter.h
index 7084f42..e7055fa 100644
--- a/fw/filter.h
+++ b/fw/filter.h
@@ -23,57 +23,4 @@ static_assert(-4 >> 1 == -2, ">> doesn't do sign extension");
 #define I16_FP_EMA_K8(x, s)  (int16_t)((((int32_t)(x)<<8) - (x) + (s)) >> 8)
 #define I16_FP_EMA_K16(x, s) (int16_t)((((int32_t)(x)<<16) - (x) + (s)) >> 16)
 
-/* ------------------------- FIXED POINT OPERATIONS ------------------------- */
-
-// Fixed-point number in 8.8 format
-typedef int16_t fp16_t;
-#define F32_TO_FP16(f) ((fp16_t)((f) * 256.0f))
-#define I16_TO_FP16(i) ((fp16_t)((i) << 8))
-#define U16_TO_FP16(u) ((fp16_t)((u) << 8))
-
-// Intger part of fp16_t
-#define I(f) ((f) >> 8)
-// Decimal part of fp16_t
-#define D(f) ((f) & 0xFF)
-
-static inline int16_t i16_mul_fp16(int16_t v, fp16_t f)
-{
-	return (((int32_t)(v)<<8) * f) >> 8;
-}
-
-static inline uint16_t u16_mul_fp16(uint16_t v, fp16_t f)
-{
-	return (((uint32_t)(v)<<8) * f) >> 8;
-}
-
-// v * a + b for signed 16-bit
-#define I16_LINCAL(v, a, b) ((int16_t)((((int32_t)(v)<<8) * (a) + (b)) >> 8))
-// v * a + b for unsigned 16-bit
-#define U16_LINCAL(v, a, b) ((uint16_t)((((uint32_t)(v)<<8) * (a) + (b)) >> 8))
-
-
-// Fixed-point number in 24.8 format
-typedef uint32_t fp24_8_t;
-
-static inline fp24_8_t f32_to_fp24_8(float f)
-{
-	return ((fp24_8_t)((f) * 256.0f));
-}
-
-static inline fp24_8_t i16_to_fp24_8(int16_t i)
-{
-	return ((fp24_8_t)(i)) << 8;
-}
-
-static inline fp24_8_t u16_to_fp24_8(uint16_t u)
-{
-	return ((fp24_8_t)(u)) << 8;
-}
-
-static inline fp24_8_t fp24_8_mul(fp24_8_t a, fp24_8_t b)
-{
-	return ((fp24_8_t)((uint32_t)a * b) >> 8);
-}
-
-
 #endif // _FILTER_H
diff --git a/fw/fr_math/FR_defs.h b/fw/fr_math/FR_defs.h
new file mode 100644
index 0000000..631126e
--- /dev/null
+++ b/fw/fr_math/FR_defs.h
@@ -0,0 +1,120 @@
+/**
+ *	@file FR_Defs.h - type definitions used in Fixed-Radix math lib
+ *
+ *	@copy Copyright (C) <2001-2026>  <M. A. Chatterjee>
+ *  @author M A Chatterjee <deftio [at] deftio [dot] com>
+ *
+ *	This software is provided 'as-is', without any express or implied
+ *	warranty. In no event will the authors be held liable for any damages
+ *	arising from the use of this software.
+ *
+ *	Permission is granted to anyone to use this software for any purpose,
+ *	including commercial applications, and to alter it and redistribute it
+ *	freely, subject to the following restrictions:
+ *
+ *	1. The origin of this software must not be misrepresented; you must not
+ *	claim that you wrote the original software. If you use this software
+ *	in a product, an acknowledgment in the product documentation would be
+ *	appreciated but is not required.
+ *
+ *	2. Altered source versions must be plainly marked as such, and must not be
+ *	misrepresented as being the original software.
+ *
+ *	3. This notice may not be removed or altered from any source
+ *	distribution.
+ *
+ */
+
+#ifndef __FR_Platform_Defs_H__
+#define __FR_Platform_Defs_H__
+
+/*
+ * Fixed-width integer typedefs.
+ *
+ * Prefer C99 <stdint.h> when available (gcc, clang, MSVC, IAR, Keil, sdcc,
+ * MSP430-gcc, AVR-gcc, RISC-V, ARM toolchains).  For bare-metal toolchains
+ * or pre-C99 compilers that lack <stdint.h>, define FR_NO_STDINT before
+ * including this header and the types are provided via sizeof()-based
+ * fallback definitions that cover the common 8/16/32/64-bit layouts.
+ */
+#ifndef FR_NO_STDINT
+#include <stdint.h>
+#else
+/* ---- fallback: no <stdint.h> ------------------------------------ */
+/* Works on any toolchain where char=8, short=16, int/long=32 bits,
+ * which covers virtually all embedded targets (AVR, MSP430, ARM,
+ * 68HC11, 68k, PPC, RISC-V, Xtensa, x86).  Adjust if your platform
+ * differs.
+ */
+typedef unsigned char      uint8_t;
+typedef signed   char      int8_t;
+typedef unsigned short     uint16_t;
+typedef signed   short     int16_t;
+#if defined(__AVR__) || defined(__MSP430__) || defined(__m68hc1x__)
+  /* On these targets int is 16-bit; use long for 32-bit */
+  typedef unsigned long    uint32_t;
+  typedef signed   long    int32_t;
+#else
+  typedef unsigned int     uint32_t;
+  typedef signed   int     int32_t;
+#endif
+/* 64-bit: available on most 32-bit+ GCC targets via long long */
+typedef unsigned long long uint64_t;
+typedef signed   long long int64_t;
+#endif /* FR_NO_STDINT */
+
+/*
+ * Arduino's USBAPI.h typedefs u8 and u16 as unsigned char / unsigned short.
+ * On AVR, uint8_t/uint16_t resolve to unsigned int types, which are the same
+ * width but different C++ types — causing a redefinition error.  Skip those
+ * two typedefs when building in an Arduino environment; the Arduino-provided
+ * types are the same width and work identically.
+ *
+ * The guard checks __cplusplus too because Arduino.h (which pulls in
+ * USBAPI.h) is only auto-included in .ino/.cpp translation units.
+ * Plain-C files (.c) compiled by the Arduino build system need our
+ * typedefs even though the ARDUINO macro is defined for them.
+ */
+#if defined(ARDUINO) && defined(__cplusplus)
+  /* Arduino C++ TU — USBAPI.h already provides u8 and u16 */
+#else
+typedef uint8_t  u8;
+typedef uint16_t u16;
+#endif
+typedef int8_t   s8;
+typedef int16_t  s16;
+typedef uint32_t u32;
+typedef int32_t  s32;
+typedef uint64_t u64;
+typedef int64_t  s64;
+
+typedef short FR_bool;
+
+#define FR_SWAP_BYTES(x) (((x >> 8) & 0xff) | ((x << 8) & 0xff00))
+
+/*=======================================================
+ * Sentinel values for math errors.
+ *
+ * Functions that can hit a domain error (sqrt of negative, log of <=0, etc.)
+ * return one of these sentinels rather than raising an error code. Callers
+ * that care can check `result == FR_DOMAIN_ERROR` before using the value.
+ *
+ * FR_OVERFLOW_POS and FR_OVERFLOW_NEG are the saturating-overflow values
+ * returned by FR_FixMulSat / FR_FixAddSat etc. They are deliberately the
+ * extremes of s32 so that they compare correctly under signed comparison.
+ *
+ * Note: FR_DOMAIN_ERROR equals INT32_MIN (the same value as FR_OVERFLOW_NEG
+ * would naturally be), so a single check `result <= FR_OVERFLOW_NEG` cannot
+ * distinguish "saturating-negative-overflow" from "domain error". In
+ * practice the two never occur in the same call: saturating ops never go
+ * to a domain-error state, and domain-error ops never saturate. The names
+ * exist to make caller intent self-documenting.
+ */
+#define FR_DOMAIN_ERROR  ((s32)0x80000000) /* INT32_MIN: e.g. sqrt(<0)        */
+#define FR_OVERFLOW_POS  ((s32)0x7fffffff) /* INT32_MAX: e.g. FR_FixMulSat +  */
+#define FR_OVERFLOW_NEG  ((s32)0x80000000) /* INT32_MIN: e.g. FR_FixMulSat -  */
+
+#define FR_FALSE (0)
+#define FR_TRUE (!FR_FALSE)
+
+#endif /* __FR_Platform_Defs_H__ */
diff --git a/fw/fr_math/FR_math.c b/fw/fr_math/FR_math.c
new file mode 100644
index 0000000..95809f8
--- /dev/null
+++ b/fw/fr_math/FR_math.c
@@ -0,0 +1,1837 @@
+/**
+ *
+ *	@file FR_math.c - c implementation file for basic fixed
+ *                              radix math routines
+ *
+ *	@copy Copyright (C) <2001-2026>  <M. A. Chatterjee>
+ *  @author M A Chatterjee <deftio [at] deftio [dot] com>
+ *
+ *  This file contains integer math settable fixed point radix math routines for
+ *  use on systems in which floating point is not desired or unavailable.
+ *
+ *	This software is provided 'as-is', without any express or implied
+ *	warranty. In no event will the authors be held liable for any damages
+ *	arising from the use of this software.
+ *
+ *	Permission is granted to anyone to use this software for any purpose,
+ *	including commercial applications, and to alter it and redistribute it
+ *	freely, subject to the following restrictions:
+ *
+ *	1. The origin of this software must not be misrepresented; you must not
+ *	claim that you wrote the original software. If you use this software
+ *	in a product, please place an acknowledgment in the product documentation.
+ *
+ *	2. Altered source versions must be plainly marked as such, and must not be
+ *	misrepresented as being the original software.
+ *
+ *	3. This notice may not be removed or altered from any source
+ *	distribution.
+ *
+ */
+
+#include "FR_math.h"
+
+#ifndef FR_NO_STDINT
+#include <stdint.h>
+#endif
+
+/*=======================================================
+ * Trig lookup tables (inlined — no separate header needed)
+ *
+ * Sine quadrant table: 129 entries covering [0, pi/2] in u0.15 format.
+ * Tangent octant table: 65 entries covering [0, pi/4] in u0.15 format.
+ * Generated by tools/coef-gen.py — do not hand-edit.
+ */
+
+#define FR_TRIG_TABLE_BITS  (7)
+#define FR_TRIG_TABLE_SIZE  ((1 << FR_TRIG_TABLE_BITS) + 1)
+
+#define FR_TRIG_FRAC_BITS   (14 - FR_TRIG_TABLE_BITS)
+#define FR_TRIG_FRAC_MAX    (1 << FR_TRIG_FRAC_BITS)
+#define FR_TRIG_FRAC_MASK   (FR_TRIG_FRAC_MAX - 1)
+#define FR_TRIG_FRAC_HALF   (FR_TRIG_FRAC_MAX >> 1)
+#define FR_TRIG_QUADRANT    (1 << 14)
+
+static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = {
+        0,   402,   804,  1206,  1608,  2009,  2411,  2811,
+     3212,  3612,  4011,  4410,  4808,  5205,  5602,  5998,
+     6393,  6787,  7180,  7571,  7962,  8351,  8740,  9127,
+     9512,  9896, 10279, 10660, 11039, 11417, 11793, 12167,
+    12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091,
+    15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869,
+    18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475,
+    20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884,
+    23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073,
+    25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020,
+    27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707,
+    28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118,
+    30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238,
+    31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058,
+    32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568,
+    32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766,
+    32768
+};
+
+#define FR_TAN_TABLE_BITS  (6)
+#define FR_TAN_TABLE_SIZE  ((1 << FR_TAN_TABLE_BITS) + 1)
+#define FR_TAN_FRAC_BITS   (13 - FR_TAN_TABLE_BITS)
+#define FR_TAN_FRAC_MAX    (1 << FR_TAN_FRAC_BITS)
+#define FR_TAN_FRAC_MASK   (FR_TAN_FRAC_MAX - 1)
+#define FR_TAN_FRAC_HALF   (FR_TAN_FRAC_MAX >> 1)
+#define FR_TAN_OCTANT      (1 << 13)
+
+static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = {
+        0,   402,   804,  1207,  1610,  2013,  2417,  2822,
+     3227,  3634,  4042,  4450,  4861,  5272,  5686,  6101,
+     6518,  6937,  7358,  7782,  8208,  8637,  9068,  9503,
+     9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104,
+    13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001,
+    17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318,
+    21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226,
+    26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973,
+    32768
+};
+
+/*=======================================================
+ * Full-precision radian/degree → BAM conversion helpers
+ *
+ * rad_to_bam_full(r) returns a full s32 BAM value where:
+ *   upper 16 bits = integer BAM (the u16 table index)
+ *   lower 16 bits = sub-BAM fractional part
+ * Input r must already be normalized to radix 16 and reduced to [-pi, pi].
+ *
+ * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are
+ * reordered so intermediate sums stay within s32 for |r| <= pi at r16.
+ */
+static s32 rad_to_bam_full(s32 r)
+{
+    /* 10 terms: 65536/(2*pi) ≈ 10430.37835...
+     * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14
+     * = 10430.378357 (~21-bit accuracy)
+     * Terms reordered: interleave negatives early to keep all intermediate
+     * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */
+    return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14);
+}
+
+#ifndef FR_LEAN
+/* deg_to_bam_full(d) — same idea for degrees.
+ * Input d must already be normalized to radix 16 and reduced to [-90, 90).
+ * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM).
+ * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */
+static s32 deg_to_bam_full(s32 d)
+{
+    return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9);
+}
+#endif
+
+/* Normalize a fixed-radix value to radix 16. */
+static s32 normalize_to_r16(s32 val, u16 radix)
+{
+    return (radix > 16) ? (val >> (radix - 16))
+         : (radix < 16) ? (val << (16 - radix))
+         : val;
+}
+
+/* Reduce non-negative radian (at r16) to [0, 2*pi). */
+static s32 reduce_to_2pi(s32 r)
+{
+    const s32 two_pi = FR_TWO_PI(16);  /* 411775 */
+    if (r > (two_pi << 1))
+        r -= (r / two_pi) * two_pi;
+    else if (r > two_pi)
+        r -= two_pi;
+    return r;
+}
+
+
+/* rad_r16_to_bam — convert radian (at r16) in [0, 2π) to u16 BAM.
+ * Uses quadrant decomposition to keep rad_to_bam_full in its safe
+ * [-π/2, π/2) range, mirroring the approach in fr_deg_to_bam. */
+static u16 rad_r16_to_bam(s32 r)
+{
+    const s32 half_pi       = FR_HALF_PI(16);       /* 102944 */
+    const s32 three_half_pi = FR_THREE_HALF_PI(16); /* 308831 */
+    const s32 pi            = FR_PI(16);             /* 205887 */
+    const s32 two_pi        = FR_TWO_PI(16);         /* 411775 */
+
+    u16 offset = 0;
+    if (r >= half_pi && r < three_half_pi) {
+        r -= pi;
+        offset = 0x8000u;
+    } else if (r >= three_half_pi) {
+        r -= two_pi;
+        /* r is now in [-π/2, 0), no offset needed (u16 wraps naturally) */
+    }
+    return (u16)(offset + (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16));
+}
+
+/* (rad_r16_to_bam32 removed — sub-BAM interpolation approach abandoned) */
+
+/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion.
+ * Normalizes to r16, reduces to [0, 2π), uses quadrant decomposition. */
+u16 fr_rad_to_bam(s32 rad, u16 radix)
+{
+    s32 r = normalize_to_r16(rad, radix);
+    /* Normalize to [0, 2π) */
+    if (r < 0) {
+        r += ((-r) / FR_TWO_PI(16)) * FR_TWO_PI(16);
+        if (r < 0) r += FR_TWO_PI(16);
+    }
+    r = reduce_to_2pi(r);
+    return rad_r16_to_bam(r);
+}
+
+#ifndef FR_LEAN
+/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion.
+ * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */
+u16 fr_deg_to_bam(s32 deg, u16 radix)
+{
+    s32 d = normalize_to_r16(deg, radix);
+
+    /* Reduce to [-180, 180) */
+    if (d >= FR_D360_R16 || d < -FR_D360_R16) {
+        s32 n = d / FR_D360_R16;
+        d -= n * FR_D360_R16;
+    }
+    if (d >=  FR_D180_R16) d -= FR_D360_R16;
+    if (d <  -FR_D180_R16) d += FR_D360_R16;
+
+    /* Reduce to [-90, 90) with BAM quadrant offset */
+    u16 offset = 0;
+    if (d >= FR_D90_R16)      { d -= FR_D180_R16; offset = 32768; }
+    else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; }
+
+    return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16));
+}
+#endif
+
+/*=======================================================
+ * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan
+ *
+ * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits
+ * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd
+ * quadrants (1, 3) reverse the in-quadrant index so the table is always read
+ * in the same direction.
+ *
+ * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at
+ * index 128). After mirroring, small full_pos → small output (near zero),
+ * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles
+ * below one table step (~0.7°). This eliminates table quantization error
+ * in the region where it matters most.
+ *
+ * Sign rule: quadrants 2 and 3 negate the result.
+ * Mirror rule: quadrants 1 and 3 flip the in-quadrant position.
+ */
+s32 fr_sin_bam(u16 bam)
+{
+	u32 q   = ((u32)bam >> 14) & 0x3;              /* top 2 bits = quadrant */
+	u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1);  /* bottom 14 bits        */
+
+	/* Exact cardinal angles */
+	if (inq == 0) {
+		if (q == 0 || q == 2) return 0;            /*   0° or 180° → 0   */
+		if (q == 1) return  FR_TRIG_ONE;           /*  90° →  1.0        */
+		return -FR_TRIG_ONE;                       /* 270° → -1.0        */
+	}
+
+	/* Odd quadrants mirror: read table from the far end */
+	if (q == 1 || q == 3)
+		inq = FR_TRIG_QUADRANT - inq;
+
+	s32 v;
+
+	/* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step).
+	 * θ_rad = inq * (π/2) / 16384.  Output = θ * 65536 = inq * FR_kQ2RAD / 16384.
+	 * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */
+	if (inq < FR_TRIG_FRAC_MAX) {
+		v = (s32)(((u32)inq * 102944u + 8192u) >> 14);
+	} else {
+		/* Table lookup with 7-bit interpolation fraction */
+		u32 idx  = inq >> FR_TRIG_FRAC_BITS;
+		u32 frac = inq &  FR_TRIG_FRAC_MASK;
+		s32 lo = (s32)gFR_SIN_TAB_Q[idx];
+		s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1];
+		v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
+		v <<= 1;   /* u0.15 → s15.16 */
+	}
+
+	return (q >= 2) ? -v : v;
+}
+
+s32 fr_cos_bam(u16 bam)
+{
+	/* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */
+	return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT));
+}
+
+s32 fr_cos(s32 rad, u16 radix)
+{
+	if (rad == 0) return FR_TRIG_ONE;
+	s32 r = normalize_to_r16(rad, radix);
+	if (r < 0) r = -r;
+	r = reduce_to_2pi(r);
+	/* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ,
+	 * cos(3π/2+δ) = sin(δ) ≈ δ. */
+	s32 delta = r - FR_HALF_PI(16);
+	if (delta >= -256 && delta <= 256)
+		return -delta;
+	delta = r - FR_THREE_HALF_PI(16);
+	if (delta >= -256 && delta <= 256)
+		return delta;
+	return fr_cos_bam(fr_rad_to_bam(rad, radix));
+}
+
+s32 fr_sin(s32 rad, u16 radix)
+{
+	if (rad == 0) return 0;
+	s32 r = normalize_to_r16(rad, radix);
+	s32 sign = 1;
+	if (r < 0) { r = -r; sign = -1; }
+	r = reduce_to_2pi(r);
+	/* Near 0 after reduction: sin(δ) ≈ δ */
+	if (r < 256) {
+		s32 v = r;
+		return (sign < 0) ? -v : v;
+	}
+	/* Near π: sin(π + δ) = -sin(δ) ≈ -δ */
+	s32 delta = r - FR_PI(16);
+	if (delta >= -256 && delta <= 256) {
+		s32 v = -delta;
+		return (sign < 0) ? -v : v;
+	}
+	/* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */
+	delta = FR_TWO_PI(16) - r;
+	if (delta >= 0 && delta < 256) {
+		s32 v = -delta;
+		return (sign < 0) ? -v : v;
+	}
+	/* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */
+	if (r > FR_PI(16)) r -= FR_TWO_PI(16);
+	u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16);
+	s32 v = fr_sin_bam(bam);
+	return (sign < 0) ? -v : v;
+}
+
+#ifndef FR_LEAN
+/*=======================================================
+ * BAM-native tangent: fr_tan_bam
+ *
+ * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant
+ * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the
+ * second octant (45°, 90°). Result is s15.16 with saturation at the
+ * poles.
+ *
+ * No 64-bit intermediates. One 32-bit division only in the >45° path.
+ */
+s32 fr_tan_bam(u16 bam)
+{
+	u32 q   = ((u32)bam >> 14) & 0x3;          /* quadrant (top 2 bits) */
+	u32 inq = (u32)bam & 0x3FFFu;              /* in-quadrant (14 bits) */
+	s32 sign = 1;
+	u32 idx, frac;
+	s32 lo, hi, raw;
+
+	/* Exact zeros: bam lands exactly on 0° or 180° */
+	if (inq == 0 && (q == 0 || q == 2))
+		return 0;
+
+	/* Poles: bam lands exactly on 90° or 270° */
+	if (inq == 0 && (q == 1 || q == 3))
+		return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+
+	/* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */
+	if (q == 1 || q == 3) {
+		inq = 0x4000u - inq;
+		sign = -1;
+	}
+
+	/* Now inq is in (0, 0x4000) = (0°, 90°) exclusive.
+	 * Split into first octant [0, 45°) and second octant [45°, 90°). */
+	if (inq < FR_TAN_OCTANT) {
+		/* First octant: direct table lookup + lerp.
+		 * inq is 13 bits; top FR_TAN_TABLE_BITS index the table,
+		 * bottom FR_TAN_FRAC_BITS drive interpolation. */
+		idx  = inq >> FR_TAN_FRAC_BITS;
+		frac = inq &  FR_TAN_FRAC_MASK;
+		lo = (s32)gFR_TAN_TAB_O[idx];
+		hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			/* Near zero: redo interpolation with 4 extra bits of
+			 * precision to reduce rounding error when result is small. */
+			s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+			s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+			raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			raw = (raw + 4) >> 3;        /* u0.19 → s15.16 with rounding */
+		} else {
+			raw <<= 1;                   /* u0.15 → s15.16              */
+		}
+	} else {
+		/* Second octant: tan(x) = 1 / tan(90° - x).
+		 * complement is in (0, 0x2000] = (0°, 45°]. */
+		u32 comp = 0x4000u - inq;
+
+		/* Look up tan(complement) from the table */
+		idx  = comp >> FR_TAN_FRAC_BITS;
+		frac = comp &  FR_TAN_FRAC_MASK;
+		lo = (s32)gFR_TAN_TAB_O[idx];
+		hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			/* Near pole: redo interpolation with 4 extra bits of
+			 * precision. The reciprocal amplifies small interpolation
+			 * errors, so extra precision significantly helps here.
+			 * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */
+			s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+			s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+			s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			if (raw_hp < 32) {
+				raw = FR_TRIG_MAXVAL;
+			} else {
+				raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+			}
+		} else {
+			raw = (s32)(0x80000000u / (u32)raw);
+		}
+	}
+
+	return (sign < 0) ? -raw : raw;
+}
+#endif /* FR_LEAN */
+
+/* fr_tan — radian-input tangent.
+ *
+ * Normalize to [0, 2π], extract quadrant sign, convert rad→u16 BAM,
+ * then do direct octant table lookup + interpolation inline.
+ * Small-angle bypass at zero crossings: tan(x) ≈ x.
+ * Near poles: use radian distance directly (cot(δ) ≈ 1/δ) to avoid
+ * BAM quantization error amplified by the reciprocal. */
+s32 fr_tan(s32 rad, u16 radix)
+{
+	if (rad == 0) return 0;
+	s32 r = normalize_to_r16(rad, radix);
+
+	/* tan(-x) = -tan(x): extract sign, work with |r| */
+	s32 sign = 1;
+	if (r < 0) { r = -r; sign = -1; }
+	r = reduce_to_2pi(r);
+
+	/* Small-angle bypass at zero crossings: tan(δ) ≈ δ */
+	if (r < 256)
+		return (sign < 0) ? -r : r;
+	{
+		s32 delta = r - FR_PI(16);
+		if (delta >= -256 && delta <= 256)
+			return (sign < 0) ? -delta : delta;
+	}
+	{
+		s32 delta = FR_TWO_PI(16) - r;
+		if (delta >= 0 && delta < 256)
+			return (sign < 0) ? delta : -delta;
+	}
+
+	/* Near-pole bypass: within POLE_THRESH r16 of π/2 or 3π/2,
+	 * use cot(δ) ≈ 1/δ from the radian distance directly.
+	 * Compute δ at r24 using precise pole constants (8× less rounding
+	 * error than the r16 FR_HALF_PI/FR_THREE_HALF_PI constants).
+	 * At δ=2048 r16 (1.79°), 1/δ error is ~0.03%. */
+	{
+		const s32 pole_thresh = 2048;       /* r16 units (~1.79°) */
+		/* Precise pole positions at r24:
+		 * π/2  × 2^24 = 26353589.76 → 26353590
+		 * 3π/2 × 2^24 = 79060769.28 → 79060769 */
+		const s32 half_pi_r24       = 26353590;
+		const s32 three_half_pi_r24 = 79060769;
+
+		s32 d1 = r - FR_HALF_PI(16);       /* coarse check at r16 */
+		s32 d2 = r - FR_THREE_HALF_PI(16);
+		s32 pole_delta_r24 = 0;
+
+		if (d1 >= -pole_thresh && d1 <= pole_thresh) {
+			s32 r24 = r << 8;
+			s32 dd = r24 - half_pi_r24;
+			pole_delta_r24 = (dd < 0) ? -dd : dd;
+		} else if (d2 >= -pole_thresh && d2 <= pole_thresh) {
+			s32 r24 = r << 8;
+			s32 dd = r24 - three_half_pi_r24;
+			pole_delta_r24 = (dd < 0) ? -dd : dd;
+		}
+
+		if (pole_delta_r24 > 0) {
+			/* Determine sign from radian quadrant */
+			s32 pole_sign;
+			if (r < FR_HALF_PI(16))
+				pole_sign = 1;          /* before π/2: → +∞ */
+			else if (r < FR_PI(16))
+				pole_sign = -1;         /* past π/2: → -∞ */
+			else if (r <= FR_THREE_HALF_PI(16))
+				pole_sign = 1;          /* before 3π/2: → +∞ */
+			else
+				pole_sign = -1;         /* past 3π/2: → -∞ */
+
+			s32 raw;
+			if (pole_delta_r24 < 512) {
+				raw = FR_TRIG_MAXVAL;   /* δ < 2 at r16 → saturate */
+			} else {
+				/* cot(δ) ≈ 1/δ.  In s15.16: (2^40) / δ_r24 */
+				raw = (s32)((1ULL << 40) / (u32)pole_delta_r24);
+				if (raw > FR_TRIG_MAXVAL) raw = FR_TRIG_MAXVAL;
+			}
+			s32 v = (pole_sign < 0) ? -raw : raw;
+			return (sign < 0) ? -v : v;
+		}
+	}
+
+	/* Convert radian to u16 BAM */
+	u16 bam = rad_r16_to_bam(r);
+
+	/* Decompose BAM into quadrant + in-quadrant */
+	u32 q   = ((u32)bam >> 14) & 0x3;
+	u32 inq = (u32)bam & 0x3FFFu;
+	s32 tsign = 1;  /* tan sign from quadrant */
+
+	/* Exact zeros: bam lands on 0° or 180° */
+	if (inq == 0 && (q == 0 || q == 2))
+		return 0;
+
+	/* Q1/Q3: reflect and negate */
+	if (q == 1 || q == 3) {
+		inq = 0x4000u - inq;
+		tsign = -1;
+	}
+
+	/* Octant table lookup + interpolation (same logic as fr_tan_bam) */
+	u32 idx, frac;
+	s32 raw;
+
+	if (inq < FR_TAN_OCTANT) {
+		/* First octant [0°, 45°): direct lookup */
+		idx  = inq >> FR_TAN_FRAC_BITS;
+		frac = inq &  FR_TAN_FRAC_MASK;
+		s32 lo = (s32)gFR_TAN_TAB_O[idx];
+		s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			s32 lo4 = lo << 4;
+			s32 hi4 = hi << 4;
+			raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			raw = (raw + 4) >> 3;
+		} else {
+			raw <<= 1;
+		}
+	} else {
+		/* Second octant [45°, 90°): reciprocal identity */
+		u32 comp = 0x4000u - inq;
+		idx  = comp >> FR_TAN_FRAC_BITS;
+		frac = comp &  FR_TAN_FRAC_MASK;
+		s32 lo = (s32)gFR_TAN_TAB_O[idx];
+		s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			s32 lo4 = lo << 4;
+			s32 hi4 = hi << 4;
+			s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			if (raw_hp < 32)
+				raw = FR_TRIG_MAXVAL;
+			else
+				raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+		} else {
+			raw = (s32)(0x80000000u / (u32)raw);
+		}
+	}
+
+	/* Combine quadrant sign and input sign */
+	s32 v = (tsign < 0) ? -raw : raw;
+	return (sign < 0) ? -v : v;
+}
+
+#ifndef FR_LEAN
+/*=======================================================
+ * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then
+ * call the BAM-native functions. Cardinal angles are exact.
+ */
+
+s32 fr_cos_deg(s32 deg, u16 radix)
+{
+	if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg));
+	if (deg < 0) deg = -deg;
+	/* Exact cardinal angles */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 rem = (deg >> radix) % 360;
+		if (rem == 0)   return  FR_TRIG_ONE;
+		if (rem == 90)  return  0;
+		if (rem == 180) return -FR_TRIG_ONE;
+		if (rem == 270) return  0;
+	}
+	/* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180,
+	 * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */
+	s32 d = normalize_to_r16(deg, radix);
+	if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; }
+	{
+		const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */
+		s32 delta = d - FR_D90_R16;
+		if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+			s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+			return -dr;
+		}
+		delta = d - (FR_D90_R16 + FR_D180_R16);
+		if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+			s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+			return dr;
+		}
+	}
+	return fr_cos_bam(fr_deg_to_bam(deg, radix));
+}
+
+s32 fr_sin_deg(s32 deg, u16 radix)
+{
+	if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg));
+	s32 sign = 1;
+	if (deg < 0) { deg = -deg; sign = -1; }
+	/* Exact cardinal angles */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 rem = (deg >> radix) % 360;
+		if (rem == 0)   return  0;
+		if (rem == 90)  return (sign < 0) ? -FR_TRIG_ONE :  FR_TRIG_ONE;
+		if (rem == 180) return  0;
+		if (rem == 270) return (sign < 0) ?  FR_TRIG_ONE : -FR_TRIG_ONE;
+	}
+	s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix));
+	return (sign < 0) ? -v : v;
+}
+
+s32 FR_TanI(s32 deg)
+{
+	/* Exact pole: deg mod 180 == ±90. Sign matches input sign. */
+	s32 rem = deg % 180;
+	if (rem == 90 || rem == -90)
+		return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+	return fr_tan_bam(FR_DEG2BAM_I(deg));
+}
+
+s32 fr_tan_deg(s32 deg, u16 radix)
+{
+	if (radix == 0) return FR_TanI(deg);
+	s32 deg_orig = deg;
+	/* Normalize to [0, 360°) at caller radix */
+	s32 d360 = 360 << radix;
+	if (deg < 0) {
+		deg += ((-deg) / d360) * d360;
+		if (deg < 0) deg += d360;
+	}
+	if (deg >= d360) {
+		deg -= (deg / d360) * d360;
+	}
+	/* Exact cardinal angles */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 ideg = deg >> radix;
+		if (ideg == 0 || ideg == 180) return 0;
+		if (ideg == 90 || ideg == 270)
+			return (deg_orig >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+	}
+	/* Near 0° or 180° (tan=0 crossings): tan(δ) ≈ δ in radians */
+	s32 d = normalize_to_r16(deg, radix);
+	{
+		const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */
+		s32 delta;
+		/* Near 0° */
+		if (d < DEG_THRESH) {
+			s32 up = d << 8;
+			return (FR_DEG2RAD(up) + (1 << 7)) >> 8;
+		}
+		/* Near 180° */
+		delta = d - FR_D180_R16;
+		if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+			s32 up = delta << 8;
+			return (FR_DEG2RAD(up) + (1 << 7)) >> 8;
+		}
+		/* Near 360° */
+		delta = FR_D360_R16 - d;
+		if (delta >= 0 && delta < DEG_THRESH) {
+			s32 up = delta << 8;
+			return -((FR_DEG2RAD(up) + (1 << 7)) >> 8);
+		}
+	}
+	/* Main path: convert to u16 BAM, table lookup */
+	u16 bam = fr_deg_to_bam(deg, radix);
+	s32 v = fr_tan_bam(bam);
+	/* Near-pole BAM alias: determine sign from normalized angle position */
+	if (bam == 0x4000u || bam == 0xC000u) {
+		s32 pole_d = (bam == 0x4000u) ? FR_D90_R16 : (FR_D90_R16 + FR_D180_R16);
+		v = (d < pole_d) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+	}
+	return v;
+}
+#endif /* FR_LEAN */
+
+/*=======================================================
+ * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest)
+ *
+ * Treats x and y as fixed-point values at the same radix r and returns
+ * (x*y) >> r at radix r. The user is responsible for tracking the radix
+ * point and for guaranteeing the product fits in 32 bits.
+ *
+ * Adds 0.5 LSB (0x8000) before the shift so the result rounds to
+ * nearest instead of truncating toward zero.
+ */
+s32 FR_FixMuls(s32 x, s32 y)
+{
+	int64_t v = (int64_t)x * (int64_t)y;
+	return (s32)((v + 0x8000) >> 16);
+}
+
+/*=======================================================
+ * FR_FixMulSat (x*y signed, SATURATED, round-to-nearest)
+ *
+ * Same semantics as FR_FixMuls but clamps to [INT32_MIN, INT32_MAX] on
+ * overflow instead of wrapping. The fixed-point radix is fixed at 16 bits
+ * (sM.16 inputs and output). Rounds to nearest (adds 0.5 LSB before shift).
+ */
+s32 FR_FixMulSat(s32 x, s32 y)
+{
+	int64_t v = ((int64_t)x * (int64_t)y + 0x8000) >> 16;
+	if (v >  (int64_t)0x7fffffff) return  FR_OVERFLOW_POS;
+	if (v < -(int64_t)0x80000000) return  FR_OVERFLOW_NEG;
+	return (s32)v;
+}
+
+/*=======================================================
+  FR_FixAddSat (x+y saturated add)
+  programmer must align radix points before using this function
+ */
+s32 FR_FixAddSat(s32 x, s32 y)
+{
+	s32 sum = x + y;
+	if (x < 0)
+	{
+		if (y < 0)
+			return (sum >= 0) ? FR_OVERFLOW_NEG : sum;
+	}
+	else
+	{
+		if (y >= 0)
+			return (sum <= 0) ? FR_OVERFLOW_POS : sum;
+	}
+	return sum;
+}
+
+/* Inverse Trig
+ * acos with binary search of the BAM-native quadrant table.
+ *
+ * Algorithm: bring `input` into s0.15, then binary-search the first-quadrant
+ * cos table for the table entry closest to |input|. Apply quadrant mirror
+ * if input was negative.
+ */
+/* FR_acos — returns radians at out_radix.
+ * Range: [0, pi].  Input is a cosine value at the given radix.
+ *
+ * Uses the 129-entry sine table in reverse: binary-search the ascending
+ * table to find asin(|input|), then acos = pi/2 - asin (with sign handling
+ * for the second quadrant).
+ */
+s32 FR_acos(s32 input, u16 radix, u16 out_radix)
+{
+	s32 v;
+	s16 sign;
+	s32 lo, hi, mid;
+	s32 idx, d, num, frac;
+	s32 input_abs;
+
+	/* Work with absolute value at the caller's radix */
+	sign = (s16)((input < 0) ? 1 : 0);
+	input_abs = sign ? -input : input;
+
+	/* Clamp at the caller's radix */
+	{
+		s32 one = (s32)1 << radix;
+		if (input_abs >= one)
+			return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0;
+	}
+
+	v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */
+
+	/* Small-angle fast path: when cos(θ) is close to 1.0, the sine table
+	 * has poor resolution near the top (entries close together).
+	 * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121]
+	 * means the input is > cos(7*π/256) ≈ 0.9975. */
+	if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8])
+	{
+		s32 one = (s32)1 << radix;
+		s32 one_minus_x = one - input_abs;           /* 1-|x| at caller radix */
+		s32 two_omx     = one_minus_x << 1;          /* 2(1-|x|) at caller radix */
+		s32 rad_native  = FR_sqrt(two_omx, radix);   /* radians at caller radix */
+		s32 rad_out     = FR_CHRDX(rad_native, radix, out_radix);
+		if (sign)
+			rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out;
+		return rad_out;
+	}
+
+	/* Binary search on the ascending sine table.
+	 * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°).
+	 *
+	 * Find the first index where table[idx] >= v. */
+	lo = 0;
+	hi = FR_TRIG_TABLE_SIZE;
+	while (lo < hi)
+	{
+		mid = (lo + hi) >> 1;
+		if ((s32)gFR_SIN_TAB_Q[mid] < v)
+			lo = mid + 1;
+		else
+			hi = mid;
+	}
+
+	/* lo is now the first index where table[lo] >= v.
+	 * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo].
+	 * This gives us the asin angle; acos = pi/2 - asin. */
+	idx = lo;
+	if (idx <= 0)
+	{
+		idx = 0;
+		frac = 0;
+	}
+	else if (idx >= FR_TRIG_TABLE_SIZE)
+	{
+		idx = FR_TRIG_TABLE_SIZE - 1;
+		frac = 0;
+	}
+	else
+	{
+		/* Interpolate between table[idx-1] and table[idx].
+		 * d = table[idx] - table[idx-1]  (>= 0, sin increasing)
+		 * num = v - table[idx-1]          (how far past table[idx-1])
+		 */
+		d   = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1];
+		num = v - (s32)gFR_SIN_TAB_Q[idx - 1];
+		if (d > 0)
+			frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d;
+		else
+			frac = 0;
+		idx = idx - 1;
+	}
+
+	{
+		/* asin_bam is the angle in first-quadrant BAM whose sin = v */
+		u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac);
+		/* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */
+		u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam);
+		if (sign)
+			bam = (u16)(FR_BAM_HALF - bam);  /* mirror: pi - angle */
+		return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix);
+	}
+}
+
+/* FR_asin — returns radians at out_radix. Range: [-pi/2, pi/2]. */
+s32 FR_asin(s32 input, u16 radix, u16 out_radix)
+{
+	/* asin(x) = pi/2 - acos(x) */
+	s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
+	return half_pi - FR_acos(input, radix, out_radix);
+}
+
+/* FR_atan2(y, x, out_radix) — full-circle arctangent, returns radians
+ * at the specified output radix (s32).
+ *
+ * Range: [-pi, pi]. Returns 0 for atan2(0,0).
+ *
+ * Implementation: normalise (x,y) via FR_hypot_fast8, then recover the
+ * angle with FR_asin or FR_acos (both use the 129-entry cosine table).
+ * To stay in the well-conditioned region of each inverse function we
+ * switch at 45°:
+ *   |y| <= |x|  →  use asin(y/h)   — asin stable near 0
+ *   |y| >  |x|  →  use acos(x/h)   — acos stable near pi/2
+ * This keeps the derivative amplification factor below 1.414x everywhere.
+ */
+s32 FR_atan2(s32 y, s32 x, u16 out_radix)
+{
+	s32 ax, ay, h, q1_angle;
+
+	/* Axis cases — exact angles, no divide. */
+	if (x == 0)
+	{
+		if (y > 0) return  FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);     /*  pi/2 */
+		if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);     /* -pi/2 */
+		return 0;
+	}
+	if (y == 0)
+		return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix);      /* 0 or pi */
+
+	ax = (x < 0) ? -x : x;
+	ay = (y < 0) ? -y : y;
+
+	/* Normalise so max(ax,ay) sits in [2^14, 2^15).  This gives
+	 * FR_hypot_fast8 enough integer bits for the shift-only segments
+	 * to produce an accurate ratio — critical when the raw inputs are
+	 * small (e.g. atan2(1,1) at radix 0).  Scaling both by the same
+	 * power of two doesn't change the angle. */
+	{
+		s32 mx = (ax > ay) ? ax : ay;
+		while (mx < (1L << 14)) { ax <<= 1; ay <<= 1; mx <<= 1; }
+		while (mx >= (1L << 16)) { ax >>= 1; ay >>= 1; mx >>= 1; }
+	}
+
+	h = FR_hypot_fast8((s32)ax, (s32)ay);
+	if (h == 0) return 0;  /* degenerate */
+
+	/* Compute the first-quadrant angle (positive, [0..pi/2]).
+	 * Divide produces a value in [0,1] at radix FR_TRIG_PREC (s0.15).
+	 *
+	 * Small-angle fast path: when the minor-axis ratio is small,
+	 * asin(x) ≈ x (error < x³/6).  Below ~5° the cubic term is
+	 * smaller than the table-lookup error, so the direct identity
+	 * is both faster and more accurate.  Threshold 2753 at r15
+	 * corresponds to sin(~4.8°) = 0.084. */
+	#define FR_ATAN2_SMALL  2753
+	if (ay <= ax)
+	{
+		/* angle in [0°..45°]: use asin(ay/h) — well-conditioned near 0 */
+		s32 sin_val = (s32)(((int64_t)ay << FR_TRIG_PREC) / h);
+		if (sin_val < FR_ATAN2_SMALL)
+			q1_angle = FR_CHRDX(sin_val, FR_TRIG_PREC, out_radix);
+		else
+			q1_angle = FR_asin(sin_val, FR_TRIG_PREC, out_radix);
+	}
+	else
+	{
+		/* angle in [45°..90°]: use acos(ax/h) — well-conditioned near pi/2 */
+		s32 cos_val = (s32)(((int64_t)ax << FR_TRIG_PREC) / h);
+		if (cos_val < FR_ATAN2_SMALL)
+		{
+			/* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */
+			s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
+			q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix);
+		}
+		else
+			q1_angle = FR_acos(cos_val, FR_TRIG_PREC, out_radix);
+	}
+
+	/* Apply quadrant from signs of x and y.
+	 * q1_angle is always positive [0..pi/2]. */
+	{
+		s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix);
+		if (x > 0)
+			return (y > 0) ? q1_angle : -q1_angle;
+		/* x < 0: mirror across y-axis */
+		return (y > 0) ? (pi - q1_angle) : (q1_angle - pi);
+	}
+}
+
+/* FR_atan(input, radix, out_radix) — arctangent of a single argument.
+ * Returns radians at out_radix, range [-pi/2, pi/2].
+ */
+s32 FR_atan(s32 input, u16 radix, u16 out_radix)
+{
+	s32 one = (s32)1 << radix;
+	return FR_atan2(input, one, out_radix);
+}
+
+/* 2^f table for f in [0, 1] in 65 entries (64 segments), output in s.16
+ * fixed point. Entry i = round(2^(i/64) * 65536).  Size: 260 bytes.
+ * Used by FR_pow2 to look up the fractional power of 2 with linear
+ * interpolation.
+ */
+static const u32 gFR_POW2_FRAC_TAB[65] = {
+     65536,  66250,  66971,  67700,  68438,  69183,  69936,  70698,
+     71468,  72246,  73032,  73828,  74632,  75444,  76266,  77096,
+     77936,  78785,  79642,  80510,  81386,  82273,  83169,  84074,
+     84990,  85915,  86851,  87796,  88752,  89719,  90696,  91684,
+     92682,  93691,  94711,  95743,  96785,  97839,  98905,  99982,
+    101070, 102171, 103283, 104408, 105545, 106694, 107856, 109031,
+    110218, 111418, 112631, 113858, 115098, 116351, 117618, 118899,
+    120194, 121502, 122825, 124163, 125515, 126882, 128263, 129660,
+    131072
+};
+
+/* FR_pow2(input, radix) — computes 2^(input/2^radix), result at same radix.
+ *
+ * Algorithm: split input into floor(integer) and fractional part. The
+ * fractional part is in [0, 1) by construction (Euclidean / mathematical
+ * floor — the fractional part of -2.3 is +0.7, not -0.3). Then
+ *   2^(int + frac) = 2^int * 2^frac
+ * where 2^frac is looked up from a 65-entry table at radix 16, and 2^int
+ * is a shift.
+ *
+ * Worst-case absolute error: ~1e-5 over [-8, 8] (65-entry table).
+ * Linear interpolation leaves a small concavity error in each interval.
+ */
+s32 FR_pow2(s32 input, u16 radix)
+{
+	s32 flr, frac_full, idx, frac_lo, lo, hi, mant, result;
+	u32 mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0;
+
+	/* Mathematical floor: for positive input it's input>>radix; for
+	 * negative input we need to round toward -infinity, not toward zero.
+	 */
+	if (input >= 0)
+	{
+		flr = (s32)((u32)input >> radix);
+		frac_full = (s32)((u32)input & mask);
+	}
+	else
+	{
+		s32 neg = -input;
+		s32 nflr = (s32)((u32)neg >> radix);
+		s32 nfrc = (s32)((u32)neg & mask);
+		if (nfrc == 0)
+		{
+			flr = -nflr;
+			frac_full = 0;
+		}
+		else
+		{
+			flr = -nflr - 1;          /* floor toward -inf */
+			frac_full = (s32)((1L << radix) - nfrc);
+		}
+	}
+
+	/* frac_full is in [0, 2^radix). Re-radix it to s.16 for table lookup. */
+	if (radix > 16)
+		frac_full >>= (radix - 16);
+	else if (radix < 16)
+		frac_full <<= (16 - radix);
+	/* now frac_full is in [0, 65536) representing fractional in s.16. */
+
+	/* Top 6 bits index the table; bottom 10 are the interpolation fraction. */
+	idx     = frac_full >> 10;
+	frac_lo = frac_full & ((1L << 10) - 1);
+	lo = (s32)gFR_POW2_FRAC_TAB[idx];
+	hi = (s32)gFR_POW2_FRAC_TAB[idx + 1];
+	mant = lo + (((hi - lo) * frac_lo) >> 10);  /* mant in s.16, in [1.0, 2.0) */
+
+	/* Apply integer shift. mant is at radix 16. We want output at `radix`.
+	 * If radix == 16: just shift mant.
+	 * Otherwise re-radix mant first.
+	 */
+	if (flr >= 0)
+	{
+		/* result = mant << flr, then re-radix to caller's radix. */
+		if (flr >= 30)
+			return FR_OVERFLOW_POS;
+		result = mant << flr;
+		return FR_CHRDX(result, 16, radix);
+	}
+	else
+	{
+		/* mant >> -flr at radix 16, then re-radix. */
+		s32 sh = -flr;
+		if (sh >= 30)
+			return 0;                       /* underflow */
+		result = mant >> sh;
+		return FR_CHRDX(result, 16, radix);
+	}
+}
+
+/* log2 mantissa table for m in [1, 2), m = 1 + i/64, returning log2(m)
+ * in s.16 fixed point. 65 entries (last is log2(2) = 1.0 = 65536) so the
+ * interpolation between idx and idx+1 never reads out of bounds.
+ * Size: 260 bytes.  Entry i = round(log2(1 + i/64) * 65536).
+ */
+static const u32 gFR_LOG2_MANT_TAB[65] = {
+        0,  1466,  2909,  4331,  5732,  7112,  8473,  9814,
+    11136, 12440, 13727, 14996, 16248, 17484, 18704, 19909,
+    21098, 22272, 23433, 24579, 25711, 26830, 27936, 29029,
+    30109, 31178, 32234, 33279, 34312, 35334, 36346, 37346,
+    38336, 39316, 40286, 41246, 42196, 43137, 44068, 44990,
+    45904, 46809, 47705, 48593, 49472, 50344, 51207, 52063,
+    52911, 53751, 54584, 55410, 56229, 57040, 57845, 58643,
+    59434, 60219, 60997, 61769, 62534, 63294, 64047, 64794,
+    65536
+};
+
+/* FR_log2(input, radix, output_radix) — log base 2 of a fixed-point number.
+ *
+ *   input        : value to take log2 of, treated as a positive sM.radix value.
+ *   radix        : number of fractional bits in `input`.
+ *   output_radix : number of fractional bits in the result.
+ *
+ * Returns FR_LOG2MIN for input <= 0 (log of zero/negative is undefined; we
+ * return a large negative sentinel rather than crash).
+ *
+ * Algorithm:
+ *   1. Find p, the position of the leading 1 bit of `input`.
+ *      log2(input) = p + log2(input / 2^p), where the second term is in
+ *      [0, 1) because (input / 2^p) is in [1, 2).
+ *   2. Normalize the mantissa to s1.31 by shifting `input` so its top bit
+ *      sits at bit 31 (so bits 30..25 are the upper 6 bits of m-1).
+ *   3. Look up log2(m) in the 65-entry table with linear interpolation
+ *      across the next 24 bits. Result is in s.16.
+ *   4. integer_part = (p - radix), then result = (integer_part << 16) +
+ *      mantissa_log2.
+ *   5. Re-radix to the requested output_radix via FR_CHRDX.
+ *
+ * Worst-case absolute error: ~6e-5 in log2 units (65-entry table).
+ */
+s32 FR_log2(s32 input, u16 radix, u16 output_radix)
+{
+	s32 p, integer_part, idx, frac, lo, hi, mant_log2, result;
+	u32 m, u;
+
+	if (input <= 0)
+		return FR_LOG2MIN;
+
+	/* Step 1: find the position of the leading 1 bit. */
+	u = (u32)input;
+	p = 0;
+	while (u > 1)
+	{
+		u >>= 1;
+		p++;
+	}
+
+	/* Step 2: shift input so the leading 1 bit is at bit 30 (s1.30 mantissa).
+	 * Equivalently: m = input << (30 - p), where m is in [2^30, 2^31).
+	 * The fractional part of m / 2^30 is in [0, 1), and that's what we look
+	 * up in the table.
+	 */
+	if (p >= 30)
+		m = (u32)input >> (p - 30);
+	else
+		m = (u32)input << (30 - p);
+
+	/* m is now in [2^30, 2^31). Subtract 2^30 to get the fractional part
+	 * (m_frac in [0, 2^30)). Index into the 64-entry table is the top 6
+	 * bits of m_frac; the lower 24 bits are the interpolation fraction.
+	 */
+	m -= (1u << 30);
+	idx  = (s32)(m >> 24);                    /* 6 bits  */
+	frac = (s32)(m & ((1u << 24) - 1));       /* 24 bits */
+	lo = (s32)gFR_LOG2_MANT_TAB[idx];
+	hi = (s32)gFR_LOG2_MANT_TAB[idx + 1];
+	mant_log2 = lo + (s32)(((int64_t)(hi - lo) * frac) >> 24);
+
+	/* Step 3: assemble. integer_part = p - radix. */
+	integer_part = p - (s32)radix;
+	result = (integer_part << 16) + mant_log2;
+
+	/* Step 4: re-radix to output_radix. */
+	return FR_CHRDX(result, 16, output_radix);
+}
+
+s32 FR_ln(s32 input, u16 radix, u16 output_radix)
+{
+	s32 r = FR_log2(input, radix, output_radix);
+	return FR_MULK28(r, FR_krLOG2E_28);
+}
+
+#ifndef FR_LEAN
+s32 FR_log10(s32 input, u16 radix, u16 output_radix)
+{
+	s32 r = FR_log2(input, radix, output_radix);
+	return FR_MULK28(r, FR_krLOG2_10_28);
+}
+#endif
+
+#ifndef FR_NO_PRINT
+/***************************************
+ * FR_printNumD - write a decimal integer with space padding.
+ *
+ * Equivalent to "%*d" in printf, modulo the return convention.
+ *
+ *   f       : per-character output function (e.g. putchar). Must not be NULL.
+ *   n       : signed integer to print.
+ *   pad     : minimum field width; spaces are prepended to reach this width.
+ *
+ * Returns the number of characters written on success, or -1 if `f` is NULL.
+ */
+int FR_printNumD(int (*f)(char), int n, int pad)
+{
+	unsigned int mag;
+	int written = 0, neg = 0;
+	int digits = 1;
+	unsigned int t;
+
+	if (!f)
+		return -1;
+
+	if (n < 0)
+	{
+		neg = 1;
+		mag = (unsigned int)(-(long)n); /* safe for INT_MIN */
+	}
+	else
+	{
+		mag = (unsigned int)n;
+	}
+
+	/* Count decimal digits in mag (always at least 1 for n=0). */
+	t = mag;
+	while (t >= 10)
+	{
+		t /= 10;
+		digits++;
+	}
+
+	/* Pad with spaces. The total width includes the sign. */
+	{
+		int total = digits + (neg ? 1 : 0);
+		while (pad-- > total)
+		{
+			f(' ');
+			written++;
+		}
+	}
+
+	if (neg)
+	{
+		f('-');
+		written++;
+	}
+
+	/* Print digits MSB first by computing the largest power of 10 <= mag. */
+	{
+		unsigned int p = 1;
+		int i;
+		for (i = 1; i < digits; i++)
+			p *= 10;
+		while (p > 0)
+		{
+			f((char)('0' + (mag / p) % 10));
+			written++;
+			if (p == 1)
+				break;
+			p /= 10;
+		}
+	}
+
+	return written;
+}
+
+/***************************************
+ * FR_printNumF - write a fixed-point number as a decimal floating-point string.
+ *
+ *   f      : per-character output function. Must not be NULL.
+ *   n      : signed fixed-point value at the given radix.
+ *   radix  : number of fractional bits in `n`.
+ *   pad    : minimum field width (including sign and decimal point).
+ *   prec   : number of fractional digits to print.
+ *
+ * Returns the number of characters written on success, -1 if `f` is NULL.
+ *
+ * Rounding policy: truncates fractional digits beyond `prec` (no rounding).
+ */
+int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec)
+{
+	unsigned int mag_int;
+	u32 mag_frac;
+	u32 frac_mask;
+	int written = 0, neg = 0;
+	int int_digits = 1;
+	int total;
+	unsigned int t;
+
+	if (!f)
+		return -1;
+
+	frac_mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0;
+
+	if (n < 0)
+	{
+		neg = 1;
+		/* Negate as unsigned to avoid INT_MIN overflow. */
+		u32 un = (u32)(-(int64_t)n);
+		mag_int  = (unsigned int)(un >> radix);
+		mag_frac = un & frac_mask;
+	}
+	else
+	{
+		mag_int  = (unsigned int)((u32)n >> radix);
+		mag_frac = (u32)n & frac_mask;
+	}
+
+	/* Count integer digits. */
+	t = mag_int;
+	while (t >= 10)
+	{
+		t /= 10;
+		int_digits++;
+	}
+
+	/* Total visible width = sign + int + (dot + prec digits if prec>0). */
+	total = int_digits + (neg ? 1 : 0) + ((prec > 0) ? (1 + prec) : 0);
+	while (pad-- > total)
+	{
+		f(' ');
+		written++;
+	}
+
+	if (neg)
+	{
+		f('-');
+		written++;
+	}
+
+	/* Print integer part. */
+	{
+		unsigned int p = 1;
+		int i;
+		for (i = 1; i < int_digits; i++)
+			p *= 10;
+		while (p > 0)
+		{
+			f((char)('0' + (mag_int / p) % 10));
+			written++;
+			if (p == 1)
+				break;
+			p /= 10;
+		}
+	}
+
+	/* Print fractional part. Extract one decimal digit at a time:
+	 * frac' = frac * 10
+	 * digit = frac' >> radix
+	 * frac  = frac' & frac_mask
+	 */
+	if (prec > 0)
+	{
+		f('.');
+		written++;
+		while (prec-- > 0)
+		{
+			u32 scaled;
+			int digit;
+			scaled = (u32)(((uint64_t)mag_frac * 10));
+			digit = (int)(scaled >> radix);
+			mag_frac = scaled & frac_mask;
+			f((char)('0' + (digit % 10)));
+			written++;
+		}
+	}
+
+	return written;
+}
+
+/***************************************
+ * FR_printNumH - write an integer as hexadecimal.
+ *
+ *   f          : per-character output function. Must not be NULL.
+ *   n          : integer to print (interpreted as unsigned for the digits).
+ *   showPrefix : if non-zero, prepend "0x".
+ *
+ * Returns the number of characters written on success, -1 if f is NULL.
+ */
+int FR_printNumH(int (*f)(char), int n, int showPrefix)
+{
+	unsigned int u = (unsigned int)n;
+	int written = 0;
+	int x = (int)((sizeof(int) << 1) - 1);
+	int d;
+
+	if (!f)
+		return -1;
+
+	if (showPrefix)
+	{
+		f('0');
+		f('x');
+		written += 2;
+	}
+
+	do
+	{
+		d = (int)((u >> (x << 2)) & 0xf);
+		d = (d > 9) ? (d - 0xa + 'a') : (d + '0');
+		f((char)d);
+		written++;
+	} while (x--);
+
+	return written;
+}
+
+/*=======================================================
+ * FR_numstr — parse a decimal string into a fixed-point value.
+ *
+ * This is the runtime inverse of FR_printNumF: given a string like
+ * "12.34" or "-0.05" and a radix (number of fractional bits), it
+ * returns the s32 fixed-point representation.
+ *
+ * Features:
+ *   - Leading whitespace is skipped.
+ *   - Optional sign ('+' or '-').
+ *   - Up to 9 fractional digits are used (s32 range).
+ *   - No malloc, no strtod, no libm.
+ *
+ * Returns 0 for NULL or empty input.
+ */
+s32 FR_numstr(const char *s, u16 radix)
+{
+    static const s32 pow10[10] = {
+        1L, 10L, 100L, 1000L, 10000L,
+        100000L, 1000000L, 10000000L, 100000000L, 1000000000L
+    };
+    s32 int_part = 0, frac_part = 0;
+    int frac_digits = 0, neg = 0;
+    s32 result;
+
+    if (!s || !*s) return 0;
+
+    while (*s == ' ' || *s == '\t') s++;          /* skip whitespace */
+    if (*s == '-') { neg = 1; s++; }              /* sign            */
+    else if (*s == '+') { s++; }
+
+    while (*s >= '0' && *s <= '9')                /* integer part    */
+        { int_part = int_part * 10 + (*s - '0'); s++; }
+
+    if (*s == '.') {                              /* fractional part */
+        s++;
+        while (*s >= '0' && *s <= '9') {
+            if (frac_digits < 9)
+                { frac_part = frac_part * 10 + (*s - '0'); frac_digits++; }
+            s++;
+        }
+    }
+
+    result = int_part << radix;
+    if (frac_digits > 0)
+        result += (s32)(((int64_t)frac_part << radix) / pow10[frac_digits]);
+
+    return neg ? -result : result;
+}
+#endif /* FR_NO_PRINT */
+
+/*=======================================================
+ * Square root and hypot
+ *
+ * fr_isqrt64 is a private helper implementing the digit-by-digit
+ * ("shift-and-subtract") integer square root. The core loop computes
+ * floor(sqrt(n)), then a final remainder check rounds to nearest.
+ * Uses no division. At most 32 iterations.
+ */
+static u32 fr_isqrt64(uint64_t n)
+{
+	uint64_t root = 0;
+	uint64_t bit  = (uint64_t)1 << 62;
+	while (bit > n) bit >>= 2;
+	while (bit != 0)
+	{
+		uint64_t trial = root + bit;
+		if (n >= trial)
+		{
+			n -= trial;
+			root = (root >> 1) + bit;
+		}
+		else
+		{
+			root >>= 1;
+		}
+		bit >>= 2;
+	}
+	/* round to nearest: if remainder > root, (root+1)^2 is closer */
+	if (n > root)
+		root++;
+	return (u32)root;
+}
+
+/*=======================================================
+ * FR_sqrt - fixed-radix square root.
+ *
+ *   input  : value at radix `radix`. Must be >= 0.
+ *   radix  : fractional bits of input AND result.
+ *   return : sqrt(input) at radix `radix`, or FR_DOMAIN_ERROR if input < 0.
+ *
+ * Math: sqrt(input_fp / 2^r) at radix r is
+ *   result_fp = sqrt(input_fp / 2^r) * 2^r = sqrt(input_fp * 2^r)
+ * so we compute isqrt(input_fp << radix) on a 64-bit accumulator. This
+ * works for any input that fits in s32 and any radix in [0, 30].
+ *
+ * Precision: round-to-nearest sqrt. Worst-case absolute error is
+ * <= 0.5 LSB at the requested radix.
+ * Always non-negative for non-negative input. Result is monotone in
+ * input.
+ *
+ * Saturation: input < 0 returns FR_DOMAIN_ERROR (= INT32_MIN). Caller
+ * can test `result == FR_DOMAIN_ERROR` to detect domain errors.
+ *
+ * Side effects: none. Pure function.
+ */
+s32 FR_sqrt(s32 input, u16 radix)
+{
+	uint64_t n;
+
+	if (input < 0)
+		return FR_DOMAIN_ERROR;
+	if (input == 0)
+		return 0;
+
+	n = (uint64_t)(u32)input << radix;
+	return (s32)fr_isqrt64(n);
+}
+
+/*=======================================================
+ * FR_hypot - sqrt(x*x + y*y) without intermediate overflow.
+ *
+ *   x, y   : values at radix `radix`
+ *   radix  : fractional bits of inputs AND result
+ *   return : sqrt(x*x + y*y) at radix `radix`.
+ *
+ * Math: x*x + y*y is naturally at radix 2*radix; isqrt of a 2r-radix
+ * value yields an r-radix result, so no extra shifting is needed. The
+ * u64 accumulator can hold (INT32_MAX^2)*2 = ~2^63, so (x*x + y*y) never
+ * overflows for any s32 inputs.
+ *
+ * Precision: round-to-nearest. Worst-case absolute error <= 0.5 LSB
+ * at the requested radix.
+ *
+ * Side effects: none. Pure function.
+ */
+#ifndef FR_LEAN
+s32 FR_hypot(s32 x, s32 y, u16 radix)
+{
+	uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x);
+	uint64_t yy = (uint64_t)((int64_t)y * (int64_t)y);
+	(void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */
+	return (s32)fr_isqrt64(xx + yy);
+}
+#endif
+
+/*=======================================================
+ * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation.
+ *
+ * Shift-only, no multiply, no 64-bit.  Based on the piecewise-linear
+ * method described in US Patent 6,567,777 B1 (Chatterjee, expired).
+ * Peak error: ~0.10%.
+ */
+s32 FR_hypot_fast8(s32 x, s32 y)
+{
+    s32 hi, lo;
+
+    /* absolute values (clamp INT32_MIN to INT32_MAX to avoid UB) */
+    if (x < 0) x = (x == (s32)0x80000000) ? 0x7FFFFFFF : -x;
+    if (y < 0) y = (y == (s32)0x80000000) ? 0x7FFFFFFF : -y;
+
+    /* hi = max(|x|,|y|), lo = min(|x|,|y|) */
+    if (x > y) { hi = x; lo = y; }
+    else       { hi = y; lo = x; }
+
+    if (hi == 0) return 0;
+
+    /* 8 piecewise-linear segments: dist ≈ a*hi + b*lo.
+     * Boundaries at β = 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875. */
+    if ((hi >> 1) < lo) {
+        /* β in (0.5, 1.0] */
+        if (lo > hi - (hi >> 2)) {
+            /* β in (0.75, 1.0] */
+            if (lo > hi - (hi >> 3))                   /* β > 0.875 */
+                /* a≈0.7305, b≈0.6836 */
+                return hi - (hi >> 2) - (hi >> 6) - (hi >> 8)
+                     + lo - (lo >> 2) - (lo >> 4) - (lo >> 8);
+            else                                        /* β in (0.75, 0.875] */
+                /* a≈0.7803, b≈0.6262 */
+                return hi - (hi >> 2) + (hi >> 5) - (hi >> 10)
+                     + (lo >> 1) + (lo >> 3) + (lo >> 10) + (lo >> 12);
+        } else {
+            /* β in (0.5, 0.75] */
+            if (lo > hi - (hi >> 1) + (hi >> 3))       /* β > 0.625 */
+                /* a≈0.8281, b≈0.5630 */
+                return hi - (hi >> 2) + (hi >> 4) + (hi >> 6)
+                     + (lo >> 1) + (lo >> 4) + (lo >> 11);
+            else                                        /* β in (0.5, 0.625] */
+                /* a≈0.8728, b≈0.4893 */
+                return hi - (hi >> 3) - (hi >> 9) - (hi >> 12)
+                     + (lo >> 1) - (lo >> 6) + (lo >> 8) + (lo >> 10);
+        }
+    } else {
+        /* β in [0, 0.5] */
+        if ((hi >> 2) < lo) {
+            /* β in (0.25, 0.5] */
+            if ((hi >> 1) - (hi >> 3) < lo)             /* β > 0.375 */
+                /* a≈0.9180, b≈0.3984 */
+                return hi - (hi >> 4) - (hi >> 6) - (hi >> 8)
+                     + (lo >> 1) - (lo >> 3) + (lo >> 5) - (lo >> 7);
+            else                                        /* β in (0.25, 0.375] */
+                /* a≈0.9551, b≈0.2988 */
+                return hi - (hi >> 4) + (hi >> 6) + (hi >> 9)
+                     + (lo >> 2) + (lo >> 4) - (lo >> 6) + (lo >> 9);
+        } else {
+            /* β in [0, 0.25] */
+            if ((hi >> 3) < lo)                         /* β in (0.125, 0.25] */
+                /* a≈0.9839, b≈0.1838 */
+                return hi - (hi >> 6) - (hi >> 11)
+                     + (lo >> 2) - (lo >> 4) - (lo >> 8) + (lo >> 12);
+            else                                        /* β in [0, 0.125] */
+                /* a≈0.9990, b≈0.0620 */
+                return hi - (hi >> 10)
+                     + (lo >> 4) - (lo >> 11);
+        }
+    }
+}
+
+#ifndef FR_NO_WAVES
+/*=======================================================
+ * Wave generators — synth-style fixed-shape waveforms.
+ *
+ * All wave functions take a u16 BAM phase in [0, 65535] (a full cycle)
+ * and return s16 in s0.15 format, clamped to [-32767, +32767] to match
+ * the trig amplitude convention used by fr_cos_bam / fr_sin_bam.
+ *
+ * Use FR_HZ2BAM_INC(hz, sample_rate) to compute a phase increment for
+ * a given output frequency, then accumulate it (mod 2^16) per sample.
+ *
+ * Side effects: pure functions (except fr_wave_noise which advances a
+ * caller-provided LFSR state pointer).
+ */
+
+/* fr_wave_sqr - 50%-duty square wave.
+ * phase < pi (BAM<0x8000) → +full; phase >= pi → -full.
+ */
+s16 fr_wave_sqr(u16 phase)
+{
+	return (phase < 0x8000) ? (s16)32767 : (s16)-32767;
+}
+
+/* fr_wave_pwm - variable-duty pulse.
+ * `duty` is the BAM threshold: phase < duty → high, else low.
+ *   duty = 0      → always low
+ *   duty = 0x8000 → 50% duty (same as fr_wave_sqr)
+ *   duty = 0xffff → high almost everywhere (one BAM step low)
+ */
+s16 fr_wave_pwm(u16 phase, u16 duty)
+{
+	return (phase < duty) ? (s16)32767 : (s16)-32767;
+}
+
+/* fr_wave_saw - rising sawtooth.
+ * Linear ramp from -32767 (just after phase=0) to +32767 (at phase=0xffff),
+ * passing through 0 at phase=0x8000. The single boundary case phase=0
+ * (which would naturally produce -32768) is clamped to -32767 to keep the
+ * amplitude symmetric.
+ */
+s16 fr_wave_saw(u16 phase)
+{
+	s32 v = (s32)phase - (s32)0x8000;
+	if (v < -32767) v = -32767;
+	return (s16)v;
+}
+
+/* fr_wave_tri - symmetric triangle.
+ * Four linear segments:
+ *   Q1 [0, 0x4000)  : rising  0 → +peak
+ *   Q2 [0x4000, 0x8000): falling +peak → 0
+ *   Q3 [0x8000, 0xc000): falling 0 → -peak
+ *   Q4 [0xc000, 0x10000): rising  -peak → 0
+ * Peaks are clamped to +/-32767 (the natural unclamped formula gives
+ * +/-32768 at the exact peak BAM).
+ */
+s16 fr_wave_tri(u16 phase)
+{
+	s32 t;
+	if (phase < 0x8000)
+	{
+		/* First half: 0 -> +peak -> 0 */
+		if (phase < 0x4000)
+			t = (s32)phase << 1;          /* 0 .. 0x7ffe */
+		else
+			t = (s32)(0x8000 - phase) << 1; /* 0x8000 .. 2 */
+		if (t > 32767) t = 32767;
+		return (s16)t;
+	}
+	else
+	{
+		/* Second half: 0 -> -peak -> 0 */
+		if (phase < 0xc000)
+			t = (s32)(phase - 0x8000) << 1; /* 0 .. 0x7ffe */
+		else
+			t = (s32)(0x10000 - phase) << 1;/* 0x8000 .. 2 */
+		if (t > 32767) t = 32767;
+		return (s16)-t;
+	}
+}
+
+/* fr_wave_tri_morph - variable-symmetry triangle.
+ *
+ *   phase       : u16 BAM
+ *   break_point : u16 BAM where the wave reaches its positive peak.
+ *
+ * Going from 0 to +peak in [0, break_point), then from +peak back to 0
+ * in [break_point, 0xffff]. The result is a triangle whose rising and
+ * falling slopes can differ.
+ *
+ *   break_point = 0x8000  → symmetric triangle
+ *   break_point = 0xffff  → rising sawtooth (instant fall)
+ *   break_point = 0x0001  → falling sawtooth (instant rise)
+ *   break_point = 0       → degenerate; treated as 1 to avoid div-by-zero
+ *
+ * Note that this version returns values in [0, 32767] only (not bipolar).
+ * Caller can subtract 16384 and double if a bipolar version is desired.
+ *
+ * Costs: one 32-bit divide per sample. On Cortex-M3+ this is ~10-20
+ * cycles. On 8051 / MSP430 this is much slower; pre-compute slopes if
+ * those targets matter to you.
+ */
+s16 fr_wave_tri_morph(u16 phase, u16 break_point)
+{
+	u32 t;
+	if (break_point == 0)
+		break_point = 1;
+	if (phase < break_point)
+	{
+		/* rising: 0 at phase=0, 32767 at phase=break_point */
+		t = (u32)(((u32)phase * 32767UL) / (u32)break_point);
+	}
+	else
+	{
+		/* falling: 32767 at phase=break_point, 0 at phase=0xffff */
+		u32 span = (u32)0xffff - (u32)break_point;
+		if (span == 0)
+			return 32767;
+		t = (u32)(((u32)((u32)0xffff - (u32)phase) * 32767UL) / span);
+	}
+	if (t > 32767) t = 32767;
+	return (s16)t;
+}
+
+/* fr_wave_noise - LFSR-based pseudorandom noise.
+ *
+ *   state : pointer to a u32 the caller maintains. Initial value must
+ *           be non-zero (zero is a fixed point of the LFSR). A common
+ *           seed is 0xACE1u or any other non-zero constant.
+ *
+ * Returns the next s16 sample in s0.15 (full ±32767 range, white-ish).
+ * Implementation: 32-bit Galois LFSR with the standard maximal-period
+ * tap polynomial 0xD0000001 (period 2^32 - 1 samples).
+ *
+ * Quality: this is "fast white noise" suitable for synth use. It is NOT
+ * cryptographically secure. For better statistical properties (FFT
+ * flatness etc.) layer a longer LFSR or use a separate PRNG.
+ */
+s16 fr_wave_noise(u32 *state)
+{
+	u32 lsb;
+	if (!state)
+		return 0;
+	lsb = *state & 1u;
+	*state >>= 1;
+	if (lsb)
+		*state ^= 0xD0000001u;
+	/* Take the top 16 bits and re-bias to s16 range, clamp to ±32767. */
+	{
+		s32 v = (s32)((*state >> 16) & 0xffffu) - 32768;
+		if (v < -32767) v = -32767;
+		return (s16)v;
+	}
+}
+
+/*=======================================================
+ * ADSR envelope generator
+ *
+ * Linear-segment Attack-Decay-Sustain-Release envelope. State is held
+ * in caller-allocated fr_adsr_t struct (no global state, no malloc).
+ *
+ * Lifecycle:
+ *   1. Caller allocates an fr_adsr_t (stack or static).
+ *   2. fr_adsr_init() once per patch with attack/decay/release durations
+ *      in samples and a sustain level in s0.15.
+ *   3. fr_adsr_trigger() on note-on. Output rises 0 -> peak over `atk`
+ *      samples, falls peak -> sustain over `dec` samples, then holds.
+ *   4. fr_adsr_release() on note-off. Output falls current -> 0 over a
+ *      time controlled by the release rate (rate, not duration: the
+ *      time depends on where in the envelope we are).
+ *   5. fr_adsr_step() once per audio sample to read the current value.
+ *
+ * Internal precision: levels are stored as s32 in s1.30 format so even
+ * very long envelopes (e.g. 48000-sample attack at 48 kHz = 1 second)
+ * have a non-zero per-sample increment. Output is converted to s0.15.
+ *
+ * Saturation: the envelope state machine is self-clamping; level cannot
+ * escape [0, 1<<30]. Output is in [0, 32767].
+ */
+
+#define FR_ADSR_PEAK_S130 ((s32)1 << 30)
+
+void fr_adsr_init(fr_adsr_t *env,
+                  u32 attack_samples,
+                  u32 decay_samples,
+                  s16 sustain_level_s015,
+                  u32 release_samples)
+{
+	if (!env)
+		return;
+	env->state   = FR_ADSR_IDLE;
+	env->level   = 0;
+
+	/* sustain_level_s015 is s16 so its upper bound (32767) is already the
+	 * type's max; only the lower bound needs an explicit clamp. */
+	if (sustain_level_s015 < 0)
+		sustain_level_s015 = 0;
+	/* Convert s0.15 -> s1.30 by shifting left 15. */
+	env->sustain = (s32)sustain_level_s015 << 15;
+
+	env->attack_inc  = (attack_samples  > 0)
+	    ? (s32)(FR_ADSR_PEAK_S130 / attack_samples)
+	    : FR_ADSR_PEAK_S130;
+	env->decay_dec   = (decay_samples   > 0)
+	    ? (s32)((FR_ADSR_PEAK_S130 - env->sustain) / (s32)decay_samples)
+	    : (FR_ADSR_PEAK_S130 - env->sustain);
+	env->release_dec = (release_samples > 0)
+	    ? (s32)(FR_ADSR_PEAK_S130 / release_samples)
+	    : FR_ADSR_PEAK_S130;
+}
+
+void fr_adsr_trigger(fr_adsr_t *env)
+{
+	if (!env)
+		return;
+	env->state = FR_ADSR_ATTACK;
+	env->level = 0;
+}
+
+void fr_adsr_release(fr_adsr_t *env)
+{
+	if (!env)
+		return;
+	env->state = FR_ADSR_RELEASE;
+}
+
+s16 fr_adsr_step(fr_adsr_t *env)
+{
+	if (!env)
+		return 0;
+	switch (env->state)
+	{
+	case FR_ADSR_ATTACK:
+		env->level += env->attack_inc;
+		if (env->level >= FR_ADSR_PEAK_S130)
+		{
+			env->level = FR_ADSR_PEAK_S130;
+			env->state = FR_ADSR_DECAY;
+		}
+		break;
+	case FR_ADSR_DECAY:
+		env->level -= env->decay_dec;
+		if (env->level <= env->sustain)
+		{
+			env->level = env->sustain;
+			env->state = FR_ADSR_SUSTAIN;
+		}
+		break;
+	case FR_ADSR_SUSTAIN:
+		env->level = env->sustain;
+		break;
+	case FR_ADSR_RELEASE:
+		env->level -= env->release_dec;
+		if (env->level <= 0)
+		{
+			env->level = 0;
+			env->state = FR_ADSR_IDLE;
+		}
+		break;
+	case FR_ADSR_IDLE:
+	default:
+		env->level = 0;
+		break;
+	}
+	/* s1.30 -> s0.15: shift right 15. Clamp for safety. */
+	{
+		s32 out = env->level >> 15;
+		if (out < 0) out = 0;
+		if (out > 32767) out = 32767;
+		return (s16)out;
+	}
+}
+#endif /* FR_NO_WAVES */
diff --git a/fw/fr_math/FR_math.h b/fw/fr_math/FR_math.h
new file mode 100644
index 0000000..a2db262
--- /dev/null
+++ b/fw/fr_math/FR_math.h
@@ -0,0 +1,653 @@
+/**
+ *	@FR_math.h - header definition file for fixed radix math routines
+ *
+ *	@copy Copyright (C) <2001-2026>  <M. A. Chatterjee>
+ *  @author M A Chatterjee <deftio [at] deftio [dot] com>
+ *
+ *  This file contains integer math settable fixed point radix math routines for
+ *  use on systems in which floating point is not desired or unavailable.
+ *
+ *  @license:
+ *	This software is provided 'as-is', without any express or implied
+ *	warranty. In no event will the authors be held liable for any damages
+ *	arising from the use of this software.
+ *
+ *	Permission is granted to anyone to use this software for any purpose,
+ *	including commercial applications, and to alter it and redistribute it
+ *	freely, subject to the following restrictions:
+ *
+ *	1. The origin of this software must not be misrepresented; you must not
+ *	claim that you wrote the original software. If you use this software
+ *	in a product, an acknowledgment in the product documentation would be
+ *	appreciated but is not required.
+ *
+ *	2. Altered source versions must be plainly marked as such, and must not be
+ *	misrepresented as being the original software.
+ *
+ *	3. This notice may not be removed or altered from any source
+ *	distribution.
+ *
+ */
+
+#ifndef __FR_Math_h__
+#define __FR_Math_h__
+
+#define FR_MATH_VERSION     "2.0.8"
+#define FR_MATH_VERSION_HEX  0x020008  /* major << 16 | minor << 8 | patch */
+
+#ifdef FR_CORE_ONLY
+#define FR_NO_PRINT
+#define FR_NO_WAVES
+#endif
+
+#ifdef FR_LEAN
+#define FR_NO_WAVES
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifndef __FR_Platform_Defs_H__
+#include "FR_defs.h"
+#endif
+
+/* Quick note on macro parameter wrapping:
+ * Arguments are parenthesized in expansions, e.g.
+ *   #define MACRO_X_SQUARED(x)  ((x)*(x))   // inner parens around each x
+ * Macros substitute text as-is. If a parameter is an expression like 3+4*5
+ * and the body mixes operators without extra parentheses, precedence errors
+ * follow. Parenthesize parameters (and fragile subexpressions) in the macro body.
+ * Example: MACRO_X_SQUARED_BAD(x)  (x*x)  ->  3+4*5*3+4*5  == 83 (wrong).
+ *          MACRO_X_SQUARED(x)  ((x)*(x))  ->  (3+4*5)*(3+4*5)  == 529 (right).
+ */
+
+/*absolute value for integer and fixed radix types*/
+#define FR_ABS(x) (((x) < 0) ? (-(x)) : (x))
+
+/*sign of x.  Not as good as The Sign of Four, but I digress           */
+#define FR_SGN(x) ((x) >> ((((signed)sizeof(x)) << 3) - 1))
+
+/*===============================================
+ * Simple Fixed Point Math Conversions
+ * r is radix precision in bits, converts to/from integer w truncation */
+#define I2FR(x, r) ((x) << (r))
+#define FR2I(x, r) ((x) >> (r))
+
+/*===============================================
+ * Make a fixed radix number from integer + base-10 fractional parts.
+ * r  = output radix (fractional bits)
+ * i  = integer part (signed)
+ * f  = decimal-fraction digits as written, e.g. for "12.34" pass f=34
+ * d  = number of decimal digits in f, e.g. for "12.34" pass d=2
+ *
+ * FR_NUM(12, 34, 2, 10)  ≈ 12.34 in s.10  = (12<<10) + (34<<10)/100 = 12636
+ * FR_NUM(-3, 5,  1, 16)  ≈ -3.5  in s.16  = (-3<<16) - (5<<16)/10
+ *
+ * The fraction is rounded toward zero. For round-to-nearest, add half an LSB
+ * before scaling at the call site. Sign of the fractional part follows the
+ * sign of i (for i==0 the result is positive, matching "+0.5" intuition).
+ */
+#define FR_NUM_POW10(d) (                                              \
+    ((d) == 0) ? 1L :                                                  \
+    ((d) == 1) ? 10L :                                                 \
+    ((d) == 2) ? 100L :                                                \
+    ((d) == 3) ? 1000L :                                               \
+    ((d) == 4) ? 10000L :                                              \
+    ((d) == 5) ? 100000L :                                             \
+    ((d) == 6) ? 1000000L :                                            \
+    ((d) == 7) ? 10000000L :                                           \
+    ((d) == 8) ? 100000000L : 1000000000L)
+#define FR_NUM(i, f, d, r) (                                           \
+    ((s32)(i) << (r)) +                                                \
+    (((i) < 0)                                                         \
+        ? -((s32)(((s32)(f) << (r)) / FR_NUM_POW10(d)))                \
+        :  ((s32)(((s32)(f) << (r)) / FR_NUM_POW10(d)))))
+/*
+FR_INT(x,r) convert a fixed radix variable x of radix r to an integer
+*/
+#define FR_INT(x, r) (((x) < 0) ? -((-(x)) >> (r)) : ((x) >> (r)))
+
+/* Change Radix (x,current_radix, new_radix)
+ * change number from its current fixed radix (can be 0 or integer) to a new fixed radix
+ * Useful when dealing with numbers with mixed radixes.  This is a MACRO so
+ * this code expands in place and x is modified.
+ */
+#define FR_CHRDX(x, r_cur, r_new) (((r_cur) - (r_new)) >= 0 ? ((x) >> ((r_cur) - (r_new))) : ((x) << ((r_new) - (r_cur))))
+
+/* return only the fractional part of x */
+#define FR_FRAC(x, r) ((FR_ABS(x)) & (((1 << (r)) - 1)))
+
+/* return the fractional part of number x with radix xr scaled to radix nr bits */
+#define FR_FRACS(x, xr, nr) (FR_CHRDX(FR_FRAC((x), (xr)), (xr), (nr)))
+
+/******************************************************
+  Add (sub) to fixed point numbers by converting the second number to the
+  same radix as the first. If yr < xr then possibility of overflow is increased.
+  Note: for two vars, i, j, of prec ir, jr, it is not necessarily true that
+  FR_ADD(i,ir,j,jr) == FR_ADD(j,jr,i,ir)
+*/
+#define FR_ADD(x, xr, y, yr) ((x) += FR_CHRDX(y, yr, xr))
+#define FR_SUB(x, xr, y, yr) ((x) -= FR_CHRDX(y, yr, xr))
+
+/* Fixed-radix division with round-to-nearest: x (at radix xr) / y (at radix yr),
+ * result at radix xr. Uses a 64-bit intermediate so the full Q16.16 range works
+ * correctly. Adds half the divisor before truncation to achieve ≤ 0.5 LSB error. */
+static inline s32 FR_div_rnd(s64 num, s32 den) {
+    if ((num ^ den) >= 0)                   /* same sign: positive quotient */
+        return (s32)((num + den / 2) / den);
+    else                                     /* negative quotient */
+        return (s32)((num - den / 2) / den);
+}
+#define FR_DIV(x, xr, y, yr) FR_div_rnd((s64)(x) << (yr), (s32)(y))
+
+/* FR_DIV_TRUNC: truncating division (old FR_DIV behavior). Useful when the
+ * caller knows the quotient is exact or truncation is acceptable. */
+#define FR_DIV_TRUNC(x, xr, y, yr) ((s32)(((s64)(x) << (yr)) / (s32)(y)))
+
+/* FR_DIV32: 32-bit-only division. Requires |x| < 2^(31-yr) to avoid
+ * overflow in the intermediate (x << yr). Use FR_DIV for full-range
+ * division with 64-bit intermediate. */
+#define FR_DIV32(x, xr, y, yr) (((s32)(x) << (yr)) / (s32)(y))
+
+/* Remainder: both operands should be at the same radix. */
+#define FR_MOD(x, y) ((x) % (y))
+
+/* min, max, clamp */
+#define FR_MIN(a, b)         (((a) < (b)) ? (a) : (b))
+#define FR_MAX(a, b)         (((a) > (b)) ? (a) : (b))
+#define FR_CLAMP(x, lo, hi)  (FR_MIN(FR_MAX((x), (lo)), (hi)))
+
+/* Check if x is a power of 2. */
+#define FR_ISPOW2(x) (!((x) & ((x) - 1)))
+
+/* floor and ceiling in current radix, leaving current radix intact
+ * this means the lower radix number of bits are set to 0
+ */
+#define FR_FLOOR(x, r) ((x) & (~((1 << r) - 1)))
+#define FR_CEIL(x, r) (FR_FLOOR(x, r) + ((FR_FRAC(x, r) ? (1 << r) : 0)))
+
+/*******************************************************
+  Interpolate between 2 fixed point values (x0,x1) of any radix,
+  with a fractional delta, of a supplied precision.
+  If delta is outside 0<= delta<=1 then extrapolation
+  does work but programmer should watch for possible overflow.
+  x0,x1 need not have same radix as delta
+*/
+#define FR_INTERP(x0, x1, delta, prec) ((x0) + ((((x1) - (x0)) * (delta)) >> (prec)))
+
+/******************************************************
+   FR_INTERPI is the same as FR_INTERP except that  insures the
+   range is btw [0<= delta < 1] --> ((0 ... (1<<prec)-1)
+   Note: delta should be >= 0
+*/
+#define FR_INTERPI(x0, x1, delta, prec) ((x0) + ((((x1) - (x0)) * ((delta) & ((1 << (prec)) - 1))) >> (prec)))
+
+/******************************************************
+  Convert to double, this is for debug only and WILL NOT compile under many embedded systems.
+  Fixed Radix to Floating Point Double Conversion
+  since this is a MACRO it will not be compiled or instantiated unless it is actually called in code.
+*/
+#define FR2D(x, r) ((double)(((double)(x)) / ((double)(1 << (r)))))
+#define D2FR(d, r) ((s32)(d * (1 << r)))
+
+/******************************************************
+  Useful Constants
+
+  FR_kXXXX "k" denotes constant to help when reading macros used in source code
+  FR_krXXX "r" == reciprocal e.g. 1/XXX
+  As these are MACROS, they take only compiled code space if actually used.
+  Consts here calculated by multiply the natural base10 value by (1<<FR_kPREC)
+ */
+#define FR_kPREC (16)         /* bits of precision in constants listed below */
+#define FR_kE (178145)        /* 2.718281828459 */
+#define FR_krE (24109)        /* 0.367879441171 */
+#define FR_kPI (205887)       /* 3.141592653589 */
+#define FR_krPI (20861)       /* 0.318309886183 */
+#define FR_kDEG2RAD (1144)    /* 0.017453292519 */
+#define FR_kRAD2DEG (3754936) /*57.295779513082 */
+
+#define FR_kQ2RAD (102944) /* 1.570796326794 */
+#define FR_kRAD2Q (41722)  /* 0.636619772367 */
+
+/*log2 to ln conversions (see MACROS) */
+#define FR_kLOG2E (94548)  /* 1.442695040890 */
+#define FR_krLOG2E (45426) /* 0.693147180560 */
+
+/*log2 to log10 conversions (see MACROS) */
+#define FR_kLOG2_10 (217706) /* 3.32192809489 */
+#define FR_krLOG2_10 (19728) /* 0.30102999566 */
+
+/* High-precision scaling constants at radix 28.
+ * Used by FR_EXP, FR_ln, FR_log10 for base conversion.
+ * At radix 28 these have ~9 decimal digits of precision, far exceeding
+ * the ~4.8 digits of Q16.16.
+ */
+#define FR_kLOG2E_28    (387270501)   /* log2(e)   = 1.4426950408889634  */
+#define FR_krLOG2E_28   (186065279)   /* ln(2)     = 0.6931471805599453  */
+#define FR_kLOG2_10_28  (891723283)   /* log2(10)  = 3.3219280948873622  */
+#define FR_krLOG2_10_28  (80807124)   /* log10(2)  = 0.3010299956639812  */
+
+/* Multiply fixed-point value x (any radix) by a radix-28 constant k.
+ * Result stays at x's radix. Uses 64-bit intermediate.
+ * Rounds to nearest (adds 0.5 LSB before shift).
+ */
+#define FR_MULK28(x, k) ((s32)((((int64_t)(x) * (int64_t)(k)) + (1 << 27)) >> 28))
+
+// common sqrts
+#define FR_kSQRT2 (92682)   /* 1.414213562373 */
+#define FR_krSQRT2 (46341)  /* 0.707106781186 */
+#define FR_kSQRT3 (113512)  /* 1.732050807568 */
+#define FR_krSQRT3 (37837)  /* 0.577350269189 */
+#define FR_kSQRT5 (146543)  /* 2.236067977599 */
+#define FR_krSQRT5 (29309)  /* 0.447213595499 */
+#define FR_kSQRT10 (207243) /* 3.162277660168 */
+#define FR_krSQRT10 (20724) /* 0.316227766016 */
+
+  /*===============================================
+   * Arithmetic operations
+   */
+  s32 FR_FixMuls(s32 x, s32 y);   // mul signed, round-to-nearest, NOT saturated
+  s32 FR_FixMulSat(s32 x, s32 y); // mul signed, round-to-nearest, saturated
+  s32 FR_FixAddSat(s32 x, s32 y); // add signed, saturated
+
+/*================================================
+ * Constants used in Trig tables, definitions
+ *
+ * FR_TRIG_PREC     — internal table precision (u0.15, sine table)
+ * FR_TRIG_OUT_PREC — output precision of sin/cos/tan (s15.16 since v2.0.1)
+ * FR_TRIG_ONE      — exact 1.0 in output format (1 << 16 = 65536)
+ *
+ * sin/cos return s32 at radix 16 (s15.16). This matches libfixmath Q16.16
+ * precision and allows exact representation of 1.0 at the poles.
+ * tan returns s32 at radix 16 (s15.16). Saturates at ±FR_TRIG_MAXVAL.
+ */
+#define FR_TRIG_PREC     (15)
+#define FR_TRIG_OUT_PREC (16)
+#define FR_TRIG_MASK     ((1 << (FR_TRIG_PREC)) - 1)
+#define FR_TRIG_ONE      (1L << FR_TRIG_OUT_PREC)         /* 65536 = 1.0 */
+#define FR_TRIG_MAXVAL   ((s32)0x7fffffff)                 /* tan saturation max */
+#define FR_TRIG_MINVAL   (-FR_TRIG_MAXVAL)                  /* tan saturation min */
+
+/* Bit Shift Scaling macros.  Useful on some platforms with poor MUL performance.
+ * Also can be useful if you need to scale numbers with
+ * large portions of bits_in_use and a larger register size is not available.
+ * For example, suppose you need to scale a large 32bit number say z=0xa4239323
+ * from degrees to radians.  Ideally this number would be multiplied by 0.017
+ * but using a (z*FR_kDEG2RAD) >> FR_kPREC type operation is likely to overflow
+ * due the MUL result being large.  The FR_RAD2DEG MACRO below doesn't require
+ * accumulator headroom bits and so has no chance for loss of precision.
+ * Another benefit is  some low end micros (8051, 68xx, MSP430(low end),
+ * PIC-8 family) have no multiplier.  So these allow the programmer an option
+ * to see if performance or precision are better expressed as shifts rather than
+ * scaled multiplies.  As always, mileage may vary depending on architecture,
+ * compiler and other considerations.
+ */
+
+/* scale by 10s */
+#define FR_SMUL10(x) (((x) << 3) + (((x) << 1)))
+#define FR_SDIV10(x) (((x) >> 3) - ((x) >> 5) + ((x) >> 7) - ((x) >> 9) + ((x) >> 11))
+
+/* scale by 1/log2(e)  0.693147180560 used for converting log2() to ln()  */
+#define FR_SrLOG2E(x) (((x) >> 1) + ((x) >> 2) - ((x) >> 3) + ((x) >> 4) + ((x) >> 7) - ((x) >> 9) - ((x) >> 12) + ((x) >> 15))
+
+/* scale by log2(e)    1.442695040889 used for converting pow2() to exp() */
+#define FR_SLOG2E(x) ((x) + ((x) >> 1) - ((x) >> 4) + ((x) >> 8) + ((x) >> 10) + ((x) >> 12) + ((x) >> 14))
+
+/* scale by 1/log2(10) 0.30102999566 used for converting log2() to log10  */
+#define FR_SrLOG2_10(x) (((x) >> 2) + ((x) >> 4) - ((x) >> 6) + ((x) >> 7) - ((x) >> 8) + ((x) >> 12))
+
+/* scale by log2(10)   3.32192809489 used for converting pow2() to pow10 */
+#define FR_SLOG2_10(x) (((x) << 1) + (x) + ((x) >> 2) + ((x) >> 4) + ((x) >> 7) + ((x) >> 10) + ((x) >> 11) + ((x) >> 13))
+
+/* Shift-only angular conversion macros
+ *
+ * All are pure constant multipliers expressed as shifts — no multiply, no
+ * divide, no 64-bit intermediates, no accumulators. Work at any radix: if
+ * your input is degrees at radix 8, the output is the target unit at radix 8.
+ * The caller shifts as needed.
+ *
+ * Angular units:
+ *   degrees   = 360  per revolution
+ *   radians   = 2*pi per revolution
+ *   BAM       = 65536 per revolution (Binary Angular Measure, u16)
+ *   quadrants = 4 per revolution (= BAM >> 14)
+ *
+ * Side-effect note: x is referenced multiple times in each macro — do not
+ * pass expressions with side effects.
+ */
+
+/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 (5 terms, ~17 bits) */
+#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13) - ((x) >> 19) - ((x) >> 20))
+
+/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.29578 (7 terms, ~19 bits) */
+#define FR_RAD2DEG(x) (((x) << 6) - ((x) << 3) + (x) + ((x) >> 2) + (((x) >> 4) - ((x) >> 6)) - ((x) >> 10))
+
+/* FR_DEG2BAM(x): multiply by 65536/360 ≈ 182.0449 (7 terms, ~18 bits).
+ * Intermediate terms overflow s32 when |x| > ~256 deg at s15.16 (x<<7 term),
+ * but the overflow is harmless when the result is truncated to u16 BAM
+ * (two's complement wrapping preserves modular correctness).
+ * For full-precision s32 BAM (sub-BAM interpolation), use fr_deg_to_bam(). */
+#define FR_DEG2BAM(x) (((x)<<7)+((x)<<6)-((x)<<3)-((x)<<1)+((x)>>5)+((x)>>6)-((x)>>9))
+
+/* FR_BAM2DEG(x): multiply by 360/65536 = 0.00549316 (4 terms, exact) */
+#define FR_BAM2DEG(x) (((x)>>8)+((x)>>9)-((x)>>12)-((x)>>13))
+
+/* FR_RAD2BAM(x): multiply by 65536/(2*pi) ≈ 10430.378 (7 terms, ~21 bits).
+ * CAUTION: overflows s32 when |x| > ~4 rad at s15.16 (x<<13 term).
+ * For safe conversion at any radix, use fr_rad_to_bam() instead. 
+ * #define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)) */
+#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)+((x)>>8)-((x)>>11)-((x)>>14))
+/* ── Overflow-safe rad/deg to BAM conversion functions ─────────────
+ *
+ * These replace the FR_RAD2BAM / FR_DEG2BAM macros for callers that
+ * need the full ±2*pi or ±360° range at any radix.
+ *
+ * Strategy: normalize input to radix 16, conditionally reduce into
+ * a safe zone, apply the full-precision shift-only multiply, then
+ * extract the u16 BAM.  No precision loss from halving/quartering.
+ *
+ * fr_rad_to_bam: reduce to [-pi, pi], reordered terms.  ±2*pi safe.
+ * fr_deg_to_bam: reduce to [-90, 90) + quadrant offset.  ±360° safe.
+ */
+
+/* Pi constants at any radix: FR_PI(r) = round(pi * 2^r), etc.
+ * Compiler evaluates at compile time when r is a constant.
+ * Max safe radix: FR_PI r<=29, FR_TWO_PI r<=28, FR_HALF_PI r<=30. */
+#define FR_PI(r)             ((s32)(3.14159265358979323846  * (1LL << (r)) + 0.5))
+#define FR_TWO_PI(r)         ((s32)(6.28318530717958647692  * (1LL << (r)) + 0.5))
+#define FR_HALF_PI(r)        ((s32)(1.57079632679489661923  * (1LL << (r)) + 0.5))
+#define FR_THREE_HALF_PI(r)  ((s32)(4.71238898038468985769  * (1LL << (r)) + 0.5))
+
+/* Convenience aliases at radix 16 */
+#define FR_PI_R16       FR_PI(16)
+#define FR_TWO_PI_R16   FR_TWO_PI(16)
+
+/* Degree constants at radix 16 (exact — no truncation) */
+#define FR_D90_R16      ((s32)90  << 16)
+#define FR_D180_R16     ((s32)180 << 16)
+#define FR_D360_R16     ((s32)360 << 16)
+
+  u16 fr_rad_to_bam(s32 rad, u16 radix);
+#ifndef FR_LEAN
+  u16 fr_deg_to_bam(s32 deg, u16 radix);
+#endif
+
+/* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */
+#define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25))
+
+/* Legacy quadrant macros (quadrants = BAM >> 14) */
+#define FR_RAD2Q(x) (((x) >> 1) + ((x) >> 3) + ((x) >> 7) + ((x) >> 8) - ((x) >> 14))
+#define FR_Q2RAD(x) ((x) + ((x) >> 1) + ((x) >> 4) + ((x) >> 7) + ((x) >> 11))
+#define FR_DEG2Q(x) (((x) >> 6) - ((x) >> 8) - ((x) >> 11) - ((x) >> 13))
+#define FR_Q2DEG(x) (((x) << 6) + ((x) << 4) + ((x) << 3) + ((x) << 1))
+
+/*===============================================
+ * BAM (Binary Angular Measure) — internal angle representation
+ *
+ * One full circle = 2^16 BAM units. So:
+ *   0       = 0 deg     = 0 rad
+ *   16384   = 90 deg    = pi/2 rad   (FR_BAM_QUADRANT)
+ *   32768   = 180 deg   = pi rad
+ *   49152   = 270 deg   = 3pi/2 rad
+ *   65536   wraps to 0  (because BAM is u16)
+ *
+ * BAM is the natural representation for fixed-point trig because:
+ *   - The top 2 bits select the quadrant (no `% 360` modulo needed).
+ *   - The next 7 bits index the 128-entry quadrant table directly.
+ *   - The bottom 7 bits give linear-interpolation precision.
+ */
+#define FR_BAM_BITS         (16)
+#define FR_BAM_FULL         (1L << FR_BAM_BITS)         /* 65536 */
+#define FR_BAM_QUADRANT     (FR_BAM_FULL >> 2)          /* 16384 */
+#define FR_BAM_HALF         (FR_BAM_FULL >> 1)          /* 32768 */
+
+/*===============================================
+ * Radian-native and BAM-native trig (recommended)
+ *
+ * All sin/cos functions return s32 at radix 16 (s15.16).
+ * 1.0 is represented exactly as FR_TRIG_ONE (65536).
+ * Poles (0, 90, 180, 270 deg) produce exact ±FR_TRIG_ONE or 0.
+ *
+ *   fr_cos_bam(bam)         — cos of a BAM angle,        s15.16 result
+ *   fr_sin_bam(bam)         — sin of a BAM angle,        s15.16 result
+ *   fr_cos(rad, radix)      — cos of radians at radix,   s15.16 result
+ *   fr_sin(rad, radix)      — sin of radians at radix,   s15.16 result
+ *   fr_tan(rad, radix)      — tan of radians at radix,   s15.16 result
+ *   fr_cos_deg(deg, radix)  — cos of fixed-radix degrees, s15.16 result
+ *   fr_sin_deg(deg, radix)  — sin of fixed-radix degrees, s15.16 result
+ *   fr_tan_deg(deg, radix)  — tan of fixed-radix degrees, s15.16 result
+ *
+ * All go through the same 129-entry quadrant table with linear interpolation.
+ * Worst-case error: ~2 LSB in s15.16 (~3e-5 absolute), except at the four
+ * cardinal angles where the result is exact.
+ *
+ * The radian and degree wrappers (fr_sin, fr_cos, fr_tan, etc.) range-reduce
+ * their input, convert to u16 BAM, and call the BAM-native functions. Small-
+ * angle bypasses at the zero crossings (sin≈0, cos≈0, tan≈0) use the linear
+ * approximation sin(δ)≈δ to avoid BAM quantization error where it matters most.
+ */
+  s32 fr_cos_bam(u16 bam);
+  s32 fr_sin_bam(u16 bam);
+#ifndef FR_LEAN
+  s32 fr_tan_bam(u16 bam);
+#endif
+  s32 fr_cos(s32 rad, u16 radix);
+  s32 fr_sin(s32 rad, u16 radix);
+  s32 fr_tan(s32 rad, u16 radix);
+
+/* Integer degrees -> BAM using division (exact at all multiples of 45 deg). */
+#define FR_DEG2BAM_I(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360))
+
+/* Legacy single-arg integer-degree macros — use FR_CosI / FR_SinI instead */
+/* #define fr_cos_deg(deg)  fr_cos_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */
+/* #define fr_sin_deg(deg)  fr_sin_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */
+
+#ifndef FR_LEAN
+/*===============================================
+ * Degree-input trig API
+ *
+ *   FR_CosI(deg)              — cos of integer degrees,       s15.16 result
+ *   FR_SinI(deg)              — sin of integer degrees,       s15.16 result
+ *   FR_TanI(deg)              — tan of integer degrees,       s15.16 result
+ *   fr_cos_deg(deg, radix)    — cos of fixed-radix degrees,   s15.16 result
+ *   fr_sin_deg(deg, radix)    — sin of fixed-radix degrees,   s15.16 result
+ *   fr_tan_deg(deg, radix)    — tan of fixed-radix degrees,   s15.16 result
+ */
+#define FR_CosI(deg)  fr_cos_bam(FR_DEG2BAM_I(deg))
+#define FR_SinI(deg)  fr_sin_bam(FR_DEG2BAM_I(deg))
+
+  s32 fr_cos_deg(s32 deg, u16 radix);
+  s32 fr_sin_deg(s32 deg, u16 radix);
+  s32 FR_TanI(s32 deg);
+  s32 fr_tan_deg(s32 deg, u16 radix);
+
+  /* Legacy macros — use fr_sin_deg/fr_cos_deg/fr_tan_deg in new code */
+  #define FR_Sin  fr_sin_deg
+  #define FR_Cos  fr_cos_deg
+  #define FR_Tan  fr_tan_deg
+#endif /* FR_LEAN */
+
+  /* Inverse trig — output in radians at caller-specified radix (s32).
+   * FR_atan2 returns radians at radix 16 (s15.16).
+   * Range: acos [0, pi], asin [-pi/2, pi/2],
+   *        atan [-pi/2, pi/2], atan2 [-pi, pi].
+   */
+  s32 FR_acos(s32 input, u16 radix, u16 out_radix);
+  s32 FR_asin(s32 input, u16 radix, u16 out_radix);
+  s32 FR_atan(s32 input, u16 radix, u16 out_radix);
+  s32 FR_atan2(s32 y, s32 x, u16 out_radix);
+
+/* Logarithms */
+#define FR_LOG2MIN (-(32767 << 16)) /* returned instead of "negative infinity" */
+
+  s32 FR_log2(s32 input, u16 radix, u16 output_radix);
+  s32 FR_ln(s32 input, u16 radix, u16 output_radix);
+#ifndef FR_LEAN
+  s32 FR_log10(s32 input, u16 radix, u16 output_radix);
+#endif
+
+  /* Power */
+  s32 FR_pow2(s32 input, u16 radix);
+#define FR_EXP(input, radix)   (FR_pow2(FR_MULK28((input), FR_kLOG2E_28), (radix)))
+#define FR_POW10(input, radix) (FR_pow2(FR_MULK28((input), FR_kLOG2_10_28), (radix)))
+
+/* Shift-only (multiply-free) base-conversion variants.
+ * Lower accuracy (~5-10 LSB at Q16.16) but no multiply instruction.
+ * Use these on targets where 32x32->64 multiply is expensive.
+ */
+#define FR_EXP_FAST(input, radix)   (FR_pow2(FR_SLOG2E(input), radix))
+#define FR_POW10_FAST(input, radix) (FR_pow2(FR_SLOG2_10(input), radix))
+
+/*===============================================
+ * Formatted output and string parsing
+ *
+ * Define FR_NO_PRINT before including this header to exclude all
+ * print/format functions from compilation. This saves ~1.7 KB of ROM
+ * on targets that don't need human-readable output (e.g. headless
+ * sensor nodes, DSP-only firmware).
+ *
+ *   #define FR_NO_PRINT
+ *   #include "FR_math.h"
+ */
+#ifndef FR_NO_PRINT
+
+  /* printing family of functions */
+  int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec); /* print fixed radix num as floating point e.g.  -12.34" */
+  int FR_printNumD(int (*f)(char), int n, int pad);                      /* print decimal number with optional padding e.g. " 12" */
+  int FR_printNumH(int (*f)(char), int n, int showPrefix);               /* print num as a hexidecimal e.g. "0x12ab"              */
+
+  /* string-to-fixed-point parser (inverse of FR_printNumF) */
+  s32 FR_numstr(const char *s, u16 radix);
+
+#endif /* FR_NO_PRINT */
+
+/*===============================================
+ * Square root and hypot
+ *
+ * Both take fixed-radix inputs and return a result at the same radix.
+ * Algorithm: digit-by-digit isqrt on a 64-bit accumulator (no division,
+ * at most 32 iterations). Rounds to nearest.
+ *
+ * Domain error sentinel: input < 0 (sqrt) returns FR_DOMAIN_ERROR. Caller
+ * can check `result == FR_DOMAIN_ERROR` to detect domain errors.
+ */
+  s32 FR_sqrt(s32 input, u16 radix);
+#ifndef FR_LEAN
+  s32 FR_hypot(s32 x, s32 y, u16 radix);
+#endif
+
+  /* Fast approximate magnitude — shift-only, no multiply, no 64-bit.
+   * Based on piecewise-linear approximation of sqrt(x*x + y*y).
+   * See US Patent 6,567,777 B1 (Chatterjee, expired).
+   *
+   *   FR_hypot_fast8(x, y)  8-segment, ~0.10% peak error
+   *
+   * Inputs are raw signed integers (or fixed-point at any radix — the
+   * result is at the same radix as the inputs, just like FR_hypot).
+   * No radix parameter needed because the algorithm is scale-invariant.
+   */
+  s32 FR_hypot_fast8(s32 x, s32 y);
+
+/*===============================================
+ * Wave generators and ADSR envelope
+ *
+ * Define FR_NO_WAVES before including this header to exclude all
+ * waveform generators (square, pulse, triangle, saw, noise) and the
+ * ADSR envelope from compilation. This saves ~400 B of ROM on targets
+ * that only need math/trig and don't do audio synthesis.
+ *
+ *   #define FR_NO_WAVES
+ *   #include "FR_math.h"
+ */
+#ifndef FR_NO_WAVES
+
+/*===============================================
+ * Wave generators — synth-style fixed-shape waveforms.
+ *
+ * All take a u16 BAM phase in [0, 65535] (a full cycle) and return s16
+ * in s0.15 in [-32767, +32767]. Use FR_HZ2BAM_INC below to compute a
+ * phase increment for a target frequency.
+ *
+ *   fr_wave_sqr(phase)              50% square
+ *   fr_wave_pwm(phase, duty)        variable-duty pulse
+ *   fr_wave_tri(phase)              symmetric triangle
+ *   fr_wave_saw(phase)              rising sawtooth
+ *   fr_wave_tri_morph(phase, brk)   variable-symmetry triangle (morphs to saw)
+ *   fr_wave_noise(state*)           LFSR pseudorandom noise
+ *
+ * fr_wave_tri_morph returns [0, 32767] (unipolar) — caller can re-bias
+ * if a bipolar form is desired. The other waves are bipolar [-32767, +32767].
+ */
+  s16 fr_wave_sqr(u16 phase);
+  s16 fr_wave_pwm(u16 phase, u16 duty);
+  s16 fr_wave_tri(u16 phase);
+  s16 fr_wave_saw(u16 phase);
+  s16 fr_wave_tri_morph(u16 phase, u16 break_point);
+  s16 fr_wave_noise(u32 *state);
+
+/* FR_HZ2BAM_INC(hz, sample_rate)
+ * Compute the per-sample BAM phase increment for a target frequency in Hz
+ * given a sample rate in Hz. Result is a u16 to add to the running phase
+ * each sample (the running phase wraps mod 2^16 naturally because it's u16).
+ *
+ *   u16 phase = 0;
+ *   u16 inc   = FR_HZ2BAM_INC(440, 48000);
+ *   for (...) { sample = fr_sin_bam(phase); phase += inc; }
+ *
+ * Range: hz must be < sample_rate / 2 (Nyquist) for a meaningful tone;
+ * higher hz aliases. The macro does not enforce this.
+ *
+ * Side-effect note: hz and sample_rate are evaluated once each.
+ */
+#define FR_HZ2BAM_INC(hz, sample_rate)  ((u16)(((u32)(hz) * 65536UL) / (u32)(sample_rate)))
+
+/*===============================================
+ * ADSR envelope generator
+ *
+ * Linear-segment Attack-Decay-Sustain-Release envelope. Caller-allocated
+ * struct, no malloc, no global state. Internal level is held in s1.30 so
+ * very long envelopes (e.g. 48000-sample attack at 48 kHz) still get a
+ * non-zero per-sample increment. Output is s0.15 in [0, 32767].
+ *
+ * Lifecycle:
+ *   fr_adsr_t env;
+ *   fr_adsr_init(&env, atk_samples, dec_samples, sustain_s015, rel_samples);
+ *   fr_adsr_trigger(&env);                 // note-on
+ *   for (...) sample = fr_adsr_step(&env); // per-audio-sample
+ *   fr_adsr_release(&env);                 // note-off
+ *   for (...) sample = fr_adsr_step(&env); // until env.state == FR_ADSR_IDLE
+ */
+#define FR_ADSR_IDLE     (0)
+#define FR_ADSR_ATTACK   (1)
+#define FR_ADSR_DECAY    (2)
+#define FR_ADSR_SUSTAIN  (3)
+#define FR_ADSR_RELEASE  (4)
+
+typedef struct fr_adsr_s {
+    u8  state;        /* FR_ADSR_* */
+    s32 level;        /* current envelope value, s1.30 */
+    s32 sustain;      /* sustain target, s1.30 */
+    s32 attack_inc;   /* per-sample increment during attack */
+    s32 decay_dec;    /* per-sample decrement during decay */
+    s32 release_dec;  /* per-sample decrement during release */
+} fr_adsr_t;
+
+  void fr_adsr_init(fr_adsr_t *env,
+                    u32 attack_samples,
+                    u32 decay_samples,
+                    s16 sustain_level_s015,
+                    u32 release_samples);
+  void fr_adsr_trigger(fr_adsr_t *env);
+  void fr_adsr_release(fr_adsr_t *env);
+  s16  fr_adsr_step(fr_adsr_t *env);
+
+#endif /* FR_NO_WAVES */
+
+#ifdef __cplusplus
+
+} // extern "C"
+#endif
+
+#endif /* __FR_Math_h__ */
diff --git a/fw/main.c b/fw/main.c
index 2653cf3..5282d14 100644
--- a/fw/main.c
+++ b/fw/main.c
@@ -3,6 +3,10 @@
 #include <stdio.h>
 #include <fsusb.h>
 
+#define FR_LEAN
+// #define FR_CORE_ONLY
+#include <FR_math.h>
+
 #include "funconfig.h"
 #include "lib_i2c.h"
 #include "display.h"
@@ -10,6 +14,8 @@
 #include "sc7a20.h"
 #include "pd.h"
 
+// Radix for fixed point operations
+#define R 16
 
 // constants
 // LUT for converting NTC readings to degrees celsius
@@ -483,7 +489,7 @@ __attribute__((noreturn)) int main(void)
 			static uint16_t vbus_mv, current_ma;
 			static int16_t temp_c, tip_temp_c;
 			static uint16_t power;
-			static fp24_8_t e;
+			static s32 e;
 			vbus_mv = U16_FP_EMA_K4(vbus_mv, ((u32)adc_buffer[0]*VCC_MV*11)/4096);
 			current_ma = U16_FP_EMA_K4(current_ma, get_current_ma(adc_buffer[1]));
 			temp_c = I16_FP_EMA_K4(temp_c, get_temp_c(adc_buffer[2]));
@@ -567,13 +573,13 @@ __attribute__((noreturn)) int main(void)
 						err_d = delta - prev_delta;
 						prev_delta = delta;
 
-						const fp24_8_t kp = f32_to_fp24_8(0.8f);
-						const fp24_8_t ki = f32_to_fp24_8(0.15f);
-						const fp24_8_t kd = f32_to_fp24_8(0.0f);
-						e = fp24_8_mul(i16_to_fp24_8(err_p), kp) +
-							fp24_8_mul(i16_to_fp24_8(err_i), ki) +
-							fp24_8_mul(i16_to_fp24_8(err_d), kd);
-						uint16_t duty = MAX(0, MIN(I(e), pd_profile.max_duty));
+						const s32 kp = FR_NUM(0, 8,  1, R);
+						const s32 ki = FR_NUM(0, 15, 2, R);
+						const s32 kd = FR_NUM(0, 0,  1, R);
+						e = FR_FIxAddSat(e, FR_FixMulSat(I2FR(err_p, R), kp));
+						e = FR_FIxAddSat(e, FR_FixMulSat(I2FR(err_i, R), ki));
+						e = FR_FIxAddSat(e, FR_FixMulSat(I2FR(err_f, R), kd));
+						uint16_t duty = MAX(0, MIN(FR2I(e, R), pd_profile.max_duty));
 
 						pwm_set(((u32)duty*tim_max)/100);
 						u8g2_DrawBox(u8g2, x_off+92, y_off+12, 4, 4);