sound, prepare FM filtering

2025-10-26 16:29:37 -04:00 · 2022-03-31 17:27:49 +00:00 · 2022-03-31 17:27:49 +00:00 · e2e2b6ad1b
commit e2e2b6ad1b
parent 882f697ad4
8 changed files with 1180 additions and 3 deletions
--- a/pico/sound/blipper.c
+++ b/pico/sound/blipper.c
@ -0,0 +1,540 @@
+/*
+ * Copyright (C) 2013 - Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, 
+ * to any person obtaining a copy of this software and
+ * associated documentation files (the "Software"),
+ * to deal in the Software without restriction,
+ * including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ *
+ * 03-2022 kub: modified for arbitrary decimation rates
+ * 03-2022 kub: modified for 32 bit sample size
+ */
+
+#include "blipper.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#define BLIPPER_FILTER_AMP 0.75
+
+#if BLIPPER_LOG_PERFORMANCE
+#include <time.h>
+static double get_time(void)
+{
+   struct timespec tv;
+   clock_gettime(CLOCK_MONOTONIC, &tv);
+   return tv.tv_sec + tv.tv_nsec / 1000000000.0;
+}
+#endif
+
+struct blipper
+{
+   blipper_long_sample_t *output_buffer;
+   unsigned output_avail;
+   unsigned output_buffer_samples;
+
+   blipper_sample_t *filter_bank;
+
+   unsigned phase;
+   unsigned phases;
+   unsigned phases_div;
+   unsigned taps;
+
+   blipper_long_sample_t integrator;
+   blipper_long_sample_t ramp;
+   blipper_long_sample_t last_sample;
+
+#if BLIPPER_LOG_PERFORMANCE
+   double total_time;
+   double integrator_time;
+   unsigned long total_samples;
+#endif
+
+   int owns_filter;
+};
+
+void blipper_free(blipper_t *blip)
+{
+   if (blip)
+   {
+#if BLIPPER_LOG_PERFORMANCE
+      fprintf(stderr, "[blipper]: Processed %lu samples, using %.6f seconds blipping and %.6f seconds integrating.\n", blip->total_samples, blip->total_time, blip->integrator_time);
+#endif
+
+      if (blip->owns_filter)
+         free(blip->filter_bank);
+      free(blip->output_buffer);
+      free(blip);
+   }
+}
+
+static double besseli0(double x)
+{
+   unsigned i;
+   double sum = 0.0;
+
+   double factorial = 1.0;
+   double factorial_mult = 0.0;
+   double x_pow = 1.0;
+   double two_div_pow = 1.0;
+   double x_sqr = x * x;
+
+   /* Approximate. This is an infinite sum.
+    * Luckily, it converges rather fast. */
+   for (i = 0; i < 18; i++)
+   {
+      sum += x_pow * two_div_pow / (factorial * factorial);
+
+      factorial_mult += 1.0;
+      x_pow *= x_sqr;
+      two_div_pow *= 0.25;
+      factorial *= factorial_mult;
+   }
+
+   return sum;
+}
+
+static double sinc(double v)
+{
+   if (fabs(v) < 0.00001)
+      return 1.0;
+   else
+      return sin(v) / v;
+}
+
+/* index range = [-1, 1) */
+static double kaiser_window(double index, double beta)
+{
+   return besseli0(beta * sqrt(1.0 - index * index));
+}
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+static blipper_real_t *blipper_create_sinc(unsigned phases, unsigned taps,
+      double cutoff, double beta)
+{
+   unsigned i, filter_len;
+   double sidelobes, window_mod, window_phase, sinc_phase;
+   blipper_real_t *filter;
+
+   filter = (blipper_real_t*)malloc(phases * taps * sizeof(*filter));
+   if (!filter)
+      return NULL;
+
+   sidelobes = taps / 2.0;
+   window_mod = 1.0 / kaiser_window(0.0, beta);
+   filter_len = phases * taps;
+   for (i = 0; i < filter_len; i++)
+   {
+      window_phase = (double)i / filter_len; /* [0, 1) */
+      window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
+      sinc_phase = window_phase * sidelobes; /* [-taps / 2, taps / 2) */
+
+      filter[i] = cutoff * sinc(M_PI * sinc_phase * cutoff) *
+         kaiser_window(window_phase, beta) * window_mod;
+   }
+
+   return filter;
+}
+
+void blipper_set_ramp(blipper_t *blip, blipper_long_sample_t delta,
+      unsigned clocks)
+{
+   blipper_real_t ramp = BLIPPER_FILTER_AMP * delta * blip->phases / clocks;
+#if BLIPPER_FIXED_POINT
+   blip->ramp = (blipper_long_sample_t)floor(ramp * 0x8000 + 0.5);
+#else
+   blip->ramp = ramp;
+#endif
+}
+
+/* We differentiate and integrate at different sample rates.
+ * Differentiation is D(z) = 1 - z^-1 and happens when delta impulses
+ * are convolved. Integration step after decimation by D is 1 / (1 - z^-D).
+ *
+ * If our sinc filter is S(z) we'd have a response of
+ * S(z) * (1 - z^-1) / (1 - z^-D) after blipping.
+ *
+ * Compensate by prefiltering S(z) with the inverse (1 - z^-D) / (1 - z^-1).
+ * This filtering creates a finite length filter, albeit slightly longer.
+ *
+ * phases is the same as decimation rate. */
+static blipper_real_t *blipper_prefilter_sinc(blipper_real_t *filter, unsigned phases,
+      unsigned taps)
+{
+   unsigned i;
+   float filter_amp = BLIPPER_FILTER_AMP / phases;
+   blipper_real_t *tmp_filter;
+   blipper_real_t *new_filter = (blipper_real_t*)malloc((phases * taps + phases) * sizeof(*filter));
+   if (!new_filter)
+      goto error;
+
+   tmp_filter = (blipper_real_t*)realloc(filter, (phases * taps + phases) * sizeof(*filter));
+   if (!tmp_filter)
+      goto error;
+   filter = tmp_filter;
+
+   /* Integrate. */
+   new_filter[0] = filter[0];
+   for (i = 1; i < phases * taps; i++)
+      new_filter[i] = new_filter[i - 1] + filter[i];
+   for (i = phases * taps; i < phases * taps + phases; i++)
+      new_filter[i] = new_filter[phases * taps - 1];
+
+   taps++;
+
+   /* Differentiate with offset of D. */
+   memcpy(filter, new_filter, phases * sizeof(*filter));
+   for (i = phases; i < phases * taps; i++)
+      filter[i] = new_filter[i] - new_filter[i - phases];
+
+   /* blipper_prefilter_sinc() boosts the gain of the sinc.
+    * Have to compensate for this. Attenuate a bit more to ensure
+    * we don't clip, especially in fixed point. */
+   for (i = 0; i < phases * taps; i++)
+      filter[i] *= filter_amp;
+
+   free(new_filter);
+   return filter;
+
+error:
+   free(new_filter);
+   free(filter);
+   return NULL;
+}
+
+/* Creates a polyphase filter bank.
+ * Interleaves the filter for cache coherency and possibilities
+ * for SIMD processing. */
+static blipper_real_t *blipper_interleave_sinc(blipper_real_t *filter, unsigned phases,
+      unsigned taps)
+{
+   unsigned t, p;
+   blipper_real_t *new_filter = (blipper_real_t*)malloc(phases * taps * sizeof(*filter));
+   if (!new_filter)
+      goto error;
+
+   for (t = 0; t < taps; t++)
+      for (p = 0; p < phases; p++)
+         new_filter[p * taps + t] = filter[t * phases + p];
+
+   free(filter);
+   return new_filter;
+
+error:
+   free(new_filter);
+   free(filter);
+   return NULL;
+}
+
+#if BLIPPER_FIXED_POINT
+static blipper_sample_t *blipper_quantize_sinc(blipper_real_t *filter, unsigned taps)
+{
+   unsigned t;
+   blipper_sample_t *filt = (blipper_sample_t*)malloc(taps * sizeof(*filt));
+   if (!filt)
+      goto error;
+
+   for (t = 0; t < taps; t++)
+      filt[t] = (blipper_sample_t)floor(filter[t] * 0x7fff + 0.5);
+
+   free(filter);
+   return filt;
+
+error:
+   free(filter);
+   free(filt);
+   return NULL;
+}
+#endif
+
+blipper_sample_t *blipper_create_filter_bank(unsigned phases, unsigned taps,
+      double cutoff, double beta)
+{
+   blipper_real_t *sinc_filter;
+
+   /* blipper_prefilter_sinc() will add one tap.
+    * To keep number of taps as expected, compensate for it here
+    * to keep the interface more obvious. */
+   if (taps <= 1)
+      return 0;
+   taps--;
+
+   sinc_filter = blipper_create_sinc(phases, taps, cutoff, beta);
+   if (!sinc_filter)
+      return 0;
+
+   sinc_filter = blipper_prefilter_sinc(sinc_filter, phases, taps);
+   if (!sinc_filter)
+      return 0;
+   taps++;
+
+   sinc_filter = blipper_interleave_sinc(sinc_filter, phases, taps);
+   if (!sinc_filter)
+      return 0;
+
+#if BLIPPER_FIXED_POINT
+   return blipper_quantize_sinc(sinc_filter, phases * taps);
+#else
+   return sinc_filter;
+#endif
+}
+
+void blipper_reset(blipper_t *blip)
+{
+   blip->phase = 0;
+   memset(blip->output_buffer, 0,
+         (blip->output_avail + blip->taps) * sizeof(*blip->output_buffer));
+   blip->output_avail = 0;
+   blip->last_sample = 0;
+   blip->integrator = 0;
+   blip->ramp = 0;
+}
+
+blipper_t *blipper_new(unsigned taps, double cutoff, double beta,
+      unsigned decimation, unsigned buffer_samples,
+      const blipper_sample_t *filter_bank)
+{
+   blipper_t *blip = NULL;
+
+   /* Sanity check. Not strictly required to be supported in C. */
+   if ((-3 >> 2) != -1)
+   {
+      fprintf(stderr, "Integer right shift not supported.\n");
+      return NULL;
+   }
+
+   blip = (blipper_t*)calloc(1, sizeof(*blip));
+   if (!blip)
+      return NULL;
+
+   blip->phases = decimation;
+   blip->phases_div = 0x100000000ULL/decimation;
+
+   blip->taps = taps;
+
+   if (!filter_bank)
+   {
+      blip->filter_bank = blipper_create_filter_bank(blip->phases, taps, cutoff, beta);
+      if (!blip->filter_bank)
+         goto error;
+      blip->owns_filter = 1;
+   }
+   else
+      blip->filter_bank = (blipper_sample_t*)filter_bank;
+
+   blip->output_buffer = (blipper_long_sample_t*)calloc(buffer_samples + blip->taps,
+         sizeof(*blip->output_buffer));
+   if (!blip->output_buffer)
+      goto error;
+   blip->output_buffer_samples = buffer_samples + blip->taps;
+
+   return blip;
+
+error:
+   blipper_free(blip);
+   return NULL;
+}
+
+inline void blipper_push_delta(blipper_t *blip, blipper_long_sample_t delta, unsigned clocks_step)
+{
+   unsigned target_output, filter_phase, taps, i;
+   const blipper_sample_t *response;
+   blipper_long_sample_t *target;
+
+   blip->phase += clocks_step;
+
+   target_output = ((unsigned long long)blip->phase * blip->phases_div) >> 32;
+
+   filter_phase = (target_output * blip->phases) - blip->phase;
+   if (filter_phase >= blip->phases) // rounding error for *(1/phases)
+      filter_phase += blip->phases, target_output ++;
+   response = blip->filter_bank + blip->taps * filter_phase;
+
+   target = blip->output_buffer + target_output;
+   taps = blip->taps;
+
+   blip->output_avail = target_output;
+
+   for (i = 1; i < taps; i += 2) {
+      target[i-1] += delta * response[i-1];
+      target[i  ] += delta * response[i  ];
+   }
+   if (taps & 1)
+      target[i-1] += delta * response[i-1];
+}
+
+static inline void _blipper_push_samples(blipper_t *blip,
+      const char *data, blipper_long_sample_t (*get)(const char *),
+      unsigned samples, unsigned stride, unsigned clocks_step)
+{
+   unsigned s;
+   unsigned clocks_skip = 0;
+   blipper_long_sample_t last = blip->last_sample;
+
+#if BLIPPER_LOG_PERFORMANCE
+   double t0 = get_time();
+#endif
+
+   for (s = 0; s < samples; s++, data += stride)
+   {
+      blipper_long_sample_t val = get(data);
+      clocks_skip += clocks_step;
+      if (val != last)
+      {
+         blipper_push_delta(blip, val - last, clocks_skip);
+         clocks_skip = 0;
+         last = val;
+      }
+   }
+
+   blip->phase += clocks_skip;
+   blip->output_avail = ((unsigned long long)blip->phase * blip->phases_div) >> 32;
+   if ((blip->output_avail+1) * blip->phases <= blip->phase)
+      blip->output_avail++; // rounding error for *(1/phases)
+   blip->last_sample = last;
+
+#if BLIPPER_LOG_PERFORMANCE
+   blip->total_time += get_time() - t0;
+   blip->total_samples += samples;
+#endif
+}
+
+static inline blipper_long_sample_t _blipper_get_short(const char *data)
+{
+   return *(blipper_sample_t *)data;
+}
+
+static inline blipper_long_sample_t _blipper_get_long(const char *data)
+{
+   return *(blipper_long_sample_t *)data;
+}
+
+void blipper_push_samples(blipper_t *blip, const blipper_sample_t *data,
+      unsigned samples, unsigned stride, unsigned clocks_step)
+{
+   _blipper_push_samples(blip, (const char *)data, _blipper_get_short, samples,
+         stride * sizeof(*data), clocks_step);
+}
+
+void blipper_push_long_samples(blipper_t *blip, const blipper_long_sample_t *data,
+      unsigned samples, unsigned stride, unsigned clocks_step)
+{
+   _blipper_push_samples(blip, (const char *)data, _blipper_get_long, samples,
+         stride * sizeof(*data), clocks_step);
+}
+
+unsigned blipper_read_phase(blipper_t *blip)
+{
+   return blip->phase;
+}
+
+unsigned blipper_read_avail(blipper_t *blip)
+{
+   return blip->output_avail;
+}
+
+static inline void _blipper_put_short(char *data, blipper_long_sample_t val)
+{
+   *(blipper_sample_t *)data = val;
+}
+
+static inline void _blipper_put_long(char *data, blipper_long_sample_t val)
+{
+   *(blipper_long_sample_t *)data = val;
+}
+
+static inline void _blipper_read(blipper_t *blip, int clamp, char *output,
+      void (*put)(char *, blipper_long_sample_t), unsigned samples, unsigned stride)
+{
+   unsigned s;
+   blipper_long_sample_t sum = blip->integrator;
+   const blipper_long_sample_t *out = blip->output_buffer;
+   blipper_long_sample_t ramp = blip->ramp;
+
+#if BLIPPER_LOG_PERFORMANCE
+   double t0 = get_time();
+#endif
+
+#if BLIPPER_FIXED_POINT
+   for (s = 0; s < samples; s++, output += stride)
+   {
+      blipper_long_sample_t quant;
+
+      /* Cannot overflow. Also add a leaky integrator.
+         Mitigates DC shift numerical instability which is
+         inherent for integrators. */
+      sum += ((out[s] + ramp) >> 1) - (sum >> 9);
+
+      /* Rounded. With leaky integrator, this cannot overflow. */
+      quant = (sum + 0x4000) >> 15;
+
+      /* Clamp. quant can potentially have range [-0x10000, 0xffff] here.
+       * In both cases, top 16-bits will have a uniform bit pattern which can be exploited. */
+      if (clamp && (blipper_sample_t)quant != quant)
+      {
+         quant = (quant >> 16) ^ 0x7fff;
+         sum = quant << 15;
+      }
+
+      put(output, quant);
+   }
+#else
+   for (s = 0; s < samples; s++, output += stride)
+   {
+      /* Leaky integrator, same as fixed point (1.0f / 512.0f) */
+      sum += out[s] + ramp - sum * 0.00195f;
+      put(output, sum);
+   }
+#endif
+
+   /* Don't bother with ring buffering.
+    * The entire buffer should be read out ideally anyways. */
+   memmove(blip->output_buffer, blip->output_buffer + samples,
+         (blip->output_avail + blip->taps - samples) * sizeof(*out));
+   memset(blip->output_buffer + blip->output_avail + blip->taps - samples, 0, samples * sizeof(*out));
+   blip->output_avail -= samples;
+   blip->phase -= samples * blip->phases;
+
+   blip->integrator = sum;
+
+#if BLIPPER_LOG_PERFORMANCE
+   blip->integrator_time += get_time() - t0;
+#endif
+}
+
+void blipper_read(blipper_t *blip, blipper_sample_t *output, unsigned samples,
+      unsigned stride)
+{
+   _blipper_read(blip, 1, (char *)output, _blipper_put_short, samples,
+         stride * sizeof(*output));
+}
+
+void blipper_read_long(blipper_t *blip, blipper_long_sample_t *output, unsigned samples,
+      unsigned stride)
+{
+   _blipper_read(blip, 0, (char *)output, _blipper_put_long, samples,
+         stride * sizeof(*output));
+}
--- a/pico/sound/blipper.h
+++ b/pico/sound/blipper.h
@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2013 - Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, 
+ * to any person obtaining a copy of this software and
+ * associated documentation files (the "Software"),
+ * to deal in the Software without restriction,
+ * including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BLIPPER_H__
+#define BLIPPER_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Compile time configurables. */
+#ifndef BLIPPER_LOG_PERFORMANCE
+#define BLIPPER_LOG_PERFORMANCE 0
+#endif
+
+#ifndef BLIPPER_FIXED_POINT
+#define BLIPPER_FIXED_POINT 1
+#endif
+
+/* Set to float or double.
+ * long double is unlikely to provide any improved precision. */
+#ifndef BLIPPER_REAL_T
+#define BLIPPER_REAL_T float
+#endif
+
+/* Allows including several implementations in one lib. */
+#if BLIPPER_FIXED_POINT
+#define BLIPPER_MANGLE(x) x##_fixed
+#else
+#define BLIPPER_CONCAT2(a, b) a ## b
+#define BLIPPER_CONCAT(a, b) BLIPPER_CONCAT2(a, b)
+#define BLIPPER_MANGLE(x) BLIPPER_CONCAT(x##_, BLIPPER_REAL_T)
+#endif
+
+#include <limits.h>
+
+typedef struct blipper blipper_t;
+typedef BLIPPER_REAL_T blipper_real_t;
+
+#if BLIPPER_FIXED_POINT
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+typedef int16_t blipper_sample_t;
+typedef int32_t blipper_long_sample_t;
+#else
+#if SHRT_MAX == 0x7fff
+typedef short blipper_sample_t;
+#elif INT_MAX == 0x7fff
+typedef int blipper_sample_t;
+#else
+#error "Cannot find suitable type for blipper_sampler_t."
+#endif
+
+#if INT_MAX == 0x7fffffffl
+typedef int blipper_long_sample_t;
+#elif LONG_MAX == 0x7fffffffl
+typedef long blipper_long_sample_t;
+#else
+#error "Cannot find suitable type for blipper_long_sample_t."
+#endif
+#endif
+#else
+typedef BLIPPER_REAL_T blipper_sample_t;
+typedef BLIPPER_REAL_T blipper_long_sample_t; /* Meaningless for float version. */
+#endif
+
+/* Create a new blipper.
+ * taps: Number of filter taps per impulse.
+ *
+ * cutoff: Cutoff frequency in the passband. Has a range of [0, 1].
+ *
+ * beta: Beta used for Kaiser window.
+ *
+ * decimation: Sets decimation rate.
+ * The input sampling rate is then output_rate * decimation.
+ * buffer_samples: The maximum number of processed output samples that can be
+ * buffered up by blipper.
+ *
+ * filter_bank: An optional filter which has already been created by
+ * blipper_create_filter_bank(). blipper_new() does not take ownership
+ * of the buffer and must be freed by caller.
+ * If non-NULL, cutoff and beta will be ignored.
+ *
+ * Some sane values:
+ * taps = 64, cutoff = 0.85, beta = 8.0
+ */
+#define blipper_new BLIPPER_MANGLE(blipper_new)
+blipper_t *blipper_new(unsigned taps, double cutoff, double beta,
+      unsigned decimation, unsigned buffer_samples, const blipper_sample_t *filter_bank);
+
+/* Reset the blipper to its initiate state. */
+#define blipper_reset BLIPPER_MANGLE(blipper_reset)
+void blipper_reset(blipper_t *blip);
+
+/* Create a filter which can be passed to blipper_new() in filter_bank.
+ * Arguments to decimation and taps must match. */
+#define blipper_create_filter_bank BLIPPER_MANGLE(blipper_create_filter_bank)
+blipper_sample_t *blipper_create_filter_bank(unsigned decimation,
+      unsigned taps, double cutoff, double beta);
+
+/* Frees the blipper. blip can be NULL (no-op). */
+#define blipper_free BLIPPER_MANGLE(blipper_free)
+void blipper_free(blipper_t *blip);
+
+/* Add a ramp to the synthesized wave. The ramp is added to the integrator
+ * on every input sample.
+ * The amount added is delta / clocks per input sample.
+ * The interface is fractional to have better accuract with fixed point.
+ * This can be combined with a delta train to synthesize e.g. sawtooth waves.
+ * When using a ramp, care must be taken to ensure that the integrator does not saturate.
+ * It is recommended to use floating point implementation when using the ramp. */
+#define blipper_set_ramp BLIPPER_MANGLE(blipper_set_ramp)
+void blipper_set_ramp(blipper_t *blip, blipper_long_sample_t delta,
+      unsigned clocks);
+
+/* Data pushing interfaces. One of these should be used exclusively. */
+
+/* Push a single delta, which occurs clock_step input samples after the
+ * last time a delta was pushed. The delta value is the difference signal
+ * between the new sample and the previous.
+ * It is unnecessary to pass a delta of 0.
+ * If the deltas are known beforehand (e.g. when synthesizing a waveform),
+ * this is a more efficient interface than blipper_push_samples().
+ *
+ * The caller must ensure not to push deltas in a way that can destabilize
+ * the final integration.
+ */
+#define blipper_push_delta BLIPPER_MANGLE(blipper_push_delta)
+void blipper_push_delta(blipper_t *blip, blipper_long_sample_t delta, unsigned clocks_step);
+
+/* Push raw samples. blipper will find the deltas themself and push them.
+ * stride is the number of samples between each sample to be used.
+ * This can be used to push interleaved stereo data to two independent
+ * blippers.
+ */
+#define blipper_push_samples BLIPPER_MANGLE(blipper_push_samples)
+void blipper_push_samples(blipper_t *blip, const blipper_sample_t *delta,
+      unsigned samples, unsigned stride, unsigned clocks_step);
+#define blipper_push_long_samples BLIPPER_MANGLE(blipper_push_long_samples)
+void blipper_push_long_samples(blipper_t *blip, const blipper_long_sample_t *delta,
+      unsigned samples, unsigned stride, unsigned clocks_step);
+
+/* Returns the number of samples available for reading using
+ * blipper_read().
+ */
+#define blipper_read_avail BLIPPER_MANGLE(blipper_read_avail)
+unsigned blipper_read_avail(blipper_t *blip);
+
+/* Returns the current filter phase
+ */
+#define blipper_read_phase BLIPPER_MANGLE(blipper_read_phase)
+unsigned blipper_read_phase(blipper_t *blip);
+
+/* Reads processed samples. The caller must ensure to not read
+ * more than what is returned from blipper_read_avail().
+ * As in blipper_push_samples(), stride is the number of samples
+ * between each output sample in output.
+ * Can be used to write to an interleaved stereo buffer.
+ */
+#define blipper_read BLIPPER_MANGLE(blipper_read)
+void blipper_read(blipper_t *blip, blipper_sample_t *output, unsigned samples,
+      unsigned stride);
+#define blipper_read_long BLIPPER_MANGLE(blipper_long_read)
+void blipper_read_long(blipper_t *blip, blipper_long_sample_t *output, unsigned samples,
+      unsigned stride);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
--- a/pico/sound/resampler.c
+++ b/pico/sound/resampler.c
@ -0,0 +1,261 @@
+/* Configurable fixed point resampling SINC filter for mono and stereo audio.
+ *
+ * (C) 2022 kub
+ *
+ * This work is licensed under the terms of any of these licenses
+ * (at your option):
+ *  - GNU GPL, version 2 or later.
+ *  - MAME license.
+ * See COPYING file in the top-level directory.
+ */
+
+
+/* SINC filter generation taken from the blipper library, its license is:
+ *
+ * Copyright (C) 2013 - Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, 
+ * to any person obtaining a copy of this software and
+ * associated documentation files (the "Software"),
+ * to deal in the Software without restriction,
+ * including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <math.h>
+
+#include "../pico_types.h"
+#include "resampler.h"
+
+static double besseli0(double x)
+{
+   unsigned i;
+   double sum = 0.0;
+
+   double factorial = 1.0;
+   double factorial_mult = 0.0;
+   double x_pow = 1.0;
+   double two_div_pow = 1.0;
+   double x_sqr = x * x;
+
+   /* Approximate. This is an infinite sum.
+    * Luckily, it converges rather fast. */
+   for (i = 0; i < 18; i++)
+   {
+      sum += x_pow * two_div_pow / (factorial * factorial);
+
+      factorial_mult += 1.0;
+      x_pow *= x_sqr;
+      two_div_pow *= 0.25;
+      factorial *= factorial_mult;
+   }
+
+   return sum;
+}
+
+static double sinc(double v)
+{
+   if (fabs(v) < 0.00001)
+      return 1.0;
+   else
+      return sin(v) / v;
+}
+
+/* index range = [-1, 1) */
+static double kaiser_window(double index, double beta)
+{
+   return besseli0(beta * sqrt(1.0 - index * index));
+}
+
+/* Creates a polyphase SINC filter (:phases banks with :taps each)
+ * Interleaves the filter for cache coherency and possibilities for SIMD */
+static s16 *create_sinc(unsigned phases, unsigned taps, double cutoff, double beta)
+{
+   unsigned i, filter_len;
+   double sidelobes, window_mod, window_phase, sinc_phase;
+   s16 *filter;
+   double tap;
+
+   filter = (s16*)malloc(phases * taps * sizeof(*filter));
+   if (!filter)
+      return NULL;
+
+   sidelobes = taps / 2.0;
+   window_mod = 1.0 / kaiser_window(0.0, beta);
+   filter_len = phases * taps;
+
+   for (i = 0; i < filter_len; i++)
+   {
+      window_phase = (double)i / filter_len; /* [0, 1) */
+      window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
+      sinc_phase = window_phase * sidelobes; /* [-taps / 2, taps / 2) */
+
+      tap = (cutoff * sinc(M_PI * sinc_phase * cutoff) *
+         kaiser_window(window_phase, beta) * window_mod);
+      /* assign taking filter bank interleaving into account:
+       * :phases banks of length :taps */
+      filter[(i%phases)*taps + (i/phases)] = tap * 0x7fff + 0.5;
+   }
+
+   return filter;
+}
+
+/* Public interface */
+
+/* Release a resampler */
+void resampler_free(resampler_t *rs)
+{
+   if (rs)
+   {
+      free(rs->buffer);
+      free(rs->filter);
+      free(rs);
+   }
+}
+
+/* Create a resampler with upsampling factor :interpolation and downsampling
+ * factor :decimation, Kaiser windowed SINC polyphase FIR with bank size :taps.
+ * The created filter has a size of :taps*:interpolation for upsampling and
+ * :taps*:decimation for downsampling. :taps is limiting the cost per sample and
+ * should be big enough to avoid inaccuracy (>= 8, higher is more accurate).
+ * :cutoff is in [0..1] with 1 representing the Nyquist rate after decimation.
+ * :beta is the Kaiser window beta.
+ * :max_input is the maximum length in a resampler_update call */
+resampler_t *resampler_new(unsigned taps, unsigned interpolation, unsigned decimation,
+      double cutoff, double beta, unsigned max_input, int stereo)
+{
+   resampler_t *rs = NULL;
+
+   if (taps == 0 || interpolation == 0 || decimation == 0 || max_input == 0)
+      return NULL; /* invalid parameters */
+
+   rs = (resampler_t*)calloc(1, sizeof(*rs));
+   if (!rs)
+      return NULL; /* out of memory */
+
+   /* :cutoff is relative to the decimated frequency, but filtering is taking
+    * place at the interpolated frequency. It needs to be adapted if resampled
+    * rate is lower. Also needs more taps to keep the transistion band width */
+   if (decimation > interpolation) {
+      cutoff = cutoff * interpolation/decimation;
+      taps = taps * decimation/interpolation;
+   }
+
+   rs->interpolation = interpolation;
+   rs->decimation = decimation;
+   rs->taps = taps;
+   /* optimizers for resampler_update: */
+   rs->interp_inv = 0x100000000ULL / interpolation; 
+   rs->ratio_int = decimation / interpolation;
+
+   rs->filter = create_sinc(interpolation, taps, cutoff, beta);
+   if (!rs->filter)
+      goto error;
+
+   rs->stereo = !!stereo;
+   rs->buffer_sz = (max_input * decimation/interpolation) + decimation + 1;
+   rs->buffer = calloc(1, rs->buffer_sz * (stereo ? 2:1) * sizeof(*rs->buffer));
+   if (!rs->buffer)
+      goto error;
+
+   return rs;
+
+error:
+   if (rs->filter)
+      free(rs->filter);
+   if (rs->buffer)
+      free(rs->buffer);
+   free(rs);
+   return NULL;
+}
+
+/* Obtain :length resampled audio frames in :buffer. Use :get_samples to obtain
+ * the needed amount of input samples */
+void resampler_update(resampler_t *rs, s32 *buffer, int length,
+       void (*get_samples)(s32 *buffer, int length, int stereo))
+{
+  s16 *u;
+  s32 *p, *q = buffer;
+  int spf = (rs->stereo?2:1);
+  s32 inlen;
+  s32 l, r;
+  int n, i;
+
+  if (length <= 0) return;
+
+  /* compute samples needed on input side:
+   * inlen = (length*decimation + interpolation-phase) / interpolation */
+  n = length*rs->decimation + rs->interpolation-rs->phase;
+  inlen = ((u64)n * rs->interp_inv) >> 32; /* input samples, n/interpolation */
+  if (inlen * rs->interpolation < n - rs->interpolation) inlen++; /* rounding */
+
+  /* reset buffer to start if the input doesn't fit into the buffer */
+  if (rs->buffer_idx + inlen+rs->taps >= rs->buffer_sz) {
+    memcpy(rs->buffer, rs->buffer + rs->buffer_idx*spf, rs->taps*spf*sizeof(*rs->buffer));
+    rs->buffer_idx = 0;
+  }
+  p = rs->buffer + rs->buffer_idx*spf;
+
+  /* generate input samples */
+  if (inlen > 0)
+    get_samples(p + rs->taps*spf, inlen, rs->stereo);
+
+  if (rs->stereo) {
+    while (--length >= 0) {
+      /* compute filter output */
+      u = rs->filter + (rs->phase * rs->taps);
+      for (i = 0, l = r = 0; i < rs->taps-1; i += 2)
+        { n = *u++; l += n * p[2*i  ]; r += n * p[2*i+1];  
+          n = *u++; l += n * p[2*i+2]; r += n * p[2*i+3]; }
+      if (i < rs->taps)
+        { n = *u++; l += n * p[2*i  ]; r += n * p[2*i+1]; }
+      *q++ = l >> 16, *q++ = r >> 16;
+      /* advance position to next sample */
+      rs->phase -= rs->decimation;
+//    if (rs->ratio_int) {
+         rs->phase += rs->ratio_int*rs->interpolation,
+         p += 2*rs->ratio_int, rs->buffer_idx += rs->ratio_int;
+//    }
+      if (rs->phase < 0)
+        { rs->phase += rs->interpolation, p += 2, rs->buffer_idx ++; }
+    }
+  } else {
+    while (--length >= 0) {
+      /* compute filter output */
+      u = rs->filter + (rs->phase * rs->taps);
+      for (i = 0, l = r = 0; i < rs->taps-1; i += 2)
+        { n = *u++; l += n * p[  i  ];
+          n = *u++; l += n * p[  i+1]; }
+      if (i < rs->taps)
+        { n = *u++; l += n * p[  i  ]; }
+      *q++ = l >> 16;
+      /* advance position to next sample */
+      rs->phase -= rs->decimation;
+//    if (rs->ratio_int) {
+         rs->phase += rs->ratio_int*rs->interpolation,
+         p +=   rs->ratio_int, rs->buffer_idx += rs->ratio_int;
+//    }
+      if (rs->phase < 0)
+        { rs->phase += rs->interpolation, p += 1, rs->buffer_idx ++; }
+    }
+  }
+}
--- a/pico/sound/resampler.h
+++ b/pico/sound/resampler.h
@ -0,0 +1,44 @@
+/* Configurable fixed point resampling SINC filter for mono and stereo audio.
+ *
+ * (C) 2022 kub
+ *
+ * This work is licensed under the terms of any of these licenses
+ * (at your option):
+ *  - GNU GPL, version 2 or later.
+ *  - MAME license.
+ * See COPYING file in the top-level directory.
+ */
+
+struct resampler {
+  int	stereo;         // mono or stereo?
+  int   taps;           // taps to compute per output sample
+  int   interpolation;  // upsampling factor (numerator)
+  int   decimation;     // downsampling factor (denominator)
+  int   ratio_int;      // floor(decimation/interpolation)
+  u32   interp_inv;     // Q16, 1.0/interpolation
+  s16   *filter;        // filter taps
+  s32   *buffer;        // filter history and input buffer (w/o zero stuffing)
+  int   buffer_sz;      // buffer size in frames
+  int   buffer_idx;     // buffer offset
+  int	phase;          // filter phase for last output sample
+};
+typedef struct resampler resampler_t;
+
+
+/* Release a resampler */
+void resampler_free(resampler_t *r);
+/* Create a resampler with upsampling factor :interpolation and downsampling
+ * factor :decimation, Kaiser windowed SINC polyphase FIR with bank size :taps.
+ * The created filter has a size of :taps*:interpolation for upsampling and
+ * :taps*:decimation for downsampling. :taps is limiting the cost per sample and
+ * should be big enough to avoid inaccuracy (>= 8, higher is more accurate).
+ * :cutoff is in [0..1] with 1 representing the Nyquist rate after decimation.
+ * :beta is the Kaiser window beta.
+ * :max_input is the maximum length in a resampler_update call */
+resampler_t *resampler_new(unsigned taps, unsigned interpolation, unsigned decimation,
+       double cutoff, double beta, unsigned max_input, int stereo);
+/* Obtain :length resampled audio frames in :buffer. Use :get_samples to obtain
+ * the needed amount of input samples */
+void resampler_update(resampler_t *r, s32 *buffer, int length,
+       void (*generate_samples)(s32 *buffer, int length, int stereo));
+
--- a/pico/sound/sound.c
+++ b/pico/sound/sound.c
@ -14,6 +14,12 @@
 #include "mix.h"
 #include "emu2413/emu2413.h"

+#ifdef USE_BLIPPER
+#include "blipper.h"
+#else
+#include "resampler.h"
+#endif
+
 void (*PsndMix_32_to_16l)(s16 *dest, s32 *src, int count) = mix_32_to_16l_stereo;

 // master int buffer to mix to
@ -32,6 +38,11 @@ OPLL old_opll;
 static OPLL *opll = NULL;
 unsigned YM2413_reg;

+#ifdef USE_BLIPPER
+static blipper_t *fmlblip, *fmrblip;
+#else
+static resampler_t *fmresampler;
+#endif

 PICO_INTERNAL void PsndInit(void)
 {
@ -44,6 +55,13 @@ PICO_INTERNAL void PsndExit(void)
 {
  OPLL_delete(opll);
  opll = NULL;
+
+#ifdef USE_BLIPPER
+  blipper_free(fmlblip); fmlblip = NULL;
+  blipper_free(fmrblip); fmrblip = NULL;
+#else
+  resampler_free(fmresampler); fmresampler = NULL;
+#endif
 }

 PICO_INTERNAL void PsndReset(void)
@ -53,6 +71,111 @@ PICO_INTERNAL void PsndReset(void)
  timers_reset();
 }

+int (*PsndFMUpdate)(s32 *buffer, int length, int stereo, int is_buf_empty);
+
+// FM polyphase FIR resampling
+
+#ifdef USE_BLIPPER
+#define FMFIR_TAPS	11
+
+// resample FM from its native 53267Hz/52781Hz with the blipper library
+static u32 ymmulinv;
+
+int YM2612UpdateFIR(s32 *buffer, int length, int stereo, int is_buf_empty)
+{
+  int mul = Pico.snd.fm_fir_mul, div = Pico.snd.fm_fir_div;
+  s32 *p = buffer, *q = buffer;
+  int ymlen;
+  int ret = 0;
+
+  if (length <= 0) return ret;
+
+  // FM samples needed: (length*div + div-blipper_read_phase(fmlblip)) / mul
+  ymlen = ((length*div + div-blipper_read_phase(fmlblip)) * ymmulinv) >> 32;
+  if (ymlen > 0)
+    ret = YM2612UpdateOne(p, ymlen, stereo, is_buf_empty);
+
+  if (stereo) {
+    blipper_push_long_samples(fmlblip, p  , ymlen, 2, mul);
+    blipper_push_long_samples(fmrblip, p+1, ymlen, 2, mul);
+    blipper_read_long(fmlblip, q  , blipper_read_avail(fmlblip), 2);
+    blipper_read_long(fmrblip, q+1, blipper_read_avail(fmrblip), 2);
+  } else {
+    blipper_push_long_samples(fmlblip, p  , ymlen, 1, mul);
+    blipper_read_long(fmlblip, q  , blipper_read_avail(fmlblip), 1);
+  }
+
+  return ret;
+}
+
+static void YM2612_setup_FIR(int inrate, int outrate, int stereo)
+{
+  int mindiff = 999;
+  int diff, mul, div;
+  int maxdecim = 1500/FMFIR_TAPS;
+
+  // compute filter ratio with smallest error for a decent number of taps
+  for (div = maxdecim/2; div <= maxdecim; div++) {
+    mul = (outrate*div + inrate/2) / inrate;
+    diff = outrate*div/mul - inrate;
+    if (abs(diff) < abs(mindiff)) {
+      mindiff = diff;
+      Pico.snd.fm_fir_mul = mul;
+      Pico.snd.fm_fir_div = div;
+    }
+  }
+  ymmulinv = 0x100000000ULL / mul; /* 1/mul in Q32 */
+  printf("FM polyphase FIR ratio=%d/%d error=%.3f%%\n",
+        Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, 100.0*mindiff/inrate);
+
+  // create blipper (modified for polyphase resampling). Not really perfect for
+  // FM, but has SINC generator, a good window, and computes the filter in Q16.
+  blipper_free(fmlblip);
+  blipper_free(fmrblip);
+  fmlblip = blipper_new(FMFIR_TAPS, 0.85, 8.5, Pico.snd.fm_fir_div, 1000, NULL);
+  if (!stereo) return;
+  fmrblip = blipper_new(FMFIR_TAPS, 0.85, 8.5, Pico.snd.fm_fir_div, 1000, NULL);
+}
+#else
+#define FMFIR_TAPS	8
+
+// resample FM from its native 53267Hz/52781Hz with polyphase FIR filter
+static int ymchans;
+static void YM2612Update(s32 *buffer, int length, int stereo)
+{
+  ymchans = YM2612UpdateOne(buffer, length, stereo, 1);
+}
+
+int YM2612UpdateFIR(s32 *buffer, int length, int stereo, int is_buf_empty)
+{
+  resampler_update(fmresampler, buffer, length, YM2612Update);
+  return ymchans;
+}
+
+static void YM2612_setup_FIR(int inrate, int outrate, int stereo)
+{
+  int mindiff = 999;
+  int diff, mul, div;
+  int maxmult = 30; // max interpolation factor
+
+  // compute filter ratio with largest multiplier for smallest error
+  for (mul = maxmult/2; mul <= maxmult; mul++) {
+    div = (inrate*mul + outrate/2) / outrate;
+    diff = outrate*div/mul - inrate;
+    if (abs(diff) <= abs(mindiff)) {
+      mindiff = diff;
+      Pico.snd.fm_fir_mul = mul;
+      Pico.snd.fm_fir_div = div;
+    }
+  }
+  printf("FM polyphase FIR ratio=%d/%d error=%.3f%%\n",
+        Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, 100.0*mindiff/inrate);
+
+  resampler_free(fmresampler);
+  fmresampler = resampler_new(FMFIR_TAPS, Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div,
+        0.85, 2.35, 2*inrate/50, stereo);
+}
+#endif

 // to be called after changing sound rate or chips
 void PsndRerate(int preserve_state)
@ -60,6 +183,7 @@ void PsndRerate(int preserve_state)
  void *state = NULL;
  int target_fps = Pico.m.pal ? 50 : 60;
  int target_lines = Pico.m.pal ? 313 : 262;
+  int ym2612_clock = Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7;

  if (preserve_state) {
    state = malloc(0x204);
@ -67,9 +191,19 @@ void PsndRerate(int preserve_state)
    ym2612_pack_state();
    memcpy(state, YM2612GetRegs(), 0x204);
  }
-  YM2612Init(Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7, PicoIn.sndRate,
+  if (PicoIn.opt & POPT_EN_FM_FILTER) {
+    int ym2612_rate = (ym2612_clock+(6*24)/2) / (6*24);
+    YM2612Init(ym2612_clock, ym2612_rate,
        ((PicoIn.opt&POPT_DIS_FM_SSGEG) ? 0 : ST_SSG) |
        ((PicoIn.opt&POPT_EN_FM_DAC)    ? ST_DAC : 0));
+    YM2612_setup_FIR(ym2612_rate, PicoIn.sndRate, PicoIn.opt & POPT_EN_STEREO);
+    PsndFMUpdate = YM2612UpdateFIR;
+  } else {
+    YM2612Init(ym2612_clock, PicoIn.sndRate,
+        ((PicoIn.opt&POPT_DIS_FM_SSGEG) ? 0 : ST_SSG) |
+        ((PicoIn.opt&POPT_EN_FM_DAC)    ? ST_DAC : 0));
+    PsndFMUpdate = YM2612UpdateOne;
+  }
  if (preserve_state) {
    // feed it back it's own registers, just like after loading state
    memcpy(YM2612GetRegs(), state, 0x204);
@ -267,7 +401,7 @@ PICO_INTERNAL void PsndDoFM(int cyc_to)
    pos <<= 1;
  }
  if (PicoIn.opt & POPT_EN_FM)
-    YM2612UpdateOne(PsndBuffer + pos, len, stereo, 1);
+    PsndFMUpdate(PsndBuffer + pos, len, stereo, 1);
 }

 // cdda
@ -383,7 +517,7 @@ static int PsndRender(int offset, int length)
    s32 *fmbuf = buf32 + ((fmlen-offset) << stereo);
    Pico.snd.fm_pos += (length-fmlen) << 20;
    if (PicoIn.opt & POPT_EN_FM)
-      YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1);
+      PsndFMUpdate(fmbuf, length-fmlen, stereo, 1);
  }

  // CD: PCM sound