mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-04 23:07:46 -04:00
540 lines
15 KiB
C
540 lines
15 KiB
C
/*
|
|
* Copyright (C) 2013 - Hans-Kristian Arntzen
|
|
*
|
|
* Permission is hereby granted, free of charge,
|
|
* to any person obtaining a copy of this software and
|
|
* associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction,
|
|
* including without limitation the rights to
|
|
* use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software,
|
|
* and to permit persons to whom the Software is furnished to do so,
|
|
* subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
* DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
*
|
|
* 03-2022 kub: modified for arbitrary decimation rates
|
|
* 03-2022 kub: modified for 32 bit sample size
|
|
*/
|
|
|
|
#include "blipper.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <math.h>
|
|
|
|
#define BLIPPER_FILTER_AMP 0.75
|
|
|
|
#if BLIPPER_LOG_PERFORMANCE
|
|
#include <time.h>
|
|
static double get_time(void)
|
|
{
|
|
struct timespec tv;
|
|
clock_gettime(CLOCK_MONOTONIC, &tv);
|
|
return tv.tv_sec + tv.tv_nsec / 1000000000.0;
|
|
}
|
|
#endif
|
|
|
|
struct blipper
|
|
{
|
|
blipper_long_sample_t *output_buffer;
|
|
unsigned output_avail;
|
|
unsigned output_buffer_samples;
|
|
|
|
blipper_sample_t *filter_bank;
|
|
|
|
unsigned phase;
|
|
unsigned phases;
|
|
unsigned phases_div;
|
|
unsigned taps;
|
|
|
|
blipper_long_sample_t integrator;
|
|
blipper_long_sample_t ramp;
|
|
blipper_long_sample_t last_sample;
|
|
|
|
#if BLIPPER_LOG_PERFORMANCE
|
|
double total_time;
|
|
double integrator_time;
|
|
unsigned long total_samples;
|
|
#endif
|
|
|
|
int owns_filter;
|
|
};
|
|
|
|
void blipper_free(blipper_t *blip)
|
|
{
|
|
if (blip)
|
|
{
|
|
#if BLIPPER_LOG_PERFORMANCE
|
|
fprintf(stderr, "[blipper]: Processed %lu samples, using %.6f seconds blipping and %.6f seconds integrating.\n", blip->total_samples, blip->total_time, blip->integrator_time);
|
|
#endif
|
|
|
|
if (blip->owns_filter)
|
|
free(blip->filter_bank);
|
|
free(blip->output_buffer);
|
|
free(blip);
|
|
}
|
|
}
|
|
|
|
static double besseli0(double x)
|
|
{
|
|
unsigned i;
|
|
double sum = 0.0;
|
|
|
|
double factorial = 1.0;
|
|
double factorial_mult = 0.0;
|
|
double x_pow = 1.0;
|
|
double two_div_pow = 1.0;
|
|
double x_sqr = x * x;
|
|
|
|
/* Approximate. This is an infinite sum.
|
|
* Luckily, it converges rather fast. */
|
|
for (i = 0; i < 18; i++)
|
|
{
|
|
sum += x_pow * two_div_pow / (factorial * factorial);
|
|
|
|
factorial_mult += 1.0;
|
|
x_pow *= x_sqr;
|
|
two_div_pow *= 0.25;
|
|
factorial *= factorial_mult;
|
|
}
|
|
|
|
return sum;
|
|
}
|
|
|
|
static double sinc(double v)
|
|
{
|
|
if (fabs(v) < 0.00001)
|
|
return 1.0;
|
|
else
|
|
return sin(v) / v;
|
|
}
|
|
|
|
/* index range = [-1, 1) */
|
|
static double kaiser_window(double index, double beta)
|
|
{
|
|
return besseli0(beta * sqrt(1.0 - index * index));
|
|
}
|
|
|
|
#ifndef M_PI
|
|
#define M_PI 3.14159265358979323846
|
|
#endif
|
|
|
|
static blipper_real_t *blipper_create_sinc(unsigned phases, unsigned taps,
|
|
double cutoff, double beta)
|
|
{
|
|
unsigned i, filter_len;
|
|
double sidelobes, window_mod, window_phase, sinc_phase;
|
|
blipper_real_t *filter;
|
|
|
|
filter = (blipper_real_t*)malloc(phases * taps * sizeof(*filter));
|
|
if (!filter)
|
|
return NULL;
|
|
|
|
sidelobes = taps / 2.0;
|
|
window_mod = 1.0 / kaiser_window(0.0, beta);
|
|
filter_len = phases * taps;
|
|
for (i = 0; i < filter_len; i++)
|
|
{
|
|
window_phase = (double)i / filter_len; /* [0, 1) */
|
|
window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
|
|
sinc_phase = window_phase * sidelobes; /* [-taps / 2, taps / 2) */
|
|
|
|
filter[i] = cutoff * sinc(M_PI * sinc_phase * cutoff) *
|
|
kaiser_window(window_phase, beta) * window_mod;
|
|
}
|
|
|
|
return filter;
|
|
}
|
|
|
|
void blipper_set_ramp(blipper_t *blip, blipper_long_sample_t delta,
|
|
unsigned clocks)
|
|
{
|
|
blipper_real_t ramp = BLIPPER_FILTER_AMP * delta * blip->phases / clocks;
|
|
#if BLIPPER_FIXED_POINT
|
|
blip->ramp = (blipper_long_sample_t)floor(ramp * 0x8000 + 0.5);
|
|
#else
|
|
blip->ramp = ramp;
|
|
#endif
|
|
}
|
|
|
|
/* We differentiate and integrate at different sample rates.
|
|
* Differentiation is D(z) = 1 - z^-1 and happens when delta impulses
|
|
* are convolved. Integration step after decimation by D is 1 / (1 - z^-D).
|
|
*
|
|
* If our sinc filter is S(z) we'd have a response of
|
|
* S(z) * (1 - z^-1) / (1 - z^-D) after blipping.
|
|
*
|
|
* Compensate by prefiltering S(z) with the inverse (1 - z^-D) / (1 - z^-1).
|
|
* This filtering creates a finite length filter, albeit slightly longer.
|
|
*
|
|
* phases is the same as decimation rate. */
|
|
static blipper_real_t *blipper_prefilter_sinc(blipper_real_t *filter, unsigned phases,
|
|
unsigned taps)
|
|
{
|
|
unsigned i;
|
|
float filter_amp = BLIPPER_FILTER_AMP / phases;
|
|
blipper_real_t *tmp_filter;
|
|
blipper_real_t *new_filter = (blipper_real_t*)malloc((phases * taps + phases) * sizeof(*filter));
|
|
if (!new_filter)
|
|
goto error;
|
|
|
|
tmp_filter = (blipper_real_t*)realloc(filter, (phases * taps + phases) * sizeof(*filter));
|
|
if (!tmp_filter)
|
|
goto error;
|
|
filter = tmp_filter;
|
|
|
|
/* Integrate. */
|
|
new_filter[0] = filter[0];
|
|
for (i = 1; i < phases * taps; i++)
|
|
new_filter[i] = new_filter[i - 1] + filter[i];
|
|
for (i = phases * taps; i < phases * taps + phases; i++)
|
|
new_filter[i] = new_filter[phases * taps - 1];
|
|
|
|
taps++;
|
|
|
|
/* Differentiate with offset of D. */
|
|
memcpy(filter, new_filter, phases * sizeof(*filter));
|
|
for (i = phases; i < phases * taps; i++)
|
|
filter[i] = new_filter[i] - new_filter[i - phases];
|
|
|
|
/* blipper_prefilter_sinc() boosts the gain of the sinc.
|
|
* Have to compensate for this. Attenuate a bit more to ensure
|
|
* we don't clip, especially in fixed point. */
|
|
for (i = 0; i < phases * taps; i++)
|
|
filter[i] *= filter_amp;
|
|
|
|
free(new_filter);
|
|
return filter;
|
|
|
|
error:
|
|
free(new_filter);
|
|
free(filter);
|
|
return NULL;
|
|
}
|
|
|
|
/* Creates a polyphase filter bank.
|
|
* Interleaves the filter for cache coherency and possibilities
|
|
* for SIMD processing. */
|
|
static blipper_real_t *blipper_interleave_sinc(blipper_real_t *filter, unsigned phases,
|
|
unsigned taps)
|
|
{
|
|
unsigned t, p;
|
|
blipper_real_t *new_filter = (blipper_real_t*)malloc(phases * taps * sizeof(*filter));
|
|
if (!new_filter)
|
|
goto error;
|
|
|
|
for (t = 0; t < taps; t++)
|
|
for (p = 0; p < phases; p++)
|
|
new_filter[p * taps + t] = filter[t * phases + p];
|
|
|
|
free(filter);
|
|
return new_filter;
|
|
|
|
error:
|
|
free(new_filter);
|
|
free(filter);
|
|
return NULL;
|
|
}
|
|
|
|
#if BLIPPER_FIXED_POINT
|
|
static blipper_sample_t *blipper_quantize_sinc(blipper_real_t *filter, unsigned taps)
|
|
{
|
|
unsigned t;
|
|
blipper_sample_t *filt = (blipper_sample_t*)malloc(taps * sizeof(*filt));
|
|
if (!filt)
|
|
goto error;
|
|
|
|
for (t = 0; t < taps; t++)
|
|
filt[t] = (blipper_sample_t)floor(filter[t] * 0x7fff + 0.5);
|
|
|
|
free(filter);
|
|
return filt;
|
|
|
|
error:
|
|
free(filter);
|
|
free(filt);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
blipper_sample_t *blipper_create_filter_bank(unsigned phases, unsigned taps,
|
|
double cutoff, double beta)
|
|
{
|
|
blipper_real_t *sinc_filter;
|
|
|
|
/* blipper_prefilter_sinc() will add one tap.
|
|
* To keep number of taps as expected, compensate for it here
|
|
* to keep the interface more obvious. */
|
|
if (taps <= 1)
|
|
return 0;
|
|
taps--;
|
|
|
|
sinc_filter = blipper_create_sinc(phases, taps, cutoff, beta);
|
|
if (!sinc_filter)
|
|
return 0;
|
|
|
|
sinc_filter = blipper_prefilter_sinc(sinc_filter, phases, taps);
|
|
if (!sinc_filter)
|
|
return 0;
|
|
taps++;
|
|
|
|
sinc_filter = blipper_interleave_sinc(sinc_filter, phases, taps);
|
|
if (!sinc_filter)
|
|
return 0;
|
|
|
|
#if BLIPPER_FIXED_POINT
|
|
return blipper_quantize_sinc(sinc_filter, phases * taps);
|
|
#else
|
|
return sinc_filter;
|
|
#endif
|
|
}
|
|
|
|
void blipper_reset(blipper_t *blip)
|
|
{
|
|
blip->phase = 0;
|
|
memset(blip->output_buffer, 0,
|
|
(blip->output_avail + blip->taps) * sizeof(*blip->output_buffer));
|
|
blip->output_avail = 0;
|
|
blip->last_sample = 0;
|
|
blip->integrator = 0;
|
|
blip->ramp = 0;
|
|
}
|
|
|
|
blipper_t *blipper_new(unsigned taps, double cutoff, double beta,
|
|
unsigned decimation, unsigned buffer_samples,
|
|
const blipper_sample_t *filter_bank)
|
|
{
|
|
blipper_t *blip = NULL;
|
|
|
|
/* Sanity check. Not strictly required to be supported in C. */
|
|
if ((-3 >> 2) != -1)
|
|
{
|
|
fprintf(stderr, "Integer right shift not supported.\n");
|
|
return NULL;
|
|
}
|
|
|
|
blip = (blipper_t*)calloc(1, sizeof(*blip));
|
|
if (!blip)
|
|
return NULL;
|
|
|
|
blip->phases = decimation;
|
|
blip->phases_div = 0x100000000ULL/decimation;
|
|
|
|
blip->taps = taps;
|
|
|
|
if (!filter_bank)
|
|
{
|
|
blip->filter_bank = blipper_create_filter_bank(blip->phases, taps, cutoff, beta);
|
|
if (!blip->filter_bank)
|
|
goto error;
|
|
blip->owns_filter = 1;
|
|
}
|
|
else
|
|
blip->filter_bank = (blipper_sample_t*)filter_bank;
|
|
|
|
blip->output_buffer = (blipper_long_sample_t*)calloc(buffer_samples + blip->taps,
|
|
sizeof(*blip->output_buffer));
|
|
if (!blip->output_buffer)
|
|
goto error;
|
|
blip->output_buffer_samples = buffer_samples + blip->taps;
|
|
|
|
return blip;
|
|
|
|
error:
|
|
blipper_free(blip);
|
|
return NULL;
|
|
}
|
|
|
|
inline void blipper_push_delta(blipper_t *blip, blipper_long_sample_t delta, unsigned clocks_step)
|
|
{
|
|
unsigned target_output, filter_phase, taps, i;
|
|
const blipper_sample_t *response;
|
|
blipper_long_sample_t *target;
|
|
|
|
blip->phase += clocks_step;
|
|
|
|
target_output = ((unsigned long long)blip->phase * blip->phases_div) >> 32;
|
|
|
|
filter_phase = (target_output * blip->phases) - blip->phase;
|
|
if (filter_phase >= blip->phases) // rounding error for *(1/phases)
|
|
filter_phase += blip->phases, target_output ++;
|
|
response = blip->filter_bank + blip->taps * filter_phase;
|
|
|
|
target = blip->output_buffer + target_output;
|
|
taps = blip->taps;
|
|
|
|
blip->output_avail = target_output;
|
|
|
|
for (i = 1; i < taps; i += 2) {
|
|
target[i-1] += delta * response[i-1];
|
|
target[i ] += delta * response[i ];
|
|
}
|
|
if (taps & 1)
|
|
target[i-1] += delta * response[i-1];
|
|
}
|
|
|
|
static inline void _blipper_push_samples(blipper_t *blip,
|
|
const char *data, blipper_long_sample_t (*get)(const char *),
|
|
unsigned samples, unsigned stride, unsigned clocks_step)
|
|
{
|
|
unsigned s;
|
|
unsigned clocks_skip = 0;
|
|
blipper_long_sample_t last = blip->last_sample;
|
|
|
|
#if BLIPPER_LOG_PERFORMANCE
|
|
double t0 = get_time();
|
|
#endif
|
|
|
|
for (s = 0; s < samples; s++, data += stride)
|
|
{
|
|
blipper_long_sample_t val = get(data);
|
|
clocks_skip += clocks_step;
|
|
if (val != last)
|
|
{
|
|
blipper_push_delta(blip, val - last, clocks_skip);
|
|
clocks_skip = 0;
|
|
last = val;
|
|
}
|
|
}
|
|
|
|
blip->phase += clocks_skip;
|
|
blip->output_avail = ((unsigned long long)blip->phase * blip->phases_div) >> 32;
|
|
if ((blip->output_avail+1) * blip->phases <= blip->phase)
|
|
blip->output_avail++; // rounding error for *(1/phases)
|
|
blip->last_sample = last;
|
|
|
|
#if BLIPPER_LOG_PERFORMANCE
|
|
blip->total_time += get_time() - t0;
|
|
blip->total_samples += samples;
|
|
#endif
|
|
}
|
|
|
|
static inline blipper_long_sample_t _blipper_get_short(const char *data)
|
|
{
|
|
return *(blipper_sample_t *)data;
|
|
}
|
|
|
|
static inline blipper_long_sample_t _blipper_get_long(const char *data)
|
|
{
|
|
return *(blipper_long_sample_t *)data;
|
|
}
|
|
|
|
void blipper_push_samples(blipper_t *blip, const blipper_sample_t *data,
|
|
unsigned samples, unsigned stride, unsigned clocks_step)
|
|
{
|
|
_blipper_push_samples(blip, (const char *)data, _blipper_get_short, samples,
|
|
stride * sizeof(*data), clocks_step);
|
|
}
|
|
|
|
void blipper_push_long_samples(blipper_t *blip, const blipper_long_sample_t *data,
|
|
unsigned samples, unsigned stride, unsigned clocks_step)
|
|
{
|
|
_blipper_push_samples(blip, (const char *)data, _blipper_get_long, samples,
|
|
stride * sizeof(*data), clocks_step);
|
|
}
|
|
|
|
unsigned blipper_read_phase(blipper_t *blip)
|
|
{
|
|
return blip->phase;
|
|
}
|
|
|
|
unsigned blipper_read_avail(blipper_t *blip)
|
|
{
|
|
return blip->output_avail;
|
|
}
|
|
|
|
static inline void _blipper_put_short(char *data, blipper_long_sample_t val)
|
|
{
|
|
*(blipper_sample_t *)data = val;
|
|
}
|
|
|
|
static inline void _blipper_put_long(char *data, blipper_long_sample_t val)
|
|
{
|
|
*(blipper_long_sample_t *)data = val;
|
|
}
|
|
|
|
static inline void _blipper_read(blipper_t *blip, int clamp, char *output,
|
|
void (*put)(char *, blipper_long_sample_t), unsigned samples, unsigned stride)
|
|
{
|
|
unsigned s;
|
|
blipper_long_sample_t sum = blip->integrator;
|
|
const blipper_long_sample_t *out = blip->output_buffer;
|
|
blipper_long_sample_t ramp = blip->ramp;
|
|
|
|
#if BLIPPER_LOG_PERFORMANCE
|
|
double t0 = get_time();
|
|
#endif
|
|
|
|
#if BLIPPER_FIXED_POINT
|
|
for (s = 0; s < samples; s++, output += stride)
|
|
{
|
|
blipper_long_sample_t quant;
|
|
|
|
/* Cannot overflow. Also add a leaky integrator.
|
|
Mitigates DC shift numerical instability which is
|
|
inherent for integrators. */
|
|
sum += ((out[s] + ramp) >> 1) - (sum >> 9);
|
|
|
|
/* Rounded. With leaky integrator, this cannot overflow. */
|
|
quant = (sum + 0x4000) >> 15;
|
|
|
|
/* Clamp. quant can potentially have range [-0x10000, 0xffff] here.
|
|
* In both cases, top 16-bits will have a uniform bit pattern which can be exploited. */
|
|
if (clamp && (blipper_sample_t)quant != quant)
|
|
{
|
|
quant = (quant >> 16) ^ 0x7fff;
|
|
sum = quant << 15;
|
|
}
|
|
|
|
put(output, quant);
|
|
}
|
|
#else
|
|
for (s = 0; s < samples; s++, output += stride)
|
|
{
|
|
/* Leaky integrator, same as fixed point (1.0f / 512.0f) */
|
|
sum += out[s] + ramp - sum * 0.00195f;
|
|
put(output, sum);
|
|
}
|
|
#endif
|
|
|
|
/* Don't bother with ring buffering.
|
|
* The entire buffer should be read out ideally anyways. */
|
|
memmove(blip->output_buffer, blip->output_buffer + samples,
|
|
(blip->output_avail + blip->taps - samples) * sizeof(*out));
|
|
memset(blip->output_buffer + blip->output_avail + blip->taps - samples, 0, samples * sizeof(*out));
|
|
blip->output_avail -= samples;
|
|
blip->phase -= samples * blip->phases;
|
|
|
|
blip->integrator = sum;
|
|
|
|
#if BLIPPER_LOG_PERFORMANCE
|
|
blip->integrator_time += get_time() - t0;
|
|
#endif
|
|
}
|
|
|
|
void blipper_read(blipper_t *blip, blipper_sample_t *output, unsigned samples,
|
|
unsigned stride)
|
|
{
|
|
_blipper_read(blip, 1, (char *)output, _blipper_put_short, samples,
|
|
stride * sizeof(*output));
|
|
}
|
|
|
|
void blipper_read_long(blipper_t *blip, blipper_long_sample_t *output, unsigned samples,
|
|
unsigned stride)
|
|
{
|
|
_blipper_read(blip, 0, (char *)output, _blipper_put_long, samples,
|
|
stride * sizeof(*output));
|
|
}
|