Resampler: Decouple the filtering step from the main resampler object.

The idea here is to have a more clearly defined data dependency
separation between the resampler and the filtering state which I'm
hoping might open up more optimization opportunities. The problem with
this theory, is that this commit makes the GCC build slower on the s16
mono upsampling path. It appears to be slightly fast with Clang though.
This commit is contained in:
David Reid
2026-02-14 07:10:34 +10:00
parent d4382ce478
commit 459fc19304
+53 -60
View File
@@ -59989,13 +59989,11 @@ static MA_INLINE void ma_linear_resampler_filter_s16_4_stereo(ma_linear_resample
} }
#endif #endif
static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResampler, ma_uint32 channels, ma_uint32 lpfCount, ma_int32* pFrame) static MA_INLINE void ma_linear_resampler_filter_s32(ma_int32* pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* pFrame)
{ {
ma_int32* pLPF;
ma_uint32 iLPF; ma_uint32 iLPF;
ma_uint32 iChannel; ma_uint32 iChannel;
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
ma_int32 b1 = pLPF[0]; ma_int32 b1 = pLPF[0];
ma_int32 b2 = pLPF[1]; ma_int32 b2 = pLPF[1];
@@ -60024,13 +60022,11 @@ static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResam
} }
} }
static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrame) static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrame)
{ {
ma_int32* pLPF;
ma_uint32 iLPF; ma_uint32 iLPF;
ma_uint32 iChannel; ma_uint32 iChannel;
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
ma_int32 b1 = pLPF[0]; ma_int32 b1 = pLPF[0];
ma_int32 b2 = pLPF[1]; ma_int32 b2 = pLPF[1];
@@ -60059,13 +60055,11 @@ static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* p
} }
} }
static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrame) static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrame)
{ {
ma_int32* pLPF;
ma_uint32 iLPF; ma_uint32 iLPF;
ma_uint32 iChannel; ma_uint32 iChannel;
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
ma_int32 b1 = pLPF[0]; ma_int32 b1 = pLPF[0];
ma_int32 b2 = pLPF[1]; ma_int32 b2 = pLPF[1];
@@ -60094,46 +60088,44 @@ static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler*
} }
} }
static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames) static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_int32* pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* pFrames)
{ {
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (0 * pResampler->channels)); ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (0 * channels));
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (1 * pResampler->channels)); ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (1 * channels));
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (2 * pResampler->channels)); ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (2 * channels));
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (3 * pResampler->channels)); ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (3 * channels));
} }
static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames) static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrames)
{ {
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 0); ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 0);
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 1); ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 1);
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 2); ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 2);
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 3); ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 3);
} }
static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames) static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrames)
{ {
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 0); ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 0);
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 2); ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 2);
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 4); ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 4);
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 6); ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 6);
} }
static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame) static MA_INLINE void ma_linear_resampler_filter_f32(float* pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* pFrame)
{ {
float* pLPF;
ma_uint32 iLPF; ma_uint32 iLPF;
ma_uint32 iChannel; ma_uint32 iChannel;
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
float b1 = pLPF[0]; float b1 = pLPF[0];
float b2 = pLPF[1]; float b2 = pLPF[1];
float a1 = pLPF[2]; float a1 = pLPF[2];
float a2 = pLPF[3]; float a2 = pLPF[3];
float* pR0 = pLPF + 4; float* pR0 = pLPF + 4;
float* pR1 = pLPF + 4 + pResampler->channels; float* pR1 = pLPF + 4 + channels;
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { for (iChannel = 0; iChannel < channels; iChannel += 1) {
float r0 = pR0[iChannel]; float r0 = pR0[iChannel];
float r1 = pR1[iChannel]; float r1 = pR1[iChannel];
float x = pFrame[iChannel]; float x = pFrame[iChannel];
@@ -60149,17 +60141,15 @@ static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResam
} }
/* Go do the next LPF state. */ /* Go do the next LPF state. */
pLPF += 4 + (pResampler->channels * 2); pLPF += 4 + (channels * 2);
} }
} }
static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame) static MA_INLINE void ma_linear_resampler_filter_f32_mono(float* pLPF, ma_uint32 lpfCount, float* pFrame)
{ {
float* pLPF;
ma_uint32 iLPF; ma_uint32 iLPF;
ma_uint32 iChannel; ma_uint32 iChannel;
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
float b1 = pLPF[0]; float b1 = pLPF[0];
float b2 = pLPF[1]; float b2 = pLPF[1];
@@ -60188,13 +60178,11 @@ static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* p
} }
} }
static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame) static MA_INLINE void ma_linear_resampler_filter_f32_stereo(float* pLPF, ma_uint32 lpfCount, float* pFrame)
{ {
float* pLPF;
ma_uint32 iLPF; ma_uint32 iLPF;
ma_uint32 iChannel; ma_uint32 iChannel;
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
float b1 = pLPF[0]; float b1 = pLPF[0];
float b2 = pLPF[1]; float b2 = pLPF[1];
@@ -60223,28 +60211,28 @@ static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler*
} }
} }
static MA_INLINE void ma_linear_resampler_filter_f32_4(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames) static MA_INLINE void ma_linear_resampler_filter_f32_4(float* pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* pFrames)
{ {
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (0 * pResampler->channels)); ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (0 * channels));
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (1 * pResampler->channels)); ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (1 * channels));
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (2 * pResampler->channels)); ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (2 * channels));
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (3 * pResampler->channels)); ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (3 * channels));
} }
static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames) static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(float* pLPF, ma_uint32 lpfCount, float* pFrames)
{ {
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 0); ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 0);
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 1); ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 1);
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 2); ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 2);
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 3); ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 3);
} }
static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames) static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(float* pLPF, ma_uint32 lpfCount, float* pFrames)
{ {
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 0); ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 0);
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 2); ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 2);
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 4); ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 4);
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 6); ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 6);
} }
@@ -60361,6 +60349,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut); return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut);
} else { } else {
/* Slow path. Need LPF. */ /* Slow path. Need LPF. */
ma_int32* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
#if 1 #if 1
/* If there's a cached frame we need to process it. */ /* If there's a cached frame we need to process it. */
@@ -60473,7 +60462,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
r[2] = x[2] + (n[2] >> MA_LINEAR_RESAMPLER_LERP_SHIFT); r[2] = x[2] + (n[2] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
r[3] = x[3] + (n[3] >> MA_LINEAR_RESAMPLER_LERP_SHIFT); r[3] = x[3] + (n[3] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
ma_linear_resampler_filter_s32_4_mono(pResampler, lpfCount, r); ma_linear_resampler_filter_s32_4_mono(pLPF, lpfCount, r);
pFramesOutS16[0] = (ma_int16)r[0]; pFramesOutS16[0] = (ma_int16)r[0];
pFramesOutS16[1] = (ma_int16)r[1]; pFramesOutS16[1] = (ma_int16)r[1];
@@ -60541,7 +60530,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
r[6] = x[6] + (n[6] >> MA_LINEAR_RESAMPLER_LERP_SHIFT); r[6] = x[6] + (n[6] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
r[7] = x[7] + (n[7] >> MA_LINEAR_RESAMPLER_LERP_SHIFT); r[7] = x[7] + (n[7] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
ma_linear_resampler_filter_s32_4_stereo(pResampler, lpfCount, r); ma_linear_resampler_filter_s32_4_stereo(pLPF, lpfCount, r);
pFramesOutS16[0] = (ma_int16)r[0]; pFramesOutS16[0] = (ma_int16)r[0];
pFramesOutS16[1] = (ma_int16)r[1]; pFramesOutS16[1] = (ma_int16)r[1];
@@ -60760,6 +60749,8 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut); return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut);
} else { } else {
/* Slow path. Need LPF. */ /* Slow path. Need LPF. */
float* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
/*printf("DOWN\n");*/ /*printf("DOWN\n");*/
while (framesProcessedOut < frameCountOut) { while (framesProcessedOut < frameCountOut) {
@@ -60774,7 +60765,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
pFramesInF32 += pResampler->channels; pFramesInF32 += pResampler->channels;
/* Filter. */ /* Filter. */
ma_linear_resampler_filter_f32(pResampler, lpfCount, pResampler->x1.f32); ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pResampler->x1.f32);
framesProcessedIn += 1; framesProcessedIn += 1;
pResampler->inTimeInt -= 1; pResampler->inTimeInt -= 1;
@@ -60833,11 +60824,13 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
invSampleRateOut = (1.0 / pResampler->sampleRateOut); invSampleRateOut = (1.0 / pResampler->sampleRateOut);
lpfCount = pResampler->lpfOrder >> 1; lpfCount = pResampler->lpfOrder >> 1;
if (pResampler->lpfOrder == 0) { if (lpfCount == 0) {
/* Fast path. No LPF needed. */ /* Fast path. No LPF needed. */
return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut); return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut);
} else { } else {
/* Slow path. Need LPF. */ /* Slow path. Need LPF. */
float* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
/*printf("UP %u\n", pResampler->inTimeInt);*/ /*printf("UP %u\n", pResampler->inTimeInt);*/
#if 1 #if 1
@@ -60851,7 +60844,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
for (c = 0; c < pResampler->channels; c += 1) { for (c = 0; c < pResampler->channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pFramesInF32[c], a); pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pFramesInF32[c], a);
} }
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFramesOutF32); ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
pFramesOutF32 += pResampler->channels; pFramesOutF32 += pResampler->channels;
@@ -60951,7 +60944,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
r[2] = x[2] + n[2]; r[2] = x[2] + n[2];
r[3] = x[3] + n[3]; r[3] = x[3] + n[3];
ma_linear_resampler_filter_f32_4_mono(pResampler, lpfCount, r); ma_linear_resampler_filter_f32_4_mono(pLPF, lpfCount, r);
pFramesOutF32[0] = r[0]; pFramesOutF32[0] = r[0];
pFramesOutF32[1] = r[1]; pFramesOutF32[1] = r[1];
@@ -61019,7 +61012,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
r[6] = x[6] + n[6]; r[6] = x[6] + n[6];
r[7] = x[7] + n[7]; r[7] = x[7] + n[7];
ma_linear_resampler_filter_f32_4_stereo(pResampler, lpfCount, r); ma_linear_resampler_filter_f32_4_stereo(pLPF, lpfCount, r);
pFramesOutF32[0] = r[0]; pFramesOutF32[0] = r[0];
pFramesOutF32[1] = r[1]; pFramesOutF32[1] = r[1];
@@ -61074,7 +61067,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
pFramesOutF32[(3 * channels) + c] = r[3]; pFramesOutF32[(3 * channels) + c] = r[3];
} }
ma_linear_resampler_filter_f32_4(pResampler, lpfCount, pFramesOutF32); ma_linear_resampler_filter_f32_4(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
} }
pFramesOutF32 += 4 * channels; pFramesOutF32 += 4 * channels;
@@ -61091,7 +61084,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
for (c = 0; c < pResampler->channels; c += 1) { for (c = 0; c < pResampler->channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a); pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a);
} }
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFramesOutF32); ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
pFramesOutF32 += pResampler->channels; pFramesOutF32 += pResampler->channels;