From 459fc19304b81a72ec0dd6336159deaebc7bd214 Mon Sep 17 00:00:00 2001 From: David Reid Date: Sat, 14 Feb 2026 07:10:34 +1000 Subject: [PATCH] Resampler: Decouple the filtering step from the main resampler object. The idea here is to have a more clearly defined data dependency separation between the resampler and the filtering state which I'm hoping might open up more optimization opportunities. The problem with this theory, is that this commit makes the GCC build slower on the s16 mono upsampling path. It appears to be slightly fast with Clang though. --- miniaudio.h | 113 ++++++++++++++++++++++++---------------------------- 1 file changed, 53 insertions(+), 60 deletions(-) diff --git a/miniaudio.h b/miniaudio.h index 10fc428f..7bceb9d1 100644 --- a/miniaudio.h +++ b/miniaudio.h @@ -59989,13 +59989,11 @@ static MA_INLINE void ma_linear_resampler_filter_s16_4_stereo(ma_linear_resample } #endif -static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResampler, ma_uint32 channels, ma_uint32 lpfCount, ma_int32* pFrame) +static MA_INLINE void ma_linear_resampler_filter_s32(ma_int32* pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* pFrame) { - ma_int32* pLPF; ma_uint32 iLPF; ma_uint32 iChannel; - pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0); for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { ma_int32 b1 = pLPF[0]; ma_int32 b2 = pLPF[1]; @@ -60024,13 +60022,11 @@ static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResam } } -static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrame) +static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrame) { - ma_int32* pLPF; ma_uint32 iLPF; ma_uint32 iChannel; - pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0); for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { ma_int32 b1 = pLPF[0]; ma_int32 b2 = pLPF[1]; @@ -60059,13 +60055,11 @@ static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* p } } -static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrame) +static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrame) { - ma_int32* pLPF; ma_uint32 iLPF; ma_uint32 iChannel; - pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0); for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { ma_int32 b1 = pLPF[0]; ma_int32 b2 = pLPF[1]; @@ -60094,46 +60088,44 @@ static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler* } } -static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames) +static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_int32* pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* pFrames) { - ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (0 * pResampler->channels)); - ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (1 * pResampler->channels)); - ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (2 * pResampler->channels)); - ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (3 * pResampler->channels)); + ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (0 * channels)); + ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (1 * channels)); + ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (2 * channels)); + ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (3 * channels)); } -static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames) +static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrames) { - ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 0); - ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 1); - ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 2); - ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 3); + ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 0); + ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 1); + ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 2); + ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 3); } -static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames) +static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrames) { - ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 0); - ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 2); - ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 4); - ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 6); + ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 0); + ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 2); + ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 4); + ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 6); } -static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame) +static MA_INLINE void ma_linear_resampler_filter_f32(float* pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* pFrame) { - float* pLPF; ma_uint32 iLPF; ma_uint32 iChannel; - pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0); for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { float b1 = pLPF[0]; float b2 = pLPF[1]; float a1 = pLPF[2]; float a2 = pLPF[3]; float* pR0 = pLPF + 4; - float* pR1 = pLPF + 4 + pResampler->channels; + float* pR1 = pLPF + 4 + channels; - for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { + for (iChannel = 0; iChannel < channels; iChannel += 1) { float r0 = pR0[iChannel]; float r1 = pR1[iChannel]; float x = pFrame[iChannel]; @@ -60149,17 +60141,15 @@ static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResam } /* Go do the next LPF state. */ - pLPF += 4 + (pResampler->channels * 2); + pLPF += 4 + (channels * 2); } } -static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame) +static MA_INLINE void ma_linear_resampler_filter_f32_mono(float* pLPF, ma_uint32 lpfCount, float* pFrame) { - float* pLPF; ma_uint32 iLPF; ma_uint32 iChannel; - pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0); for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { float b1 = pLPF[0]; float b2 = pLPF[1]; @@ -60188,13 +60178,11 @@ static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* p } } -static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame) +static MA_INLINE void ma_linear_resampler_filter_f32_stereo(float* pLPF, ma_uint32 lpfCount, float* pFrame) { - float* pLPF; ma_uint32 iLPF; ma_uint32 iChannel; - pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0); for (iLPF = 0; iLPF < lpfCount; iLPF += 1) { float b1 = pLPF[0]; float b2 = pLPF[1]; @@ -60223,28 +60211,28 @@ static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler* } } -static MA_INLINE void ma_linear_resampler_filter_f32_4(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames) +static MA_INLINE void ma_linear_resampler_filter_f32_4(float* pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* pFrames) { - ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (0 * pResampler->channels)); - ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (1 * pResampler->channels)); - ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (2 * pResampler->channels)); - ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (3 * pResampler->channels)); + ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (0 * channels)); + ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (1 * channels)); + ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (2 * channels)); + ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (3 * channels)); } -static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames) +static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(float* pLPF, ma_uint32 lpfCount, float* pFrames) { - ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 0); - ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 1); - ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 2); - ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 3); + ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 0); + ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 1); + ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 2); + ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 3); } -static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames) +static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(float* pLPF, ma_uint32 lpfCount, float* pFrames) { - ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 0); - ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 2); - ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 4); - ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 6); + ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 0); + ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 2); + ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 4); + ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 6); } @@ -60361,6 +60349,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut); } else { /* Slow path. Need LPF. */ + ma_int32* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0); #if 1 /* If there's a cached frame we need to process it. */ @@ -60473,7 +60462,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r r[2] = x[2] + (n[2] >> MA_LINEAR_RESAMPLER_LERP_SHIFT); r[3] = x[3] + (n[3] >> MA_LINEAR_RESAMPLER_LERP_SHIFT); - ma_linear_resampler_filter_s32_4_mono(pResampler, lpfCount, r); + ma_linear_resampler_filter_s32_4_mono(pLPF, lpfCount, r); pFramesOutS16[0] = (ma_int16)r[0]; pFramesOutS16[1] = (ma_int16)r[1]; @@ -60541,7 +60530,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r r[6] = x[6] + (n[6] >> MA_LINEAR_RESAMPLER_LERP_SHIFT); r[7] = x[7] + (n[7] >> MA_LINEAR_RESAMPLER_LERP_SHIFT); - ma_linear_resampler_filter_s32_4_stereo(pResampler, lpfCount, r); + ma_linear_resampler_filter_s32_4_stereo(pLPF, lpfCount, r); pFramesOutS16[0] = (ma_int16)r[0]; pFramesOutS16[1] = (ma_int16)r[1]; @@ -60760,6 +60749,8 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut); } else { /* Slow path. Need LPF. */ + float* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0); + /*printf("DOWN\n");*/ while (framesProcessedOut < frameCountOut) { @@ -60774,7 +60765,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear pFramesInF32 += pResampler->channels; /* Filter. */ - ma_linear_resampler_filter_f32(pResampler, lpfCount, pResampler->x1.f32); + ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pResampler->x1.f32); framesProcessedIn += 1; pResampler->inTimeInt -= 1; @@ -60833,11 +60824,13 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r invSampleRateOut = (1.0 / pResampler->sampleRateOut); lpfCount = pResampler->lpfOrder >> 1; - if (pResampler->lpfOrder == 0) { + if (lpfCount == 0) { /* Fast path. No LPF needed. */ return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut); } else { /* Slow path. Need LPF. */ + float* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0); + /*printf("UP %u\n", pResampler->inTimeInt);*/ #if 1 @@ -60851,7 +60844,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r for (c = 0; c < pResampler->channels; c += 1) { pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pFramesInF32[c], a); } - ma_linear_resampler_filter_f32(pResampler, lpfCount, pFramesOutF32); + ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pFramesOutF32); pFramesOutF32 += pResampler->channels; @@ -60951,7 +60944,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r r[2] = x[2] + n[2]; r[3] = x[3] + n[3]; - ma_linear_resampler_filter_f32_4_mono(pResampler, lpfCount, r); + ma_linear_resampler_filter_f32_4_mono(pLPF, lpfCount, r); pFramesOutF32[0] = r[0]; pFramesOutF32[1] = r[1]; @@ -61019,7 +61012,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r r[6] = x[6] + n[6]; r[7] = x[7] + n[7]; - ma_linear_resampler_filter_f32_4_stereo(pResampler, lpfCount, r); + ma_linear_resampler_filter_f32_4_stereo(pLPF, lpfCount, r); pFramesOutF32[0] = r[0]; pFramesOutF32[1] = r[1]; @@ -61074,7 +61067,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r pFramesOutF32[(3 * channels) + c] = r[3]; } - ma_linear_resampler_filter_f32_4(pResampler, lpfCount, pFramesOutF32); + ma_linear_resampler_filter_f32_4(pLPF, lpfCount, pResampler->channels, pFramesOutF32); } pFramesOutF32 += 4 * channels; @@ -61091,7 +61084,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r for (c = 0; c < pResampler->channels; c += 1) { pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a); } - ma_linear_resampler_filter_f32(pResampler, lpfCount, pFramesOutF32); + ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pFramesOutF32); pFramesOutF32 += pResampler->channels;