From 2ea55eaeaf6c72ed636213e6f29061378de2fb5f Mon Sep 17 00:00:00 2001 From: David Reid Date: Sat, 7 Feb 2026 15:40:10 +1000 Subject: [PATCH] Resampler: Optimization to f32, no LPF code path. --- miniaudio.h | 289 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 202 insertions(+), 87 deletions(-) diff --git a/miniaudio.h b/miniaudio.h index bcccb3b3..f75fa5d1 100644 --- a/miniaudio.h +++ b/miniaudio.h @@ -59325,6 +59325,109 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_s16_no_lpf(ma_ return MA_SUCCESS; } +static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_linear_resampler* pResampler, const float* pFramesInF32, ma_uint64* pFrameCountIn, float* pFramesOutF32, ma_uint64* pFrameCountOut, float invSampleRateOut) +{ + ma_uint64 frameCountIn; + ma_uint64 frameCountOut; + ma_uint64 framesProcessedIn; + ma_uint64 framesProcessedOut; + ma_uint32 c; + + MA_ASSERT(pResampler != NULL); + MA_ASSERT(pFramesInF32 != NULL); + MA_ASSERT(pFrameCountIn != NULL); + MA_ASSERT(pFramesOutF32 != NULL); + MA_ASSERT(pFrameCountOut != NULL); + + frameCountIn = *pFrameCountIn; + frameCountOut = *pFrameCountOut; + framesProcessedIn = 0; + framesProcessedOut = 0; + + /* If there's a cached frame we need to process it. */ + if (pResampler->inTimeInt == 0) { + MA_ASSERT(pResampler->cachedFrameCount <= 1); /* There is at most one cached frame. */ + + while (pResampler->cachedFrameCount > 0 && frameCountIn > 0 && framesProcessedOut < frameCountOut) { + float a = pResampler->inTimeFrac * invSampleRateOut; + + for (c = 0; c < pResampler->channels; c += 1) { + pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pFramesInF32[c], a); + } + pFramesOutF32 += pResampler->channels; + + framesProcessedOut += 1; + + /* Advance time forward. */ + pResampler->inTimeInt += pResampler->inAdvanceInt; + pResampler->inTimeFrac += pResampler->inAdvanceFrac; + if (pResampler->inTimeFrac >= pResampler->sampleRateOut) { + pResampler->inTimeFrac -= pResampler->sampleRateOut; + pResampler->inTimeInt += 1; + } + + /* Subtract one from the time to account for the cached frame, but only if the entire frame was processed. */ + if (pResampler->inTimeInt > 0) { + pResampler->inTimeInt -= 1; + pResampler->cachedFrameCount = 0; + } + } + } else { + /* The rate must have changed between calls. Ignore the cached frame. */ + } + + while (framesProcessedOut < frameCountOut && pResampler->inTimeInt < frameCountIn) { + if (pResampler->inTimeInt + 1 < frameCountIn) { + float a = pResampler->inTimeFrac * invSampleRateOut; + + for (c = 0; c < pResampler->channels; c += 1) { + pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[(pResampler->inTimeInt * pResampler->channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a); + } + pFramesOutF32 += pResampler->channels; + + framesProcessedOut += 1; + + /* Advance time forward. */ + pResampler->inTimeInt += pResampler->inAdvanceInt; + pResampler->inTimeFrac += pResampler->inAdvanceFrac; + if (pResampler->inTimeFrac >= pResampler->sampleRateOut) { + pResampler->inTimeFrac -= pResampler->sampleRateOut; + pResampler->inTimeInt += 1; + } + } else { + /* + There is not enough input frames to interpolate. We'll need to stop here. But it's important that we cache + the frame to ensure we make some forward progress. + */ + for (c = 0; c < pResampler->channels; c += 1) { + pResampler->x0.f32[c] = pFramesInF32[(pResampler->inTimeInt * pResampler->channels) + c]; + } + + pResampler->cachedFrameCount = 1; + pResampler->inTimeInt += 1; + + break; + } + } + + /* The number of frames we processed is simply the difference between our current time and previous time, clamped. */ + framesProcessedIn = pResampler->inTimeInt; + if (framesProcessedIn > frameCountIn) { /* Should never overshoot when upsampling. Downsampling could overshoot. */ + framesProcessedIn = frameCountIn; + } + + if (pResampler->inTimeInt >= framesProcessedIn) { + pResampler->inTimeInt -= framesProcessedIn; + } else { + pResampler->inTimeInt = 0; + } + + *pFrameCountIn = framesProcessedIn; + *pFrameCountOut = framesProcessedOut; + + return MA_SUCCESS; +} + static ma_result ma_linear_resampler_process_pcm_frames_s16_downsample(ma_linear_resampler* pResampler, const void* pFramesIn, ma_uint64* pFrameCountIn, void* pFramesOut, ma_uint64* pFrameCountOut) { const ma_int16* pFramesInS16; @@ -59528,60 +59631,66 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear framesProcessedOut = 0; invSampleRateOut = 1.0f / pResampler->sampleRateOut; - while (framesProcessedOut < frameCountOut) { - /* Before interpolating we need to load the buffers. When doing this we need to ensure we run every input sample through the filter. */ - while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) { - ma_uint32 iChannel; + if (pResampler->lpfOrder == 0) { + /* Fast path. No LPF needed. */ + return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut); + } else { + /* Slow path. Need LPF. */ + while (framesProcessedOut < frameCountOut) { + /* Before interpolating we need to load the buffers. When doing this we need to ensure we run every input sample through the filter. */ + while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) { + ma_uint32 iChannel; - if (pFramesInF32 != NULL) { - for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { - pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel]; - pResampler->x1.f32[iChannel] = pFramesInF32[iChannel]; + if (pFramesInF32 != NULL) { + for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { + pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel]; + pResampler->x1.f32[iChannel] = pFramesInF32[iChannel]; + } + pFramesInF32 += pResampler->channels; + } else { + for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { + pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel]; + pResampler->x1.f32[iChannel] = 0; + } } - pFramesInF32 += pResampler->channels; - } else { - for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { - pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel]; - pResampler->x1.f32[iChannel] = 0; + + /* Filter. Do not apply filtering if sample rates are the same or else you'll get dangerous glitching. */ + if (pResampler->lpfOrder > 0 && (pResampler->inAdvanceInt != 1 || pResampler->inAdvanceFrac != 0)) { + ma_lpf_process_pcm_frame_f32(&pResampler->lpf, pResampler->x1.f32, pResampler->x1.f32); } + + framesProcessedIn += 1; + pResampler->inTimeInt -= 1; } - /* Filter. Do not apply filtering if sample rates are the same or else you'll get dangerous glitching. */ - if (pResampler->lpfOrder > 0 && (pResampler->inAdvanceInt != 1 || pResampler->inAdvanceFrac != 0)) { - ma_lpf_process_pcm_frame_f32(&pResampler->lpf, pResampler->x1.f32, pResampler->x1.f32); + if (pResampler->inTimeInt > 0) { + break; /* Ran out of input data. */ } - framesProcessedIn += 1; - pResampler->inTimeInt -= 1; + /* Getting here means the frames have been loaded and filtered and we can generate the next output frame. */ + if (pFramesOutF32 != NULL) { + MA_ASSERT(pResampler->inTimeInt == 0); + ma_linear_resampler_interpolate_frame_f32(pResampler, invSampleRateOut, pFramesOutF32); + + pFramesOutF32 += pResampler->channels; + } + + framesProcessedOut += 1; + + /* Advance time forward. */ + pResampler->inTimeInt += pResampler->inAdvanceInt; + pResampler->inTimeFrac += pResampler->inAdvanceFrac; + if (pResampler->inTimeFrac >= pResampler->sampleRateOut) { + pResampler->inTimeFrac -= pResampler->sampleRateOut; + pResampler->inTimeInt += 1; + } } - if (pResampler->inTimeInt > 0) { - break; /* Ran out of input data. */ - } + *pFrameCountIn = framesProcessedIn; + *pFrameCountOut = framesProcessedOut; - /* Getting here means the frames have been loaded and filtered and we can generate the next output frame. */ - if (pFramesOutF32 != NULL) { - MA_ASSERT(pResampler->inTimeInt == 0); - ma_linear_resampler_interpolate_frame_f32(pResampler, invSampleRateOut, pFramesOutF32); - - pFramesOutF32 += pResampler->channels; - } - - framesProcessedOut += 1; - - /* Advance time forward. */ - pResampler->inTimeInt += pResampler->inAdvanceInt; - pResampler->inTimeFrac += pResampler->inAdvanceFrac; - if (pResampler->inTimeFrac >= pResampler->sampleRateOut) { - pResampler->inTimeFrac -= pResampler->sampleRateOut; - pResampler->inTimeInt += 1; - } + return MA_SUCCESS; } - - *pFrameCountIn = framesProcessedIn; - *pFrameCountOut = framesProcessedOut; - - return MA_SUCCESS; } static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_resampler* pResampler, const void* pFramesIn, ma_uint64* pFrameCountIn, void* pFramesOut, ma_uint64* pFrameCountOut) @@ -59606,60 +59715,66 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r framesProcessedOut = 0; invSampleRateOut = 1.0f / pResampler->sampleRateOut; - while (framesProcessedOut < frameCountOut) { - /* Before interpolating we need to load the buffers. */ - while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) { - ma_uint32 iChannel; + if (pResampler->lpfOrder == 0) { + /* Fast path. No LPF needed. */ + return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut); + } else { + /* Slow path. Need LPF. */ + while (framesProcessedOut < frameCountOut) { + /* Before interpolating we need to load the buffers. */ + while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) { + ma_uint32 iChannel; - if (pFramesInF32 != NULL) { - for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { - pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel]; - pResampler->x1.f32[iChannel] = pFramesInF32[iChannel]; - } - pFramesInF32 += pResampler->channels; - } else { - for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { - pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel]; - pResampler->x1.f32[iChannel] = 0; + if (pFramesInF32 != NULL) { + for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { + pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel]; + pResampler->x1.f32[iChannel] = pFramesInF32[iChannel]; + } + pFramesInF32 += pResampler->channels; + } else { + for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) { + pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel]; + pResampler->x1.f32[iChannel] = 0; + } } + + framesProcessedIn += 1; + pResampler->inTimeInt -= 1; } - framesProcessedIn += 1; - pResampler->inTimeInt -= 1; - } - - if (pResampler->inTimeInt > 0) { - break; /* Ran out of input data. */ - } - - /* Getting here means the frames have been loaded and we can generate the next output frame. */ - if (pFramesOutF32 != NULL) { - MA_ASSERT(pResampler->inTimeInt == 0); - ma_linear_resampler_interpolate_frame_f32(pResampler, invSampleRateOut, pFramesOutF32); - - /* Filter. Do not apply filtering if sample rates are the same or else you'll get dangerous glitching. */ - if (pResampler->lpfOrder > 0 && (pResampler->inAdvanceInt != 1 || pResampler->inAdvanceFrac != 0)) { - ma_lpf_process_pcm_frame_f32(&pResampler->lpf, pFramesOutF32, pFramesOutF32); + if (pResampler->inTimeInt > 0) { + break; /* Ran out of input data. */ } - pFramesOutF32 += pResampler->channels; + /* Getting here means the frames have been loaded and we can generate the next output frame. */ + if (pFramesOutF32 != NULL) { + MA_ASSERT(pResampler->inTimeInt == 0); + ma_linear_resampler_interpolate_frame_f32(pResampler, invSampleRateOut, pFramesOutF32); + + /* Filter. Do not apply filtering if sample rates are the same or else you'll get dangerous glitching. */ + if (pResampler->lpfOrder > 0 && (pResampler->inAdvanceInt != 1 || pResampler->inAdvanceFrac != 0)) { + ma_lpf_process_pcm_frame_f32(&pResampler->lpf, pFramesOutF32, pFramesOutF32); + } + + pFramesOutF32 += pResampler->channels; + } + + framesProcessedOut += 1; + + /* Advance time forward. */ + pResampler->inTimeInt += pResampler->inAdvanceInt; + pResampler->inTimeFrac += pResampler->inAdvanceFrac; + if (pResampler->inTimeFrac >= pResampler->sampleRateOut) { + pResampler->inTimeFrac -= pResampler->sampleRateOut; + pResampler->inTimeInt += 1; + } } - framesProcessedOut += 1; + *pFrameCountIn = framesProcessedIn; + *pFrameCountOut = framesProcessedOut; - /* Advance time forward. */ - pResampler->inTimeInt += pResampler->inAdvanceInt; - pResampler->inTimeFrac += pResampler->inAdvanceFrac; - if (pResampler->inTimeFrac >= pResampler->sampleRateOut) { - pResampler->inTimeFrac -= pResampler->sampleRateOut; - pResampler->inTimeInt += 1; - } + return MA_SUCCESS; } - - *pFrameCountIn = framesProcessedIn; - *pFrameCountOut = framesProcessedOut; - - return MA_SUCCESS; } static ma_result ma_linear_resampler_process_pcm_frames_f32(ma_linear_resampler* pResampler, const void* pFramesIn, ma_uint64* pFrameCountIn, void* pFramesOut, ma_uint64* pFrameCountOut)