Resampler: More work on filter decoupling.

This makes the s16 mono upsampling path slower somehow. This seems to be
the problem code path for some reason. Other paths don't seem to be so
sensitive to seemingly harmless changes.
This commit is contained in:
David Reid
2026-02-14 07:54:01 +10:00
parent 459fc19304
commit 6d20358df1
+97 -100
View File
@@ -59328,6 +59328,7 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_s16_no_lpf(ma_
ma_uint64 frameCountOut;
ma_uint64 framesProcessedIn;
ma_uint64 framesProcessedOut;
ma_uint32 channels;
ma_uint32 c;
MA_ASSERT(pResampler != NULL);
@@ -59340,6 +59341,7 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_s16_no_lpf(ma_
frameCountOut = *pFrameCountOut;
framesProcessedIn = 0;
framesProcessedOut = 0;
channels = pResampler->channels;
/* If there's a cached frame we need to process it. */
if (pResampler->inTimeInt == 0) {
@@ -59348,10 +59350,10 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_s16_no_lpf(ma_
while (pResampler->cachedFrameCount > 0 && frameCountIn > 0 && framesProcessedOut < frameCountOut) {
ma_uint32 a = pResampler->inTimeFrac * invSampleRateOut;
for (c = 0; c < pResampler->channels; c += 1) {
for (c = 0; c < channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pResampler->x0.s16[c], pFramesInS16[c], a);
}
pFramesOutS16 += pResampler->channels;
pFramesOutS16 += channels;
framesProcessedOut += 1;
@@ -59376,8 +59378,6 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_s16_no_lpf(ma_
/* Experimental loop unrolling to make it easier for SIMD-ification. */
#if 1
{
ma_uint32 channels = pResampler->channels;
while (framesProcessedOut + 4 <= frameCountOut) {
ma_uint32 inTimeIntTemp;
ma_uint32 inTimeFracTemp;
@@ -59580,10 +59580,10 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_s16_no_lpf(ma_
if (pResampler->inTimeInt + 1 < frameCountIn) {
ma_uint32 a = pResampler->inTimeFrac * invSampleRateOut;
for (c = 0; c < pResampler->channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pFramesInS16[(pResampler->inTimeInt * pResampler->channels) + c], pFramesInS16[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a);
for (c = 0; c < channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pFramesInS16[(pResampler->inTimeInt * channels) + c], pFramesInS16[((pResampler->inTimeInt + 1) * channels) + c], a);
}
pFramesOutS16 += pResampler->channels;
pFramesOutS16 += channels;
framesProcessedOut += 1;
@@ -59599,8 +59599,8 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_s16_no_lpf(ma_
There is not enough input frames to interpolate. We'll need to stop here. But it's important that we cache
the frame to ensure we make some forward progress.
*/
for (c = 0; c < pResampler->channels; c += 1) {
pResampler->x0.s16[c] = pFramesInS16[(pResampler->inTimeInt * pResampler->channels) + c];
for (c = 0; c < channels; c += 1) {
pResampler->x0.s16[c] = pFramesInS16[(pResampler->inTimeInt * channels) + c];
}
pResampler->cachedFrameCount = 1;
@@ -59634,6 +59634,7 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
ma_uint64 frameCountOut;
ma_uint64 framesProcessedIn;
ma_uint64 framesProcessedOut;
ma_uint32 channels;
ma_uint32 c;
MA_ASSERT(pResampler != NULL);
@@ -59646,6 +59647,7 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
frameCountOut = *pFrameCountOut;
framesProcessedIn = 0;
framesProcessedOut = 0;
channels = pResampler->channels;
/* If there's a cached frame we need to process it. */
if (pResampler->inTimeInt == 0) {
@@ -59654,10 +59656,10 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
while (pResampler->cachedFrameCount > 0 && frameCountIn > 0 && framesProcessedOut < frameCountOut) {
float a = (double)(pResampler->inTimeFrac * invSampleRateOut);
for (c = 0; c < pResampler->channels; c += 1) {
for (c = 0; c < channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pFramesInF32[c], a);
}
pFramesOutF32 += pResampler->channels;
pFramesOutF32 += channels;
framesProcessedOut += 1;
@@ -59682,8 +59684,6 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
/* Experimental loop unrolling to make it easier for SIMD-ification. */
#if 1
{
ma_uint32 channels = pResampler->channels;
while (framesProcessedOut + 4 <= frameCountOut) {
ma_uint32 inTimeIntTemp;
ma_uint32 inTimeFracTemp;
@@ -59886,10 +59886,10 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
if (pResampler->inTimeInt + 1 < frameCountIn) {
float a = (float)(pResampler->inTimeFrac * invSampleRateOut);
for (c = 0; c < pResampler->channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a);
for (c = 0; c < channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * channels) + c], a);
}
pFramesOutF32 += pResampler->channels;
pFramesOutF32 += channels;
framesProcessedOut += 1;
@@ -59905,8 +59905,8 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
There is not enough input frames to interpolate. We'll need to stop here. But it's important that we cache
the frame to ensure we make some forward progress.
*/
for (c = 0; c < pResampler->channels; c += 1) {
pResampler->x0.f32[c] = pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c];
for (c = 0; c < channels; c += 1) {
pResampler->x0.f32[c] = pFramesInF32[((pResampler->inTimeInt + 0) * channels) + c];
}
pResampler->cachedFrameCount = 1;
@@ -59935,23 +59935,20 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
}
static MA_INLINE void ma_linear_resampler_filter_s16(ma_linear_resampler* pResampler, ma_int16* pFrame)
static MA_INLINE void ma_linear_resampler_filter_s16(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int16* MA_RESTRICT pFrame)
{
ma_int32* pLPF;
ma_uint32 iLPF;
ma_uint32 lpfCount = pResampler->lpfOrder >> 1;
ma_uint32 iChannel;
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
ma_int32 b1 = pLPF[0];
ma_int32 b2 = pLPF[1];
ma_int32 a1 = pLPF[2];
ma_int32 a2 = pLPF[3];
ma_int32* pR0 = pLPF + 4;
ma_int32* pR1 = pLPF + 4 + pResampler->channels;
ma_int32* pR1 = pLPF + 4 + channels;
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
for (iChannel = 0; iChannel < channels; iChannel += 1) {
ma_int32 r0 = pR0[iChannel];
ma_int32 r1 = pR1[iChannel];
ma_int32 x = pFrame[iChannel];
@@ -59967,29 +59964,29 @@ static MA_INLINE void ma_linear_resampler_filter_s16(ma_linear_resampler* pResam
}
/* Go do the next LPF state. */
pLPF += 4 + (pResampler->channels * 2);
pLPF += 4 + (channels * 2);
}
}
static MA_INLINE void ma_linear_resampler_filter_s16_4(ma_linear_resampler* pResampler, ma_int16* pFrames)
static MA_INLINE void ma_linear_resampler_filter_s16_4(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int16* MA_RESTRICT pFrames)
{
ma_linear_resampler_filter_s16(pResampler, pFrames + (0 * pResampler->channels));
ma_linear_resampler_filter_s16(pResampler, pFrames + (1 * pResampler->channels));
ma_linear_resampler_filter_s16(pResampler, pFrames + (2 * pResampler->channels));
ma_linear_resampler_filter_s16(pResampler, pFrames + (3 * pResampler->channels));
ma_linear_resampler_filter_s16(pLPF, lpfCount, channels, pFrames + (0 * channels));
ma_linear_resampler_filter_s16(pLPF, lpfCount, channels, pFrames + (1 * channels));
ma_linear_resampler_filter_s16(pLPF, lpfCount, channels, pFrames + (2 * channels));
ma_linear_resampler_filter_s16(pLPF, lpfCount, channels, pFrames + (3 * channels));
}
#if 1
static MA_INLINE void ma_linear_resampler_filter_s16_4_stereo(ma_linear_resampler* pResampler, ma_int16* pFrames)
static MA_INLINE void ma_linear_resampler_filter_s16_4_stereo(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_int16* MA_RESTRICT pFrames)
{
ma_linear_resampler_filter_s16(pResampler, pFrames + 0);
ma_linear_resampler_filter_s16(pResampler, pFrames + 2);
ma_linear_resampler_filter_s16(pResampler, pFrames + 4);
ma_linear_resampler_filter_s16(pResampler, pFrames + 6);
ma_linear_resampler_filter_s16(pLPF, lpfCount, 2, pFrames + 0);
ma_linear_resampler_filter_s16(pLPF, lpfCount, 2, pFrames + 2);
ma_linear_resampler_filter_s16(pLPF, lpfCount, 2, pFrames + 4);
ma_linear_resampler_filter_s16(pLPF, lpfCount, 2, pFrames + 6);
}
#endif
static MA_INLINE void ma_linear_resampler_filter_s32(ma_int32* pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* pFrame)
static MA_INLINE void ma_linear_resampler_filter_s32(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* MA_RESTRICT pFrame)
{
ma_uint32 iLPF;
ma_uint32 iChannel;
@@ -60022,7 +60019,7 @@ static MA_INLINE void ma_linear_resampler_filter_s32(ma_int32* pLPF, ma_uint32 l
}
}
static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrame)
static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_int32* MA_RESTRICT pFrame)
{
ma_uint32 iLPF;
ma_uint32 iChannel;
@@ -60055,7 +60052,7 @@ static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_int32* pLPF, ma_uin
}
}
static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrame)
static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_int32* MA_RESTRICT pFrame)
{
ma_uint32 iLPF;
ma_uint32 iChannel;
@@ -60088,7 +60085,7 @@ static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_int32* pLPF, ma_u
}
}
static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_int32* pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* pFrames)
static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* MA_RESTRICT pFrames)
{
ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (0 * channels));
ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (1 * channels));
@@ -60096,7 +60093,7 @@ static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_int32* pLPF, ma_uint32
ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (3 * channels));
}
static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrames)
static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_int32* MA_RESTRICT pFrames)
{
ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 0);
ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 1);
@@ -60104,7 +60101,7 @@ static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_int32* pLPF, ma_u
ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 3);
}
static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrames)
static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_int32* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_int32* MA_RESTRICT pFrames)
{
ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 0);
ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 2);
@@ -60112,7 +60109,7 @@ static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_int32* pLPF, ma
ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 6);
}
static MA_INLINE void ma_linear_resampler_filter_f32(float* pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* pFrame)
static MA_INLINE void ma_linear_resampler_filter_f32(float* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* MA_RESTRICT pFrame)
{
ma_uint32 iLPF;
ma_uint32 iChannel;
@@ -60145,7 +60142,7 @@ static MA_INLINE void ma_linear_resampler_filter_f32(float* pLPF, ma_uint32 lpfC
}
}
static MA_INLINE void ma_linear_resampler_filter_f32_mono(float* pLPF, ma_uint32 lpfCount, float* pFrame)
static MA_INLINE void ma_linear_resampler_filter_f32_mono(float* MA_RESTRICT pLPF, ma_uint32 lpfCount, float* MA_RESTRICT pFrame)
{
ma_uint32 iLPF;
ma_uint32 iChannel;
@@ -60178,7 +60175,7 @@ static MA_INLINE void ma_linear_resampler_filter_f32_mono(float* pLPF, ma_uint32
}
}
static MA_INLINE void ma_linear_resampler_filter_f32_stereo(float* pLPF, ma_uint32 lpfCount, float* pFrame)
static MA_INLINE void ma_linear_resampler_filter_f32_stereo(float* MA_RESTRICT pLPF, ma_uint32 lpfCount, float* MA_RESTRICT pFrame)
{
ma_uint32 iLPF;
ma_uint32 iChannel;
@@ -60211,7 +60208,7 @@ static MA_INLINE void ma_linear_resampler_filter_f32_stereo(float* pLPF, ma_uint
}
}
static MA_INLINE void ma_linear_resampler_filter_f32_4(float* pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* pFrames)
static MA_INLINE void ma_linear_resampler_filter_f32_4(float* MA_RESTRICT pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* MA_RESTRICT pFrames)
{
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (0 * channels));
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (1 * channels));
@@ -60219,7 +60216,7 @@ static MA_INLINE void ma_linear_resampler_filter_f32_4(float* pLPF, ma_uint32 lp
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (3 * channels));
}
static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(float* pLPF, ma_uint32 lpfCount, float* pFrames)
static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(float* MA_RESTRICT pLPF, ma_uint32 lpfCount, float* MA_RESTRICT pFrames)
{
ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 0);
ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 1);
@@ -60227,7 +60224,7 @@ static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(float* pLPF, ma_uint
ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 3);
}
static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(float* pLPF, ma_uint32 lpfCount, float* pFrames)
static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(float* MA_RESTRICT pLPF, ma_uint32 lpfCount, float* MA_RESTRICT pFrames)
{
ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 0);
ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 2);
@@ -60247,6 +60244,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_downsample(ma_linear
ma_uint32 c;
ma_uint32 invSampleRateOut;
ma_uint32 lpfCount;
ma_uint32 channels;
MA_ASSERT(pResampler != NULL);
MA_ASSERT(pFrameCountIn != NULL);
@@ -60260,29 +60258,26 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_downsample(ma_linear
framesProcessedOut = 0;
invSampleRateOut = (1 << MA_LINEAR_RESAMPLER_LERP_SHIFT) / pResampler->sampleRateOut;
lpfCount = pResampler->lpfOrder >> 1;
channels = pResampler->channels;
/*
Not currently using lpfCount. If I update the `filter_s16()` function to accept a lpfCount parameter it results
in the mono s16 path when upsampling going twice as slow when compiled with GCC.
*/
(void)lpfCount;
if (pResampler->lpfOrder == 0) {
if (lpfCount == 0) {
return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut);
} else {
ma_int32* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
while (framesProcessedOut < frameCountOut) {
ma_uint32 a = pResampler->inTimeFrac * invSampleRateOut;
/* Before interpolating we need to load the buffers. When doing this we need to ensure we run every input sample through the filter. */
while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) {
for (c = 0; c < pResampler->channels; c += 1) {
for (c = 0; c < channels; c += 1) {
pResampler->x0.s16[c] = pResampler->x1.s16[c];
pResampler->x1.s16[c] = pFramesInS16[c];
}
pFramesInS16 += pResampler->channels;
pFramesInS16 += channels;
/* Filter. */
ma_linear_resampler_filter_s16(pResampler, pResampler->x1.s16);
ma_linear_resampler_filter_s16(pLPF, lpfCount, channels, pResampler->x1.s16);
framesProcessedIn += 1;
pResampler->inTimeInt -= 1;
@@ -60293,10 +60288,10 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_downsample(ma_linear
}
/* Getting here means the frames have been loaded and filtered and we can generate the next output frame. */
for (c = 0; c < pResampler->channels; c += 1) {
for (c = 0; c < channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pResampler->x0.s16[c], pResampler->x1.s16[c], a);
}
pFramesOutS16 += pResampler->channels;
pFramesOutS16 += channels;
framesProcessedOut += 1;
@@ -60327,6 +60322,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
ma_uint32 c;
ma_uint32 invSampleRateOut;
ma_uint32 lpfCount;
ma_uint32 channels;
MA_ASSERT(pResampler != NULL);
MA_ASSERT(pFramesIn != NULL);
@@ -60342,9 +60338,10 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
framesProcessedOut = 0;
invSampleRateOut = (1 << MA_LINEAR_RESAMPLER_LERP_SHIFT) / pResampler->sampleRateOut;
lpfCount = pResampler->lpfOrder >> 1;
channels = pResampler->channels;
/* We can run an optimized path when the low-pass filter is not being used. */
if (pResampler->lpfOrder == 0) {
if (lpfCount == 0) {
/* Fast path. No LPF needed. */
return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut);
} else {
@@ -60359,12 +60356,12 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
while (pResampler->cachedFrameCount > 0 && frameCountIn > 0 && framesProcessedOut < frameCountOut) {
ma_uint32 a = pResampler->inTimeFrac * invSampleRateOut;
for (c = 0; c < pResampler->channels; c += 1) {
for (c = 0; c < channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pResampler->x0.s16[c], pFramesInS16[c], a);
}
ma_linear_resampler_filter_s16(pResampler, pFramesOutS16);
ma_linear_resampler_filter_s16(pLPF, lpfCount, channels, pFramesOutS16);
pFramesOutS16 += pResampler->channels;
pFramesOutS16 += channels;
framesProcessedOut += 1;
@@ -60389,8 +60386,6 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
/* Experimental loop unrolling to make it easier for SIMD-ification. */
#if 1
{
ma_uint32 channels = pResampler->channels;
while (framesProcessedOut + 4 <= frameCountOut) {
ma_uint32 inTimeIntTemp;
ma_uint32 inTimeFracTemp;
@@ -60585,7 +60580,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
pFramesOutS16[(3 * channels) + c] = (ma_int16)r[3];
}
ma_linear_resampler_filter_s16_4(pResampler, pFramesOutS16);
ma_linear_resampler_filter_s16_4(pLPF, lpfCount, channels, pFramesOutS16);
}
pFramesOutS16 += 4 * channels;
@@ -60599,12 +60594,12 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
if (pResampler->inTimeInt + 1 < frameCountIn) {
ma_uint32 a = pResampler->inTimeFrac * invSampleRateOut;
for (c = 0; c < pResampler->channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pFramesInS16[((pResampler->inTimeInt + 0) * pResampler->channels) + c], pFramesInS16[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a);
for (c = 0; c < channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pFramesInS16[((pResampler->inTimeInt + 0) * channels) + c], pFramesInS16[((pResampler->inTimeInt + 1) * channels) + c], a);
}
ma_linear_resampler_filter_s16(pResampler, pFramesOutS16);
ma_linear_resampler_filter_s16(pLPF, lpfCount, channels, pFramesOutS16);
pFramesOutS16 += pResampler->channels;
pFramesOutS16 += channels;
framesProcessedOut += 1;
@@ -60620,8 +60615,8 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
There is not enough input frames to interpolate. We'll need to stop here. But it's important that we cache
the frame to ensure we make some forward progress.
*/
for (c = 0; c < pResampler->channels; c += 1) {
pResampler->x0.s16[c] = pFramesInS16[((pResampler->inTimeInt + 0) * pResampler->channels) + c];
for (c = 0; c < channels; c += 1) {
pResampler->x0.s16[c] = pFramesInS16[((pResampler->inTimeInt + 0) * channels) + c];
}
pResampler->cachedFrameCount = 1;
@@ -60648,8 +60643,8 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
downsampling process will be expecting an input frame.
*/
if (framesProcessedIn > 0) {
for (c = 0; c < pResampler->channels; c += 1) {
pResampler->x1.s16[c] = pFramesInS16[((framesProcessedIn - 1) * pResampler->channels) + c];
for (c = 0; c < channels; c += 1) {
pResampler->x1.s16[c] = pFramesInS16[((framesProcessedIn - 1) * channels) + c];
}
}
@@ -60663,11 +60658,11 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) {
ma_uint32 iChannel;
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
for (iChannel = 0; iChannel < channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = pFramesInS16[iChannel];
}
pFramesInS16 += pResampler->channels;
pFramesInS16 += channels;
framesProcessedIn += 1;
pResampler->inTimeInt -= 1;
@@ -60685,7 +60680,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
/* Filter. */
ma_linear_resampler_filter_s16(pResampler, pFramesOutS16);
pFramesOutS16 += pResampler->channels;
pFramesOutS16 += channels;
}
framesProcessedOut += 1;
@@ -60730,6 +60725,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
ma_uint32 c;
float invSampleRateOut;
ma_uint32 lpfCount;
ma_uint32 channels;
MA_ASSERT(pResampler != NULL);
MA_ASSERT(pFrameCountIn != NULL);
@@ -60743,8 +60739,9 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
framesProcessedOut = 0;
invSampleRateOut = 1.0f / pResampler->sampleRateOut;
lpfCount = pResampler->lpfOrder >> 1;
channels = pResampler->channels;
if (pResampler->lpfOrder == 0) {
if (lpfCount == 0) {
/* Fast path. No LPF needed. */
return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut);
} else {
@@ -60758,14 +60755,14 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
/* Before interpolating we need to load the buffers. When doing this we need to ensure we run every input sample through the filter. */
while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) {
for (c = 0; c < pResampler->channels; c += 1) {
for (c = 0; c < channels; c += 1) {
pResampler->x0.f32[c] = pResampler->x1.f32[c];
pResampler->x1.f32[c] = pFramesInF32[c];
}
pFramesInF32 += pResampler->channels;
pFramesInF32 += channels;
/* Filter. */
ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pResampler->x1.f32);
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pResampler->x1.f32);
framesProcessedIn += 1;
pResampler->inTimeInt -= 1;
@@ -60776,10 +60773,10 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
}
/* Getting here means the frames have been loaded and filtered and we can generate the next output frame. */
for (c = 0; c < pResampler->channels; c += 1) {
for (c = 0; c < channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pResampler->x1.f32[c], a);
}
pFramesOutF32 += pResampler->channels;
pFramesOutF32 += channels;
framesProcessedOut += 1;
@@ -60810,6 +60807,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
ma_uint32 c;
double invSampleRateOut;
ma_uint32 lpfCount;
ma_uint32 channels;
MA_ASSERT(pResampler != NULL);
MA_ASSERT(pFrameCountIn != NULL);
@@ -60823,6 +60821,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
framesProcessedOut = 0;
invSampleRateOut = (1.0 / pResampler->sampleRateOut);
lpfCount = pResampler->lpfOrder >> 1;
channels = pResampler->channels;
if (lpfCount == 0) {
/* Fast path. No LPF needed. */
@@ -60841,12 +60840,12 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
while (pResampler->cachedFrameCount > 0 && frameCountIn > 0 && framesProcessedOut < frameCountOut) {
float a = (float)(pResampler->inTimeFrac * invSampleRateOut);
for (c = 0; c < pResampler->channels; c += 1) {
for (c = 0; c < channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pFramesInF32[c], a);
}
ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFramesOutF32);
pFramesOutF32 += pResampler->channels;
pFramesOutF32 += channels;
framesProcessedOut += 1;
@@ -60871,8 +60870,6 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
/* Experimental loop unrolling to make it easier for SIMD-ification. */
#if 1
{
ma_uint32 channels = pResampler->channels;
while (framesProcessedOut + 4 <= frameCountOut) {
ma_uint32 inTimeIntTemp;
ma_uint32 inTimeFracTemp;
@@ -61067,7 +61064,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
pFramesOutF32[(3 * channels) + c] = r[3];
}
ma_linear_resampler_filter_f32_4(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
ma_linear_resampler_filter_f32_4(pLPF, lpfCount, channels, pFramesOutF32);
}
pFramesOutF32 += 4 * channels;
@@ -61081,12 +61078,12 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
if (pResampler->inTimeInt + 1 < frameCountIn) {
float a = (float)(pResampler->inTimeFrac * invSampleRateOut);
for (c = 0; c < pResampler->channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a);
for (c = 0; c < channels; c += 1) {
pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * channels) + c], a);
}
ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFramesOutF32);
pFramesOutF32 += pResampler->channels;
pFramesOutF32 += channels;
framesProcessedOut += 1;
@@ -61102,8 +61099,8 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
There is not enough input frames to interpolate. We'll need to stop here. But it's important that we cache
the frame to ensure we make some forward progress.
*/
for (c = 0; c < pResampler->channels; c += 1) {
pResampler->x0.f32[c] = pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c];
for (c = 0; c < channels; c += 1) {
pResampler->x0.f32[c] = pFramesInF32[((pResampler->inTimeInt + 0) * channels) + c];
}
pResampler->cachedFrameCount = 1;
@@ -61130,8 +61127,8 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
downsampling process will be expecting an input frame.
*/
if (framesProcessedIn > 0) {
for (c = 0; c < pResampler->channels; c += 1) {
pResampler->x1.f32[c] = pFramesInF32[((framesProcessedIn - 1) * pResampler->channels) + c];
for (c = 0; c < channels; c += 1) {
pResampler->x1.f32[c] = pFramesInF32[((framesProcessedIn - 1) * channels) + c];
}
}
@@ -61145,11 +61142,11 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) {
ma_uint32 iChannel;
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
for (iChannel = 0; iChannel < channels; iChannel += 1) {
pResampler->x0.f32[iChannel] = pResampler->x1.f32[iChannel];
pResampler->x1.f32[iChannel] = pFramesInF32[iChannel];
}
pFramesInF32 += pResampler->channels;
pFramesInF32 += channels;
framesProcessedIn += 1;
pResampler->inTimeInt -= 1;
@@ -61167,7 +61164,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
/* Filter. */
ma_linear_resampler_filter_f32(pResampler, pFramesOutF32);
pFramesOutF32 += pResampler->channels;
pFramesOutF32 += channels;
}
framesProcessedOut += 1;