mirror of
https://github.com/mackron/miniaudio.git
synced 2026-04-21 15:56:58 +02:00
Resampler: Optimization to the filtering stage.
This seems to trigger a fast optimization strategy when compiling with GCC. With this change the filtered s16 mono path is almost 2x faster.
This commit is contained in:
+58
-50
@@ -59989,11 +59989,10 @@ static MA_INLINE void ma_linear_resampler_filter_s16_4_stereo(ma_linear_resample
|
||||
}
|
||||
#endif
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResampler, ma_uint32 channels, ma_int32* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResampler, ma_uint32 channels, ma_uint32 lpfCount, ma_int32* pFrame)
|
||||
{
|
||||
ma_int32* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 lpfCount = pResampler->lpfOrder >> 1;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
|
||||
@@ -60025,11 +60024,10 @@ static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResam
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* pResampler, ma_int32* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrame)
|
||||
{
|
||||
ma_int32* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 lpfCount = pResampler->lpfOrder >> 1;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
|
||||
@@ -60061,11 +60059,10 @@ static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* p
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler* pResampler, ma_int32* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrame)
|
||||
{
|
||||
ma_int32* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 lpfCount = pResampler->lpfOrder >> 1;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
|
||||
@@ -60097,35 +60094,34 @@ static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler*
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_linear_resampler* pResampler, ma_int32* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, pFrames + (0 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, pFrames + (1 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, pFrames + (2 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, pFrames + (3 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (0 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (1 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (2 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (3 * pResampler->channels));
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_linear_resampler* pResampler, ma_int32* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, pFrames + 0);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, pFrames + 1);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, pFrames + 2);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, pFrames + 3);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 1);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 3);
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_linear_resampler* pResampler, ma_int32* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, pFrames + 0);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, pFrames + 2);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, pFrames + 4);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, pFrames + 6);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 4);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 6);
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResampler, float* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame)
|
||||
{
|
||||
float* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 lpfCount = pResampler->lpfOrder >> 1;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
|
||||
@@ -60157,11 +60153,10 @@ static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResam
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* pResampler, float* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame)
|
||||
{
|
||||
float* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 lpfCount = pResampler->lpfOrder >> 1;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
|
||||
@@ -60193,11 +60188,10 @@ static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* p
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler* pResampler, float* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame)
|
||||
{
|
||||
float* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 lpfCount = pResampler->lpfOrder >> 1;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
|
||||
@@ -60229,28 +60223,28 @@ static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler*
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4(ma_linear_resampler* pResampler, float* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_f32(pResampler, pFrames + (0 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, pFrames + (1 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, pFrames + (2 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, pFrames + (3 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (0 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (1 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (2 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (3 * pResampler->channels));
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(ma_linear_resampler* pResampler, float* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, pFrames + 0);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, pFrames + 1);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, pFrames + 2);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, pFrames + 3);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 1);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 3);
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(ma_linear_resampler* pResampler, float* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, pFrames + 0);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, pFrames + 2);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, pFrames + 4);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, pFrames + 6);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 4);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 6);
|
||||
}
|
||||
|
||||
|
||||
@@ -60264,6 +60258,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_downsample(ma_linear
|
||||
ma_uint64 framesProcessedOut;
|
||||
ma_uint32 c;
|
||||
ma_uint32 invSampleRateOut;
|
||||
ma_uint32 lpfCount;
|
||||
|
||||
MA_ASSERT(pResampler != NULL);
|
||||
MA_ASSERT(pFrameCountIn != NULL);
|
||||
@@ -60276,6 +60271,13 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_downsample(ma_linear
|
||||
framesProcessedIn = 0;
|
||||
framesProcessedOut = 0;
|
||||
invSampleRateOut = (1 << MA_LINEAR_RESAMPLER_LERP_SHIFT) / pResampler->sampleRateOut;
|
||||
lpfCount = pResampler->lpfOrder >> 1;
|
||||
|
||||
/*
|
||||
Not currently using lpfCount. If I update the `filter_s16()` function to accept a lpfCount parameter it results
|
||||
in the mono s16 path when upsampling going twice as slow when compiled with GCC.
|
||||
*/
|
||||
(void)lpfCount;
|
||||
|
||||
if (pResampler->lpfOrder == 0) {
|
||||
return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut);
|
||||
@@ -60336,6 +60338,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
|
||||
ma_uint64 framesProcessedOut;
|
||||
ma_uint32 c;
|
||||
ma_uint32 invSampleRateOut;
|
||||
ma_uint32 lpfCount;
|
||||
|
||||
MA_ASSERT(pResampler != NULL);
|
||||
MA_ASSERT(pFramesIn != NULL);
|
||||
@@ -60350,6 +60353,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
|
||||
framesProcessedIn = 0;
|
||||
framesProcessedOut = 0;
|
||||
invSampleRateOut = (1 << MA_LINEAR_RESAMPLER_LERP_SHIFT) / pResampler->sampleRateOut;
|
||||
lpfCount = pResampler->lpfOrder >> 1;
|
||||
|
||||
/* We can run an optimized path when the low-pass filter is not being used. */
|
||||
if (pResampler->lpfOrder == 0) {
|
||||
@@ -60469,7 +60473,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
|
||||
r[2] = x[2] + (n[2] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
|
||||
r[3] = x[3] + (n[3] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
|
||||
|
||||
ma_linear_resampler_filter_s32_4_mono(pResampler, r);
|
||||
ma_linear_resampler_filter_s32_4_mono(pResampler, lpfCount, r);
|
||||
|
||||
pFramesOutS16[0] = (ma_int16)r[0];
|
||||
pFramesOutS16[1] = (ma_int16)r[1];
|
||||
@@ -60537,7 +60541,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
|
||||
r[6] = x[6] + (n[6] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
|
||||
r[7] = x[7] + (n[7] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
|
||||
|
||||
ma_linear_resampler_filter_s32_4_stereo(pResampler, r);
|
||||
ma_linear_resampler_filter_s32_4_stereo(pResampler, lpfCount, r);
|
||||
|
||||
pFramesOutS16[0] = (ma_int16)r[0];
|
||||
pFramesOutS16[1] = (ma_int16)r[1];
|
||||
@@ -60736,6 +60740,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
|
||||
ma_uint64 framesProcessedOut;
|
||||
ma_uint32 c;
|
||||
float invSampleRateOut;
|
||||
ma_uint32 lpfCount;
|
||||
|
||||
MA_ASSERT(pResampler != NULL);
|
||||
MA_ASSERT(pFrameCountIn != NULL);
|
||||
@@ -60748,6 +60753,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
|
||||
framesProcessedIn = 0;
|
||||
framesProcessedOut = 0;
|
||||
invSampleRateOut = 1.0f / pResampler->sampleRateOut;
|
||||
lpfCount = pResampler->lpfOrder >> 1;
|
||||
|
||||
if (pResampler->lpfOrder == 0) {
|
||||
/* Fast path. No LPF needed. */
|
||||
@@ -60768,7 +60774,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
|
||||
pFramesInF32 += pResampler->channels;
|
||||
|
||||
/* Filter. */
|
||||
ma_linear_resampler_filter_f32(pResampler, pResampler->x1.f32);
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pResampler->x1.f32);
|
||||
|
||||
framesProcessedIn += 1;
|
||||
pResampler->inTimeInt -= 1;
|
||||
@@ -60812,6 +60818,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
ma_uint64 framesProcessedOut;
|
||||
ma_uint32 c;
|
||||
double invSampleRateOut;
|
||||
ma_uint32 lpfCount;
|
||||
|
||||
MA_ASSERT(pResampler != NULL);
|
||||
MA_ASSERT(pFrameCountIn != NULL);
|
||||
@@ -60824,6 +60831,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
framesProcessedIn = 0;
|
||||
framesProcessedOut = 0;
|
||||
invSampleRateOut = (1.0 / pResampler->sampleRateOut);
|
||||
lpfCount = pResampler->lpfOrder >> 1;
|
||||
|
||||
if (pResampler->lpfOrder == 0) {
|
||||
/* Fast path. No LPF needed. */
|
||||
@@ -60843,7 +60851,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
for (c = 0; c < pResampler->channels; c += 1) {
|
||||
pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pFramesInF32[c], a);
|
||||
}
|
||||
ma_linear_resampler_filter_f32(pResampler, pFramesOutF32);
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFramesOutF32);
|
||||
|
||||
pFramesOutF32 += pResampler->channels;
|
||||
|
||||
@@ -60943,7 +60951,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
r[2] = x[2] + n[2];
|
||||
r[3] = x[3] + n[3];
|
||||
|
||||
ma_linear_resampler_filter_f32_4_mono(pResampler, r);
|
||||
ma_linear_resampler_filter_f32_4_mono(pResampler, lpfCount, r);
|
||||
|
||||
pFramesOutF32[0] = r[0];
|
||||
pFramesOutF32[1] = r[1];
|
||||
@@ -61011,7 +61019,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
r[6] = x[6] + n[6];
|
||||
r[7] = x[7] + n[7];
|
||||
|
||||
ma_linear_resampler_filter_f32_4_stereo(pResampler, r);
|
||||
ma_linear_resampler_filter_f32_4_stereo(pResampler, lpfCount, r);
|
||||
|
||||
pFramesOutF32[0] = r[0];
|
||||
pFramesOutF32[1] = r[1];
|
||||
@@ -61066,7 +61074,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
pFramesOutF32[(3 * channels) + c] = r[3];
|
||||
}
|
||||
|
||||
ma_linear_resampler_filter_f32_4(pResampler, pFramesOutF32);
|
||||
ma_linear_resampler_filter_f32_4(pResampler, lpfCount, pFramesOutF32);
|
||||
}
|
||||
|
||||
pFramesOutF32 += 4 * channels;
|
||||
@@ -61083,7 +61091,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
for (c = 0; c < pResampler->channels; c += 1) {
|
||||
pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a);
|
||||
}
|
||||
ma_linear_resampler_filter_f32(pResampler, pFramesOutF32);
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFramesOutF32);
|
||||
|
||||
pFramesOutF32 += pResampler->channels;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user