mirror of
https://github.com/mackron/miniaudio.git
synced 2026-04-22 00:06:59 +02:00
Resampler: Decouple the filtering step from the main resampler object.
The idea here is to have a more clearly defined data dependency separation between the resampler and the filtering state which I'm hoping might open up more optimization opportunities. The problem with this theory, is that this commit makes the GCC build slower on the s16 mono upsampling path. It appears to be slightly fast with Clang though.
This commit is contained in:
+53
-60
@@ -59989,13 +59989,11 @@ static MA_INLINE void ma_linear_resampler_filter_s16_4_stereo(ma_linear_resample
|
||||
}
|
||||
#endif
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResampler, ma_uint32 channels, ma_uint32 lpfCount, ma_int32* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32(ma_int32* pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* pFrame)
|
||||
{
|
||||
ma_int32* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
|
||||
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
|
||||
ma_int32 b1 = pLPF[0];
|
||||
ma_int32 b2 = pLPF[1];
|
||||
@@ -60024,13 +60022,11 @@ static MA_INLINE void ma_linear_resampler_filter_s32(ma_linear_resampler* pResam
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrame)
|
||||
{
|
||||
ma_int32* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
|
||||
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
|
||||
ma_int32 b1 = pLPF[0];
|
||||
ma_int32 b2 = pLPF[1];
|
||||
@@ -60059,13 +60055,11 @@ static MA_INLINE void ma_linear_resampler_filter_s32_mono(ma_linear_resampler* p
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrame)
|
||||
{
|
||||
ma_int32* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
|
||||
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
|
||||
ma_int32 b1 = pLPF[0];
|
||||
ma_int32 b2 = pLPF[1];
|
||||
@@ -60094,46 +60088,44 @@ static MA_INLINE void ma_linear_resampler_filter_s32_stereo(ma_linear_resampler*
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4(ma_int32* pLPF, ma_uint32 lpfCount, ma_uint32 channels, ma_int32* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (0 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (1 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (2 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pResampler, pResampler->channels, lpfCount, pFrames + (3 * pResampler->channels));
|
||||
ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (0 * channels));
|
||||
ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (1 * channels));
|
||||
ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (2 * channels));
|
||||
ma_linear_resampler_filter_s32(pLPF, lpfCount, channels, pFrames + (3 * channels));
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4_mono(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 1);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_s32_mono(pResampler, lpfCount, pFrames + 3);
|
||||
ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 1);
|
||||
ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_s32_mono(pLPF, lpfCount, pFrames + 3);
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, ma_int32* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_s32_4_stereo(ma_int32* pLPF, ma_uint32 lpfCount, ma_int32* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 4);
|
||||
ma_linear_resampler_filter_s32_stereo(pResampler, lpfCount, pFrames + 6);
|
||||
ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 4);
|
||||
ma_linear_resampler_filter_s32_stereo(pLPF, lpfCount, pFrames + 6);
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32(float* pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* pFrame)
|
||||
{
|
||||
float* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
|
||||
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
|
||||
float b1 = pLPF[0];
|
||||
float b2 = pLPF[1];
|
||||
float a1 = pLPF[2];
|
||||
float a2 = pLPF[3];
|
||||
float* pR0 = pLPF + 4;
|
||||
float* pR1 = pLPF + 4 + pResampler->channels;
|
||||
float* pR1 = pLPF + 4 + channels;
|
||||
|
||||
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
|
||||
for (iChannel = 0; iChannel < channels; iChannel += 1) {
|
||||
float r0 = pR0[iChannel];
|
||||
float r1 = pR1[iChannel];
|
||||
float x = pFrame[iChannel];
|
||||
@@ -60149,17 +60141,15 @@ static MA_INLINE void ma_linear_resampler_filter_f32(ma_linear_resampler* pResam
|
||||
}
|
||||
|
||||
/* Go do the next LPF state. */
|
||||
pLPF += 4 + (pResampler->channels * 2);
|
||||
pLPF += 4 + (channels * 2);
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_mono(float* pLPF, ma_uint32 lpfCount, float* pFrame)
|
||||
{
|
||||
float* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
|
||||
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
|
||||
float b1 = pLPF[0];
|
||||
float b2 = pLPF[1];
|
||||
@@ -60188,13 +60178,11 @@ static MA_INLINE void ma_linear_resampler_filter_f32_mono(ma_linear_resampler* p
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrame)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_stereo(float* pLPF, ma_uint32 lpfCount, float* pFrame)
|
||||
{
|
||||
float* pLPF;
|
||||
ma_uint32 iLPF;
|
||||
ma_uint32 iChannel;
|
||||
|
||||
pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
|
||||
for (iLPF = 0; iLPF < lpfCount; iLPF += 1) {
|
||||
float b1 = pLPF[0];
|
||||
float b2 = pLPF[1];
|
||||
@@ -60223,28 +60211,28 @@ static MA_INLINE void ma_linear_resampler_filter_f32_stereo(ma_linear_resampler*
|
||||
}
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4(float* pLPF, ma_uint32 lpfCount, ma_uint32 channels, float* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (0 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (1 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (2 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFrames + (3 * pResampler->channels));
|
||||
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (0 * channels));
|
||||
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (1 * channels));
|
||||
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (2 * channels));
|
||||
ma_linear_resampler_filter_f32(pLPF, lpfCount, channels, pFrames + (3 * channels));
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4_mono(float* pLPF, ma_uint32 lpfCount, float* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 1);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_f32_mono(pResampler, lpfCount, pFrames + 3);
|
||||
ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 1);
|
||||
ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_f32_mono(pLPF, lpfCount, pFrames + 3);
|
||||
}
|
||||
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(ma_linear_resampler* pResampler, ma_uint32 lpfCount, float* pFrames)
|
||||
static MA_INLINE void ma_linear_resampler_filter_f32_4_stereo(float* pLPF, ma_uint32 lpfCount, float* pFrames)
|
||||
{
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 4);
|
||||
ma_linear_resampler_filter_f32_stereo(pResampler, lpfCount, pFrames + 6);
|
||||
ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 0);
|
||||
ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 2);
|
||||
ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 4);
|
||||
ma_linear_resampler_filter_f32_stereo(pLPF, lpfCount, pFrames + 6);
|
||||
}
|
||||
|
||||
|
||||
@@ -60361,6 +60349,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
|
||||
return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut);
|
||||
} else {
|
||||
/* Slow path. Need LPF. */
|
||||
ma_int32* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_S32(pResampler, 0);
|
||||
|
||||
#if 1
|
||||
/* If there's a cached frame we need to process it. */
|
||||
@@ -60473,7 +60462,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
|
||||
r[2] = x[2] + (n[2] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
|
||||
r[3] = x[3] + (n[3] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
|
||||
|
||||
ma_linear_resampler_filter_s32_4_mono(pResampler, lpfCount, r);
|
||||
ma_linear_resampler_filter_s32_4_mono(pLPF, lpfCount, r);
|
||||
|
||||
pFramesOutS16[0] = (ma_int16)r[0];
|
||||
pFramesOutS16[1] = (ma_int16)r[1];
|
||||
@@ -60541,7 +60530,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
|
||||
r[6] = x[6] + (n[6] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
|
||||
r[7] = x[7] + (n[7] >> MA_LINEAR_RESAMPLER_LERP_SHIFT);
|
||||
|
||||
ma_linear_resampler_filter_s32_4_stereo(pResampler, lpfCount, r);
|
||||
ma_linear_resampler_filter_s32_4_stereo(pLPF, lpfCount, r);
|
||||
|
||||
pFramesOutS16[0] = (ma_int16)r[0];
|
||||
pFramesOutS16[1] = (ma_int16)r[1];
|
||||
@@ -60760,6 +60749,8 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
|
||||
return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut);
|
||||
} else {
|
||||
/* Slow path. Need LPF. */
|
||||
float* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
|
||||
|
||||
/*printf("DOWN\n");*/
|
||||
|
||||
while (framesProcessedOut < frameCountOut) {
|
||||
@@ -60774,7 +60765,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_downsample(ma_linear
|
||||
pFramesInF32 += pResampler->channels;
|
||||
|
||||
/* Filter. */
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pResampler->x1.f32);
|
||||
ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pResampler->x1.f32);
|
||||
|
||||
framesProcessedIn += 1;
|
||||
pResampler->inTimeInt -= 1;
|
||||
@@ -60833,11 +60824,13 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
invSampleRateOut = (1.0 / pResampler->sampleRateOut);
|
||||
lpfCount = pResampler->lpfOrder >> 1;
|
||||
|
||||
if (pResampler->lpfOrder == 0) {
|
||||
if (lpfCount == 0) {
|
||||
/* Fast path. No LPF needed. */
|
||||
return ma_linear_resampler_process_pcm_frames_f32_no_lpf(pResampler, pFramesInF32, pFrameCountIn, pFramesOutF32, pFrameCountOut, invSampleRateOut);
|
||||
} else {
|
||||
/* Slow path. Need LPF. */
|
||||
float* pLPF = MA_LINEAR_RESAMPLER_GET_LPF_STATE_F32(pResampler, 0);
|
||||
|
||||
/*printf("UP %u\n", pResampler->inTimeInt);*/
|
||||
|
||||
#if 1
|
||||
@@ -60851,7 +60844,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
for (c = 0; c < pResampler->channels; c += 1) {
|
||||
pFramesOutF32[c] = ma_mix_f32_fast(pResampler->x0.f32[c], pFramesInF32[c], a);
|
||||
}
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFramesOutF32);
|
||||
ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
|
||||
|
||||
pFramesOutF32 += pResampler->channels;
|
||||
|
||||
@@ -60951,7 +60944,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
r[2] = x[2] + n[2];
|
||||
r[3] = x[3] + n[3];
|
||||
|
||||
ma_linear_resampler_filter_f32_4_mono(pResampler, lpfCount, r);
|
||||
ma_linear_resampler_filter_f32_4_mono(pLPF, lpfCount, r);
|
||||
|
||||
pFramesOutF32[0] = r[0];
|
||||
pFramesOutF32[1] = r[1];
|
||||
@@ -61019,7 +61012,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
r[6] = x[6] + n[6];
|
||||
r[7] = x[7] + n[7];
|
||||
|
||||
ma_linear_resampler_filter_f32_4_stereo(pResampler, lpfCount, r);
|
||||
ma_linear_resampler_filter_f32_4_stereo(pLPF, lpfCount, r);
|
||||
|
||||
pFramesOutF32[0] = r[0];
|
||||
pFramesOutF32[1] = r[1];
|
||||
@@ -61074,7 +61067,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
pFramesOutF32[(3 * channels) + c] = r[3];
|
||||
}
|
||||
|
||||
ma_linear_resampler_filter_f32_4(pResampler, lpfCount, pFramesOutF32);
|
||||
ma_linear_resampler_filter_f32_4(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
|
||||
}
|
||||
|
||||
pFramesOutF32 += 4 * channels;
|
||||
@@ -61091,7 +61084,7 @@ static ma_result ma_linear_resampler_process_pcm_frames_f32_upsample(ma_linear_r
|
||||
for (c = 0; c < pResampler->channels; c += 1) {
|
||||
pFramesOutF32[c] = ma_mix_f32_fast(pFramesInF32[((pResampler->inTimeInt + 0) * pResampler->channels) + c], pFramesInF32[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a);
|
||||
}
|
||||
ma_linear_resampler_filter_f32(pResampler, lpfCount, pFramesOutF32);
|
||||
ma_linear_resampler_filter_f32(pLPF, lpfCount, pResampler->channels, pFramesOutF32);
|
||||
|
||||
pFramesOutF32 += pResampler->channels;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user