Resampler: Optimization for floating point stereo.

This applies only to the f32 no-LPF code path. Other code paths will be integrated later.
2026-07-22 21:02:42 +02:00 · 2026-02-08 22:01:23 +10:00
parent cd02ebe39c
commit 172f8beae6
1 changed files with 82 additions and 3 deletions
@@ -59387,7 +59387,7 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
        /* The rate must have changed between calls. Ignore the cached frame. */
    }
-    /* Experimental loop unrolling for some SIMD experiments. */
+    /* Experimental loop unrolling to make it easier for SIMD-ification. */
    #if 1
    {
        ma_uint32 channels = pResampler->channels;
@@ -59423,9 +59423,8 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
            pResampler->inTimeInt  = inTimeIntTemp;
            pResampler->inTimeFrac = inTimeFracTemp;
-            /* We should now be able to SIMD-ify the rest. */
+            /* We should now be able to SIMD-ify the rest. For now I am trusting the compiler to vectorize this, but I'll experiment with some manual stuff later. */
            {
                /* TODO: Experiment with a stereo specialization. */
                if (channels == 1) {
                    float x[4];
                    float y[4];
@@ -59466,6 +59465,86 @@ static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_f32_no_lpf(ma_
                    pFramesOutF32[1] = r[1];
                    pFramesOutF32[2] = r[2];
                    pFramesOutF32[3] = r[3];
                } else if (channels == 2) {
                    float x[8];
                    float y[8];
                    float a[8];
                    float d[8];
                    float n[8];
                    float r[8];
                    /* Frame 0, Channel 0 */
                    x[0] = pFramesInF32[((inTimeInt[0] + 0) * 2) + 0];
                    y[0] = pFramesInF32[((inTimeInt[0] + 1) * 2) + 0];
                    a[0] = inTimeFrac[0] * invSampleRateOut;
                    d[0] = y[0] - x[0];
                    n[0] = d[0] * a[0];
                    r[0] = x[0] + n[0];
                    /* Frame 0, Channel 1 */
                    x[1] = pFramesInF32[((inTimeInt[0] + 0) * 2) + 1];
                    y[1] = pFramesInF32[((inTimeInt[0] + 1) * 2) + 1];
                    a[1] = inTimeFrac[0] * invSampleRateOut;
                    d[1] = y[1] - x[1];
                    n[1] = d[1] * a[1];
                    r[1] = x[1] + n[1];
                    /* Frame 1, Channel 0 */
                    x[2] = pFramesInF32[((inTimeInt[1] + 0) * 2) + 0];
                    y[2] = pFramesInF32[((inTimeInt[1] + 1) * 2) + 0];
                    a[2] = inTimeFrac[1] * invSampleRateOut;
                    d[2] = y[2] - x[2];
                    n[2] = d[2] * a[2];
                    r[2] = x[2] + n[2];
                    /* Frame 1, Channel 1 */
                    x[3] = pFramesInF32[((inTimeInt[1] + 0) * 2) + 1];
                    y[3] = pFramesInF32[((inTimeInt[1] + 1) * 2) + 1];
                    a[3] = inTimeFrac[1] * invSampleRateOut;
                    d[3] = y[3] - x[3];
                    n[3] = d[3] * a[3];
                    r[3] = x[3] + n[3];
                    /* Frame 2, Channel 0 */
                    x[4] = pFramesInF32[((inTimeInt[2] + 0) * 2) + 0];
                    y[4] = pFramesInF32[((inTimeInt[2] + 1) * 2) + 0];
                    a[4] = inTimeFrac[2] * invSampleRateOut;
                    d[4] = y[4] - x[4];
                    n[4] = d[4] * a[4];
                    r[4] = x[4] + n[4];
                    /* Frame 2, Channel 1 */
                    x[5] = pFramesInF32[((inTimeInt[2] + 0) * 2) + 1];
                    y[5] = pFramesInF32[((inTimeInt[2] + 1) * 2) + 1];
                    a[5] = inTimeFrac[2] * invSampleRateOut;
                    d[5] = y[5] - x[5];
                    n[5] = d[5] * a[5];
                    r[5] = x[5] + n[5];
                    /* Frame 3, Channel 0 */
                    x[6] = pFramesInF32[((inTimeInt[3] + 0) * 2) + 0];
                    y[6] = pFramesInF32[((inTimeInt[3] + 1) * 2) + 0];
                    a[6] = inTimeFrac[3] * invSampleRateOut;
                    d[6] = y[6] - x[6];
                    n[6] = d[6] * a[6];
                    r[6] = x[6] + n[6];
                    /* Frame 3, Channel 1 */
                    x[7] = pFramesInF32[((inTimeInt[3] + 0) * 2) + 1];
                    y[7] = pFramesInF32[((inTimeInt[3] + 1) * 2) + 1];
                    a[7] = inTimeFrac[3] * invSampleRateOut;
                    d[7] = y[7] - x[7];
                    n[7] = d[7] * a[7];
                    r[7] = x[7] + n[7];
                    pFramesOutF32[0] = r[0];
                    pFramesOutF32[1] = r[1];
                    pFramesOutF32[2] = r[2];
                    pFramesOutF32[3] = r[3];
                    pFramesOutF32[4] = r[4];
                    pFramesOutF32[5] = r[5];
                    pFramesOutF32[6] = r[6];
                    pFramesOutF32[7] = r[7];
                } else {
                    for (c = 0; c < channels; c += 1) {
                        float x[4];