Fix SSE2 sample swapping in mono expansion.

The SSE2 code paths for mono expansion introduced in Version 0.11.15
mixed up the parameters of `_mm_shuffle_ps()`, which in turn caused
adjacent PCM frames to be swapped in the channel-expanded output.
This commit is contained in:
nmlgc
2023-08-26 21:24:58 +02:00
committed by David Reid
parent 9d461f6d5d
commit 70bf42392d
2 changed files with 4 additions and 4 deletions
+2 -2
View File
@@ -41417,7 +41417,7 @@ static ma_result ma_channel_map_apply_mono_in_f32(float* MA_RESTRICT pFramesOut,
for (iFrame = 0; iFrame < unrolledFrameCount; iFrame += 1) { for (iFrame = 0; iFrame < unrolledFrameCount; iFrame += 1) {
__m128 in0 = _mm_set1_ps(pFramesIn[iFrame*2 + 0]); __m128 in0 = _mm_set1_ps(pFramesIn[iFrame*2 + 0]);
__m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]); __m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]);
_mm_storeu_ps(&pFramesOut[iFrame*4 + 0], _mm_shuffle_ps(in1, in0, _MM_SHUFFLE(0, 0, 0, 0))); _mm_storeu_ps(&pFramesOut[iFrame*4 + 0], _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(0, 0, 0, 0)));
} }
/* Tail. */ /* Tail. */
@@ -41443,7 +41443,7 @@ static ma_result ma_channel_map_apply_mono_in_f32(float* MA_RESTRICT pFramesOut,
__m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]); __m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]);
_mm_storeu_ps(&pFramesOut[iFrame*12 + 0], in0); _mm_storeu_ps(&pFramesOut[iFrame*12 + 0], in0);
_mm_storeu_ps(&pFramesOut[iFrame*12 + 4], _mm_shuffle_ps(in1, in0, _MM_SHUFFLE(0, 0, 0, 0))); _mm_storeu_ps(&pFramesOut[iFrame*12 + 4], _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(0, 0, 0, 0)));
_mm_storeu_ps(&pFramesOut[iFrame*12 + 8], in1); _mm_storeu_ps(&pFramesOut[iFrame*12 + 8], in1);
} }
+2 -2
View File
@@ -52835,7 +52835,7 @@ static ma_result ma_channel_map_apply_mono_in_f32(float* MA_RESTRICT pFramesOut,
for (iFrame = 0; iFrame < unrolledFrameCount; iFrame += 1) { for (iFrame = 0; iFrame < unrolledFrameCount; iFrame += 1) {
__m128 in0 = _mm_set1_ps(pFramesIn[iFrame*2 + 0]); __m128 in0 = _mm_set1_ps(pFramesIn[iFrame*2 + 0]);
__m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]); __m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]);
_mm_storeu_ps(&pFramesOut[iFrame*4 + 0], _mm_shuffle_ps(in1, in0, _MM_SHUFFLE(0, 0, 0, 0))); _mm_storeu_ps(&pFramesOut[iFrame*4 + 0], _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(0, 0, 0, 0)));
} }
/* Tail. */ /* Tail. */
@@ -52861,7 +52861,7 @@ static ma_result ma_channel_map_apply_mono_in_f32(float* MA_RESTRICT pFramesOut,
__m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]); __m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]);
_mm_storeu_ps(&pFramesOut[iFrame*12 + 0], in0); _mm_storeu_ps(&pFramesOut[iFrame*12 + 0], in0);
_mm_storeu_ps(&pFramesOut[iFrame*12 + 4], _mm_shuffle_ps(in1, in0, _MM_SHUFFLE(0, 0, 0, 0))); _mm_storeu_ps(&pFramesOut[iFrame*12 + 4], _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(0, 0, 0, 0)));
_mm_storeu_ps(&pFramesOut[iFrame*12 + 8], in1); _mm_storeu_ps(&pFramesOut[iFrame*12 + 8], in1);
} }