diff --git a/mini_al.h b/mini_al.h index f4da2a2a..049ffbfa 100644 --- a/mini_al.h +++ b/mini_al.h @@ -18806,8 +18806,8 @@ void mal_pcm_f32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_ x0 = _mm_mul_ps(x0, _mm_set1_ps(32767.0f)); x1 = _mm_mul_ps(x1, _mm_set1_ps(32767.0f)); - *((__m128i*)(dst_s16 + i)) = _mm_packs_epi32(_mm_cvttps_epi32(x0), _mm_cvttps_epi32(x1)); - + _mm_stream_si128(((__m128i*)(dst_s16 + i)), _mm_packs_epi32(_mm_cvttps_epi32(x0), _mm_cvttps_epi32(x1))); + i += 8; } @@ -18906,7 +18906,8 @@ void mal_pcm_f32_to_s16__avx2(void* dst, const void* src, mal_uint64 count, mal_ __m256i p1 = _mm256_permute2x128_si256(i0, i1, 1 | 48); __m256i r = _mm256_packs_epi32(p0, p1); - *((__m256i*)(dst_s16 + i)) = r; + _mm256_stream_si256(((__m256i*)(dst_s16 + i)), r); + i += 16; } diff --git a/tests/mal_profiling.c b/tests/mal_profiling.c index 053ff8bb..5acff0ff 100644 --- a/tests/mal_profiling.c +++ b/tests/mal_profiling.c @@ -580,7 +580,7 @@ int do_profiling__format_conversion__profile_individual(mal_format formatIn, mal int do_profiling__format_conversion__profile_set(mal_format formatIn, mal_format formatOut, mal_dither_mode ditherMode) { // Generate our base data to begin with. This is generated from an f32 sine wave which is converted to formatIn. That then becomes our base data. - mal_uint32 sampleCount = 1000000; + mal_uint32 sampleCount = 10000000; float* pSourceData = (float*)mal_aligned_malloc(sampleCount*sizeof(*pSourceData), MAL_SIMD_ALIGNMENT); if (pSourceData == NULL) {