diff --git a/miniaudio.h b/miniaudio.h index 19f9746e..40889a1e 100644 --- a/miniaudio.h +++ b/miniaudio.h @@ -1480,77 +1480,6 @@ struct ma_channel_router }; - -typedef struct ma_src ma_src; -typedef ma_uint32 (* ma_src_read_deinterleaved_proc)(ma_src* pSRC, ma_uint32 frameCount, void** ppSamplesOut, void* pUserData); /* Returns the number of frames that were read. */ - -typedef enum -{ - ma_src_algorithm_linear = 0, - ma_src_algorithm_sinc, - ma_src_algorithm_none, - ma_src_algorithm_default = ma_src_algorithm_linear -} ma_src_algorithm; - -typedef enum -{ - ma_src_sinc_window_function_hann = 0, - ma_src_sinc_window_function_rectangular, - ma_src_sinc_window_function_default = ma_src_sinc_window_function_hann -} ma_src_sinc_window_function; - -typedef struct -{ - ma_src_sinc_window_function windowFunction; - ma_uint32 windowWidth; -} ma_src_config_sinc; - -typedef struct -{ - ma_uint32 sampleRateIn; - ma_uint32 sampleRateOut; - ma_uint32 channels; - ma_src_algorithm algorithm; - ma_bool32 neverConsumeEndOfInput : 1; - ma_bool32 noSSE2 : 1; - ma_bool32 noAVX2 : 1; - ma_bool32 noAVX512 : 1; - ma_bool32 noNEON : 1; - ma_src_read_deinterleaved_proc onReadDeinterleaved; - void* pUserData; - ma_src_config_sinc sinc; -} ma_src_config; - -struct ma_src -{ - union - { - struct - { - MA_ALIGN(MA_SIMD_ALIGNMENT) float input[MA_MAX_CHANNELS][MA_SRC_INPUT_BUFFER_SIZE_IN_SAMPLES]; - float timeIn; - ma_uint32 leftoverFrames; - } linear; - - struct - { - MA_ALIGN(MA_SIMD_ALIGNMENT) float input[MA_MAX_CHANNELS][MA_SRC_SINC_MAX_WINDOW_WIDTH*2 + MA_SRC_INPUT_BUFFER_SIZE_IN_SAMPLES]; - float timeIn; - ma_uint32 inputFrameCount; /* The number of frames sitting in the input buffer, not including the first half of the window. */ - ma_uint32 windowPosInSamples; /* An offset of . */ - float table[MA_SRC_SINC_MAX_WINDOW_WIDTH*1 * MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION]; /* Precomputed lookup table. The +1 is used to avoid the need for an overflow check. */ - } sinc; - }; - - ma_src_config config; - ma_bool32 isEndOfInputLoaded : 1; - ma_bool32 useSSE2 : 1; - ma_bool32 useAVX2 : 1; - ma_bool32 useAVX512 : 1; - ma_bool32 useNEON : 1; -}; - - /************************************************************************************************************************************************************ ************************************************************************************************************************************************************* @@ -1808,40 +1737,6 @@ Helper for initializing a channel router config. ma_channel_router_config ma_channel_router_config_init(ma_uint32 channelsIn, const ma_channel channelMapIn[MA_MAX_CHANNELS], ma_uint32 channelsOut, const ma_channel channelMapOut[MA_MAX_CHANNELS], ma_channel_mix_mode mixingMode, ma_channel_router_read_deinterleaved_proc onRead, void* pUserData); -/************************************************************************************************************************************************************ - -Sample Rate Conversion -====================== - -************************************************************************************************************************************************************/ - -/* -Initializes a sample rate conversion object. -*/ -ma_result ma_src_init(const ma_src_config* pConfig, ma_src* pSRC); - -/* -Dynamically adjusts the sample rate. - -This is useful for dynamically adjust pitch. Keep in mind, however, that this will speed up or slow down the sound. If this -is not acceptable you will need to use your own algorithm. -*/ -ma_result ma_src_set_sample_rate(ma_src* pSRC, ma_uint32 sampleRateIn, ma_uint32 sampleRateOut); - -/* -Reads a number of frames. - -Returns the number of frames actually read. -*/ -ma_uint64 ma_src_read_deinterleaved(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData); - -/* -Helper for creating a sample rate conversion config. -*/ -ma_src_config ma_src_config_init_new(void); -ma_src_config ma_src_config_init(ma_uint32 sampleRateIn, ma_uint32 sampleRateOut, ma_uint32 channels, ma_src_read_deinterleaved_proc onReadDeinterleaved, void* pUserData); - - /************************************************************************************************************************************************************ Conversion @@ -4430,11 +4325,15 @@ Standard Library Stuff #define ma_copy_memory MA_COPY_MEMORY #define ma_assert MA_ASSERT -#define ma_countof(x) (sizeof(x) / sizeof(x[0])) -#define ma_max(x, y) (((x) > (y)) ? (x) : (y)) -#define ma_min(x, y) (((x) < (y)) ? (x) : (y)) -#define ma_clamp(x, lo, hi) (ma_max(lo, ma_min(x, hi))) -#define ma_offset_ptr(p, offset) (((ma_uint8*)(p)) + (offset)) +#define ma_countof(x) (sizeof(x) / sizeof(x[0])) +#define ma_max(x, y) (((x) > (y)) ? (x) : (y)) +#define ma_min(x, y) (((x) < (y)) ? (x) : (y)) +#define ma_clamp(x, lo, hi) (ma_max(lo, ma_min(x, hi))) +#define ma_offset_ptr(p, offset) (((ma_uint8*)(p)) + (offset)) + +#define ma_floorf(x) ((float)floor((double)(x))) +#define ma_sinf(x) ((float)sin((double)(x))) +#define ma_cosf(x) ((float)cos((double)(x))) #define ma_buffer_frame_capacity(buffer, channels, format) (sizeof(buffer) / ma_get_bytes_per_sample(format) / (channels)) @@ -34910,943 +34809,6 @@ ma_channel_router_config ma_channel_router_config_init(ma_uint32 channelsIn, con } - -/************************************************************************************************************************************************************** - -SRC - -**************************************************************************************************************************************************************/ -#define ma_floorf(x) ((float)floor((double)(x))) -#define ma_sinf(x) ((float)sin((double)(x))) -#define ma_cosf(x) ((float)cos((double)(x))) - -static MA_INLINE double ma_sinc(double x) -{ - if (x != 0) { - return sin(MA_PI_D*x) / (MA_PI_D*x); - } else { - return 1; - } -} - -#define ma_sincf(x) ((float)ma_sinc((double)(x))) - - -ma_uint64 ma_src_read_deinterleaved__passthrough(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData); -ma_uint64 ma_src_read_deinterleaved__linear(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData); -ma_uint64 ma_src_read_deinterleaved__sinc(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData); - -void ma_src__build_sinc_table__sinc(ma_src* pSRC) -{ - ma_uint32 i; - - ma_assert(pSRC != NULL); - - pSRC->sinc.table[0] = 1.0f; - for (i = 1; i < ma_countof(pSRC->sinc.table); i += 1) { - double x = i*MA_PI_D / MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION; - pSRC->sinc.table[i] = (float)(sin(x)/x); - } -} - -void ma_src__build_sinc_table__rectangular(ma_src* pSRC) -{ - /* This is the same as the base sinc table. */ - ma_src__build_sinc_table__sinc(pSRC); -} - -void ma_src__build_sinc_table__hann(ma_src* pSRC) -{ - ma_uint32 i; - - ma_src__build_sinc_table__sinc(pSRC); - - for (i = 0; i < ma_countof(pSRC->sinc.table); i += 1) { - double x = pSRC->sinc.table[i]; - double N = MA_SRC_SINC_MAX_WINDOW_WIDTH*2; - double n = ((double)(i) / MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION) + MA_SRC_SINC_MAX_WINDOW_WIDTH; - double w = 0.5 * (1 - cos((2*MA_PI_D*n) / (N))); - - pSRC->sinc.table[i] = (float)(x * w); - } -} - -ma_result ma_src_init(const ma_src_config* pConfig, ma_src* pSRC) -{ - if (pSRC == NULL) { - return MA_INVALID_ARGS; - } - - ma_zero_object(pSRC); - - if (pConfig == NULL || pConfig->onReadDeinterleaved == NULL) { - return MA_INVALID_ARGS; - } - if (pConfig->channels == 0 || pConfig->channels > MA_MAX_CHANNELS) { - return MA_INVALID_ARGS; - } - - pSRC->config = *pConfig; - - /* SIMD */ - pSRC->useSSE2 = ma_has_sse2() && !pConfig->noSSE2; - pSRC->useAVX2 = ma_has_avx2() && !pConfig->noAVX2; - pSRC->useAVX512 = ma_has_avx512f() && !pConfig->noAVX512; - pSRC->useNEON = ma_has_neon() && !pConfig->noNEON; - - if (pSRC->config.algorithm == ma_src_algorithm_sinc) { - /* Make sure the window width within bounds. */ - if (pSRC->config.sinc.windowWidth == 0) { - pSRC->config.sinc.windowWidth = MA_SRC_SINC_DEFAULT_WINDOW_WIDTH; - } - if (pSRC->config.sinc.windowWidth < MA_SRC_SINC_MIN_WINDOW_WIDTH) { - pSRC->config.sinc.windowWidth = MA_SRC_SINC_MIN_WINDOW_WIDTH; - } - if (pSRC->config.sinc.windowWidth > MA_SRC_SINC_MAX_WINDOW_WIDTH) { - pSRC->config.sinc.windowWidth = MA_SRC_SINC_MAX_WINDOW_WIDTH; - } - - /* Set up the lookup table. */ - switch (pSRC->config.sinc.windowFunction) { - case ma_src_sinc_window_function_hann: ma_src__build_sinc_table__hann(pSRC); break; - case ma_src_sinc_window_function_rectangular: ma_src__build_sinc_table__rectangular(pSRC); break; - default: return MA_INVALID_ARGS; /* <-- Hitting this means the window function is unknown to miniaudio. */ - } - } - - return MA_SUCCESS; -} - -ma_result ma_src_set_sample_rate(ma_src* pSRC, ma_uint32 sampleRateIn, ma_uint32 sampleRateOut) -{ - if (pSRC == NULL) { - return MA_INVALID_ARGS; - } - - /* Must have a sample rate of > 0. */ - if (sampleRateIn == 0 || sampleRateOut == 0) { - return MA_INVALID_ARGS; - } - - ma_atomic_exchange_32(&pSRC->config.sampleRateIn, sampleRateIn); - ma_atomic_exchange_32(&pSRC->config.sampleRateOut, sampleRateOut); - - return MA_SUCCESS; -} - -ma_uint64 ma_src_read_deinterleaved(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData) -{ - ma_src_algorithm algorithm; - - if (pSRC == NULL || frameCount == 0 || ppSamplesOut == NULL) { - return 0; - } - - algorithm = pSRC->config.algorithm; - - /* Can use a function pointer for this. */ - switch (algorithm) { - case ma_src_algorithm_none: return ma_src_read_deinterleaved__passthrough(pSRC, frameCount, ppSamplesOut, pUserData); - case ma_src_algorithm_linear: return ma_src_read_deinterleaved__linear( pSRC, frameCount, ppSamplesOut, pUserData); - case ma_src_algorithm_sinc: return ma_src_read_deinterleaved__sinc( pSRC, frameCount, ppSamplesOut, pUserData); - default: break; - } - - /* Should never get here. */ - return 0; -} - -ma_uint64 ma_src_read_deinterleaved__passthrough(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData) -{ - if (frameCount <= 0xFFFFFFFF) { - return pSRC->config.onReadDeinterleaved(pSRC, (ma_uint32)frameCount, ppSamplesOut, pUserData); - } else { - ma_uint32 iChannel; - ma_uint64 totalFramesRead; - float* ppNextSamplesOut[MA_MAX_CHANNELS]; - - for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) { - ppNextSamplesOut[iChannel] = (float*)ppSamplesOut[iChannel]; - } - - totalFramesRead = 0; - while (totalFramesRead < frameCount) { - ma_uint32 framesJustRead; - ma_uint64 framesRemaining = frameCount - totalFramesRead; - ma_uint64 framesToReadRightNow = framesRemaining; - if (framesToReadRightNow > 0xFFFFFFFF) { - framesToReadRightNow = 0xFFFFFFFF; - } - - framesJustRead = (ma_uint32)pSRC->config.onReadDeinterleaved(pSRC, (ma_uint32)framesToReadRightNow, (void**)ppNextSamplesOut, pUserData); - if (framesJustRead == 0) { - break; - } - - totalFramesRead += framesJustRead; - for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) { - ppNextSamplesOut[iChannel] += framesJustRead; - } - - if (framesJustRead < framesToReadRightNow) { - break; - } - } - - return totalFramesRead; - } -} - -ma_uint64 ma_src_read_deinterleaved__linear(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData) -{ - float* ppNextSamplesOut[MA_MAX_CHANNELS]; - float factor; - ma_uint32 maxFrameCountPerChunkIn; - ma_uint64 totalFramesRead; - - ma_assert(pSRC != NULL); - ma_assert(frameCount > 0); - ma_assert(ppSamplesOut != NULL); - - ma_copy_memory(ppNextSamplesOut, ppSamplesOut, sizeof(void*) * pSRC->config.channels); - - factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut; - maxFrameCountPerChunkIn = ma_countof(pSRC->linear.input[0]); - - totalFramesRead = 0; - while (totalFramesRead < frameCount) { - ma_uint32 iChannel; - float tBeg; - float tEnd; - float tAvailable; - float tNext; - float* ppSamplesFromClient[MA_MAX_CHANNELS]; - ma_uint32 iNextFrame; - ma_uint32 maxOutputFramesToRead; - ma_uint32 maxOutputFramesToRead4; - ma_uint32 framesToReadFromClient; - ma_uint32 framesReadFromClient; - ma_uint64 framesRemaining = frameCount - totalFramesRead; - ma_uint64 framesToRead = framesRemaining; - if (framesToRead > 16384) { - framesToRead = 16384; /* <-- Keep this small because we're using 32-bit floats for calculating sample positions and I don't want to run out of precision with huge sample counts. */ - } - - - /* Read Input Data */ - tBeg = pSRC->linear.timeIn; - tEnd = tBeg + ((ma_int64)framesToRead*factor); /* Cast to int64 required for VC6. */ - - framesToReadFromClient = (ma_uint32)(tEnd) + 1 + 1; /* +1 to make tEnd 1-based and +1 because we always need to an extra sample for interpolation. */ - if (framesToReadFromClient >= maxFrameCountPerChunkIn) { - framesToReadFromClient = maxFrameCountPerChunkIn; - } - - for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) { - ppSamplesFromClient[iChannel] = pSRC->linear.input[iChannel] + pSRC->linear.leftoverFrames; - } - - framesReadFromClient = 0; - if (framesToReadFromClient > pSRC->linear.leftoverFrames) { - framesReadFromClient = (ma_uint32)pSRC->config.onReadDeinterleaved(pSRC, (ma_uint32)framesToReadFromClient - pSRC->linear.leftoverFrames, (void**)ppSamplesFromClient, pUserData); - } - - framesReadFromClient += pSRC->linear.leftoverFrames; /* <-- You can sort of think of it as though we've re-read the leftover samples from the client. */ - if (framesReadFromClient < 2) { - break; - } - - for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) { - ppSamplesFromClient[iChannel] = pSRC->linear.input[iChannel]; - } - - - /* Write Output Data */ - - /* - At this point we have a bunch of frames that the client has given to us for processing. From this we can determine the maximum number of output frames - that can be processed from this input. We want to output as many samples as possible from our input data. - */ - tAvailable = framesReadFromClient - tBeg - 1; /* Subtract 1 because the last input sample is needed for interpolation and cannot be included in the output sample count calculation. */ - - maxOutputFramesToRead = (ma_uint32)(tAvailable / factor); - if (maxOutputFramesToRead == 0) { - maxOutputFramesToRead = 1; - } - if (maxOutputFramesToRead > framesToRead) { - maxOutputFramesToRead = (ma_uint32)framesToRead; - } - - /* Output frames are always read in groups of 4 because I'm planning on using this as a reference for some SIMD-y stuff later. */ - maxOutputFramesToRead4 = maxOutputFramesToRead/4; - for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) { - ma_uint32 iFrameOut; - float t0 = pSRC->linear.timeIn + factor*0; - float t1 = pSRC->linear.timeIn + factor*1; - float t2 = pSRC->linear.timeIn + factor*2; - float t3 = pSRC->linear.timeIn + factor*3; - float t; - - for (iFrameOut = 0; iFrameOut < maxOutputFramesToRead4; iFrameOut += 1) { - float iPrevSample0 = (float)floor(t0); - float iPrevSample1 = (float)floor(t1); - float iPrevSample2 = (float)floor(t2); - float iPrevSample3 = (float)floor(t3); - - float iNextSample0 = iPrevSample0 + 1; - float iNextSample1 = iPrevSample1 + 1; - float iNextSample2 = iPrevSample2 + 1; - float iNextSample3 = iPrevSample3 + 1; - - float alpha0 = t0 - iPrevSample0; - float alpha1 = t1 - iPrevSample1; - float alpha2 = t2 - iPrevSample2; - float alpha3 = t3 - iPrevSample3; - - float prevSample0 = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample0]; - float prevSample1 = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample1]; - float prevSample2 = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample2]; - float prevSample3 = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample3]; - - float nextSample0 = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample0]; - float nextSample1 = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample1]; - float nextSample2 = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample2]; - float nextSample3 = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample3]; - - ppNextSamplesOut[iChannel][iFrameOut*4 + 0] = ma_mix_f32_fast(prevSample0, nextSample0, alpha0); - ppNextSamplesOut[iChannel][iFrameOut*4 + 1] = ma_mix_f32_fast(prevSample1, nextSample1, alpha1); - ppNextSamplesOut[iChannel][iFrameOut*4 + 2] = ma_mix_f32_fast(prevSample2, nextSample2, alpha2); - ppNextSamplesOut[iChannel][iFrameOut*4 + 3] = ma_mix_f32_fast(prevSample3, nextSample3, alpha3); - - t0 += factor*4; - t1 += factor*4; - t2 += factor*4; - t3 += factor*4; - } - - t = pSRC->linear.timeIn + (factor*maxOutputFramesToRead4*4); - for (iFrameOut = (maxOutputFramesToRead4*4); iFrameOut < maxOutputFramesToRead; iFrameOut += 1) { - float iPrevSample = (float)floor(t); - float iNextSample = iPrevSample + 1; - float alpha = t - iPrevSample; - float prevSample; - float nextSample; - - ma_assert(iPrevSample < ma_countof(pSRC->linear.input[iChannel])); - ma_assert(iNextSample < ma_countof(pSRC->linear.input[iChannel])); - - prevSample = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample]; - nextSample = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample]; - - ppNextSamplesOut[iChannel][iFrameOut] = ma_mix_f32_fast(prevSample, nextSample, alpha); - - t += factor; - } - - ppNextSamplesOut[iChannel] += maxOutputFramesToRead; - } - - totalFramesRead += maxOutputFramesToRead; - - - /* Residual */ - tNext = pSRC->linear.timeIn + (maxOutputFramesToRead*factor); - - pSRC->linear.timeIn = tNext; - ma_assert(tNext <= framesReadFromClient+1); - - iNextFrame = (ma_uint32)floor(tNext); - pSRC->linear.leftoverFrames = framesReadFromClient - iNextFrame; - pSRC->linear.timeIn = tNext - iNextFrame; - - for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) { - ma_uint32 iFrame; - for (iFrame = 0; iFrame < pSRC->linear.leftoverFrames; ++iFrame) { - float sample = ppSamplesFromClient[iChannel][framesReadFromClient-pSRC->linear.leftoverFrames + iFrame]; - ppSamplesFromClient[iChannel][iFrame] = sample; - } - } - - - /* Exit the loop if we've found everything from the client. */ - if (framesReadFromClient < framesToReadFromClient) { - break; - } - } - - return totalFramesRead; -} - - -ma_src_config ma_src_config_init_new() -{ - ma_src_config config; - ma_zero_object(&config); - - return config; -} - -ma_src_config ma_src_config_init(ma_uint32 sampleRateIn, ma_uint32 sampleRateOut, ma_uint32 channels, ma_src_read_deinterleaved_proc onReadDeinterleaved, void* pUserData) -{ - ma_src_config config = ma_src_config_init_new(); - config.sampleRateIn = sampleRateIn; - config.sampleRateOut = sampleRateOut; - config.channels = channels; - config.onReadDeinterleaved = onReadDeinterleaved; - config.pUserData = pUserData; - - return config; -} - - -/************************************************************************************************************************************************************** - -Sinc Sample Rate Conversion -=========================== - -The sinc SRC algorithm uses a windowed sinc to perform interpolation of samples. Currently, miniaudio's implementation supports rectangular and Hann window -methods. - -Whenever an output sample is being computed, it looks at a sub-section of the input samples. I've called this sub-section in the code below the "window", -which I realize is a bit ambigous with the mathematical "window", but it works for me when I need to conceptualize things in my head. The window is made up -of two halves. The first half contains past input samples (initialized to zero), and the second half contains future input samples. As time moves forward -and input samples are consumed, the window moves forward. The larger the window, the better the quality at the expense of slower processing. The window is -limited the range [MA_SRC_SINC_MIN_WINDOW_WIDTH, MA_SRC_SINC_MAX_WINDOW_WIDTH] and defaults to MA_SRC_SINC_DEFAULT_WINDOW_WIDTH. - -Input samples are cached for efficiency (to prevent frequently requesting tiny numbers of samples from the client). When the window gets to the end of the -cache, it's moved back to the start, and more samples are read from the client. If the client has no more data to give, the cache is filled with zeros and -the last of the input samples will be consumed. Once the last of the input samples have been consumed, no more samples will be output. - - -When reading output samples, we always first read whatever is already in the input cache. Only when the cache has been fully consumed do we read more data -from the client. - -To access samples in the input buffer you do so relative to the window. When the window itself is at position 0, the first item in the buffer is accessed -with "windowPos + windowWidth". Generally, to access any sample relative to the window you do "windowPos + windowWidth + sampleIndexRelativeToWindow". - -**************************************************************************************************************************************************************/ - -/* Comment this to disable interpolation of table lookups. Less accurate, but faster. */ -#define MA_USE_SINC_TABLE_INTERPOLATION - -/* Retrieves a sample from the input buffer's window. Values >= 0 retrieve future samples. Negative values return past samples. */ -static MA_INLINE float ma_src_sinc__get_input_sample_from_window(const ma_src* pSRC, ma_uint32 channel, ma_uint32 windowPosInSamples, ma_int32 sampleIndex) -{ - ma_assert(pSRC != NULL); - ma_assert(channel < pSRC->config.channels); - ma_assert(sampleIndex >= -(ma_int32)pSRC->config.sinc.windowWidth); - ma_assert(sampleIndex < (ma_int32)pSRC->config.sinc.windowWidth); - - /* The window should always be contained within the input cache. */ - ma_assert(windowPosInSamples < ma_countof(pSRC->sinc.input[0]) - pSRC->config.sinc.windowWidth); - - return pSRC->sinc.input[channel][windowPosInSamples + pSRC->config.sinc.windowWidth + sampleIndex]; -} - -static MA_INLINE float ma_src_sinc__interpolation_factor(const ma_src* pSRC, float x) -{ - float xabs; - ma_int32 ixabs; - - ma_assert(pSRC != NULL); - - xabs = (float)fabs(x); - xabs = xabs * MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION; - ixabs = (ma_int32)xabs; - -#if defined(MA_USE_SINC_TABLE_INTERPOLATION) - { - float a = xabs - ixabs; - return ma_mix_f32_fast(pSRC->sinc.table[ixabs], pSRC->sinc.table[ixabs+1], a); - } -#else - return pSRC->sinc.table[ixabs]; -#endif -} - -#if defined(MA_SUPPORT_SSE2) -static MA_INLINE __m128 ma_fabsf_sse2(__m128 x) -{ - return _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)), x); -} - -static MA_INLINE __m128 ma_truncf_sse2(__m128 x) -{ - return _mm_cvtepi32_ps(_mm_cvttps_epi32(x)); -} - -static MA_INLINE __m128 ma_src_sinc__interpolation_factor__sse2(const ma_src* pSRC, __m128 x) -{ - __m128 resolution128; - __m128 xabs; - __m128i ixabs; - __m128 lo; - __m128 hi; - __m128 a; - __m128 r; - int* ixabsv; - - resolution128 = _mm_set1_ps(MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION); - xabs = ma_fabsf_sse2(x); - xabs = _mm_mul_ps(xabs, resolution128); - ixabs = _mm_cvttps_epi32(xabs); - - ixabsv = (int*)&ixabs; - - lo = _mm_set_ps( - pSRC->sinc.table[ixabsv[3]], - pSRC->sinc.table[ixabsv[2]], - pSRC->sinc.table[ixabsv[1]], - pSRC->sinc.table[ixabsv[0]] - ); - - hi = _mm_set_ps( - pSRC->sinc.table[ixabsv[3]+1], - pSRC->sinc.table[ixabsv[2]+1], - pSRC->sinc.table[ixabsv[1]+1], - pSRC->sinc.table[ixabsv[0]+1] - ); - - a = _mm_sub_ps(xabs, _mm_cvtepi32_ps(ixabs)); - r = ma_mix_f32_fast__sse2(lo, hi, a); - - return r; -} -#endif - -#if defined(MA_SUPPORT_AVX2) -static MA_INLINE __m256 ma_fabsf_avx2(__m256 x) -{ - return _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)), x); -} - -#if 0 -static MA_INLINE __m256 ma_src_sinc__interpolation_factor__avx2(const ma_src* pSRC, __m256 x) -{ - __m256 resolution256 = _mm256_set1_ps(MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION); - __m256 xabs = ma_fabsf_avx2(x); - - xabs = _mm256_mul_ps(xabs, resolution256); - - __m256i ixabs = _mm256_cvttps_epi32(xabs); - __m256 a = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs)); - - - int* ixabsv = (int*)&ixabs; - - __m256 lo = _mm256_set_ps( - pSRC->sinc.table[ixabsv[7]], - pSRC->sinc.table[ixabsv[6]], - pSRC->sinc.table[ixabsv[5]], - pSRC->sinc.table[ixabsv[4]], - pSRC->sinc.table[ixabsv[3]], - pSRC->sinc.table[ixabsv[2]], - pSRC->sinc.table[ixabsv[1]], - pSRC->sinc.table[ixabsv[0]] - ); - - __m256 hi = _mm256_set_ps( - pSRC->sinc.table[ixabsv[7]+1], - pSRC->sinc.table[ixabsv[6]+1], - pSRC->sinc.table[ixabsv[5]+1], - pSRC->sinc.table[ixabsv[4]+1], - pSRC->sinc.table[ixabsv[3]+1], - pSRC->sinc.table[ixabsv[2]+1], - pSRC->sinc.table[ixabsv[1]+1], - pSRC->sinc.table[ixabsv[0]+1] - ); - - __m256 r = ma_mix_f32_fast__avx2(lo, hi, a); - - return r; -} -#endif - -#endif - -#if defined(MA_SUPPORT_NEON) -static MA_INLINE float32x4_t ma_fabsf_neon(float32x4_t x) -{ - return vabdq_f32(vmovq_n_f32(0), x); -} - -static MA_INLINE float32x4_t ma_src_sinc__interpolation_factor__neon(const ma_src* pSRC, float32x4_t x) -{ - float32x4_t xabs; - int32x4_t ixabs; - float32x4_t a; - float32x4_t r; - int* ixabsv; - float lo[4]; - float hi[4]; - - xabs = ma_fabsf_neon(x); - xabs = vmulq_n_f32(xabs, MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION); - ixabs = vcvtq_s32_f32(xabs); - - ixabsv = (int*)&ixabs; - - lo[0] = pSRC->sinc.table[ixabsv[0]]; - lo[1] = pSRC->sinc.table[ixabsv[1]]; - lo[2] = pSRC->sinc.table[ixabsv[2]]; - lo[3] = pSRC->sinc.table[ixabsv[3]]; - - hi[0] = pSRC->sinc.table[ixabsv[0]+1]; - hi[1] = pSRC->sinc.table[ixabsv[1]+1]; - hi[2] = pSRC->sinc.table[ixabsv[2]+1]; - hi[3] = pSRC->sinc.table[ixabsv[3]+1]; - - a = vsubq_f32(xabs, vcvtq_f32_s32(ixabs)); - r = ma_mix_f32_fast__neon(vld1q_f32(lo), vld1q_f32(hi), a); - - return r; -} -#endif - -ma_uint64 ma_src_read_deinterleaved__sinc(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData) -{ - float factor; - float inverseFactor; - ma_int32 windowWidth; - ma_int32 windowWidth2; - ma_int32 windowWidthSIMD; - ma_int32 windowWidthSIMD2; - float* ppNextSamplesOut[MA_MAX_CHANNELS]; - float _windowSamplesUnaligned[MA_SRC_SINC_MAX_WINDOW_WIDTH*2 + MA_SIMD_ALIGNMENT]; - float* windowSamples; - float _iWindowFUnaligned[MA_SRC_SINC_MAX_WINDOW_WIDTH*2 + MA_SIMD_ALIGNMENT]; - float* iWindowF; - ma_int32 i; - ma_uint64 totalOutputFramesRead; - - ma_assert(pSRC != NULL); - ma_assert(frameCount > 0); - ma_assert(ppSamplesOut != NULL); - - factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut; - inverseFactor = 1/factor; - - windowWidth = (ma_int32)pSRC->config.sinc.windowWidth; - windowWidth2 = windowWidth*2; - - /* - There are cases where it's actually more efficient to increase the window width so that it's aligned with the respective - SIMD pipeline being used. - */ - windowWidthSIMD = windowWidth; - if (pSRC->useNEON) { - windowWidthSIMD = (windowWidthSIMD + 1) & ~(1); - } else if (pSRC->useAVX512) { - windowWidthSIMD = (windowWidthSIMD + 7) & ~(7); - } else if (pSRC->useAVX2) { - windowWidthSIMD = (windowWidthSIMD + 3) & ~(3); - } else if (pSRC->useSSE2) { - windowWidthSIMD = (windowWidthSIMD + 1) & ~(1); - } - - windowWidthSIMD2 = windowWidthSIMD*2; - (void)windowWidthSIMD2; /* <-- Silence a warning when SIMD is disabled. */ - - ma_copy_memory(ppNextSamplesOut, ppSamplesOut, sizeof(void*) * pSRC->config.channels); - - windowSamples = (float*)(((ma_uintptr)_windowSamplesUnaligned + MA_SIMD_ALIGNMENT-1) & ~(MA_SIMD_ALIGNMENT-1)); - ma_zero_memory(windowSamples, MA_SRC_SINC_MAX_WINDOW_WIDTH*2 * sizeof(float)); - - iWindowF = (float*)(((ma_uintptr)_iWindowFUnaligned + MA_SIMD_ALIGNMENT-1) & ~(MA_SIMD_ALIGNMENT-1)); - ma_zero_memory(iWindowF, MA_SRC_SINC_MAX_WINDOW_WIDTH*2 * sizeof(float)); - - for (i = 0; i < windowWidth2; ++i) { - iWindowF[i] = (float)(i - windowWidth); - } - - totalOutputFramesRead = 0; - while (totalOutputFramesRead < frameCount) { - ma_uint32 maxInputSamplesAvailableInCache; - float timeInBeg; - float timeInEnd; - ma_uint64 maxOutputFramesToRead; - ma_uint64 outputFramesRemaining; - ma_uint64 outputFramesToRead; - ma_uint32 iChannel; - ma_uint32 prevWindowPosInSamples; - ma_uint32 availableOutputFrames; - - /* - The maximum number of frames we can read this iteration depends on how many input samples we have available to us. This is the number - of input samples between the end of the window and the end of the cache. - */ - maxInputSamplesAvailableInCache = ma_countof(pSRC->sinc.input[0]) - (pSRC->config.sinc.windowWidth*2) - pSRC->sinc.windowPosInSamples; - if (maxInputSamplesAvailableInCache > pSRC->sinc.inputFrameCount) { - maxInputSamplesAvailableInCache = pSRC->sinc.inputFrameCount; - } - - /* Never consume the tail end of the input data if requested. */ - if (pSRC->config.neverConsumeEndOfInput) { - if (maxInputSamplesAvailableInCache >= pSRC->config.sinc.windowWidth) { - maxInputSamplesAvailableInCache -= pSRC->config.sinc.windowWidth; - } else { - maxInputSamplesAvailableInCache = 0; - } - } - - timeInBeg = pSRC->sinc.timeIn; - timeInEnd = (float)(pSRC->sinc.windowPosInSamples + maxInputSamplesAvailableInCache); - - ma_assert(timeInBeg >= 0); - ma_assert(timeInBeg <= timeInEnd); - - maxOutputFramesToRead = (ma_uint64)(((timeInEnd - timeInBeg) * inverseFactor)); - - outputFramesRemaining = frameCount - totalOutputFramesRead; - outputFramesToRead = outputFramesRemaining; - if (outputFramesToRead > maxOutputFramesToRead) { - outputFramesToRead = maxOutputFramesToRead; - } - - for (iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) { - /* Do SRC. */ - float timeIn = timeInBeg; - ma_uint32 iSample; - for (iSample = 0; iSample < outputFramesToRead; iSample += 1) { - float sampleOut = 0; - float iTimeInF = ma_floorf(timeIn); - ma_uint32 iTimeIn = (ma_uint32)iTimeInF; - ma_int32 iWindow = 0; - float tScalar; - - /* Pre-load the window samples into an aligned buffer to begin with. Need to put these into an aligned buffer to make SIMD easier. */ - windowSamples[0] = 0; /* <-- The first sample is always zero. */ - for (i = 1; i < windowWidth2; ++i) { - windowSamples[i] = pSRC->sinc.input[iChannel][iTimeIn + i]; - } - -#if defined(MA_SUPPORT_AVX2) || defined(MA_SUPPORT_AVX512) - if (pSRC->useAVX2 || pSRC->useAVX512) { - __m256i ixabs[MA_SRC_SINC_MAX_WINDOW_WIDTH*2/8]; - __m256 a[MA_SRC_SINC_MAX_WINDOW_WIDTH*2/8]; - __m256 resolution256; - __m256 t; - __m256 r; - ma_int32 windowWidth8; - ma_int32 iWindow8; - - resolution256 = _mm256_set1_ps(MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION); - - t = _mm256_set1_ps((timeIn - iTimeInF)); - r = _mm256_set1_ps(0); - - windowWidth8 = windowWidthSIMD2 >> 3; - for (iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) { - __m256 w = *((__m256*)iWindowF + iWindow8); - - __m256 xabs = _mm256_sub_ps(t, w); - xabs = ma_fabsf_avx2(xabs); - xabs = _mm256_mul_ps(xabs, resolution256); - - ixabs[iWindow8] = _mm256_cvttps_epi32(xabs); - a[iWindow8] = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs[iWindow8])); - } - - for (iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) { - int* ixabsv = (int*)&ixabs[iWindow8]; - - __m256 lo = _mm256_set_ps( - pSRC->sinc.table[ixabsv[7]], - pSRC->sinc.table[ixabsv[6]], - pSRC->sinc.table[ixabsv[5]], - pSRC->sinc.table[ixabsv[4]], - pSRC->sinc.table[ixabsv[3]], - pSRC->sinc.table[ixabsv[2]], - pSRC->sinc.table[ixabsv[1]], - pSRC->sinc.table[ixabsv[0]] - ); - - __m256 hi = _mm256_set_ps( - pSRC->sinc.table[ixabsv[7]+1], - pSRC->sinc.table[ixabsv[6]+1], - pSRC->sinc.table[ixabsv[5]+1], - pSRC->sinc.table[ixabsv[4]+1], - pSRC->sinc.table[ixabsv[3]+1], - pSRC->sinc.table[ixabsv[2]+1], - pSRC->sinc.table[ixabsv[1]+1], - pSRC->sinc.table[ixabsv[0]+1] - ); - - __m256 s = *((__m256*)windowSamples + iWindow8); - r = _mm256_add_ps(r, _mm256_mul_ps(s, ma_mix_f32_fast__avx2(lo, hi, a[iWindow8]))); - } - - /* Horizontal add. */ - __m256 x = _mm256_hadd_ps(r, _mm256_permute2f128_ps(r, r, 1)); - x = _mm256_hadd_ps(x, x); - x = _mm256_hadd_ps(x, x); - sampleOut += _mm_cvtss_f32(_mm256_castps256_ps128(x)); - - iWindow += windowWidth8 * 8; - } - else -#endif -#if defined(MA_SUPPORT_SSE2) - if (pSRC->useSSE2) { - __m128 t = _mm_set1_ps((timeIn - iTimeInF)); - __m128 r = _mm_set1_ps(0); - - ma_int32 windowWidth4 = windowWidthSIMD2 >> 2; - ma_int32 iWindow4; - for (iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) { - __m128* s = (__m128*)windowSamples + iWindow4; - __m128* w = (__m128*)iWindowF + iWindow4; - - __m128 a = ma_src_sinc__interpolation_factor__sse2(pSRC, _mm_sub_ps(t, *w)); - r = _mm_add_ps(r, _mm_mul_ps(*s, a)); - } - - sampleOut += ((float*)(&r))[0]; - sampleOut += ((float*)(&r))[1]; - sampleOut += ((float*)(&r))[2]; - sampleOut += ((float*)(&r))[3]; - - iWindow += windowWidth4 * 4; - } - else -#endif -#if defined(MA_SUPPORT_NEON) - if (pSRC->useNEON) { - float32x4_t t = vmovq_n_f32((timeIn - iTimeInF)); - float32x4_t r = vmovq_n_f32(0); - - ma_int32 windowWidth4 = windowWidthSIMD2 >> 2; - ma_int32 iWindow4; - for (iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) { - float32x4_t* s = (float32x4_t*)windowSamples + iWindow4; - float32x4_t* w = (float32x4_t*)iWindowF + iWindow4; - - float32x4_t a = ma_src_sinc__interpolation_factor__neon(pSRC, vsubq_f32(t, *w)); - r = vaddq_f32(r, vmulq_f32(*s, a)); - } - - sampleOut += ((float*)(&r))[0]; - sampleOut += ((float*)(&r))[1]; - sampleOut += ((float*)(&r))[2]; - sampleOut += ((float*)(&r))[3]; - - iWindow += windowWidth4 * 4; - } - else -#endif - { - iWindow += 1; /* The first one is a dummy for SIMD alignment purposes. Skip it. */ - } - - /* Non-SIMD/Reference implementation. */ - tScalar = (timeIn - iTimeIn); - for (; iWindow < windowWidth2; iWindow += 1) { - float s = windowSamples[iWindow]; - float w = iWindowF[iWindow]; - - float a = ma_src_sinc__interpolation_factor(pSRC, (tScalar - w)); - float r = s * a; - - sampleOut += r; - } - - ppNextSamplesOut[iChannel][iSample] = (float)sampleOut; - - timeIn += factor; - } - - ppNextSamplesOut[iChannel] += outputFramesToRead; - } - - totalOutputFramesRead += outputFramesToRead; - - prevWindowPosInSamples = pSRC->sinc.windowPosInSamples; - - pSRC->sinc.timeIn += ((ma_int64)outputFramesToRead * factor); /* Cast to int64 required for VC6. */ - pSRC->sinc.windowPosInSamples = (ma_uint32)pSRC->sinc.timeIn; - pSRC->sinc.inputFrameCount -= pSRC->sinc.windowPosInSamples - prevWindowPosInSamples; - - /* If the window has reached a point where we cannot read a whole output sample it needs to be moved back to the start. */ - availableOutputFrames = (ma_uint32)((timeInEnd - pSRC->sinc.timeIn) * inverseFactor); - - if (availableOutputFrames == 0) { - size_t samplesToMove = ma_countof(pSRC->sinc.input[0]) - pSRC->sinc.windowPosInSamples; - - pSRC->sinc.timeIn -= ma_floorf(pSRC->sinc.timeIn); - pSRC->sinc.windowPosInSamples = 0; - - /* Move everything from the end of the cache up to the front. */ - for (iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) { - memmove(pSRC->sinc.input[iChannel], pSRC->sinc.input[iChannel] + ma_countof(pSRC->sinc.input[iChannel]) - samplesToMove, samplesToMove * sizeof(*pSRC->sinc.input[iChannel])); - } - } - - /* Read more data from the client if required. */ - if (pSRC->isEndOfInputLoaded) { - pSRC->isEndOfInputLoaded = MA_FALSE; - break; - } - - /* - Everything beyond this point is reloading. If we're at the end of the input data we do _not_ want to try reading any more in this function call. If the - caller wants to keep trying, they can reload their internal data sources and call this function again. We should never be - */ - ma_assert(pSRC->isEndOfInputLoaded == MA_FALSE); - - if (pSRC->sinc.inputFrameCount <= pSRC->config.sinc.windowWidth || availableOutputFrames == 0) { - float* ppInputDst[MA_MAX_CHANNELS] = {0}; - ma_uint32 framesToReadFromClient; - ma_uint32 framesReadFromClient; - ma_uint32 leftoverFrames; - - for (iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) { - ppInputDst[iChannel] = pSRC->sinc.input[iChannel] + pSRC->config.sinc.windowWidth + pSRC->sinc.inputFrameCount; - } - - /* Now read data from the client. */ - framesToReadFromClient = ma_countof(pSRC->sinc.input[0]) - (pSRC->config.sinc.windowWidth + pSRC->sinc.inputFrameCount); - - framesReadFromClient = 0; - if (framesToReadFromClient > 0) { - framesReadFromClient = pSRC->config.onReadDeinterleaved(pSRC, framesToReadFromClient, (void**)ppInputDst, pUserData); - } - - if (framesReadFromClient != framesToReadFromClient) { - pSRC->isEndOfInputLoaded = MA_TRUE; - } else { - pSRC->isEndOfInputLoaded = MA_FALSE; - } - - if (framesReadFromClient != 0) { - pSRC->sinc.inputFrameCount += framesReadFromClient; - } else { - /* We couldn't get anything more from the client. If no more output samples can be computed from the available input samples we need to return. */ - if (pSRC->config.neverConsumeEndOfInput) { - if ((pSRC->sinc.inputFrameCount * inverseFactor) <= pSRC->config.sinc.windowWidth) { - break; - } - } else { - if ((pSRC->sinc.inputFrameCount * inverseFactor) < 1) { - break; - } - } - } - - /* Anything left over in the cache must be set to zero. */ - leftoverFrames = ma_countof(pSRC->sinc.input[0]) - (pSRC->config.sinc.windowWidth + pSRC->sinc.inputFrameCount); - if (leftoverFrames > 0) { - for (iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) { - ma_zero_memory(pSRC->sinc.input[iChannel] + pSRC->config.sinc.windowWidth + pSRC->sinc.inputFrameCount, leftoverFrames * sizeof(float)); - } - } - } - } - - return totalOutputFramesRead; -} - - - /************************************************************************************************************************************************************** Format Conversion