Resampler: Optimization to s16, no LPF code path.

This commit is contained in:
David Reid
2026-02-07 11:26:05 +10:00
parent 4b8eb8588d
commit c0b57c3aea
+239 -88
View File
@@ -5603,6 +5603,7 @@ typedef struct
ma_uint32 inAdvanceFrac;
ma_uint32 inTimeInt;
ma_uint32 inTimeFrac;
ma_uint32 cachedFrameCount;
union
{
float* f32;
@@ -59104,7 +59105,13 @@ MA_API ma_result ma_linear_resampler_init_preallocated(const ma_linear_resampler
return result;
}
pResampler->inTimeInt = 1; /* Set this to one to force an input sample to always be loaded for the first output frame. */
/* TODO: Remove the `inTimeInt = 1` thing when the new optimizations are done. */
if (pResampler->lpfOrder == 0) {
pResampler->inTimeInt = 0;
} else {
pResampler->inTimeInt = 1; /* Set this to one to force an input sample to always be loaded for the first output frame. */
}
pResampler->inTimeFrac = 0;
return MA_SUCCESS;
@@ -59155,6 +59162,16 @@ MA_API void ma_linear_resampler_uninit(ma_linear_resampler* pResampler, const ma
#define MA_LINEAR_RESAMPLER_LERP_SHIFT 12
static MA_INLINE ma_int16 ma_linear_resampler_mix_s16(ma_int16 x, ma_int16 y, ma_uint32 a)
{
ma_int32 d;
ma_int32 n;
d = y - x;
n = d * a;
return (ma_int16)(x + (n >> MA_LINEAR_RESAMPLER_LERP_SHIFT));
}
static MA_INLINE void ma_linear_resampler_interpolate_frame_s16(ma_linear_resampler* pResampler, ma_uint32 invSampleRateOut, ma_int16* MA_RESTRICT pFrameOut)
{
ma_uint32 c;
@@ -59205,6 +59222,109 @@ static MA_INLINE void ma_linear_resampler_interpolate_frame_f32(ma_linear_resamp
}
}
static MA_INLINE ma_result ma_linear_resampler_process_pcm_frames_s16_no_lpf(ma_linear_resampler* pResampler, const ma_int16* pFramesInS16, ma_uint64* pFrameCountIn, ma_int16* pFramesOutS16, ma_uint64* pFrameCountOut, ma_uint32 invSampleRateOut)
{
ma_uint64 frameCountIn;
ma_uint64 frameCountOut;
ma_uint64 framesProcessedIn;
ma_uint64 framesProcessedOut;
ma_uint32 c;
MA_ASSERT(pResampler != NULL);
MA_ASSERT(pFramesInS16 != NULL);
MA_ASSERT(pFrameCountIn != NULL);
MA_ASSERT(pFramesOutS16 != NULL);
MA_ASSERT(pFrameCountOut != NULL);
frameCountIn = *pFrameCountIn;
frameCountOut = *pFrameCountOut;
framesProcessedIn = 0;
framesProcessedOut = 0;
/* If there's a cached frame we need to process it. */
if (pResampler->inTimeInt == 0) {
MA_ASSERT(pResampler->cachedFrameCount <= 1); /* There is at most one cached frame. */
while (pResampler->cachedFrameCount > 0 && frameCountIn > 0 && framesProcessedOut < frameCountOut) {
ma_uint32 a = pResampler->inTimeFrac * invSampleRateOut;
for (c = 0; c < pResampler->channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pResampler->x0.s16[c], pFramesInS16[c], a);
}
pFramesOutS16 += pResampler->channels;
framesProcessedOut += 1;
/* Advance time forward. */
pResampler->inTimeInt += pResampler->inAdvanceInt;
pResampler->inTimeFrac += pResampler->inAdvanceFrac;
if (pResampler->inTimeFrac >= pResampler->sampleRateOut) {
pResampler->inTimeFrac -= pResampler->sampleRateOut;
pResampler->inTimeInt += 1;
}
/* Subtract one from the time to account for the cached frame, but only if the entire frame was processed. */
if (pResampler->inTimeInt > 0) {
pResampler->inTimeInt -= 1;
pResampler->cachedFrameCount = 0;
}
}
} else {
/* The rate must have changed between calls. Ignore the cached frame. */
}
while (framesProcessedOut < frameCountOut && pResampler->inTimeInt < frameCountIn) {
if (pResampler->inTimeInt + 1 < frameCountIn) {
ma_uint32 a = pResampler->inTimeFrac * invSampleRateOut;
for (c = 0; c < pResampler->channels; c += 1) {
pFramesOutS16[c] = ma_linear_resampler_mix_s16(pFramesInS16[(pResampler->inTimeInt * pResampler->channels) + c], pFramesInS16[((pResampler->inTimeInt + 1) * pResampler->channels) + c], a);
}
pFramesOutS16 += pResampler->channels;
framesProcessedOut += 1;
/* Advance time forward. */
pResampler->inTimeInt += pResampler->inAdvanceInt;
pResampler->inTimeFrac += pResampler->inAdvanceFrac;
if (pResampler->inTimeFrac >= pResampler->sampleRateOut) {
pResampler->inTimeFrac -= pResampler->sampleRateOut;
pResampler->inTimeInt += 1;
}
} else {
/*
There is not enough input frames to interpolate. We'll need to stop here. But it's important that we cache
the frame to ensure we make some forward progress.
*/
for (c = 0; c < pResampler->channels; c += 1) {
pResampler->x0.s16[c] = pFramesInS16[(pResampler->inTimeInt * pResampler->channels) + c];
}
pResampler->cachedFrameCount = 1;
pResampler->inTimeInt += 1;
break;
}
}
/* The number of frames we processed is simply the difference between our current time and previous time, clamped. */
framesProcessedIn = pResampler->inTimeInt;
if (framesProcessedIn > frameCountIn) { /* Should never overshoot when upsampling. Downsampling could overshoot. */
framesProcessedIn = frameCountIn;
}
if (pResampler->inTimeInt >= framesProcessedIn) {
pResampler->inTimeInt -= framesProcessedIn;
} else {
pResampler->inTimeInt = 0;
}
*pFrameCountIn = framesProcessedIn;
*pFrameCountOut = framesProcessedOut;
return MA_SUCCESS;
}
static ma_result ma_linear_resampler_process_pcm_frames_s16_downsample(ma_linear_resampler* pResampler, const void* pFramesIn, ma_uint64* pFrameCountIn, void* pFramesOut, ma_uint64* pFrameCountOut)
{
const ma_int16* pFramesInS16;
@@ -59227,60 +59347,64 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_downsample(ma_linear
framesProcessedOut = 0;
invSampleRateOut = (1 << MA_LINEAR_RESAMPLER_LERP_SHIFT) / pResampler->sampleRateOut;
while (framesProcessedOut < frameCountOut) {
/* Before interpolating we need to load the buffers. When doing this we need to ensure we run every input sample through the filter. */
while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) {
ma_uint32 iChannel;
if (pResampler->lpfOrder == 0) {
return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut);
} else {
while (framesProcessedOut < frameCountOut) {
/* Before interpolating we need to load the buffers. When doing this we need to ensure we run every input sample through the filter. */
while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) {
ma_uint32 iChannel;
if (pFramesInS16 != NULL) {
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = pFramesInS16[iChannel];
if (pFramesInS16 != NULL) {
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = pFramesInS16[iChannel];
}
pFramesInS16 += pResampler->channels;
} else {
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = 0;
}
}
pFramesInS16 += pResampler->channels;
} else {
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = 0;
/* Filter. Do not apply filtering if sample rates are the same or else you'll get dangerous glitching. */
if (pResampler->lpfOrder > 0 && (pResampler->inAdvanceInt != 1 || pResampler->inAdvanceFrac != 0)) {
ma_lpf_process_pcm_frame_s16(&pResampler->lpf, pResampler->x1.s16, pResampler->x1.s16);
}
framesProcessedIn += 1;
pResampler->inTimeInt -= 1;
}
/* Filter. Do not apply filtering if sample rates are the same or else you'll get dangerous glitching. */
if (pResampler->lpfOrder > 0 && (pResampler->inAdvanceInt != 1 || pResampler->inAdvanceFrac != 0)) {
ma_lpf_process_pcm_frame_s16(&pResampler->lpf, pResampler->x1.s16, pResampler->x1.s16);
if (pResampler->inTimeInt > 0) {
break; /* Ran out of input data. */
}
framesProcessedIn += 1;
pResampler->inTimeInt -= 1;
/* Getting here means the frames have been loaded and filtered and we can generate the next output frame. */
if (pFramesOutS16 != NULL) {
MA_ASSERT(pResampler->inTimeInt == 0);
ma_linear_resampler_interpolate_frame_s16(pResampler, invSampleRateOut, pFramesOutS16);
pFramesOutS16 += pResampler->channels;
}
framesProcessedOut += 1;
/* Advance time forward. */
pResampler->inTimeInt += pResampler->inAdvanceInt;
pResampler->inTimeFrac += pResampler->inAdvanceFrac;
if (pResampler->inTimeFrac >= pResampler->sampleRateOut) {
pResampler->inTimeFrac -= pResampler->sampleRateOut;
pResampler->inTimeInt += 1;
}
}
if (pResampler->inTimeInt > 0) {
break; /* Ran out of input data. */
}
*pFrameCountIn = framesProcessedIn;
*pFrameCountOut = framesProcessedOut;
/* Getting here means the frames have been loaded and filtered and we can generate the next output frame. */
if (pFramesOutS16 != NULL) {
MA_ASSERT(pResampler->inTimeInt == 0);
ma_linear_resampler_interpolate_frame_s16(pResampler, invSampleRateOut, pFramesOutS16);
pFramesOutS16 += pResampler->channels;
}
framesProcessedOut += 1;
/* Advance time forward. */
pResampler->inTimeInt += pResampler->inAdvanceInt;
pResampler->inTimeFrac += pResampler->inAdvanceFrac;
if (pResampler->inTimeFrac >= pResampler->sampleRateOut) {
pResampler->inTimeFrac -= pResampler->sampleRateOut;
pResampler->inTimeInt += 1;
}
return MA_SUCCESS;
}
*pFrameCountIn = framesProcessedIn;
*pFrameCountOut = framesProcessedOut;
return MA_SUCCESS;
}
static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_resampler* pResampler, const void* pFramesIn, ma_uint64* pFrameCountIn, void* pFramesOut, ma_uint64* pFrameCountOut)
@@ -59294,7 +59418,9 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
ma_uint32 invSampleRateOut;
MA_ASSERT(pResampler != NULL);
MA_ASSERT(pFramesIn != NULL);
MA_ASSERT(pFrameCountIn != NULL);
MA_ASSERT(pFramesOut != NULL);
MA_ASSERT(pFrameCountOut != NULL);
pFramesInS16 = (const ma_int16*)pFramesIn;
@@ -59305,60 +59431,67 @@ static ma_result ma_linear_resampler_process_pcm_frames_s16_upsample(ma_linear_r
framesProcessedOut = 0;
invSampleRateOut = (1 << MA_LINEAR_RESAMPLER_LERP_SHIFT) / pResampler->sampleRateOut;
while (framesProcessedOut < frameCountOut) {
/* Before interpolating we need to load the buffers. */
while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) {
ma_uint32 iChannel;
/* We can run an optimized path when the low-pass filter is not being used. */
if (pResampler->lpfOrder == 0) {
/* Fast path. No LPF needed. */
return ma_linear_resampler_process_pcm_frames_s16_no_lpf(pResampler, pFramesInS16, pFrameCountIn, pFramesOutS16, pFrameCountOut, invSampleRateOut);
} else {
/* Slow path. Need LPF. */
while (framesProcessedOut < frameCountOut) {
/* Before interpolating we need to load the buffers. */
while (pResampler->inTimeInt > 0 && frameCountIn > framesProcessedIn) {
ma_uint32 iChannel;
if (pFramesInS16 != NULL) {
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = pFramesInS16[iChannel];
}
pFramesInS16 += pResampler->channels;
} else {
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = 0;
if (pFramesInS16 != NULL) {
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = pFramesInS16[iChannel];
}
pFramesInS16 += pResampler->channels;
} else {
for (iChannel = 0; iChannel < pResampler->channels; iChannel += 1) {
pResampler->x0.s16[iChannel] = pResampler->x1.s16[iChannel];
pResampler->x1.s16[iChannel] = 0;
}
}
framesProcessedIn += 1;
pResampler->inTimeInt -= 1;
}
framesProcessedIn += 1;
pResampler->inTimeInt -= 1;
}
if (pResampler->inTimeInt > 0) {
break; /* Ran out of input data. */
}
/* Getting here means the frames have been loaded and we can generate the next output frame. */
if (pFramesOutS16 != NULL) {
MA_ASSERT(pResampler->inTimeInt == 0);
ma_linear_resampler_interpolate_frame_s16(pResampler, invSampleRateOut, pFramesOutS16);
/* Filter. Do not apply filtering if sample rates are the same or else you'll get dangerous glitching. */
if (pResampler->lpfOrder > 0 && (pResampler->inAdvanceInt != 1 || pResampler->inAdvanceFrac != 0)) {
ma_lpf_process_pcm_frame_s16(&pResampler->lpf, pFramesOutS16, pFramesOutS16);
if (pResampler->inTimeInt > 0) {
break; /* Ran out of input data. */
}
pFramesOutS16 += pResampler->channels;
/* Getting here means the frames have been loaded and we can generate the next output frame. */
if (pFramesOutS16 != NULL) {
MA_ASSERT(pResampler->inTimeInt == 0);
ma_linear_resampler_interpolate_frame_s16(pResampler, invSampleRateOut, pFramesOutS16);
/* Filter. Do not apply filtering if sample rates are the same or else you'll get dangerous glitching. */
if (pResampler->lpfOrder > 0 && (pResampler->inAdvanceInt != 1 || pResampler->inAdvanceFrac != 0)) {
ma_lpf_process_pcm_frame_s16(&pResampler->lpf, pFramesOutS16, pFramesOutS16);
}
pFramesOutS16 += pResampler->channels;
}
framesProcessedOut += 1;
/* Advance time forward. */
pResampler->inTimeInt += pResampler->inAdvanceInt;
pResampler->inTimeFrac += pResampler->inAdvanceFrac;
if (pResampler->inTimeFrac >= pResampler->sampleRateOut) {
pResampler->inTimeFrac -= pResampler->sampleRateOut;
pResampler->inTimeInt += 1;
}
}
framesProcessedOut += 1;
*pFrameCountIn = framesProcessedIn;
*pFrameCountOut = framesProcessedOut;
/* Advance time forward. */
pResampler->inTimeInt += pResampler->inAdvanceInt;
pResampler->inTimeFrac += pResampler->inAdvanceFrac;
if (pResampler->inTimeFrac >= pResampler->sampleRateOut) {
pResampler->inTimeFrac -= pResampler->sampleRateOut;
pResampler->inTimeInt += 1;
}
return MA_SUCCESS;
}
*pFrameCountIn = framesProcessedIn;
*pFrameCountOut = framesProcessedOut;
return MA_SUCCESS;
}
static ma_result ma_linear_resampler_process_pcm_frames_s16(ma_linear_resampler* pResampler, const void* pFramesIn, ma_uint64* pFrameCountIn, void* pFramesOut, ma_uint64* pFrameCountOut)
@@ -59609,6 +59742,7 @@ MA_API ma_uint64 ma_linear_resampler_get_output_latency(const ma_linear_resample
MA_API ma_result ma_linear_resampler_get_required_input_frame_count(const ma_linear_resampler* pResampler, ma_uint64 outputFrameCount, ma_uint64* pInputFrameCount)
{
#if 0
ma_uint64 inputFrameCount;
if (pInputFrameCount == NULL) {
@@ -59636,6 +59770,14 @@ MA_API ma_result ma_linear_resampler_get_required_input_frame_count(const ma_lin
*pInputFrameCount = inputFrameCount;
return MA_SUCCESS;
#else
{
(void)pResampler;
(void)outputFrameCount;
(void)pInputFrameCount;
return MA_NOT_IMPLEMENTED;
}
#endif
}
static ma_result ma_linear_resampler_get_expected_output_frame_count_ex(ma_uint32 sampleRateIn, ma_uint32 sampleRateOut, ma_uint32 inTimeInt, ma_uint32 inTimeFrac, ma_uint32 inAdvanceInt, ma_uint32 inAdvanceFrac, ma_uint64 inputFrameCount, ma_uint64* pOutputFrameCount)
@@ -59681,6 +59823,7 @@ static ma_result ma_linear_resampler_get_expected_output_frame_count_ex(ma_uint3
MA_API ma_result ma_linear_resampler_get_expected_output_frame_count(const ma_linear_resampler* pResampler, ma_uint64 inputFrameCount, ma_uint64* pOutputFrameCount)
{
#if 0
if (pOutputFrameCount == NULL) {
return MA_INVALID_ARGS;
}
@@ -59692,6 +59835,14 @@ MA_API ma_result ma_linear_resampler_get_expected_output_frame_count(const ma_li
}
return ma_linear_resampler_get_expected_output_frame_count_ex(pResampler->sampleRateIn, pResampler->sampleRateOut, pResampler->inTimeInt, pResampler->inTimeFrac, pResampler->inAdvanceInt, pResampler->inAdvanceFrac, inputFrameCount, pOutputFrameCount);
#else
{
(void)pResampler;
(void)inputFrameCount;
(void)pOutputFrameCount;
return MA_NOT_IMPLEMENTED;
}
#endif
}
MA_API ma_result ma_linear_resampler_reset(ma_linear_resampler* pResampler)