diff --git a/extras/dr_flac.h b/extras/dr_flac.h index fa727aca..cb4c29cb 100644 --- a/extras/dr_flac.h +++ b/extras/dr_flac.h @@ -1,5 +1,5 @@ // FLAC audio decoder. Public domain. See "unlicense" statement at the end of this file. -// dr_flac - v0.10.0 - 2018-09-11 +// dr_flac - v0.11.0 - 2018-12-xx // // David Reid - mackron@gmail.com @@ -18,7 +18,7 @@ // } // // drflac_int32* pSamples = malloc(pFlac->totalSampleCount * sizeof(drflac_int32)); -// drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_s32(pFlac, pFlac->totalSampleCount, pSamples); +// drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalSampleCount, pSamples); // // The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of // channels and the bits per sample, should be directly accessible - just make sure you don't change their values. Samples are @@ -29,7 +29,7 @@ // the decoder will give you as many samples as it can, up to the amount requested. Later on when you need the next batch of // samples, just call it again. Example: // -// while (drflac_read_s32(pFlac, chunkSize, pChunkSamples) > 0) { +// while (drflac_read_pcm_frames_s32(pFlac, chunkSize, pChunkSamples) > 0) { // do_something(); // } // @@ -138,6 +138,16 @@ typedef drflac_uint32 drflac_bool32; #define DRFLAC_TRUE 1 #define DRFLAC_FALSE 0 +#if defined(_MSC_VER) && _MSC_VER >= 1700 // Visual Studio 2012 +#define DRFLAC_DEPRECATED __declspec(deprecated) +#elif (defined(__GNUC__) && __GNUC__ >= 4) +#define DRFLAC_DEPRECATED __attribute__((deprecated)) +#elif (defined(__clang__) && __has_feature(attribute_deprecated)) +#define DRFLAC_DEPRECATED __attribute__((deprecated)) +#else +#define DRFLAC_DEPRECATED +#endif + // As data is read from the client it is placed into an internal buffer for fast access. This controls the // size of that buffer. Larger values means more speed, but also more memory. In my testing there is diminishing // returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8. @@ -464,6 +474,7 @@ typedef struct // with each channel having a total of 4096, this value will be set to 2*4096 = 8192. Can be 0 in which case it's still a // valid stream, but just means the total sample count is unknown. Likely the case with streams like internet radio. drflac_uint64 totalSampleCount; + drflac_uint64 totalPCMFrameCount; // <-- Equal to totalSampleCount / channels. // The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. @@ -581,58 +592,34 @@ void drflac_close(drflac* pFlac); // Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM. // -// pFlac [in] The decoder. -// samplesToRead [in] The number of samples to read. -// pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. +// pFlac [in] The decoder. +// framesToRead [in] The number of PCM frames to read. +// pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. // -// Returns the number of samples actually read. +// Returns the number of PCM frames actually read. // -// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples +// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames // seeked. -drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* pBufferOut); +drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut); -// Same as drflac_read_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit. -// -// pFlac [in] The decoder. -// samplesToRead [in] The number of samples to read. -// pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. -// -// Returns the number of samples actually read. -// -// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples -// seeked. +// Same as drflac_read_pcm_frames_s32(), except outputs samples as 16-bit integer PCM rather than 32-bit. // // Note that this is lossy for streams where the bits per sample is larger than 16. -drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut); +drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut); -// Same as drflac_read_s32(), except outputs samples as 32-bit floating-point PCM. -// -// pFlac [in] The decoder. -// samplesToRead [in] The number of samples to read. -// pBufferOut [out, optional] A pointer to the buffer that will receive the decoded samples. -// -// Returns the number of samples actually read. -// -// pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of samples -// seeked. +// Same as drflac_read_pcm_frames_s32(), except outputs samples as 32-bit floating-point PCM. // // Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly // represent every possible number. -drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut); +drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut); -// Seeks to the sample at the given index. +// Seeks to the PCM frame at the given index. // -// pFlac [in] The decoder. -// sampleIndex [in] The index of the sample to seek to. See notes below. +// pFlac [in] The decoder. +// pcmFrameIndex [in] The index of the PCM frame to seek to. See notes below. // // Returns DRFLAC_TRUE if successful; DRFLAC_FALSE otherwise. -// -// The sample index is based on interleaving. In a stereo stream, for example, the sample at index 0 is the first sample -// in the left channel; the sample at index 1 is the first sample on the right channel, and so on. -// -// When seeking, you will likely want to ensure it's rounded to a multiple of the channel count. You can do this with -// something like drflac_seek_to_sample(pFlac, (mySampleIndex + (mySampleIndex % pFlac->channels))) -drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex); +drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex); @@ -680,33 +667,33 @@ drflac* drflac_open_memory_with_metadata(const void* data, size_t dataSize, drfl // read samples into a dynamically sized buffer on the heap until no samples are left. // // Do not call this function on a broadcast type of stream (like internet radio streams and whatnot). -drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); -// Same as drflac_open_and_decode_s32(), except returns signed 16-bit integer samples. -drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +// Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. +drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); -// Same as drflac_open_and_decode_s32(), except returns 32-bit floating-point samples. -float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +// Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. +float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); #ifndef DR_FLAC_NO_STDIO -// Same as drflac_open_and_decode_s32() except opens the decoder from a file. -drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +// Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. +drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); -// Same as drflac_open_and_decode_file_s32(), except returns signed 16-bit integer samples. -drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +// Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. +drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); -// Same as drflac_open_and_decode_file_f32(), except returns 32-bit floating-point samples. -float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +// Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. +float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); #endif -// Same as drflac_open_and_decode_s32() except opens the decoder from a block of memory. -drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +// Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. +drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); -// Same as drflac_open_and_decode_memory_s32(), except returns signed 16-bit integer samples. -drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +// Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. +drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); -// Same as drflac_open_and_decode_memory_s32(), except returns 32-bit floating-point samples. -float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); +// Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. +float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount); // Frees memory that was allocated internally by dr_flac. void drflac_free(void* p); @@ -764,6 +751,20 @@ void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack); +//// Deprecated APIs //// +DRFLAC_DEPRECATED drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* pBufferOut); // Use drflac_read_pcm_frames_s32() instead. +DRFLAC_DEPRECATED drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut); // Use drflac_read_pcm_frames_s16() instead. +DRFLAC_DEPRECATED drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut); // Use drflac_read_pcm_frames_f32() instead. +DRFLAC_DEPRECATED drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex); // Use drflac_seek_to_pcm_frame() instead. +DRFLAC_DEPRECATED drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_and_read_pcm_frames_s32(). +DRFLAC_DEPRECATED drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_and_read_pcm_frames_s16(). +DRFLAC_DEPRECATED float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_and_read_pcm_frames_f32(). +DRFLAC_DEPRECATED drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_file_and_read_pcm_frames_s32(). +DRFLAC_DEPRECATED drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_file_and_read_pcm_frames_s16(). +DRFLAC_DEPRECATED float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_file_and_read_pcm_frames_f32(). +DRFLAC_DEPRECATED drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_memory_and_read_pcm_frames_s32(). +DRFLAC_DEPRECATED drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_memory_and_read_pcm_frames_s16(). +DRFLAC_DEPRECATED float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount); // Use drflac_open_memory_and_read_pcm_frames_f32(). #ifdef __cplusplus } @@ -790,6 +791,16 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #include #include +#ifdef _MSC_VER +#define DRFLAC_INLINE __forceinline +#else +#ifdef __GNUC__ +#define DRFLAC_INLINE inline __attribute__((always_inline)) +#else +#define DRFLAC_INLINE inline +#endif +#endif + // CPU architecture. #if defined(__x86_64__) || defined(_M_X64) #define DRFLAC_X64 @@ -799,6 +810,62 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #define DRFLAC_ARM #endif +// Intrinsics Support +#if !defined(DR_FLAC_NO_SIMD) + #if defined(DRFLAC_X64) || defined(DRFLAC_X86) + #if defined(_MSC_VER) && !defined(__clang__) + // MSVC. + #if !defined(DRFLAC_NO_SSE2) // Assume all MSVC compilers support SSE2 intrinsics. + #define DRFLAC_SUPPORT_SSE2 + #endif + #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41) // 2010 + #define DRFLAC_SUPPORT_SSE41 + #endif + #else + // Assume GNUC-style. + #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2) + #define DRFLAC_SUPPORT_SSE2 + #endif + #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41) + #define DRFLAC_SUPPORT_SSE41 + #endif + #endif + + // If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. + #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) + #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include() + #define DRFLAC_SUPPORT_SSE2 + #endif + #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include() + #define DRFLAC_SUPPORT_SSE41 + #endif + #endif + + #if defined(DRFLAC_SUPPORT_SSE41) + #include + #elif defined(DRFLAC_SUPPORT_SSE2) + #include + #endif + #endif + + #if defined(DRFLAC_ARM) + #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) + #define DRFLAC_SUPPORT_NEON + #endif + + // Fall back to looking for the #include file. + #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) + #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include() + #define DRFLAC_SUPPORT_NEON + #endif + #endif + + #if defined(DRFLAC_SUPPORT_NEON) + #include + #endif + #endif +#endif + // Compile-time CPU feature support. #if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) #if defined(_MSC_VER) && !defined(__clang__) @@ -841,6 +908,56 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #define DRFLAC_NO_CPUID #endif +static DRFLAC_INLINE drflac_bool32 drflac_has_sse2() +{ +#if defined(DRFLAC_SUPPORT_SSE2) + #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2) + #if defined(DRFLAC_X64) + return DRFLAC_TRUE; // 64-bit targets always support SSE2. + #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__) + return DRFLAC_TRUE; // If the compiler is allowed to freely generate SSE2 code we can assume support. + #else + #if defined(DRFLAC_NO_CPUID) + return DRFLAC_FALSE; + #else + int info[4]; + drflac_cpuid(info, 1); + return (info[3] & (1 << 26)) != 0; + #endif + #endif + #else + return DRFLAC_FALSE; // SSE2 is only supported on x86 and x64 architectures. + #endif +#else + return DRFLAC_FALSE; // No compiler support. +#endif +} + +static DRFLAC_INLINE drflac_bool32 drflac_has_sse41() +{ +#if defined(DRFLAC_SUPPORT_SSE41) + #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41) + #if defined(DRFLAC_X64) + return DRFLAC_TRUE; // 64-bit targets always support SSE4.1. + #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__) + return DRFLAC_TRUE; // If the compiler is allowed to freely generate SSE41 code we can assume support. + #else + #if defined(DRFLAC_NO_CPUID) + return DRFLAC_FALSE; + #else + int info[4]; + drflac_cpuid(info, 1); + return (info[2] & (1 << 19)) != 0; + #endif + #endif + #else + return DRFLAC_FALSE; // SSE41 is only supported on x86 and x64 architectures. + #endif +#else + return DRFLAC_FALSE; // No compiler support. +#endif +} + #if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) #define DRFLAC_HAS_LZCNT_INTRINSIC @@ -900,16 +1017,6 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, #define DRFLAC_MAX_SIMD_VECTOR_SIZE 64 // 64 for AVX-512 in the future. -#ifdef _MSC_VER -#define DRFLAC_INLINE __forceinline -#else -#ifdef __GNUC__ -#define DRFLAC_INLINE inline __attribute__((always_inline)) -#else -#define DRFLAC_INLINE inline -#endif -#endif - typedef drflac_int32 drflac_result; #define DRFLAC_SUCCESS 0 #define DRFLAC_ERROR -1 // A generic error. @@ -931,6 +1038,10 @@ typedef drflac_int32 drflac_result; #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10 +// Keeps track of the number of leading samples for each sub-frame. This is required because the SSE pipeline will occasionally +// reference excess prior samples. +#define DRFLAC_LEADING_SAMPLES 32 + #define drflac_align(x, a) ((((x) + (a) - 1) / (a)) * (a)) #define drflac_assert DRFLAC_ASSERT @@ -941,18 +1052,21 @@ typedef drflac_int32 drflac_result; // CPU caps. static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE; #ifndef DRFLAC_NO_CPUID -static drflac_bool32 drflac__gIsSSE42Supported = DRFLAC_FALSE; +static drflac_bool32 drflac__gIsSSE2Supported = DRFLAC_FALSE; +static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE; static void drflac__init_cpu_caps() { int info[4] = {0}; // LZCNT drflac__cpuid(info, 0x80000001); - drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0; + drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0; - // SSE4.2 - drflac__cpuid(info, 1); - drflac__gIsSSE42Supported = (info[2] & (1 << 19)) != 0; + // SSE2 + drflac__gIsSSE2Supported = drflac_has_sse2(); + + // SSE4.1 + drflac__gIsSSE41Supported = drflac_has_sse41(); } #endif @@ -1761,6 +1875,10 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) { + if (x == 0) { + return sizeof(x)*8; + } + static drflac_uint32 clz_table_4[] = { 0, 4, @@ -1808,6 +1926,9 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) #endif #else #if defined(__GNUC__) || defined(__clang__) + if (x == 0) { + return sizeof(x)*8; + } #ifdef DRFLAC_64BIT return (drflac_uint32)__builtin_clzll((unsigned long long)x); #else @@ -1826,6 +1947,10 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x) { + if (x == 0) { + return sizeof(x)*8; + } + drflac_uint32 n; #ifdef DRFLAC_64BIT _BitScanReverse64((unsigned long*)&n, x); @@ -1838,7 +1963,6 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x) static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x) { - // This function assumes at least one bit is set. Checking for 0 needs to be done at a higher level, outside this function. #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT if (drflac__is_lzcnt_supported()) { return drflac__clz_lzcnt(x); @@ -2204,6 +2328,435 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 return (drflac_int32)(prediction >> shift); } +static DRFLAC_INLINE void drflac__calculate_prediction_64_x4(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, const drflac_uint32 riceParamParts[4], drflac_int32* pDecodedSamples) +{ + drflac_assert(order <= 32); + + drflac_int64 prediction0 = 0; + drflac_int64 prediction1 = 0; + drflac_int64 prediction2 = 0; + drflac_int64 prediction3 = 0; + + switch (order) + { + case 32: + prediction0 += coefficients[31] * (drflac_int64)pDecodedSamples[-32]; + prediction1 += coefficients[31] * (drflac_int64)pDecodedSamples[-31]; + prediction2 += coefficients[31] * (drflac_int64)pDecodedSamples[-30]; + prediction3 += coefficients[31] * (drflac_int64)pDecodedSamples[-29]; + case 31: + prediction0 += coefficients[30] * (drflac_int64)pDecodedSamples[-31]; + prediction1 += coefficients[30] * (drflac_int64)pDecodedSamples[-30]; + prediction2 += coefficients[30] * (drflac_int64)pDecodedSamples[-29]; + prediction3 += coefficients[30] * (drflac_int64)pDecodedSamples[-28]; + case 30: + prediction0 += coefficients[29] * (drflac_int64)pDecodedSamples[-30]; + prediction1 += coefficients[29] * (drflac_int64)pDecodedSamples[-29]; + prediction2 += coefficients[29] * (drflac_int64)pDecodedSamples[-28]; + prediction3 += coefficients[29] * (drflac_int64)pDecodedSamples[-27]; + case 29: + prediction0 += coefficients[28] * (drflac_int64)pDecodedSamples[-29]; + prediction1 += coefficients[28] * (drflac_int64)pDecodedSamples[-28]; + prediction2 += coefficients[28] * (drflac_int64)pDecodedSamples[-27]; + prediction3 += coefficients[28] * (drflac_int64)pDecodedSamples[-26]; + case 28: + prediction0 += coefficients[27] * (drflac_int64)pDecodedSamples[-28]; + prediction1 += coefficients[27] * (drflac_int64)pDecodedSamples[-27]; + prediction2 += coefficients[27] * (drflac_int64)pDecodedSamples[-26]; + prediction3 += coefficients[27] * (drflac_int64)pDecodedSamples[-25]; + case 27: + prediction0 += coefficients[26] * (drflac_int64)pDecodedSamples[-27]; + prediction1 += coefficients[26] * (drflac_int64)pDecodedSamples[-26]; + prediction2 += coefficients[26] * (drflac_int64)pDecodedSamples[-25]; + prediction3 += coefficients[26] * (drflac_int64)pDecodedSamples[-24]; + case 26: + prediction0 += coefficients[25] * (drflac_int64)pDecodedSamples[-26]; + prediction1 += coefficients[25] * (drflac_int64)pDecodedSamples[-25]; + prediction2 += coefficients[25] * (drflac_int64)pDecodedSamples[-24]; + prediction3 += coefficients[25] * (drflac_int64)pDecodedSamples[-23]; + case 25: + prediction0 += coefficients[24] * (drflac_int64)pDecodedSamples[-25]; + prediction1 += coefficients[24] * (drflac_int64)pDecodedSamples[-24]; + prediction2 += coefficients[24] * (drflac_int64)pDecodedSamples[-23]; + prediction3 += coefficients[24] * (drflac_int64)pDecodedSamples[-22]; + case 24: + prediction0 += coefficients[23] * (drflac_int64)pDecodedSamples[-24]; + prediction1 += coefficients[23] * (drflac_int64)pDecodedSamples[-23]; + prediction2 += coefficients[23] * (drflac_int64)pDecodedSamples[-22]; + prediction3 += coefficients[23] * (drflac_int64)pDecodedSamples[-21]; + case 23: + prediction0 += coefficients[22] * (drflac_int64)pDecodedSamples[-23]; + prediction1 += coefficients[22] * (drflac_int64)pDecodedSamples[-22]; + prediction2 += coefficients[22] * (drflac_int64)pDecodedSamples[-21]; + prediction3 += coefficients[22] * (drflac_int64)pDecodedSamples[-20]; + case 22: + prediction0 += coefficients[21] * (drflac_int64)pDecodedSamples[-22]; + prediction1 += coefficients[21] * (drflac_int64)pDecodedSamples[-21]; + prediction2 += coefficients[21] * (drflac_int64)pDecodedSamples[-20]; + prediction3 += coefficients[21] * (drflac_int64)pDecodedSamples[-19]; + case 21: + prediction0 += coefficients[20] * (drflac_int64)pDecodedSamples[-21]; + prediction1 += coefficients[20] * (drflac_int64)pDecodedSamples[-20]; + prediction2 += coefficients[20] * (drflac_int64)pDecodedSamples[-19]; + prediction3 += coefficients[20] * (drflac_int64)pDecodedSamples[-18]; + case 20: + prediction0 += coefficients[19] * (drflac_int64)pDecodedSamples[-20]; + prediction1 += coefficients[19] * (drflac_int64)pDecodedSamples[-19]; + prediction2 += coefficients[19] * (drflac_int64)pDecodedSamples[-18]; + prediction3 += coefficients[19] * (drflac_int64)pDecodedSamples[-17]; + case 19: + prediction0 += coefficients[18] * (drflac_int64)pDecodedSamples[-19]; + prediction1 += coefficients[18] * (drflac_int64)pDecodedSamples[-18]; + prediction2 += coefficients[18] * (drflac_int64)pDecodedSamples[-17]; + prediction3 += coefficients[18] * (drflac_int64)pDecodedSamples[-16]; + case 18: + prediction0 += coefficients[17] * (drflac_int64)pDecodedSamples[-18]; + prediction1 += coefficients[17] * (drflac_int64)pDecodedSamples[-17]; + prediction2 += coefficients[17] * (drflac_int64)pDecodedSamples[-16]; + prediction3 += coefficients[17] * (drflac_int64)pDecodedSamples[-15]; + case 17: + prediction0 += coefficients[16] * (drflac_int64)pDecodedSamples[-17]; + prediction1 += coefficients[16] * (drflac_int64)pDecodedSamples[-16]; + prediction2 += coefficients[16] * (drflac_int64)pDecodedSamples[-15]; + prediction3 += coefficients[16] * (drflac_int64)pDecodedSamples[-14]; + + case 16: + prediction0 += coefficients[15] * (drflac_int64)pDecodedSamples[-16]; + prediction1 += coefficients[15] * (drflac_int64)pDecodedSamples[-15]; + prediction2 += coefficients[15] * (drflac_int64)pDecodedSamples[-14]; + prediction3 += coefficients[15] * (drflac_int64)pDecodedSamples[-13]; + case 15: + prediction0 += coefficients[14] * (drflac_int64)pDecodedSamples[-15]; + prediction1 += coefficients[14] * (drflac_int64)pDecodedSamples[-14]; + prediction2 += coefficients[14] * (drflac_int64)pDecodedSamples[-13]; + prediction3 += coefficients[14] * (drflac_int64)pDecodedSamples[-12]; + case 14: + prediction0 += coefficients[13] * (drflac_int64)pDecodedSamples[-14]; + prediction1 += coefficients[13] * (drflac_int64)pDecodedSamples[-13]; + prediction2 += coefficients[13] * (drflac_int64)pDecodedSamples[-12]; + prediction3 += coefficients[13] * (drflac_int64)pDecodedSamples[-11]; + case 13: + prediction0 += coefficients[12] * (drflac_int64)pDecodedSamples[-13]; + prediction1 += coefficients[12] * (drflac_int64)pDecodedSamples[-12]; + prediction2 += coefficients[12] * (drflac_int64)pDecodedSamples[-11]; + prediction3 += coefficients[12] * (drflac_int64)pDecodedSamples[-10]; + case 12: + prediction0 += coefficients[11] * (drflac_int64)pDecodedSamples[-12]; + prediction1 += coefficients[11] * (drflac_int64)pDecodedSamples[-11]; + prediction2 += coefficients[11] * (drflac_int64)pDecodedSamples[-10]; + prediction3 += coefficients[11] * (drflac_int64)pDecodedSamples[- 9]; + case 11: + prediction0 += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; + prediction1 += coefficients[10] * (drflac_int64)pDecodedSamples[-10]; + prediction2 += coefficients[10] * (drflac_int64)pDecodedSamples[- 9]; + prediction3 += coefficients[10] * (drflac_int64)pDecodedSamples[- 8]; + case 10: + prediction0 += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; + prediction1 += coefficients[9] * (drflac_int64)pDecodedSamples[- 9]; + prediction2 += coefficients[9] * (drflac_int64)pDecodedSamples[- 8]; + prediction3 += coefficients[9] * (drflac_int64)pDecodedSamples[- 7]; + case 9: + prediction0 += coefficients[8] * (drflac_int64)pDecodedSamples[- 9]; + prediction1 += coefficients[8] * (drflac_int64)pDecodedSamples[- 8]; + prediction2 += coefficients[8] * (drflac_int64)pDecodedSamples[- 7]; + prediction3 += coefficients[8] * (drflac_int64)pDecodedSamples[- 6]; + case 8: + prediction0 += coefficients[7] * (drflac_int64)pDecodedSamples[- 8]; + prediction1 += coefficients[7] * (drflac_int64)pDecodedSamples[- 7]; + prediction2 += coefficients[7] * (drflac_int64)pDecodedSamples[- 6]; + prediction3 += coefficients[7] * (drflac_int64)pDecodedSamples[- 5]; + case 7: + prediction0 += coefficients[6] * (drflac_int64)pDecodedSamples[- 7]; + prediction1 += coefficients[6] * (drflac_int64)pDecodedSamples[- 6]; + prediction2 += coefficients[6] * (drflac_int64)pDecodedSamples[- 5]; + prediction3 += coefficients[6] * (drflac_int64)pDecodedSamples[- 4]; + case 6: + prediction0 += coefficients[5] * (drflac_int64)pDecodedSamples[- 6]; + prediction1 += coefficients[5] * (drflac_int64)pDecodedSamples[- 5]; + prediction2 += coefficients[5] * (drflac_int64)pDecodedSamples[- 4]; + prediction3 += coefficients[5] * (drflac_int64)pDecodedSamples[- 3]; + case 5: + prediction0 += coefficients[4] * (drflac_int64)pDecodedSamples[- 5]; + prediction1 += coefficients[4] * (drflac_int64)pDecodedSamples[- 4]; + prediction2 += coefficients[4] * (drflac_int64)pDecodedSamples[- 3]; + prediction3 += coefficients[4] * (drflac_int64)pDecodedSamples[- 2]; + case 4: + prediction0 += coefficients[3] * (drflac_int64)pDecodedSamples[- 4]; + prediction1 += coefficients[3] * (drflac_int64)pDecodedSamples[- 3]; + prediction2 += coefficients[3] * (drflac_int64)pDecodedSamples[- 2]; + prediction3 += coefficients[3] * (drflac_int64)pDecodedSamples[- 1]; + order = 3; + } + + switch (order) + { + case 3: prediction0 += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3]; + case 2: prediction0 += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2]; + case 1: prediction0 += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1]; + } + pDecodedSamples[0] = riceParamParts[0] + (drflac_int32)(prediction0 >> shift); + + switch (order) + { + case 3: prediction1 += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 2]; + case 2: prediction1 += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 1]; + case 1: prediction1 += coefficients[ 0] * (drflac_int64)pDecodedSamples[ 0]; + } + pDecodedSamples[1] = riceParamParts[1] + (drflac_int32)(prediction1 >> shift); + + switch (order) + { + case 3: prediction2 += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 1]; + case 2: prediction2 += coefficients[ 1] * (drflac_int64)pDecodedSamples[ 0]; + case 1: prediction2 += coefficients[ 0] * (drflac_int64)pDecodedSamples[ 1]; + } + pDecodedSamples[2] = riceParamParts[2] + (drflac_int32)(prediction2 >> shift); + + switch (order) + { + case 3: prediction3 += coefficients[ 2] * (drflac_int64)pDecodedSamples[ 0]; + case 2: prediction3 += coefficients[ 1] * (drflac_int64)pDecodedSamples[ 1]; + case 1: prediction3 += coefficients[ 0] * (drflac_int64)pDecodedSamples[ 2]; + } + pDecodedSamples[3] = riceParamParts[3] + (drflac_int32)(prediction3 >> shift); +} + +#if defined(DRFLAC_SUPPORT_SSE41) +static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64__sse41(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +{ + drflac_assert(order <= 32); + + __m128i prediction = _mm_setzero_si128(); + + switch (order) + { + case 32: + case 31: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[31], 0, coefficients[30]), _mm_set_epi32(0, pDecodedSamples[-32], 0, pDecodedSamples[-31]))); + case 30: + case 29: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[29], 0, coefficients[28]), _mm_set_epi32(0, pDecodedSamples[-30], 0, pDecodedSamples[-29]))); + case 28: + case 27: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[27], 0, coefficients[26]), _mm_set_epi32(0, pDecodedSamples[-28], 0, pDecodedSamples[-27]))); + case 26: + case 25: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[25], 0, coefficients[24]), _mm_set_epi32(0, pDecodedSamples[-26], 0, pDecodedSamples[-25]))); + case 24: + case 23: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[23], 0, coefficients[22]), _mm_set_epi32(0, pDecodedSamples[-24], 0, pDecodedSamples[-23]))); + case 22: + case 21: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[21], 0, coefficients[20]), _mm_set_epi32(0, pDecodedSamples[-22], 0, pDecodedSamples[-21]))); + case 20: + case 19: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[19], 0, coefficients[18]), _mm_set_epi32(0, pDecodedSamples[-20], 0, pDecodedSamples[-19]))); + case 18: + case 17: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[17], 0, coefficients[16]), _mm_set_epi32(0, pDecodedSamples[-18], 0, pDecodedSamples[-17]))); + case 16: + case 15: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[15], 0, coefficients[14]), _mm_set_epi32(0, pDecodedSamples[-16], 0, pDecodedSamples[-15]))); + case 14: + case 13: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[13], 0, coefficients[12]), _mm_set_epi32(0, pDecodedSamples[-14], 0, pDecodedSamples[-13]))); + case 12: + case 11: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[11], 0, coefficients[10]), _mm_set_epi32(0, pDecodedSamples[-12], 0, pDecodedSamples[-11]))); + case 10: + case 9: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 9], 0, coefficients[ 8]), _mm_set_epi32(0, pDecodedSamples[-10], 0, pDecodedSamples[- 9]))); + case 8: + case 7: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 7], 0, coefficients[ 6]), _mm_set_epi32(0, pDecodedSamples[- 8], 0, pDecodedSamples[- 7]))); + case 6: + case 5: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 5], 0, coefficients[ 4]), _mm_set_epi32(0, pDecodedSamples[- 6], 0, pDecodedSamples[- 5]))); + case 4: + case 3: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 3], 0, coefficients[ 2]), _mm_set_epi32(0, pDecodedSamples[- 4], 0, pDecodedSamples[- 3]))); + case 2: + case 1: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 1], 0, coefficients[ 0]), _mm_set_epi32(0, pDecodedSamples[- 2], 0, pDecodedSamples[- 1]))); + } + + return (drflac_int32)(( + ((drflac_uint64*)&prediction)[0] + + ((drflac_uint64*)&prediction)[1]) >> shift); +} + +static DRFLAC_INLINE void drflac__calculate_prediction_64_x2__sse41(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, const drflac_uint32 riceParamParts[4], drflac_int32* pDecodedSamples) +{ + drflac_assert(order <= 32); + + __m128i prediction = _mm_setzero_si128(); + drflac_int64 predictions[2] = {0, 0}; + + switch (order) + { + case 32: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[31], 0, coefficients[31]), _mm_set_epi32(0, pDecodedSamples[-31], 0, pDecodedSamples[-32]))); + case 31: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[30], 0, coefficients[30]), _mm_set_epi32(0, pDecodedSamples[-30], 0, pDecodedSamples[-31]))); + case 30: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[29], 0, coefficients[29]), _mm_set_epi32(0, pDecodedSamples[-29], 0, pDecodedSamples[-30]))); + case 29: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[28], 0, coefficients[28]), _mm_set_epi32(0, pDecodedSamples[-28], 0, pDecodedSamples[-29]))); + case 28: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[27], 0, coefficients[27]), _mm_set_epi32(0, pDecodedSamples[-27], 0, pDecodedSamples[-28]))); + case 27: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[26], 0, coefficients[26]), _mm_set_epi32(0, pDecodedSamples[-26], 0, pDecodedSamples[-27]))); + case 26: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[25], 0, coefficients[25]), _mm_set_epi32(0, pDecodedSamples[-25], 0, pDecodedSamples[-26]))); + case 25: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[24], 0, coefficients[24]), _mm_set_epi32(0, pDecodedSamples[-24], 0, pDecodedSamples[-25]))); + case 24: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[23], 0, coefficients[23]), _mm_set_epi32(0, pDecodedSamples[-23], 0, pDecodedSamples[-24]))); + case 23: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[22], 0, coefficients[22]), _mm_set_epi32(0, pDecodedSamples[-22], 0, pDecodedSamples[-23]))); + case 22: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[21], 0, coefficients[21]), _mm_set_epi32(0, pDecodedSamples[-21], 0, pDecodedSamples[-22]))); + case 21: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[20], 0, coefficients[20]), _mm_set_epi32(0, pDecodedSamples[-20], 0, pDecodedSamples[-21]))); + case 20: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[19], 0, coefficients[19]), _mm_set_epi32(0, pDecodedSamples[-19], 0, pDecodedSamples[-20]))); + case 19: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[18], 0, coefficients[18]), _mm_set_epi32(0, pDecodedSamples[-18], 0, pDecodedSamples[-19]))); + case 18: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[17], 0, coefficients[17]), _mm_set_epi32(0, pDecodedSamples[-17], 0, pDecodedSamples[-18]))); + case 17: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[16], 0, coefficients[16]), _mm_set_epi32(0, pDecodedSamples[-16], 0, pDecodedSamples[-17]))); + case 16: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[15], 0, coefficients[15]), _mm_set_epi32(0, pDecodedSamples[-15], 0, pDecodedSamples[-16]))); + case 15: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[14], 0, coefficients[14]), _mm_set_epi32(0, pDecodedSamples[-14], 0, pDecodedSamples[-15]))); + case 14: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[13], 0, coefficients[13]), _mm_set_epi32(0, pDecodedSamples[-13], 0, pDecodedSamples[-14]))); + case 13: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[12], 0, coefficients[12]), _mm_set_epi32(0, pDecodedSamples[-12], 0, pDecodedSamples[-13]))); + case 12: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[11], 0, coefficients[11]), _mm_set_epi32(0, pDecodedSamples[-11], 0, pDecodedSamples[-12]))); + case 11: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[10], 0, coefficients[10]), _mm_set_epi32(0, pDecodedSamples[-10], 0, pDecodedSamples[-11]))); + case 10: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 9], 0, coefficients[ 9]), _mm_set_epi32(0, pDecodedSamples[- 9], 0, pDecodedSamples[-10]))); + case 9: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 8], 0, coefficients[ 8]), _mm_set_epi32(0, pDecodedSamples[- 8], 0, pDecodedSamples[- 9]))); + case 8: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 7], 0, coefficients[ 7]), _mm_set_epi32(0, pDecodedSamples[- 7], 0, pDecodedSamples[- 8]))); + case 7: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 6], 0, coefficients[ 6]), _mm_set_epi32(0, pDecodedSamples[- 6], 0, pDecodedSamples[- 7]))); + case 6: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 5], 0, coefficients[ 5]), _mm_set_epi32(0, pDecodedSamples[- 5], 0, pDecodedSamples[- 6]))); + case 5: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 4], 0, coefficients[ 4]), _mm_set_epi32(0, pDecodedSamples[- 4], 0, pDecodedSamples[- 5]))); + case 4: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 3], 0, coefficients[ 3]), _mm_set_epi32(0, pDecodedSamples[- 3], 0, pDecodedSamples[- 4]))); + case 3: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 2], 0, coefficients[ 2]), _mm_set_epi32(0, pDecodedSamples[- 2], 0, pDecodedSamples[- 3]))); + case 2: prediction = _mm_add_epi64(prediction, _mm_mul_epi32(_mm_set_epi32(0, coefficients[ 1], 0, coefficients[ 1]), _mm_set_epi32(0, pDecodedSamples[- 1], 0, pDecodedSamples[- 2]))); + order = 1; + } + + _mm_storeu_si128((__m128i*)predictions, prediction); + + switch (order) + { + case 1: predictions[0] += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1]; + } + pDecodedSamples[0] = riceParamParts[0] + (drflac_int32)(predictions[0] >> shift); + + switch (order) + { + case 1: predictions[1] += coefficients[ 0] * (drflac_int64)pDecodedSamples[ 0]; + } + pDecodedSamples[1] = riceParamParts[1] + (drflac_int32)(predictions[1] >> shift); +} + + +static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a) +{ + return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); +} + +static DRFLAC_INLINE __m128i drflac__mm_slide1_epi32(__m128i a, __m128i b) +{ + // a3a2a1a0/b3b2b1b0 -> a2a1a0b3 + + // Result = a2a1a0b3 + __m128i b3a3b2a2 = _mm_unpackhi_epi32(a, b); + __m128i a2b3a2b3 = _mm_shuffle_epi32(b3a3b2a2, _MM_SHUFFLE(0, 3, 0, 3)); + __m128i a1a2a0b3 = _mm_unpacklo_epi32(a2b3a2b3, a); + __m128i a2a1a0b3 = _mm_shuffle_epi32(a1a2a0b3, _MM_SHUFFLE(2, 3, 1, 0)); + return a2a1a0b3; +} + +static DRFLAC_INLINE __m128i drflac__mm_slide2_epi32(__m128i a, __m128i b) +{ + // Result = a1a0b3b2 + __m128i b1b0b3b2 = _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)); + __m128i a1b3a0b2 = _mm_unpacklo_epi32(b1b0b3b2, a); + __m128i a1a0b3b2 = _mm_shuffle_epi32(a1b3a0b2, _MM_SHUFFLE(3, 1, 2, 0)); + return a1a0b3b2; +} + +static DRFLAC_INLINE __m128i drflac__mm_slide3_epi32(__m128i a, __m128i b) +{ + // Result = a0b3b2b1 + __m128i b1a1b0a0 = _mm_unpacklo_epi32(a, b); + __m128i a0b1a0b1 = _mm_shuffle_epi32(b1a1b0a0, _MM_SHUFFLE(0, 3, 0, 3)); + __m128i b3a0b2b1 = _mm_unpackhi_epi32(a0b1a0b1, b); + __m128i a0b3b2b1 = _mm_shuffle_epi32(b3a0b2b1, _MM_SHUFFLE(2, 3, 1, 0)); + return a0b3b2b1; +} + +static DRFLAC_INLINE void drflac__calculate_prediction_32_x4__sse41(drflac_uint32 order, drflac_int32 shift, const __m128i* coefficients128, const __m128i riceParamParts128, drflac_int32* pDecodedSamples) +{ + drflac_assert(order <= 32); + + // I don't think this is as efficient as it could be. More work needs to be done on this. + if (order > 0) { + __m128i s_09_10_11_12 = _mm_loadu_si128((const __m128i*)(pDecodedSamples - 12)); + __m128i s_05_06_07_08 = _mm_loadu_si128((const __m128i*)(pDecodedSamples - 8)); + __m128i s_01_02_03_04 = _mm_loadu_si128((const __m128i*)(pDecodedSamples - 4)); + + __m128i prediction = _mm_setzero_si128(); + + // The idea with this switch is to do do a single jump based on the value of "order". In my test library, "order" is never larger than 12, so + // I have decided to do a less optimal solution in the order > 12 case. + switch (order) + { + case 32: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[31], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 32)))); + case 31: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[30], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 31)))); + case 30: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[29], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 30)))); + case 29: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[28], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 29)))); + case 28: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[27], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 28)))); + case 27: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[26], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 27)))); + case 26: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[25], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 26)))); + case 25: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[24], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 25)))); + case 24: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[23], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 24)))); + case 23: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[22], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 23)))); + case 22: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[21], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 22)))); + case 21: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[20], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 21)))); + case 20: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[19], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 20)))); + case 19: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[18], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 19)))); + case 18: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[17], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 18)))); + case 17: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[16], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 17)))); + case 16: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[15], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 16)))); + case 15: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[14], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 15)))); + case 14: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[13], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 14)))); + case 13: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[12], _mm_loadu_si128((const __m128i*)(pDecodedSamples - 13)))); + + case 12: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[11], s_09_10_11_12)); + case 11: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[10], drflac__mm_slide3_epi32(s_05_06_07_08, s_09_10_11_12))); + case 10: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 9], drflac__mm_slide2_epi32(s_05_06_07_08, s_09_10_11_12))); + case 9: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 8], drflac__mm_slide1_epi32(s_05_06_07_08, s_09_10_11_12))); + case 8: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 7], s_05_06_07_08)); + case 7: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 6], drflac__mm_slide3_epi32(s_01_02_03_04, s_05_06_07_08))); + case 6: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 5], drflac__mm_slide2_epi32(s_01_02_03_04, s_05_06_07_08))); + case 5: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 4], drflac__mm_slide1_epi32(s_01_02_03_04, s_05_06_07_08))); + case 4: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 3], s_01_02_03_04)); order = 3; // <-- Don't forget to set order to 3 here! + case 3: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 2], drflac__mm_slide3_epi32(_mm_setzero_si128(), s_01_02_03_04))); + case 2: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 1], drflac__mm_slide2_epi32(_mm_setzero_si128(), s_01_02_03_04))); + case 1: prediction = _mm_add_epi32(prediction, _mm_mullo_epi32(coefficients128[ 0], drflac__mm_slide1_epi32(_mm_setzero_si128(), s_01_02_03_04))); + } + + drflac_int32 predictions[4]; + _mm_storeu_si128((__m128i*)predictions, prediction); + + drflac_uint32 riceParamParts[4]; + _mm_storeu_si128((__m128i*)riceParamParts, riceParamParts128); + + predictions[0] = riceParamParts[0] + (predictions[0] >> shift); + + switch (order) + { + case 3: predictions[3] += ((const drflac_int32*)&coefficients128[ 2])[0] * predictions[ 0]; + case 2: predictions[2] += ((const drflac_int32*)&coefficients128[ 1])[0] * predictions[ 0]; + case 1: predictions[1] += ((const drflac_int32*)&coefficients128[ 0])[0] * predictions[ 0]; + } + predictions[1] = riceParamParts[1] + (predictions[1] >> shift); + + switch (order) + { + case 3: + case 2: predictions[3] += ((const drflac_int32*)&coefficients128[ 1])[0] * predictions[ 1]; + case 1: predictions[2] += ((const drflac_int32*)&coefficients128[ 0])[0] * predictions[ 1]; + } + predictions[2] = riceParamParts[2] + (predictions[2] >> shift); + + switch (order) + { + case 3: + case 2: + case 1: predictions[3] += ((const drflac_int32*)&coefficients128[ 0])[0] * predictions[ 2]; + } + predictions[3] = riceParamParts[3] + (predictions[3] >> shift); + + pDecodedSamples[0] = predictions[0]; + pDecodedSamples[1] = predictions[1]; + pDecodedSamples[2] = predictions[2]; + pDecodedSamples[3] = predictions[3]; + } else { + _mm_storeu_si128((__m128i*)pDecodedSamples, riceParamParts128); + } +} +#endif + #if 0 // Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the // sake of readability and should only be used as a reference. @@ -2288,6 +2841,7 @@ static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_ui } #endif +#if 0 static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) { drflac_assert(riceParam > 0); // <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. @@ -2344,74 +2898,335 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac bs->cache <<= bitCountLo; } - *pZeroCounterOut = zeroCounter; - *pRiceParamPartOut = riceParamPart; + pZeroCounterOut[0] = zeroCounter; + pRiceParamPartOut[0] = riceParamPart; + return DRFLAC_TRUE; } +#endif -static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts__param_equals_zero(drflac_bs* bs, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) +static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) { - drflac_cache_t riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(0); + drflac_uint32 riceParamPlus1 = riceParam + 1; + //drflac_cache_t riceParamPlus1Mask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1); + drflac_uint32 riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1); + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; - drflac_uint32 zeroCounter = 0; - while (bs->cache == 0) { - zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); - if (!drflac__reload_cache(bs)) { - return DRFLAC_FALSE; - } - } + // The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + // no idea how this will work in practice... + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; - drflac_uint32 setBitOffsetPlus1 = drflac__clz(bs->cache); - zeroCounter += setBitOffsetPlus1; - setBitOffsetPlus1 += 1; + // The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + pZeroCounterOut[0] = lzcount; + // It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + // this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + // outside of this function at a higher level. + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; - drflac_uint32 riceParamPart; - drflac_uint32 riceLength = setBitOffsetPlus1; - if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { - riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength)); - - bs->consumedBits += riceLength; - bs->cache <<= riceLength; - } else { - // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. - drflac_uint32 bitCountLo = riceLength + bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs); - - if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { -#ifndef DR_FLAC_NO_CRC - drflac__update_crc16(bs); -#endif - bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); - bs->consumedBits = 0; -#ifndef DR_FLAC_NO_CRC - bs->crc16Cache = bs->cache; -#endif + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + // Getting here means the rice parameter part is wholly contained within the current cache line. + pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; } else { - // Slow path. We need to fetch more data from the client. - if (!drflac__reload_cache(bs)) { - return DRFLAC_FALSE; + // Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + // line, reload the cache, and then combine it with the head of the next cache line. + + // Grab the high part of the rice parameter part. + drflac_uint32 riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + + // Before reloading the cache we need to grab the size in bits of the low part. + drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + drflac_assert(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); + + // Now reload the cache. + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + // Slow path. We need to fetch more data from the client. + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; + } + + // We should now have enough information to construct the rice parameter part. + drflac_uint32 riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount))); + pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo; + + bs_cache <<= riceParamPartLoBitCount; + } + } else { + // Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + // to drflac__clz() and we need to reload the cache. + drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits); + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + // Slow path. We need to fetch more data from the client. + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } + + lzcount = drflac__clz(bs_cache); + zeroCounter += lzcount; + + if (lzcount < sizeof(bs_cache)*8) { + break; } } - riceParamPart = (drflac_uint32)(DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo)); - - bs->consumedBits += bitCountLo; - bs->cache <<= bitCountLo; + pZeroCounterOut[0] = zeroCounter; + goto extract_rice_param_part; } - *pZeroCounterOut = zeroCounter; - *pRiceParamPartOut = riceParamPart; + // Make sure the cache is restored at the end of it all. + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + + return DRFLAC_TRUE; +} + +static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x4(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) +{ + drflac_uint32 riceParamPlus1 = riceParam + 1; + //drflac_cache_t riceParamPlus1Mask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1); + drflac_uint32 riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1); + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; + + // The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + // no idea how this will work in practice... + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; + + // What this is doing is trying to efficiently extract 4 rice parts at a time, the idea being that we can exploit certain properties + // to our advantage to make things more efficient. + for (int i = 0; i < 4; ++i) { + // The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + pZeroCounterOut[i] = lzcount; + + // It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + // this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + // outside of this function at a higher level. + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; + + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + // Getting here means the rice parameter part is wholly contained within the current cache line. + pRiceParamPartOut[i] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; + } else { + // Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + // line, reload the cache, and then combine it with the head of the next cache line. + + // Grab the high part of the rice parameter part. + drflac_uint32 riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + + // Before reloading the cache we need to grab the size in bits of the low part. + drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + + // Now reload the cache. + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + // Slow path. We need to fetch more data from the client. + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; + } + + // We should now have enough information to construct the rice parameter part. + drflac_uint32 riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount))); + pRiceParamPartOut[i] = riceParamPartHi | riceParamPartLo; + + bs_cache <<= riceParamPartLoBitCount; + } + } else { + // Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + // to drflac__clz() and we need to reload the cache. + drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits); + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + // Slow path. We need to fetch more data from the client. + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } + + lzcount = drflac__clz(bs_cache); + zeroCounter += lzcount; + + if (lzcount < sizeof(bs_cache)*8) { + break; + } + } + + pZeroCounterOut[i] = zeroCounter; + goto extract_rice_param_part; + } + } + + // Make sure the cache is restored at the end of it all. + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + + return DRFLAC_TRUE; +} + +static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam) +{ + drflac_uint32 riceParamPlus1 = riceParam + 1; + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; + + // The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + // no idea how this will work in practice... + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; + + // The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + // It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + // this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + // outside of this function at a higher level. + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; + + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + // Getting here means the rice parameter part is wholly contained within the current cache line. + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; + } else { + // Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + // line, reload the cache, and then combine it with the head of the next cache line. + + // Before reloading the cache we need to grab the size in bits of the low part. + drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + drflac_assert(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); + + // Now reload the cache. + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + // Slow path. We need to fetch more data from the client. + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; + } + + bs_cache <<= riceParamPartLoBitCount; + } + } else { + // Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + // to drflac__clz() and we need to reload the cache. + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + // Slow path. We need to fetch more data from the client. + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } + + lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + break; + } + } + + goto extract_rice_param_part; + } + + // Make sure the cache is restored at the end of it all. + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { drflac_assert(bs != NULL); drflac_assert(count > 0); drflac_assert(pSamplesOut != NULL); - static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; drflac_uint32 zeroCountPart0; drflac_uint32 zeroCountPart1; @@ -2421,57 +3236,92 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_b drflac_uint32 riceParamPart1; drflac_uint32 riceParamPart2; drflac_uint32 riceParamPart3; - drflac_uint32 i4 = 0; - drflac_uint32 count4 = count >> 2; - while (i4 < count4) { - // Rice extraction. - if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart0, &riceParamPart0) || - !drflac__read_rice_parts(bs, riceParam, &zeroCountPart1, &riceParamPart1) || - !drflac__read_rice_parts(bs, riceParam, &zeroCountPart2, &riceParamPart2) || - !drflac__read_rice_parts(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { - return DRFLAC_FALSE; - } + drflac_uint32 riceParamMask = ~((~0UL) << riceParam); + const drflac_int32* pSamplesOutEnd = pSamplesOut + ((count >> 2) << 2); - riceParamPart0 |= (zeroCountPart0 << riceParam); - riceParamPart1 |= (zeroCountPart1 << riceParam); - riceParamPart2 |= (zeroCountPart2 << riceParam); - riceParamPart3 |= (zeroCountPart3 << riceParam); + if (bitsPerSample >= 24) { + while (pSamplesOut < pSamplesOutEnd) { + // Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version + // against an array. Not sure why, but perhaps it's making more efficient use of registers? + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } - riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; - riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; - riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; - riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + riceParamPart0 &= riceParamMask; + riceParamPart1 &= riceParamMask; + riceParamPart2 &= riceParamMask; + riceParamPart3 &= riceParamMask; + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; - if (bitsPerSample > 16) { pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); - } else { + + pSamplesOut += 4; + } + } else { + while (pSamplesOut < pSamplesOutEnd) { + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } + + riceParamPart0 &= riceParamMask; + riceParamPart1 &= riceParamMask; + riceParamPart2 &= riceParamMask; + riceParamPart3 &= riceParamMask; + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); - } - i4 += 1; - pSamplesOut += 4; + pSamplesOut += 4; + } } - drflac_uint32 i = i4 << 2; + + + drflac_uint32 i = ((count >> 2) << 2); while (i < count) { // Rice extraction. - if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { return DRFLAC_FALSE; } // Rice reconstruction. + riceParamPart0 &= riceParamMask; riceParamPart0 |= (zeroCountPart0 << riceParam); riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; //riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1); // Sample reconstruction. - if (bitsPerSample > 16) { + if (bitsPerSample >= 24) { pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); } else { pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); @@ -2480,11 +3330,12 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__simple(drflac_b i += 1; pSamplesOut += 1; } - + return DRFLAC_TRUE; } -static drflac_bool32 drflac__decode_samples_with_residual__rice__param_equals_zero(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +#if defined(DRFLAC_SUPPORT_SSE41) +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { drflac_assert(bs != NULL); drflac_assert(count > 0); @@ -2492,67 +3343,126 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__param_equals_ze static drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; - drflac_uint32 zeroCountPart0; - drflac_uint32 zeroCountPart1; - drflac_uint32 zeroCountPart2; - drflac_uint32 zeroCountPart3; - drflac_uint32 riceParamPart0; - drflac_uint32 riceParamPart1; - drflac_uint32 riceParamPart2; - drflac_uint32 riceParamPart3; - drflac_uint32 i4 = 0; - drflac_uint32 count4 = count >> 2; - while (i4 < count4) { - // Rice extraction. - if (!drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart0, &riceParamPart0) || - !drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart1, &riceParamPart1) || - !drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart2, &riceParamPart2) || - !drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart3, &riceParamPart3)) { - return DRFLAC_FALSE; + //drflac_uint32 zeroCountParts[4]; + //drflac_uint32 riceParamParts[4]; + + drflac_uint32 zeroCountParts0; + drflac_uint32 zeroCountParts1; + drflac_uint32 zeroCountParts2; + drflac_uint32 zeroCountParts3; + drflac_uint32 riceParamParts0; + drflac_uint32 riceParamParts1; + drflac_uint32 riceParamParts2; + drflac_uint32 riceParamParts3; + + drflac_uint32 riceParamMask = ~((~0UL) << riceParam); + __m128i riceParamMask128 = _mm_set1_epi32(riceParamMask); + __m128i one = _mm_set1_epi32(0x01); + + const drflac_int32* pSamplesOutEnd = pSamplesOut + ((count >> 2) << 2); + + if (bitsPerSample >= 24) { + while (pSamplesOut < pSamplesOutEnd) { + // Rice extraction. + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { + return DRFLAC_FALSE; + } + + __m128i zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); + __m128i riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); + + riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); + riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); + riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, one), _mm_set1_epi32(0xFFFFFFFF))); // <-- Only supported from SSE4.1 + //riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, one)), one)); // <-- SSE2 compatible + + drflac_uint32 riceParamParts[4]; + _mm_storeu_si128((__m128i*)riceParamParts, riceParamPart128); + + #if defined(DRFLAC_64BIT) + // The scalar implementation seems to be faster on 64-bit in my testing. + drflac__calculate_prediction_64_x4(order, shift, coefficients, riceParamParts, pSamplesOut); + #else + pSamplesOut[0] = riceParamParts[0] + drflac__calculate_prediction_64__sse41(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamParts[1] + drflac__calculate_prediction_64__sse41(order, shift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamParts[2] + drflac__calculate_prediction_64__sse41(order, shift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamParts[3] + drflac__calculate_prediction_64__sse41(order, shift, coefficients, pSamplesOut + 3); + #endif + + pSamplesOut += 4; + } + } else { + drflac_int32 coefficientsUnaligned[32*4 + 4] = {0}; + drflac_int32* coefficients128 = (drflac_int32*)(((size_t)coefficientsUnaligned + 15) & ~15); + for (drflac_uint32 i = 0; i < order; ++i) { + coefficients128[i*4+0] = coefficients[i]; + coefficients128[i*4+1] = coefficients[i]; + coefficients128[i*4+2] = coefficients[i]; + coefficients128[i*4+3] = coefficients[i]; } - riceParamPart0 |= zeroCountPart0; - riceParamPart1 |= zeroCountPart1; - riceParamPart2 |= zeroCountPart2; - riceParamPart3 |= zeroCountPart3; + while (pSamplesOut < pSamplesOutEnd) { + // Rice extraction. +#if 1 + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { + return DRFLAC_FALSE; + } - riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; - riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; - riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; - riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + __m128i zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); + __m128i riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); +#else + if (!drflac__read_rice_parts_x4(bs, riceParam, zeroCountParts, riceParamParts)) { + return DRFLAC_FALSE; + } - if (bitsPerSample > 16) { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); - pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); - pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); - pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); - } else { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); - pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); - pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); - pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); + __m128i zeroCountPart128 = _mm_set_epi32(zeroCountParts[3], zeroCountParts[2], zeroCountParts[1], zeroCountParts[0]); + __m128i riceParamPart128 = _mm_set_epi32(riceParamParts[3], riceParamParts[2], riceParamParts[1], riceParamParts[0]); +#endif + + riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); + riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); + riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, one), _mm_set1_epi32(0xFFFFFFFF))); + +#if 1 + drflac__calculate_prediction_32_x4__sse41(order, shift, (const __m128i*)coefficients128, riceParamPart128, pSamplesOut); +#else + drflac_int32 riceParamParts[4]; + _mm_storeu_si128((__m128i*)riceParamParts, riceParamPart128); + + pSamplesOut[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamParts[1] + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamParts[2] + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamParts[3] + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); +#endif + + pSamplesOut += 4; } - - i4 += 1; - pSamplesOut += 4; } - drflac_uint32 i = i4 << 2; + + drflac_uint32 i = ((count >> 2) << 2); while (i < count) { // Rice extraction. - if (!drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart0, &riceParamPart0)) { + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { return DRFLAC_FALSE; } // Rice reconstruction. - riceParamPart0 |= zeroCountPart0; - riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamParts0 &= riceParamMask; + riceParamParts0 |= (zeroCountParts0 << riceParam); + riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; // Sample reconstruction. - if (bitsPerSample > 16) { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); + if (bitsPerSample >= 24) { + pSamplesOut[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); } else { - pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); } i += 1; @@ -2561,18 +3471,23 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__param_equals_ze return DRFLAC_TRUE; } +#endif static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) { -#if 0 - return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); -#else - if (riceParam != 0) { - return drflac__decode_samples_with_residual__rice__simple(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); - } else { - return drflac__decode_samples_with_residual__rice__param_equals_zero(bs, bitsPerSample, count, order, shift, coefficients, pSamplesOut); - } +#if defined(DRFLAC_SUPPORT_SSE41) + if (drflac__gIsSSE41Supported) { + return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } else #endif + { + // Scalar fallback. + #if 0 + return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + #else + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + #endif + } } // Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. @@ -2581,20 +3496,9 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_ drflac_assert(bs != NULL); drflac_assert(count > 0); - drflac_uint32 zeroCountPart; - drflac_uint32 riceParamPart; - - if (riceParam != 0) { - for (drflac_uint32 i = 0; i < count; ++i) { - if (!drflac__read_rice_parts(bs, riceParam, &zeroCountPart, &riceParamPart)) { - return DRFLAC_FALSE; - } - } - } else { - for (drflac_uint32 i = 0; i < count; ++i) { - if (!drflac__read_rice_parts__param_equals_zero(bs, &zeroCountPart, &riceParamPart)) { - return DRFLAC_FALSE; - } + for (drflac_uint32 i = 0; i < count; ++i) { + if (!drflac__seek_rice_parts(bs, riceParam)) { + return DRFLAC_FALSE; } } @@ -2833,7 +3737,7 @@ static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) { - drflac_int32 lpcCoefficientsTable[5][4] = { + static drflac_int32 lpcCoefficientsTable[5][4] = { {0, 0, 0, 0}, {1, 0, 0, 0}, {2, -1, 0, 0}, @@ -2890,6 +3794,7 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl drflac_int32 coefficients[32]; + drflac_zero_memory(coefficients, sizeof(coefficients)); for (i = 0; i < lpcOrder; ++i) { if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) { return DRFLAC_FALSE; @@ -2904,7 +3809,7 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl } -static drflac_bool32 drflac__read_next_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header) +static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header) { drflac_assert(bs != NULL); drflac_assert(header != NULL); @@ -3275,7 +4180,7 @@ static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignm return lookup[channelAssignment]; } -static drflac_result drflac__decode_frame(drflac* pFlac) +static drflac_result drflac__decode_flac_frame(drflac* pFlac) { // This function should be called while the stream is sitting on the first byte after the frame header. drflac_zero_memory(pFlac->currentFrame.subframes, sizeof(pFlac->currentFrame.subframes)); @@ -3292,7 +4197,7 @@ static drflac_result drflac__decode_frame(drflac* pFlac) } for (int i = 0; i < channelCount; ++i) { - if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFrame, i, pFlac->pDecodedSamples + (pFlac->currentFrame.header.blockSize * i))) { + if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFrame, i, pFlac->pDecodedSamples + ((pFlac->currentFrame.header.blockSize+DRFLAC_LEADING_SAMPLES) * i) + DRFLAC_LEADING_SAMPLES)) { return DRFLAC_ERROR; } } @@ -3324,7 +4229,7 @@ static drflac_result drflac__decode_frame(drflac* pFlac) return DRFLAC_SUCCESS; } -static drflac_result drflac__seek_frame(drflac* pFlac) +static drflac_result drflac__seek_flac_frame(drflac* pFlac) { int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); for (int i = 0; i < channelCount; ++i) { @@ -3356,16 +4261,16 @@ static drflac_result drflac__seek_frame(drflac* pFlac) return DRFLAC_SUCCESS; } -static drflac_bool32 drflac__read_and_decode_next_frame(drflac* pFlac) +static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac) { drflac_assert(pFlac != NULL); for (;;) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { return DRFLAC_FALSE; } - drflac_result result = drflac__decode_frame(pFlac); + drflac_result result = drflac__decode_flac_frame(pFlac); if (result != DRFLAC_SUCCESS) { if (result == DRFLAC_CRC_MISMATCH) { continue; // CRC mismatch. Skip to the next frame. @@ -3411,11 +4316,41 @@ static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac) return result; } -static DRFLAC_INLINE drflac_result drflac__seek_to_next_frame(drflac* pFlac) +static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac) { // This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. drflac_assert(pFlac != NULL); - return drflac__seek_frame(pFlac); + return drflac__seek_flac_frame(pFlac); +} + +drflac_uint64 drflac__seek_forward_by_samples(drflac* pFlac, drflac_uint64 samplesToRead) +{ + drflac_uint64 samplesRead = 0; + while (samplesToRead > 0) { + if (pFlac->currentFrame.samplesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; // Couldn't read the next frame, so just break from the loop and return. + } + } else { + if (pFlac->currentFrame.samplesRemaining > samplesToRead) { + samplesRead += samplesToRead; + pFlac->currentFrame.samplesRemaining -= (drflac_uint32)samplesToRead; // <-- Safe cast. Will always be < currentFrame.samplesRemaining < 65536. + samplesToRead = 0; + } else { + samplesRead += pFlac->currentFrame.samplesRemaining; + samplesToRead -= pFlac->currentFrame.samplesRemaining; + pFlac->currentFrame.samplesRemaining = 0; + } + } + } + + pFlac->currentSample += samplesRead; + return samplesRead; +} + +drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek) +{ + return drflac__seek_forward_by_samples(pFlac, pcmFramesToSeek*pFlac->channels); } static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_uint64 sampleIndex) @@ -3432,7 +4367,7 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u // The frame header for the first frame may not yet have been read. We need to do that if necessary. if (pFlac->currentSample == 0 && pFlac->currentFrame.samplesRemaining == 0) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { return DRFLAC_FALSE; } } else { @@ -3448,7 +4383,7 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u } // Decode the first frame in preparation for sample-exact seeking below. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { return DRFLAC_FALSE; } } @@ -3467,10 +4402,10 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u drflac_uint64 samplesToDecode = sampleIndex - runningSampleCount; if (!isMidFrame) { - drflac_result result = drflac__decode_frame(pFlac); + drflac_result result = drflac__decode_flac_frame(pFlac); if (result == DRFLAC_SUCCESS) { // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. - return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). + return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). } else { if (result == DRFLAC_CRC_MISMATCH) { goto next_iteration; // CRC mismatch. Pretend this frame never existed. @@ -3480,13 +4415,13 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u } } else { // We started seeking mid-frame which means we need to skip the frame decoding part. - return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; + return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode; } } else { // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this // frame never existed and leave the running sample count untouched. if (!isMidFrame) { - drflac_result result = drflac__seek_to_next_frame(pFlac); + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); if (result == DRFLAC_SUCCESS) { runningSampleCount += sampleCountInThisFrame; } else { @@ -3498,7 +4433,7 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u } } else { // We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with - // drflac__seek_to_next_frame() which only works if the decoder is sitting on the byte just after the frame header. + // drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. runningSampleCount += pFlac->currentFrame.samplesRemaining; pFlac->currentFrame.samplesRemaining = 0; isMidFrame = DRFLAC_FALSE; @@ -3507,7 +4442,7 @@ static drflac_bool32 drflac__seek_to_sample__brute_force(drflac* pFlac, drflac_u next_iteration: // Grab the next frame in preparation for the next iteration. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { return DRFLAC_FALSE; } } @@ -3544,7 +4479,7 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui // The frame header for the first frame may not yet have been read. We need to do that if necessary. if (pFlac->currentSample == 0 && pFlac->currentFrame.samplesRemaining == 0) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { return DRFLAC_FALSE; } } else { @@ -3559,7 +4494,7 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui } // Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { return DRFLAC_FALSE; } } @@ -3576,10 +4511,10 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui drflac_uint64 samplesToDecode = sampleIndex - runningSampleCount; if (!isMidFrame) { - drflac_result result = drflac__decode_frame(pFlac); + drflac_result result = drflac__decode_flac_frame(pFlac); if (result == DRFLAC_SUCCESS) { // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. - return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). + return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). } else { if (result == DRFLAC_CRC_MISMATCH) { goto next_iteration; // CRC mismatch. Pretend this frame never existed. @@ -3589,13 +4524,13 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui } } else { // We started seeking mid-frame which means we need to skip the frame decoding part. - return drflac_read_s32(pFlac, samplesToDecode, NULL) == samplesToDecode; + return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode; } } else { // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this // frame never existed and leave the running sample count untouched. if (!isMidFrame) { - drflac_result result = drflac__seek_to_next_frame(pFlac); + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); if (result == DRFLAC_SUCCESS) { runningSampleCount += sampleCountInThisFrame; } else { @@ -3607,7 +4542,7 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui } } else { // We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with - // drflac__seek_to_next_frame() which only works if the decoder is sitting on the byte just after the frame header. + // drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. runningSampleCount += pFlac->currentFrame.samplesRemaining; pFlac->currentFrame.samplesRemaining = 0; isMidFrame = DRFLAC_FALSE; @@ -3616,7 +4551,7 @@ static drflac_bool32 drflac__seek_to_sample__seek_table(drflac* pFlac, drflac_ui next_iteration: // Grab the next frame in preparation for the next iteration. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { return DRFLAC_FALSE; } } @@ -4086,7 +5021,7 @@ drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_ pInit->hasStreamInfoBlock = DRFLAC_FALSE; pInit->hasMetadataBlocks = DRFLAC_FALSE; - if (!drflac__read_next_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) { + if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) { return DRFLAC_FALSE; // Couldn't find a frame. } @@ -4112,7 +5047,7 @@ drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_ pInit->bitsPerSample = streaminfo.bitsPerSample; pInit->totalSampleCount = streaminfo.totalSampleCount; pInit->maxBlockSize = streaminfo.maxBlockSize; // Don't care about the min block size - only the max (used for determining the size of the memory allocation). - pInit->hasMetadataBlocks = !isLastBlock; + pInit->hasMetadataBlocks = !isLastBlock; if (onMeta) { drflac_metadata metadata; @@ -4695,14 +5630,14 @@ drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleInde // bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the // standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks // the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read - // using the native FLAC decoding APIs, such as drflac__read_next_frame_header(), need to be re-implemented so as to + // using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to // avoid the use of the drflac_bs object. // // Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons: // 1) Seeking is already partially accelerated using Ogg's paging system in the code block above. // 2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon. // 3) Simplicity. - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { return DRFLAC_FALSE; } @@ -4714,14 +5649,14 @@ drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleInde if (sampleIndex < (runningSampleCount + sampleCountInThisFrame)) { // The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend // it never existed and keep iterating. - drflac_result result = drflac__decode_frame(pFlac); + drflac_result result = drflac__decode_flac_frame(pFlac); if (result == DRFLAC_SUCCESS) { // The frame is valid. We just need to skip over some samples to ensure it's sample-exact. drflac_uint64 samplesToDecode = (size_t)(sampleIndex - runningSampleCount); // <-- Safe cast because the maximum number of samples in a frame is 65535. if (samplesToDecode == 0) { return DRFLAC_TRUE; } - return drflac_read_s32(pFlac, samplesToDecode, NULL) != 0; // <-- If this fails, something bad has happened (it should never fail). + return drflac__seek_forward_by_samples(pFlac, samplesToDecode) == samplesToDecode; // <-- If this fails, something bad has happened (it should never fail). } else { if (result == DRFLAC_CRC_MISMATCH) { continue; // CRC mismatch. Pretend this frame never existed. @@ -4732,7 +5667,7 @@ drflac_bool32 drflac_ogg__seek_to_sample(drflac* pFlac, drflac_uint64 sampleInde } else { // It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this // frame never existed and leave the running sample count untouched. - drflac_result result = drflac__seek_to_next_frame(pFlac); + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); if (result == DRFLAC_SUCCESS) { runningSampleCount += sampleCountInThisFrame; } else { @@ -4986,15 +5921,16 @@ void drflac__init_from_info(drflac* pFlac, drflac_init_info* pInit) drflac_assert(pInit != NULL); drflac_zero_memory(pFlac, sizeof(*pFlac)); - pFlac->bs = pInit->bs; - pFlac->onMeta = pInit->onMeta; - pFlac->pUserDataMD = pInit->pUserDataMD; - pFlac->maxBlockSize = pInit->maxBlockSize; - pFlac->sampleRate = pInit->sampleRate; - pFlac->channels = (drflac_uint8)pInit->channels; - pFlac->bitsPerSample = (drflac_uint8)pInit->bitsPerSample; - pFlac->totalSampleCount = pInit->totalSampleCount; - pFlac->container = pInit->container; + pFlac->bs = pInit->bs; + pFlac->onMeta = pInit->onMeta; + pFlac->pUserDataMD = pInit->pUserDataMD; + pFlac->maxBlockSize = pInit->maxBlockSize; + pFlac->sampleRate = pInit->sampleRate; + pFlac->channels = (drflac_uint8)pInit->channels; + pFlac->bitsPerSample = (drflac_uint8)pInit->bitsPerSample; + pFlac->totalSampleCount = pInit->totalSampleCount; + pFlac->totalPCMFrameCount = pInit->totalSampleCount / pFlac->channels; + pFlac->container = pInit->container; } drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD) @@ -5021,10 +5957,10 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p // The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector // we are supporting. drflac_uint32 wholeSIMDVectorCountPerChannel; - if ((init.maxBlockSize % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) { - wholeSIMDVectorCountPerChannel = (init.maxBlockSize / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))); + if (((init.maxBlockSize+DRFLAC_LEADING_SAMPLES) % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) { + wholeSIMDVectorCountPerChannel = ((init.maxBlockSize+DRFLAC_LEADING_SAMPLES) / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))); } else { - wholeSIMDVectorCountPerChannel = (init.maxBlockSize / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1; + wholeSIMDVectorCountPerChannel = ((init.maxBlockSize+DRFLAC_LEADING_SAMPLES) / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1; } drflac_uint32 decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels; @@ -5151,12 +6087,12 @@ drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_p pFlac->currentFrame.header = init.firstFrameHeader; do { - drflac_result result = drflac__decode_frame(pFlac); + drflac_result result = drflac__decode_flac_frame(pFlac); if (result == DRFLAC_SUCCESS) { break; } else { if (result == DRFLAC_CRC_MISMATCH) { - if (!drflac__read_next_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFrame.header)) { DRFLAC_FREE(pFlac); return NULL; } @@ -5475,31 +6411,6 @@ drflac_uint64 drflac__read_s32__misaligned(drflac* pFlac, drflac_uint64 samplesT return samplesRead; } -drflac_uint64 drflac__seek_forward_by_samples(drflac* pFlac, drflac_uint64 samplesToRead) -{ - drflac_uint64 samplesRead = 0; - while (samplesToRead > 0) { - if (pFlac->currentFrame.samplesRemaining == 0) { - if (!drflac__read_and_decode_next_frame(pFlac)) { - break; // Couldn't read the next frame, so just break from the loop and return. - } - } else { - if (pFlac->currentFrame.samplesRemaining > samplesToRead) { - samplesRead += samplesToRead; - pFlac->currentFrame.samplesRemaining -= (drflac_uint32)samplesToRead; // <-- Safe cast. Will always be < currentFrame.samplesRemaining < 65536. - samplesToRead = 0; - } else { - samplesRead += pFlac->currentFrame.samplesRemaining; - samplesToRead -= pFlac->currentFrame.samplesRemaining; - pFlac->currentFrame.samplesRemaining = 0; - } - } - } - - pFlac->currentSample += samplesRead; - return samplesRead; -} - drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int32* bufferOut) { // Note that is allowed to be null, in which case this will act like a seek. @@ -5516,7 +6427,7 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac while (samplesToRead > 0) { // If we've run out of samples in this frame, go to the next. if (pFlac->currentFrame.samplesRemaining == 0) { - if (!drflac__read_and_decode_next_frame(pFlac)) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { break; // Couldn't read the next frame, so just break from the loop and return. } } else { @@ -5649,11 +6560,37 @@ drflac_uint64 drflac_read_s32(drflac* pFlac, drflac_uint64 samplesToRead, drflac return samplesRead; } +drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut) +{ +#if defined(_MSC_VER) && !defined(__clang__) + #pragma warning(push) + #pragma warning(disable:4996) // was declared deprecated +#elif defined(__GNUC__) || defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + return drflac_read_s32(pFlac, framesToRead*pFlac->channels, pBufferOut) / pFlac->channels; +#if defined(_MSC_VER) && !defined(__clang__) + #pragma warning(pop) +#elif defined(__GNUC__) || defined(__clang__) + #pragma GCC diagnostic pop +#endif +} + + drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac_int16* pBufferOut) { // This reads samples in 2 passes and can probably be optimized. drflac_uint64 totalSamplesRead = 0; +#if defined(_MSC_VER) && !defined(__clang__) + #pragma warning(push) + #pragma warning(disable:4996) // was declared deprecated +#elif defined(__GNUC__) || defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + while (samplesToRead > 0) { drflac_int32 samples32[4096]; drflac_uint64 samplesJustRead = drflac_read_s32(pFlac, (samplesToRead > 4096) ? 4096 : samplesToRead, samples32); @@ -5671,14 +6608,57 @@ drflac_uint64 drflac_read_s16(drflac* pFlac, drflac_uint64 samplesToRead, drflac pBufferOut += samplesJustRead; } +#if defined(_MSC_VER) && !defined(__clang__) + #pragma warning(pop) +#elif defined(__GNUC__) || defined(__clang__) + #pragma GCC diagnostic pop +#endif + return totalSamplesRead; } +drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut) +{ + // This reads samples in 2 passes and can probably be optimized. + drflac_uint64 totalPCMFramesRead = 0; + + while (framesToRead > 0) { + drflac_int32 samples32[4096]; + drflac_uint64 framesJustRead = drflac_read_pcm_frames_s32(pFlac, (framesToRead > 4096/pFlac->channels) ? 4096/pFlac->channels : framesToRead, samples32); + if (framesJustRead == 0) { + break; // Reached the end. + } + + // s32 -> s16 + for (drflac_uint64 iFrame = 0; iFrame < framesJustRead; ++iFrame) { + for (drflac_uint32 iChannel = 0; iChannel < pFlac->channels; ++iChannel) { + drflac_uint64 iSample = iFrame*pFlac->channels + iChannel; + pBufferOut[iSample] = (drflac_int16)(samples32[iSample] >> 16); + } + } + + totalPCMFramesRead += framesJustRead; + framesToRead -= framesJustRead; + pBufferOut += framesJustRead * pFlac->channels; + } + + return totalPCMFramesRead; +} + + drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut) { // This reads samples in 2 passes and can probably be optimized. drflac_uint64 totalSamplesRead = 0; +#if defined(_MSC_VER) && !defined(__clang__) + #pragma warning(push) + #pragma warning(disable:4996) // was declared deprecated +#elif defined(__GNUC__) || defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + while (samplesToRead > 0) { drflac_int32 samples32[4096]; drflac_uint64 samplesJustRead = drflac_read_s32(pFlac, (samplesToRead > 4096) ? 4096 : samplesToRead, samples32); @@ -5696,9 +6676,645 @@ drflac_uint64 drflac_read_f32(drflac* pFlac, drflac_uint64 samplesToRead, float* pBufferOut += samplesJustRead; } +#if defined(_MSC_VER) && !defined(__clang__) + #pragma warning(pop) +#elif defined(__GNUC__) || defined(__clang__) + #pragma GCC diagnostic pop +#endif + return totalSamplesRead; } +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + int left = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample); + int side = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample); + int right = left - side; + + pOutputSamples[i*2+0] = (float)(left / 2147483648.0); + pOutputSamples[i*2+1] = (float)(right / 2147483648.0); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 frameCount4 = frameCount >> 2; + + float factor = 1 / 2147483648.0; + + drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample; + drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample; + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + drflac_int32 left0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 left1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 left2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 left3 = pInputSamples0[i*4+3] << shift0; + + drflac_int32 side0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 side1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 side2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 side3 = pInputSamples1[i*4+3] << shift1; + + drflac_int32 right0 = left0 - side0; + drflac_int32 right1 = left1 - side1; + drflac_int32 right2 = left2 - side2; + drflac_int32 right3 = left3 - side3; + + pOutputSamples[i*8+0] = left0 * factor; + pOutputSamples[i*8+1] = right0 * factor; + pOutputSamples[i*8+2] = left1 * factor; + pOutputSamples[i*8+3] = right1 * factor; + pOutputSamples[i*8+4] = left2 * factor; + pOutputSamples[i*8+5] = right2 * factor; + pOutputSamples[i*8+6] = left3 * factor; + pOutputSamples[i*8+7] = right3 * factor; + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + int left = pInputSamples0[i] << shift0; + int side = pInputSamples1[i] << shift1; + int right = left - side; + + pOutputSamples[i*2+0] = (float)(left * factor); + pOutputSamples[i*2+1] = (float)(right * factor); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_assert(pFlac->bitsPerSample <= 24); + + drflac_uint64 frameCount4 = frameCount >> 2; + + __m128 factor = _mm_set1_ps(1.0f / 8388608.0f); + int shift0 = (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample) - 8; + int shift1 = (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample) - 8; + + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i); + __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i); + + __m128i left = _mm_slli_epi32(inputSample0, shift0); + __m128i side = _mm_slli_epi32(inputSample1, shift1); + __m128i right = _mm_sub_epi32(left, side); + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); + + pOutputSamples[i*8+0] = ((float*)&leftf)[0]; + pOutputSamples[i*8+1] = ((float*)&rightf)[0]; + pOutputSamples[i*8+2] = ((float*)&leftf)[1]; + pOutputSamples[i*8+3] = ((float*)&rightf)[1]; + pOutputSamples[i*8+4] = ((float*)&leftf)[2]; + pOutputSamples[i*8+5] = ((float*)&rightf)[2]; + pOutputSamples[i*8+6] = ((float*)&leftf)[3]; + pOutputSamples[i*8+7] = ((float*)&rightf)[3]; + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + int left = pInputSamples0[i] << shift0; + int side = pInputSamples1[i] << shift1; + int right = left - side; + + pOutputSamples[i*2+0] = (float)(left / 8388608.0f); + pOutputSamples[i*2+1] = (float)(right / 8388608.0f); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + // Scalar fallback. +#if 0 + drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + int side = pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample); + int right = pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample); + int left = right + side; + + pOutputSamples[i*2+0] = (float)(left / 2147483648.0); + pOutputSamples[i*2+1] = (float)(right / 2147483648.0); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 frameCount4 = frameCount >> 2; + + float factor = 1 / 2147483648.0; + + drflac_int32 shift0 = unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample; + drflac_int32 shift1 = unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample; + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + drflac_int32 side0 = pInputSamples0[i*4+0] << shift0; + drflac_int32 side1 = pInputSamples0[i*4+1] << shift0; + drflac_int32 side2 = pInputSamples0[i*4+2] << shift0; + drflac_int32 side3 = pInputSamples0[i*4+3] << shift0; + + drflac_int32 right0 = pInputSamples1[i*4+0] << shift1; + drflac_int32 right1 = pInputSamples1[i*4+1] << shift1; + drflac_int32 right2 = pInputSamples1[i*4+2] << shift1; + drflac_int32 right3 = pInputSamples1[i*4+3] << shift1; + + drflac_int32 left0 = right0 + side0; + drflac_int32 left1 = right1 + side1; + drflac_int32 left2 = right2 + side2; + drflac_int32 left3 = right3 + side3; + + pOutputSamples[i*8+0] = left0 * factor; + pOutputSamples[i*8+1] = right0 * factor; + pOutputSamples[i*8+2] = left1 * factor; + pOutputSamples[i*8+3] = right1 * factor; + pOutputSamples[i*8+4] = left2 * factor; + pOutputSamples[i*8+5] = right2 * factor; + pOutputSamples[i*8+6] = left3 * factor; + pOutputSamples[i*8+7] = right3 * factor; + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + int side = pInputSamples0[i] << shift0; + int right = pInputSamples1[i] << shift1; + int left = right + side; + + pOutputSamples[i*2+0] = (float)(left * factor); + pOutputSamples[i*2+1] = (float)(right * factor); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_assert(pFlac->bitsPerSample <= 24); + + drflac_uint64 frameCount4 = frameCount >> 2; + + __m128 factor = _mm_set1_ps(1.0f / 8388608.0f); + int shift0 = (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample) - 8; + int shift1 = (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample) - 8; + + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i); + __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i); + + __m128i side = _mm_slli_epi32(inputSample0, shift0); + __m128i right = _mm_slli_epi32(inputSample1, shift1); + __m128i left = _mm_add_epi32(right, side); + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); + + pOutputSamples[i*8+0] = ((float*)&leftf)[0]; + pOutputSamples[i*8+1] = ((float*)&rightf)[0]; + pOutputSamples[i*8+2] = ((float*)&leftf)[1]; + pOutputSamples[i*8+3] = ((float*)&rightf)[1]; + pOutputSamples[i*8+4] = ((float*)&leftf)[2]; + pOutputSamples[i*8+5] = ((float*)&rightf)[2]; + pOutputSamples[i*8+6] = ((float*)&leftf)[3]; + pOutputSamples[i*8+7] = ((float*)&rightf)[3]; + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + int side = pInputSamples0[i] << shift0; + int right = pInputSamples1[i] << shift1; + int left = right + side; + + pOutputSamples[i*2+0] = (float)(left / 8388608.0f); + pOutputSamples[i*2+1] = (float)(right / 8388608.0f); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + // Scalar fallback. +#if 0 + drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + int mid = pInputSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int side = pInputSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0); + pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 frameCount4 = frameCount >> 2; + + float factor = 1 / 2147483648.0; + + int shift = unusedBitsPerSample; + if (shift > 0) { + shift -= 1; + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + int mid0 = pInputSamples0[i*4+0] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int mid1 = pInputSamples0[i*4+1] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int mid2 = pInputSamples0[i*4+2] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int mid3 = pInputSamples0[i*4+3] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + + int side0 = pInputSamples1[i*4+0] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + int side1 = pInputSamples1[i*4+1] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + int side2 = pInputSamples1[i*4+2] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + int side3 = pInputSamples1[i*4+3] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + + mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); + mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); + mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); + mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); + + int temp0L = ((mid0 + side0) << shift); + int temp1L = ((mid1 + side1) << shift); + int temp2L = ((mid2 + side2) << shift); + int temp3L = ((mid3 + side3) << shift); + + int temp0R = ((mid0 - side0) << shift); + int temp1R = ((mid1 - side1) << shift); + int temp2R = ((mid2 - side2) << shift); + int temp3R = ((mid3 - side3) << shift); + + pOutputSamples[i*8+0] = (float)(temp0L * factor); + pOutputSamples[i*8+1] = (float)(temp0R * factor); + pOutputSamples[i*8+2] = (float)(temp1L * factor); + pOutputSamples[i*8+3] = (float)(temp1R * factor); + pOutputSamples[i*8+4] = (float)(temp2L * factor); + pOutputSamples[i*8+5] = (float)(temp2R * factor); + pOutputSamples[i*8+6] = (float)(temp3L * factor); + pOutputSamples[i*8+7] = (float)(temp3R * factor); + } + } else { + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + int mid0 = pInputSamples0[i*4+0] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int mid1 = pInputSamples0[i*4+1] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int mid2 = pInputSamples0[i*4+2] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int mid3 = pInputSamples0[i*4+3] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + + int side0 = pInputSamples1[i*4+0] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + int side1 = pInputSamples1[i*4+1] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + int side2 = pInputSamples1[i*4+2] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + int side3 = pInputSamples1[i*4+3] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + + mid0 = (((drflac_uint32)mid0) << 1) | (side0 & 0x01); + mid1 = (((drflac_uint32)mid1) << 1) | (side1 & 0x01); + mid2 = (((drflac_uint32)mid2) << 1) | (side2 & 0x01); + mid3 = (((drflac_uint32)mid3) << 1) | (side3 & 0x01); + + int temp0L = ((mid0 + side0) >> 1); + int temp1L = ((mid1 + side1) >> 1); + int temp2L = ((mid2 + side2) >> 1); + int temp3L = ((mid3 + side3) >> 1); + + int temp0R = ((mid0 - side0) >> 1); + int temp1R = ((mid1 - side1) >> 1); + int temp2R = ((mid2 - side2) >> 1); + int temp3R = ((mid3 - side3) >> 1); + + pOutputSamples[i*8+0] = (float)(temp0L * factor); + pOutputSamples[i*8+1] = (float)(temp0R * factor); + pOutputSamples[i*8+2] = (float)(temp1L * factor); + pOutputSamples[i*8+3] = (float)(temp1R * factor); + pOutputSamples[i*8+4] = (float)(temp2L * factor); + pOutputSamples[i*8+5] = (float)(temp2R * factor); + pOutputSamples[i*8+6] = (float)(temp3L * factor); + pOutputSamples[i*8+7] = (float)(temp3R * factor); + } + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + int mid = pInputSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int side = pInputSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << unusedBitsPerSample) * factor); + pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << unusedBitsPerSample) * factor); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_assert(pFlac->bitsPerSample <= 24); + + drflac_uint64 frameCount4 = frameCount >> 2; + + float factor = 1.0f / 8388608.0f; + __m128 factor128 = _mm_set1_ps(1.0f / 8388608.0f); + + int shift = unusedBitsPerSample - 8; + if (shift == 0) { + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i); + __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i); + + __m128i mid = _mm_slli_epi32(inputSample0, pFlac->currentFrame.subframes[0].wastedBitsPerSample); + __m128i side = _mm_slli_epi32(inputSample1, pFlac->currentFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + __m128i tempL = _mm_add_epi32(mid, side); + __m128i tempR = _mm_sub_epi32(mid, side); + + // Signed bit shift. + tempL = _mm_or_si128(_mm_srli_epi32(tempL, 1), _mm_and_si128(tempL, _mm_set1_epi32(0x80000000))); + tempR = _mm_or_si128(_mm_srli_epi32(tempR, 1), _mm_and_si128(tempR, _mm_set1_epi32(0x80000000))); + + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); + + pOutputSamples[i*8+0] = ((float*)&leftf)[0]; + pOutputSamples[i*8+1] = ((float*)&rightf)[0]; + pOutputSamples[i*8+2] = ((float*)&leftf)[1]; + pOutputSamples[i*8+3] = ((float*)&rightf)[1]; + pOutputSamples[i*8+4] = ((float*)&leftf)[2]; + pOutputSamples[i*8+5] = ((float*)&rightf)[2]; + pOutputSamples[i*8+6] = ((float*)&leftf)[3]; + pOutputSamples[i*8+7] = ((float*)&rightf)[3]; + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + int mid = pInputSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int side = pInputSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)(((mid + side) >> 1) * factor); + pOutputSamples[i*2+1] = (float)(((mid - side) >> 1) * factor); + } + } else { + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i); + __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i); + + __m128i mid = _mm_slli_epi32(inputSample0, pFlac->currentFrame.subframes[0].wastedBitsPerSample); + __m128i side = _mm_slli_epi32(inputSample1, pFlac->currentFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + __m128i tempL = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(mid, side), 1), shift); + __m128i tempR = _mm_slli_epi32(_mm_srli_epi32(_mm_sub_epi32(mid, side), 1), shift); + + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); + + pOutputSamples[i*8+0] = ((float*)&leftf)[0]; + pOutputSamples[i*8+1] = ((float*)&rightf)[0]; + pOutputSamples[i*8+2] = ((float*)&leftf)[1]; + pOutputSamples[i*8+3] = ((float*)&rightf)[1]; + pOutputSamples[i*8+4] = ((float*)&leftf)[2]; + pOutputSamples[i*8+5] = ((float*)&rightf)[2]; + pOutputSamples[i*8+6] = ((float*)&leftf)[3]; + pOutputSamples[i*8+7] = ((float*)&rightf)[3]; + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + int mid = pInputSamples0[i] << pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int side = pInputSamples1[i] << pFlac->currentFrame.subframes[1].wastedBitsPerSample; + + mid = (((drflac_uint32)mid) << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (float)((((mid + side) >> 1) << shift) * factor); + pOutputSamples[i*2+1] = (float)((((mid - side) >> 1) << shift) * factor); + } + } +} +#endif + + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + // Scalar fallback. +#if 0 + drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + +#if 0 +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + for (drflac_uint64 i = 0; i < frameCount; ++i) { + pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0); + pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 frameCount4 = frameCount >> 2; + + float factor = 1 / 2147483648.0; + + int shift0 = (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample); + int shift1 = (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample); + + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + int tempL0 = pInputSamples0[i*4+0] << shift0; + int tempL1 = pInputSamples0[i*4+1] << shift0; + int tempL2 = pInputSamples0[i*4+2] << shift0; + int tempL3 = pInputSamples0[i*4+3] << shift0; + + int tempR0 = pInputSamples1[i*4+0] << shift1; + int tempR1 = pInputSamples1[i*4+1] << shift1; + int tempR2 = pInputSamples1[i*4+2] << shift1; + int tempR3 = pInputSamples1[i*4+3] << shift1; + + pOutputSamples[i*8+0] = (float)(tempL0 * factor); + pOutputSamples[i*8+1] = (float)(tempR0 * factor); + pOutputSamples[i*8+2] = (float)(tempL1 * factor); + pOutputSamples[i*8+3] = (float)(tempR1 * factor); + pOutputSamples[i*8+4] = (float)(tempL2 * factor); + pOutputSamples[i*8+5] = (float)(tempR2 * factor); + pOutputSamples[i*8+6] = (float)(tempL3 * factor); + pOutputSamples[i*8+7] = (float)(tempR3 * factor); + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor); + pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 frameCount4 = frameCount >> 2; + + float factor = 1.0f / 8388608.0f; + __m128 factor128 = _mm_set1_ps(1.0f / 8388608.0f); + + int shift0 = (unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample) - 8; + int shift1 = (unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample) - 8; + + for (drflac_uint64 i = 0; i < frameCount4; ++i) { + __m128i inputSample0 = _mm_loadu_si128((const __m128i*)pInputSamples0 + i); + __m128i inputSample1 = _mm_loadu_si128((const __m128i*)pInputSamples1 + i); + + __m128i i32L = _mm_slli_epi32(inputSample0, shift0); + __m128i i32R = _mm_slli_epi32(inputSample1, shift1); + + __m128 f32L = _mm_mul_ps(_mm_cvtepi32_ps(i32L), factor128); + __m128 f32R = _mm_mul_ps(_mm_cvtepi32_ps(i32R), factor128); + + pOutputSamples[i*8+0] = ((float*)&f32L)[0]; + pOutputSamples[i*8+1] = ((float*)&f32R)[0]; + pOutputSamples[i*8+2] = ((float*)&f32L)[1]; + pOutputSamples[i*8+3] = ((float*)&f32R)[1]; + pOutputSamples[i*8+4] = ((float*)&f32L)[2]; + pOutputSamples[i*8+5] = ((float*)&f32R)[2]; + pOutputSamples[i*8+6] = ((float*)&f32L)[3]; + pOutputSamples[i*8+7] = ((float*)&f32R)[3]; + } + + for (drflac_uint64 i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (float)((pInputSamples0[i] << shift0) * factor); + pOutputSamples[i*2+1] = (float)((pInputSamples1[i] << shift1) * factor); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_int32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + // Scalar fallback. +#if 0 + drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#else + drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); +#endif + } +} + +drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut) +{ + if (pFlac == NULL || framesToRead == 0) { + return 0; + } + + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } + + drflac_uint64 framesRead = 0; + while (framesToRead > 0) { + // If we've run out of samples in this frame, go to the next. + if (pFlac->currentFrame.samplesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; // Couldn't read the next frame, so just break from the loop and return. + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); + drflac_uint64 totalFramesInPacket = pFlac->currentFrame.header.blockSize; + drflac_uint64 framesReadFromPacketSoFar = totalFramesInPacket - (pFlac->currentFrame.samplesRemaining/channelCount); + drflac_uint64 iFirstPCMFrame = framesReadFromPacketSoFar; + drflac_int32 unusedBitsPerSample = 32 - pFlac->bitsPerSample; + + drflac_uint64 frameCountThisIteration = framesToRead; + if (frameCountThisIteration > pFlac->currentFrame.samplesRemaining / channelCount) { + frameCountThisIteration = pFlac->currentFrame.samplesRemaining / channelCount; + } + + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + iFirstPCMFrame; + + switch (pFlac->currentFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + // Generic interleaving. + for (drflac_uint64 i = 0; i < frameCountThisIteration; ++i) { + for (unsigned int j = 0; j < channelCount; ++j) { + pBufferOut[(i*channelCount)+j] = (float)(((pFlac->currentFrame.subframes[j].pDecodedSamples[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFrame.subframes[j].wastedBitsPerSample)) / 2147483648.0); + } + } + } + + drflac_uint64 samplesReadThisIteration = frameCountThisIteration * channelCount; + framesRead += frameCountThisIteration; + framesReadFromPacketSoFar += frameCountThisIteration; + pBufferOut += samplesReadThisIteration; + framesToRead -= frameCountThisIteration; + pFlac->currentSample += samplesReadThisIteration; + pFlac->currentFrame.samplesRemaining -= (unsigned int)samplesReadThisIteration; + } + } + + return framesRead; +} + drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex) { if (pFlac == NULL) { @@ -5765,6 +7381,72 @@ drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex) } } +drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + if (pFlac == NULL) { + return DRFLAC_FALSE; + } + + // If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present + // when the decoder was opened. + if (pFlac->firstFramePos == 0) { + return DRFLAC_FALSE; + } + + if (pcmFrameIndex == 0) { + pFlac->currentSample = 0; + return drflac__seek_to_first_frame(pFlac); + } else { + drflac_bool32 wasSuccessful = DRFLAC_FALSE; + + // Clamp the sample to the end. + if (pcmFrameIndex >= pFlac->totalPCMFrameCount) { + pcmFrameIndex = pFlac->totalPCMFrameCount - 1; + } + + // If the target sample and the current sample are in the same frame we just move the position forward. + if (pcmFrameIndex*pFlac->channels > pFlac->currentSample) { + // Forward. + drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex*pFlac->channels - pFlac->currentSample); + if (pFlac->currentFrame.samplesRemaining > offset) { + pFlac->currentFrame.samplesRemaining -= offset; + pFlac->currentSample = pcmFrameIndex*pFlac->channels; + return DRFLAC_TRUE; + } + } else { + // Backward. + drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentSample - pcmFrameIndex*pFlac->channels); + drflac_uint32 currentFrameSampleCount = pFlac->currentFrame.header.blockSize * drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.header.channelAssignment); + drflac_uint32 currentFrameSamplesConsumed = (drflac_uint32)(currentFrameSampleCount - pFlac->currentFrame.samplesRemaining); + if (currentFrameSamplesConsumed > offsetAbs) { + pFlac->currentFrame.samplesRemaining += offsetAbs; + pFlac->currentSample = pcmFrameIndex*pFlac->channels; + return DRFLAC_TRUE; + } + } + + // Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so + // we'll instead use Ogg's natural seeking facility. +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + wasSuccessful = drflac_ogg__seek_to_sample(pFlac, pcmFrameIndex*pFlac->channels); + } + else +#endif + { + // First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. + wasSuccessful = drflac__seek_to_sample__seek_table(pFlac, pcmFrameIndex*pFlac->channels); + if (!wasSuccessful) { + wasSuccessful = drflac__seek_to_sample__brute_force(pFlac, pcmFrameIndex*pFlac->channels); + } + } + + pFlac->currentSample = pcmFrameIndex*pFlac->channels; + return wasSuccessful; + } +} + //// High Level APIs //// @@ -5781,15 +7463,15 @@ drflac_bool32 drflac_seek_to_sample(drflac* pFlac, drflac_uint64 sampleIndex) // Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. -#define DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(extension, type) \ -static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut)\ +#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \ +static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\ { \ drflac_assert(pFlac != NULL); \ \ type* pSampleData = NULL; \ - drflac_uint64 totalSampleCount = pFlac->totalSampleCount; \ + drflac_uint64 totalPCMFrameCount = pFlac->totalPCMFrameCount; \ \ - if (totalSampleCount == 0) { \ + if (totalPCMFrameCount == 0) { \ type buffer[4096]; \ \ size_t sampleDataBufferSize = sizeof(buffer); \ @@ -5798,9 +7480,9 @@ static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigne goto on_error; \ } \ \ - drflac_uint64 samplesRead; \ - while ((samplesRead = (drflac_uint64)drflac_read_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0]), buffer)) > 0) { \ - if (((totalSampleCount + samplesRead) * sizeof(type)) > sampleDataBufferSize) { \ + drflac_uint64 pcmFramesRead; \ + while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) { \ + if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) { \ sampleDataBufferSize *= 2; \ type* pNewSampleData = (type*)DRFLAC_REALLOC(pSampleData, sampleDataBufferSize); \ if (pNewSampleData == NULL) { \ @@ -5811,15 +7493,15 @@ static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigne pSampleData = pNewSampleData; \ } \ \ - drflac_copy_memory(pSampleData + totalSampleCount, buffer, (size_t)(samplesRead*sizeof(type))); \ - totalSampleCount += samplesRead; \ + drflac_copy_memory(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type))); \ + totalPCMFrameCount += pcmFramesRead; \ } \ \ /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to \ protect those ears from random noise! */ \ - drflac_zero_memory(pSampleData + totalSampleCount, (size_t)(sampleDataBufferSize - totalSampleCount*sizeof(type))); \ + drflac_zero_memory(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type))); \ } else { \ - drflac_uint64 dataSize = totalSampleCount * sizeof(type); \ + drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type); \ if (dataSize > DRFLAC_SIZE_MAX) { \ goto on_error; /* The decoded data is too big. */ \ } \ @@ -5829,12 +7511,12 @@ static type* drflac__full_decode_and_close_ ## extension (drflac* pFlac, unsigne goto on_error; \ } \ \ - totalSampleCount = drflac_read_##extension(pFlac, pFlac->totalSampleCount, pSampleData); \ + totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData); \ } \ \ if (sampleRateOut) *sampleRateOut = pFlac->sampleRate; \ if (channelsOut) *channelsOut = pFlac->channels; \ - if (totalSampleCountOut) *totalSampleCountOut = totalSampleCount; \ + if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount; \ \ drflac_close(pFlac); \ return pSampleData; \ @@ -5844,141 +7526,356 @@ on_error: return NULL; \ } -DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(s32, drflac_int32) -DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(s16, drflac_int16) -DRFLAC_DEFINE_FULL_DECODE_AND_CLOSE(f32, float) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float) -drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut) { // Safety. - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalPCMFrameCountOut) *totalPCMFrameCountOut = 0; drflac* pFlac = drflac_open(onRead, onSeek, pUserData); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); } -drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_and_decode_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) { // Safety. - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; - drflac* pFlac = drflac_open(onRead, onSeek, pUserData); - if (pFlac == NULL) { + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + drflac_int32* pResult = drflac_open_and_read_pcm_frames_s32(onRead, onSeek, pUserData, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { return NULL; } - return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount); + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; } -float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) + + +drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut) { // Safety. - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalPCMFrameCountOut) *totalPCMFrameCountOut = 0; drflac* pFlac = drflac_open(onRead, onSeek, pUserData); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); +} + +drflac_int16* drflac_open_and_decode_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) +{ + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + drflac_int16* pResult = drflac_open_and_read_pcm_frames_s16(onRead, onSeek, pUserData, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; +} + + +float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut) +{ + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalPCMFrameCountOut) *totalPCMFrameCountOut = 0; + + drflac* pFlac = drflac_open(onRead, onSeek, pUserData); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); +} + +float* drflac_open_and_decode_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) +{ + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + float* pResult = drflac_open_and_read_pcm_frames_f32(onRead, onSeek, pUserData, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; } #ifndef DR_FLAC_NO_STDIO -drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + if (totalPCMFrameCount) *totalPCMFrameCount = 0; drflac* pFlac = drflac_open_file(filename); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); } -drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_and_decode_file_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) { - if (sampleRate) *sampleRate = 0; - if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; - drflac* pFlac = drflac_open_file(filename); - if (pFlac == NULL) { + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + drflac_int32* pResult = drflac_open_file_and_read_pcm_frames_s32(filename, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { return NULL; } - return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount); + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; } -float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) + +drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + if (totalPCMFrameCount) *totalPCMFrameCount = 0; drflac* pFlac = drflac_open_file(filename); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); +} + +drflac_int16* drflac_open_and_decode_file_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) +{ + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + drflac_int16* pResult = drflac_open_file_and_read_pcm_frames_s16(filename, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; +} + + +float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount) +{ + if (sampleRate) *sampleRate = 0; + if (channels) *channels = 0; + if (totalPCMFrameCount) *totalPCMFrameCount = 0; + + drflac* pFlac = drflac_open_file(filename); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); +} + +float* drflac_open_and_decode_file_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) +{ + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + float* pResult = drflac_open_file_and_read_pcm_frames_f32(filename, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; } #endif -drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + if (totalPCMFrameCount) *totalPCMFrameCount = 0; drflac* pFlac = drflac_open_memory(data, dataSize); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); } -drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int32* drflac_open_and_decode_memory_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) +{ + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + drflac_int32* pResult = drflac_open_memory_and_read_pcm_frames_s32(data, dataSize, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; +} + + +drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + if (totalPCMFrameCount) *totalPCMFrameCount = 0; drflac* pFlac = drflac_open_memory(data, dataSize); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_s16(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); } -float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalSampleCount) +drflac_int16* drflac_open_and_decode_memory_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) +{ + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + drflac_int16* pResult = drflac_open_memory_and_read_pcm_frames_s16(data, dataSize, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; +} + + +float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; - if (totalSampleCount) *totalSampleCount = 0; + if (totalPCMFrameCount) *totalPCMFrameCount = 0; drflac* pFlac = drflac_open_memory(data, dataSize); if (pFlac == NULL) { return NULL; } - return drflac__full_decode_and_close_f32(pFlac, channels, sampleRate, totalSampleCount); + return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); } +float* drflac_open_and_decode_memory_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalSampleCountOut) +{ + // Safety. + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalSampleCountOut) *totalSampleCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + + float* pResult = drflac_open_memory_and_read_pcm_frames_f32(data, dataSize, &channels, &sampleRate, &totalPCMFrameCount); + if (pResult == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalSampleCountOut) *totalSampleCountOut = totalPCMFrameCount * channels; + + return pResult; +} + + void drflac_free(void* pSampleDataReturnedByOpenAndDecode) { DRFLAC_FREE(pSampleDataReturnedByOpenAndDecode); @@ -6061,6 +7958,17 @@ drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, // REVISION HISTORY // +// v0.11.0 - 2018-12-xx +// - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with +// drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take +// and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by +// dividing it by the channel count, and then do the same with the return value. +// - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as +// the changes to drflac_read_*() apply. +// - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as +// the changes to drflac_read_*() apply. +// - Optimizations. +// // v0.10.0 - 2018-09-11 // - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you // need to do it yourself via the callback API. diff --git a/extras/dr_mp3.h b/extras/dr_mp3.h index cd8920a1..e951cbdb 100644 --- a/extras/dr_mp3.h +++ b/extras/dr_mp3.h @@ -1,5 +1,5 @@ // MP3 audio decoder. Public domain. See "unlicense" statement at the end of this file. -// dr_mp3 - v0.3.2 - 2018-09-11 +// dr_mp3 - v0.4.0 - 2018-xx-xx // // David Reid - mackron@gmail.com // @@ -22,7 +22,7 @@ // // ... // -// drmp3_uint64 framesRead = drmp3_read_f32(pMP3, framesToRead, pFrames); +// drmp3_uint64 framesRead = drmp3_read_pcm_frames_f32(pMP3, framesToRead, pFrames); // // The drmp3 object is transparent so you can get access to the channel count and sample rate like so: // @@ -35,12 +35,12 @@ // The example above initializes a decoder from a file, but you can also initialize it from a block of memory and read and seek // callbacks with drmp3_init_memory() and drmp3_init() respectively. // -// You do need to do any annoying memory management when reading PCM frames - this is all managed internally. You can request -// any number of PCM frames in each call to drmp3_read_f32() and it will return as many PCM frames as it can, up to the requested -// amount. +// You do not need to do any annoying memory management when reading PCM frames - this is all managed internally. You can request +// any number of PCM frames in each call to drmp3_read_pcm_frames_f32() and it will return as many PCM frames as it can, up to the +// requested amount. // -// You can also decode an entire file in one go with drmp3_open_and_decode_f32(), drmp3_open_and_decode_memory_f32() and -// drmp3_open_and_decode_file_f32(). +// You can also decode an entire file in one go with drmp3_open_and_read_f32(), drmp3_open_memory_and_read_f32() and +// drmp3_open_file_and_read_f32(). // // // OPTIONS @@ -52,11 +52,6 @@ // // #define DR_MP3_NO_SIMD // Disable SIMD optimizations. -// -// -// LIMITATIONS -// =========== -// - Seeking is extremely inefficient. #ifndef dr_mp3_h #define dr_mp3_h @@ -92,7 +87,8 @@ typedef drmp3_uint32 drmp3_bool32; #define DRMP3_TRUE 1 #define DRMP3_FALSE 0 -#define DRMP3_MAX_SAMPLES_PER_FRAME (1152*2) +#define DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME 1152 +#define DRMP3_MAX_SAMPLES_PER_FRAME (DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME*2) // Low Level Push API @@ -162,7 +158,7 @@ struct drmp3_src { struct { - float alpha; + double alpha; drmp3_bool32 isPrevFramesLoaded : 1; drmp3_bool32 isNextFramesLoaded : 1; } linear; @@ -175,6 +171,14 @@ typedef enum drmp3_seek_origin_current } drmp3_seek_origin; +typedef struct +{ + drmp3_uint64 seekPosInBytes; // Points to the first byte of an MP3 frame. + drmp3_uint64 pcmFrameIndex; // The index of the PCM frame this seek point targets. + drmp3_uint16 mp3FramesToDiscard; // The number of whole MP3 frames to be discarded before pcmFramesToDiscard. + drmp3_uint16 pcmFramesToDiscard; // The number of leading samples to read and discard. These are discarded after mp3FramesToDiscard. +} drmp3_seek_point; + // Callback for when data is read. Return value is the number of bytes actually read. // // pUserData [in] The user data that was passed to drmp3_init(), drmp3_open() and family. @@ -214,12 +218,16 @@ typedef struct drmp3_read_proc onRead; drmp3_seek_proc onSeek; void* pUserData; - drmp3_uint32 frameChannels; // The number of channels in the currently loaded MP3 frame. Internal use only. - drmp3_uint32 frameSampleRate; // The sample rate of the currently loaded MP3 frame. Internal use only. - drmp3_uint32 framesConsumed; - drmp3_uint32 framesRemaining; - drmp3_uint8 frames[sizeof(float)*DRMP3_MAX_SAMPLES_PER_FRAME]; // <-- Multipled by sizeof(float) to ensure there's enough room for DR_MP3_FLOAT_OUTPUT. + drmp3_uint32 mp3FrameChannels; // The number of channels in the currently loaded MP3 frame. Internal use only. + drmp3_uint32 mp3FrameSampleRate; // The sample rate of the currently loaded MP3 frame. Internal use only. + drmp3_uint32 pcmFramesConsumedInMP3Frame; + drmp3_uint32 pcmFramesRemainingInMP3Frame; + drmp3_uint8 pcmFrames[sizeof(float)*DRMP3_MAX_SAMPLES_PER_FRAME]; // <-- Multipled by sizeof(float) to ensure there's enough room for DR_MP3_FLOAT_OUTPUT. + drmp3_uint64 currentPCMFrame; // The current PCM frame, globally, based on the output sample rate. Mainly used for seeking. + drmp3_uint64 streamCursor; // The current byte the decoder is sitting on in the raw stream. drmp3_src src; + drmp3_seek_point* pSeekPoints; // NULL by default. Set with drmp3_bind_seek_table(). Memory is owned by the client. dr_mp3 will never attempt to free this pointer. + drmp3_uint32 seekPointCount; // The number of items in pSeekPoints. When set to 0 assumes to no seek table. Defaults to zero. size_t dataSize; size_t dataCapacity; drmp3_uint8* pData; @@ -268,12 +276,38 @@ void drmp3_uninit(drmp3* pMP3); // Reads PCM frames as interleaved 32-bit IEEE floating point PCM. // // Note that framesToRead specifies the number of PCM frames to read, _not_ the number of MP3 frames. -drmp3_uint64 drmp3_read_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut); +drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut); // Seeks to a specific frame. // // Note that this is _not_ an MP3 frame, but rather a PCM frame. -drmp3_bool32 drmp3_seek_to_frame(drmp3* pMP3, drmp3_uint64 frameIndex); +drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex); + +// Calculates the total number of PCM frames in the MP3 stream. Cannot be used for infinite streams such as internet +// radio. Runs in linear time. Returns 0 on error. +drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3); + +// Calculates the total number of MP3 frames in the MP3 stream. Cannot be used for infinite streams such as internet +// radio. Runs in linear time. Returns 0 on error. +drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3); + +// Calculates the seekpoints based on PCM frames. This is slow. +// +// pSeekpoint count is a pointer to a uint32 containing the seekpoint count. On input it contains the desired count. +// On output it contains the actual count. The reason for this design is that the client may request too many +// seekpoints, in which case dr_mp3 will return a corrected count. +// +// Note that seektable seeking is not quite sample exact when the MP3 stream contains inconsistent sample rates. +drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints); + +// Binds a seek table to the decoder. +// +// This does _not_ make a copy of pSeekPoints - it only references it. It is up to the application to ensure this +// remains valid while it is bound to the decoder. +// +// Use drmp3_calculate_seek_points() to calculate the seek points. +drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints); + // Opens an decodes an entire MP3 stream as a single operation. @@ -281,10 +315,10 @@ drmp3_bool32 drmp3_seek_to_frame(drmp3* pMP3, drmp3_uint64 frameIndex); // pConfig is both an input and output. On input it contains what you want. On output it contains what you got. // // Free the returned pointer with drmp3_free(). -float* drmp3_open_and_decode_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount); -float* drmp3_open_and_decode_memory_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount); +float* drmp3_open_and_read_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount); +float* drmp3_open_memory_and_read_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount); #ifndef DR_MP3_NO_STDIO -float* drmp3_open_and_decode_file_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount); +float* drmp3_open_file_and_read_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount); #endif // Frees any memory that was allocated by a public drmp3 API. @@ -1958,11 +1992,6 @@ int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes info->layer = 4 - DRMP3_HDR_GET_LAYER(hdr); info->bitrate_kbps = drmp3_hdr_bitrate_kbps(hdr); - if (!pcm) - { - return drmp3_hdr_frame_samples(hdr); - } - drmp3_bs_init(bs_frame, hdr + DRMP3_HDR_SIZE, frame_size - DRMP3_HDR_SIZE); if (DRMP3_HDR_IS_CRC(hdr)) { @@ -1978,7 +2007,7 @@ int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes return 0; } success = drmp3_L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin); - if (success) + if (success && pcm != NULL) { for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels)) { @@ -1993,6 +2022,10 @@ int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes #ifdef DR_MP3_ONLY_MP3 return 0; #else + if (pcm == NULL) { + return drmp3_hdr_frame_samples(hdr); + } + drmp3_L12_scale_info sci[1]; drmp3_L12_read_scale_info(hdr, bs_frame, sci); @@ -2015,6 +2048,7 @@ int drmp3dec_decode_frame(drmp3dec *dec, const unsigned char *mp3, int mp3_bytes } #endif } + return success*drmp3_hdr_frame_samples(dec->header); } @@ -2097,10 +2131,13 @@ void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, int num_samples) // Options. #ifndef DR_MP3_DEFAULT_CHANNELS -#define DR_MP3_DEFAULT_CHANNELS 2 +#define DR_MP3_DEFAULT_CHANNELS 2 #endif #ifndef DR_MP3_DEFAULT_SAMPLE_RATE -#define DR_MP3_DEFAULT_SAMPLE_RATE 44100 +#define DR_MP3_DEFAULT_SAMPLE_RATE 44100 +#endif +#ifndef DRMP3_SEEK_LEADING_MP3_FRAMES +#define DRMP3_SEEK_LEADING_MP3_FRAMES 2 #endif @@ -2323,7 +2360,7 @@ drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCou pSRC->algo.linear.isNextFramesLoaded = DRMP3_TRUE; } - float factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut; + double factor = (double)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut; drmp3_uint64 totalFramesRead = 0; while (frameCount > 0) { @@ -2331,7 +2368,7 @@ drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCou float* pPrevFrame = pSRC->bin; float* pNextFrame = pSRC->bin + pSRC->config.channels; - drmp3_blend_f32((float*)pFramesOut, pPrevFrame, pNextFrame, pSRC->algo.linear.alpha, pSRC->config.channels); + drmp3_blend_f32((float*)pFramesOut, pPrevFrame, pNextFrame, (float)pSRC->algo.linear.alpha, pSRC->config.channels); pSRC->algo.linear.alpha += factor; @@ -2376,35 +2413,91 @@ drmp3_uint64 drmp3_src_read_frames_linear(drmp3_src* pSRC, drmp3_uint64 frameCou } +static size_t drmp3__on_read(drmp3* pMP3, void* pBufferOut, size_t bytesToRead) +{ + size_t bytesRead = pMP3->onRead(pMP3->pUserData, pBufferOut, bytesToRead); + pMP3->streamCursor += bytesRead; + return bytesRead; +} -static drmp3_bool32 drmp3_decode_next_frame(drmp3* pMP3) +static drmp3_bool32 drmp3__on_seek(drmp3* pMP3, int offset, drmp3_seek_origin origin) +{ + drmp3_assert(offset >= 0); + + if (!pMP3->onSeek(pMP3->pUserData, offset, origin)) { + return DRMP3_FALSE; + } + + if (origin == drmp3_seek_origin_start) { + pMP3->streamCursor = (drmp3_uint64)offset; + } else { + pMP3->streamCursor += offset; + } + + return DRMP3_TRUE; +} + +static drmp3_bool32 drmp3__on_seek_64(drmp3* pMP3, drmp3_uint64 offset, drmp3_seek_origin origin) +{ + if (offset <= 0x7FFFFFFF) { + return drmp3__on_seek(pMP3, (int)offset, origin); + } + + + // Getting here "offset" is too large for a 32-bit integer. We just keep seeking forward until we hit the offset. + if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_start)) { + return DRMP3_FALSE; + } + + offset -= 0x7FFFFFFF; + while (offset > 0) { + if (offset <= 0x7FFFFFFF) { + if (!drmp3__on_seek(pMP3, (int)offset, drmp3_seek_origin_current)) { + return DRMP3_FALSE; + } + offset = 0; + } else { + if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_current)) { + return DRMP3_FALSE; + } + offset -= 0x7FFFFFFF; + } + } + + return DRMP3_TRUE; +} + + + + +static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3_bool32 discard) { drmp3_assert(pMP3 != NULL); drmp3_assert(pMP3->onRead != NULL); if (pMP3->atEnd) { - return DRMP3_FALSE; + return 0; } - do - { + drmp3_uint32 pcmFramesRead = 0; + do { // minimp3 recommends doing data submission in 16K chunks. If we don't have at least 16K bytes available, get more. if (pMP3->dataSize < DRMP3_DATA_CHUNK_SIZE) { if (pMP3->dataCapacity < DRMP3_DATA_CHUNK_SIZE) { pMP3->dataCapacity = DRMP3_DATA_CHUNK_SIZE; drmp3_uint8* pNewData = (drmp3_uint8*)drmp3_realloc(pMP3->pData, pMP3->dataCapacity); if (pNewData == NULL) { - return DRMP3_FALSE; // Out of memory. + return 0; // Out of memory. } pMP3->pData = pNewData; } - size_t bytesRead = pMP3->onRead(pMP3->pUserData, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize)); + size_t bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize)); if (bytesRead == 0) { if (pMP3->dataSize == 0) { pMP3->atEnd = DRMP3_TRUE; - return DRMP3_FALSE; // No data. + return 0; // No data. } } @@ -2413,51 +2506,81 @@ static drmp3_bool32 drmp3_decode_next_frame(drmp3* pMP3) if (pMP3->dataSize > INT_MAX) { pMP3->atEnd = DRMP3_TRUE; - return DRMP3_FALSE; // File too big. + return 0; // File too big. } drmp3dec_frame_info info; - drmp3_uint32 samplesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData, (int)pMP3->dataSize, (drmp3d_sample_t*)pMP3->frames, &info); // <-- Safe size_t -> int conversion thanks to the check above. - if (samplesRead != 0) { - size_t leftoverDataSize = (pMP3->dataSize - (size_t)info.frame_bytes); - for (size_t i = 0; i < leftoverDataSize; ++i) { - pMP3->pData[i] = pMP3->pData[i + (size_t)info.frame_bytes]; - } - + pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData, (int)pMP3->dataSize, pPCMFrames, &info); // <-- Safe size_t -> int conversion thanks to the check above. + + // Consume the data. + size_t leftoverDataSize = (pMP3->dataSize - (size_t)info.frame_bytes); + if (info.frame_bytes > 0) { + memmove(pMP3->pData, pMP3->pData + info.frame_bytes, leftoverDataSize); pMP3->dataSize = leftoverDataSize; - pMP3->framesConsumed = 0; - pMP3->framesRemaining = samplesRead; - pMP3->frameChannels = info.channels; - pMP3->frameSampleRate = info.hz; - drmp3_src_set_input_sample_rate(&pMP3->src, pMP3->frameSampleRate); + } + + // pcmFramesRead will be equal to 0 if decoding failed. If it is zero and info.frame_bytes > 0 then we have successfully + // decoded the frame. A special case is if we are wanting to discard the frame, in which case we return successfully. + if (pcmFramesRead > 0 || (info.frame_bytes > 0 && discard)) { + pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header); + pMP3->pcmFramesConsumedInMP3Frame = 0; + pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead; + pMP3->mp3FrameChannels = info.channels; + pMP3->mp3FrameSampleRate = info.hz; + drmp3_src_set_input_sample_rate(&pMP3->src, pMP3->mp3FrameSampleRate); break; - } else { + } else if (info.frame_bytes == 0) { // Need more data. minimp3 recommends doing data submission in 16K chunks. if (pMP3->dataCapacity == pMP3->dataSize) { // No room. Expand. pMP3->dataCapacity += DRMP3_DATA_CHUNK_SIZE; drmp3_uint8* pNewData = (drmp3_uint8*)drmp3_realloc(pMP3->pData, pMP3->dataCapacity); if (pNewData == NULL) { - return DRMP3_FALSE; // Out of memory. + return 0; // Out of memory. } pMP3->pData = pNewData; } // Fill in a chunk. - size_t bytesRead = pMP3->onRead(pMP3->pUserData, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize)); + size_t bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize)); if (bytesRead == 0) { pMP3->atEnd = DRMP3_TRUE; - return DRMP3_FALSE; // Error reading more data. + return 0; // Error reading more data. } pMP3->dataSize += bytesRead; } } while (DRMP3_TRUE); - return DRMP3_TRUE; + return pcmFramesRead; } +static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3) +{ + drmp3_assert(pMP3 != NULL); + return drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames, DRMP3_FALSE); +} + +#if 0 +static drmp3_uint32 drmp3_seek_next_frame(drmp3* pMP3) +{ + drmp3_assert(pMP3 != NULL); + + drmp3_uint32 pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, NULL); + if (pcmFrameCount == 0) { + return 0; + } + + // We have essentially just skipped past the frame, so just set the remaining samples to 0. + pMP3->currentPCMFrame += pcmFrameCount; + pMP3->pcmFramesConsumedInMP3Frame = pcmFrameCount; + pMP3->pcmFramesRemainingInMP3Frame = 0; + + return pcmFrameCount; +} +#endif + static drmp3_uint64 drmp3_read_src(drmp3_src* pSRC, drmp3_uint64 frameCount, void* pFramesOut, void* pUserData) { drmp3* pMP3 = (drmp3*)pUserData; @@ -2465,64 +2588,64 @@ static drmp3_uint64 drmp3_read_src(drmp3_src* pSRC, drmp3_uint64 frameCount, voi drmp3_assert(pMP3->onRead != NULL); float* pFramesOutF = (float*)pFramesOut; - drmp3_uint32 totalFramesRead = 0; + drmp3_uint64 totalFramesRead = 0; while (frameCount > 0) { // Read from the in-memory buffer first. - while (pMP3->framesRemaining > 0 && frameCount > 0) { - drmp3d_sample_t* frames = (drmp3d_sample_t*)pMP3->frames; + while (pMP3->pcmFramesRemainingInMP3Frame > 0 && frameCount > 0) { + drmp3d_sample_t* frames = (drmp3d_sample_t*)pMP3->pcmFrames; #ifndef DR_MP3_FLOAT_OUTPUT - if (pMP3->frameChannels == 1) { + if (pMP3->mp3FrameChannels == 1) { if (pMP3->channels == 1) { // Mono -> Mono. - pFramesOutF[0] = frames[pMP3->framesConsumed] / 32768.0f; + pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f; } else { // Mono -> Stereo. - pFramesOutF[0] = frames[pMP3->framesConsumed] / 32768.0f; - pFramesOutF[1] = frames[pMP3->framesConsumed] / 32768.0f; + pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f; + pFramesOutF[1] = frames[pMP3->pcmFramesConsumedInMP3Frame] / 32768.0f; } } else { if (pMP3->channels == 1) { // Stereo -> Mono float sample = 0; - sample += frames[(pMP3->framesConsumed*pMP3->frameChannels)+0] / 32768.0f; - sample += frames[(pMP3->framesConsumed*pMP3->frameChannels)+1] / 32768.0f; + sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0] / 32768.0f; + sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1] / 32768.0f; pFramesOutF[0] = sample * 0.5f; } else { // Stereo -> Stereo - pFramesOutF[0] = frames[(pMP3->framesConsumed*pMP3->frameChannels)+0] / 32768.0f; - pFramesOutF[1] = frames[(pMP3->framesConsumed*pMP3->frameChannels)+1] / 32768.0f; + pFramesOutF[0] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0] / 32768.0f; + pFramesOutF[1] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1] / 32768.0f; } } #else - if (pMP3->frameChannels == 1) { + if (pMP3->mp3FrameChannels == 1) { if (pMP3->channels == 1) { // Mono -> Mono. - pFramesOutF[0] = frames[pMP3->framesConsumed]; + pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame]; } else { // Mono -> Stereo. - pFramesOutF[0] = frames[pMP3->framesConsumed]; - pFramesOutF[1] = frames[pMP3->framesConsumed]; + pFramesOutF[0] = frames[pMP3->pcmFramesConsumedInMP3Frame]; + pFramesOutF[1] = frames[pMP3->pcmFramesConsumedInMP3Frame]; } } else { if (pMP3->channels == 1) { // Stereo -> Mono float sample = 0; - sample += frames[(pMP3->framesConsumed*pMP3->frameChannels)+0]; - sample += frames[(pMP3->framesConsumed*pMP3->frameChannels)+1]; + sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0]; + sample += frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1]; pFramesOutF[0] = sample * 0.5f; } else { // Stereo -> Stereo - pFramesOutF[0] = frames[(pMP3->framesConsumed*pMP3->frameChannels)+0]; - pFramesOutF[1] = frames[(pMP3->framesConsumed*pMP3->frameChannels)+1]; + pFramesOutF[0] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+0]; + pFramesOutF[1] = frames[(pMP3->pcmFramesConsumedInMP3Frame*pMP3->mp3FrameChannels)+1]; } } #endif - pMP3->framesConsumed += 1; - pMP3->framesRemaining -= 1; - frameCount -= 1; + pMP3->pcmFramesConsumedInMP3Frame += 1; + pMP3->pcmFramesRemainingInMP3Frame -= 1; totalFramesRead += 1; + frameCount -= 1; pFramesOutF += pSRC->config.channels; } @@ -2530,11 +2653,11 @@ static drmp3_uint64 drmp3_read_src(drmp3_src* pSRC, drmp3_uint64 frameCount, voi break; } - drmp3_assert(pMP3->framesRemaining == 0); + drmp3_assert(pMP3->pcmFramesRemainingInMP3Frame == 0); // At this point we have exhausted our in-memory buffer so we need to re-fill. Note that the sample rate may have changed // at this point which means we'll also need to update our sample rate conversion pipeline. - if (!drmp3_decode_next_frame(pMP3)) { + if (drmp3_decode_next_frame(pMP3) == 0) { break; } } @@ -2710,7 +2833,9 @@ drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* filePath, const drmp3_conf void drmp3_uninit(drmp3* pMP3) { - if (pMP3 == NULL) return; + if (pMP3 == NULL) { + return; + } #ifndef DR_MP3_NO_STDIO if (pMP3->onRead == drmp3__on_read_stdio) { @@ -2721,9 +2846,11 @@ void drmp3_uninit(drmp3* pMP3) drmp3_free(pMP3->pData); } -drmp3_uint64 drmp3_read_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut) +drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBufferOut) { - if (pMP3 == NULL || pMP3->onRead == NULL) return 0; + if (pMP3 == NULL || pMP3->onRead == NULL) { + return 0; + } drmp3_uint64 totalFramesRead = 0; @@ -2735,7 +2862,7 @@ drmp3_uint64 drmp3_read_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBuff framesToReadRightNow = framesToRead; } - drmp3_uint64 framesJustRead = drmp3_read_f32(pMP3, framesToReadRightNow, temp); + drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp); if (framesJustRead == 0) { break; } @@ -2745,40 +2872,471 @@ drmp3_uint64 drmp3_read_f32(drmp3* pMP3, drmp3_uint64 framesToRead, float* pBuff } } else { totalFramesRead = drmp3_src_read_frames_ex(&pMP3->src, framesToRead, pBufferOut, DRMP3_TRUE); + pMP3->currentPCMFrame += totalFramesRead; } return totalFramesRead; } -drmp3_bool32 drmp3_seek_to_frame(drmp3* pMP3, drmp3_uint64 frameIndex) +void drmp3_reset(drmp3* pMP3) { - if (pMP3 == NULL || pMP3->onSeek == NULL) return DRMP3_FALSE; + drmp3_assert(pMP3 != NULL); + + pMP3->pcmFramesConsumedInMP3Frame = 0; + pMP3->pcmFramesRemainingInMP3Frame = 0; + pMP3->currentPCMFrame = 0; + pMP3->dataSize = 0; + pMP3->atEnd = DRMP3_FALSE; + pMP3->src.bin[0] = 0; + pMP3->src.bin[1] = 0; + pMP3->src.bin[2] = 0; + pMP3->src.bin[3] = 0; + pMP3->src.cache.cachedFrameCount = 0; + pMP3->src.cache.iNextFrame = 0; + pMP3->src.algo.linear.alpha = 0; + pMP3->src.algo.linear.isNextFramesLoaded = 0; + pMP3->src.algo.linear.isPrevFramesLoaded = 0; + //drmp3_zero_object(&pMP3->decoder); + drmp3dec_init(&pMP3->decoder); +} + +drmp3_bool32 drmp3_seek_to_start_of_stream(drmp3* pMP3) +{ + drmp3_assert(pMP3 != NULL); + drmp3_assert(pMP3->onSeek != NULL); // Seek to the start of the stream to begin with. - if (!pMP3->onSeek(pMP3->pUserData, 0, drmp3_seek_origin_start)) { + if (!drmp3__on_seek(pMP3, 0, drmp3_seek_origin_start)) { return DRMP3_FALSE; } // Clear any cached data. - pMP3->framesConsumed = 0; - pMP3->framesRemaining = 0; - pMP3->dataSize = 0; - pMP3->atEnd = DRMP3_FALSE; + drmp3_reset(pMP3); + return DRMP3_TRUE; +} - // TODO: Optimize. - // - // This is inefficient. We simply read frames from the start of the stream. - drmp3_uint64 framesRead = drmp3_read_f32(pMP3, frameIndex, NULL); - if (framesRead != frameIndex) { +float drmp3_get_cached_pcm_frame_count_from_src(drmp3* pMP3) +{ + return (pMP3->src.cache.cachedFrameCount - pMP3->src.cache.iNextFrame) + (float)pMP3->src.algo.linear.alpha; +} + +float drmp3_get_pcm_frames_remaining_in_mp3_frame(drmp3* pMP3) +{ + float factor = (float)pMP3->src.config.sampleRateOut / (float)pMP3->src.config.sampleRateIn; + float frameCountPreSRC = drmp3_get_cached_pcm_frame_count_from_src(pMP3) + pMP3->pcmFramesRemainingInMP3Frame; + return frameCountPreSRC * factor; +} + +// NOTE ON SEEKING +// =============== +// The seeking code below is a complete mess and is broken for cases when the sample rate changes. The problem +// is with the resampling and the crappy resampler used by dr_mp3. What needs to happen is the following: +// +// 1) The resampler needs to be replaced. +// 2) The resampler has state which needs to be updated whenever an MP3 frame is decoded outside of +// drmp3_read_pcm_frames_f32(). The resampler needs an API to "flush" some imaginary input so that it's +// state is updated accordingly. + +drmp3_bool32 drmp3_seek_forward_by_pcm_frames__brute_force(drmp3* pMP3, drmp3_uint64 frameOffset) +{ +#if 0 + // MP3 is a bit annoying when it comes to seeking because of the bit reservoir. It basically means that an MP3 frame can possibly + // depend on some of the data of prior frames. This means it's not as simple as seeking to the first byte of the MP3 frame that + // contains the sample because that MP3 frame will need the data from the previous MP3 frame (which we just seeked past!). To + // resolve this we seek past a number of MP3 frames up to a point, and then read-and-discard the remainder. + drmp3_uint64 maxFramesToReadAndDiscard = (drmp3_uint64)(DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME * 3 * ((float)pMP3->src.config.sampleRateOut / (float)pMP3->src.config.sampleRateIn)); + + // Now get rid of leading whole frames. + while (frameOffset > maxFramesToReadAndDiscard) { + float pcmFramesRemainingInCurrentMP3FrameF = drmp3_get_pcm_frames_remaining_in_mp3_frame(pMP3); + drmp3_uint32 pcmFramesRemainingInCurrentMP3Frame = (drmp3_uint32)pcmFramesRemainingInCurrentMP3FrameF; + if (frameOffset > pcmFramesRemainingInCurrentMP3Frame) { + frameOffset -= pcmFramesRemainingInCurrentMP3Frame; + pMP3->currentPCMFrame += pcmFramesRemainingInCurrentMP3Frame; + pMP3->pcmFramesConsumedInMP3Frame += pMP3->pcmFramesRemainingInMP3Frame; + pMP3->pcmFramesRemainingInMP3Frame = 0; + } else { + break; + } + + drmp3_uint32 pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, pMP3->pcmFrames, DRMP3_FALSE); + if (pcmFrameCount == 0) { + break; + } + } + + // The last step is to read-and-discard any remaining PCM frames to make it sample-exact. + drmp3_uint64 framesRead = drmp3_read_pcm_frames_f32(pMP3, frameOffset, NULL); + if (framesRead != frameOffset) { return DRMP3_FALSE; } +#else + // Just using a dumb read-and-discard for now pending updates to the resampler. + drmp3_uint64 framesRead = drmp3_read_pcm_frames_f32(pMP3, frameOffset, NULL); + if (framesRead != frameOffset) { + return DRMP3_FALSE; + } +#endif + + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_seek_to_pcm_frame__brute_force(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + drmp3_assert(pMP3 != NULL); + + if (frameIndex == pMP3->currentPCMFrame) { + return DRMP3_TRUE; + } + + // If we're moving foward we just read from where we're at. Otherwise we need to move back to the start of + // the stream and read from the beginning. + //drmp3_uint64 framesToReadAndDiscard; + if (frameIndex < pMP3->currentPCMFrame) { + // Moving backward. Move to the start of the stream and then move forward. + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + } + + drmp3_assert(frameIndex >= pMP3->currentPCMFrame); + return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, (frameIndex - pMP3->currentPCMFrame)); +} + +drmp3_bool32 drmp3_find_closest_seek_point(drmp3* pMP3, drmp3_uint64 frameIndex, drmp3_uint32* pSeekPointIndex) +{ + drmp3_assert(pSeekPointIndex != NULL); + + if (frameIndex < pMP3->pSeekPoints[0].pcmFrameIndex) { + return DRMP3_FALSE; + } + + // Linear search for simplicity to begin with while I'm getting this thing working. Once it's all working change this to a binary search. + for (drmp3_uint32 iSeekPoint = 0; iSeekPoint < pMP3->seekPointCount; ++iSeekPoint) { + if (pMP3->pSeekPoints[iSeekPoint].pcmFrameIndex > frameIndex) { + break; // Found it. + } + + *pSeekPointIndex = iSeekPoint; + } + + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_seek_to_pcm_frame__seek_table(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + drmp3_assert(pMP3 != NULL); + drmp3_assert(pMP3->pSeekPoints != NULL); + drmp3_assert(pMP3->seekPointCount > 0); + + drmp3_seek_point seekPoint; + + // If there is no prior seekpoint it means the target PCM frame comes before the first seek point. Just assume a seekpoint at the start of the file in this case. + drmp3_uint32 priorSeekPointIndex; + if (drmp3_find_closest_seek_point(pMP3, frameIndex, &priorSeekPointIndex)) { + seekPoint = pMP3->pSeekPoints[priorSeekPointIndex]; + } else { + seekPoint.seekPosInBytes = 0; + seekPoint.pcmFrameIndex = 0; + seekPoint.mp3FramesToDiscard = 0; + seekPoint.pcmFramesToDiscard = 0; + } + + // First thing to do is seek to the first byte of the relevant MP3 frame. + if (!drmp3__on_seek_64(pMP3, seekPoint.seekPosInBytes, drmp3_seek_origin_start)) { + return DRMP3_FALSE; // Failed to seek. + } + + // Clear any cached data. + drmp3_reset(pMP3); + + // Whole MP3 frames need to be discarded first. + for (drmp3_uint16 iMP3Frame = 0; iMP3Frame < seekPoint.mp3FramesToDiscard; ++iMP3Frame) { + // Pass in non-null for the last frame because we want to ensure the sample rate converter is preloaded correctly. + drmp3d_sample_t* pPCMFrames = NULL; + if (iMP3Frame == seekPoint.mp3FramesToDiscard-1) { + pPCMFrames = (drmp3d_sample_t*)pMP3->pcmFrames; + } + + // We first need to decode the next frame, and then we need to flush the resampler. + drmp3_uint32 pcmFramesReadPreSRC = drmp3_decode_next_frame_ex(pMP3, pPCMFrames, DRMP3_TRUE); + if (pcmFramesReadPreSRC == 0) { + return DRMP3_FALSE; + } + } + + // We seeked to an MP3 frame in the raw stream so we need to make sure the current PCM frame is set correctly. + pMP3->currentPCMFrame = seekPoint.pcmFrameIndex - seekPoint.pcmFramesToDiscard; + + // Update resampler. This is wrong. Need to instead update it on a per MP3 frame basis. Also broken for cases when + // the sample rate is being reduced in my testing. Should work fine when the input and output sample rate is the same + // or a clean multiple. + pMP3->src.algo.linear.alpha = pMP3->currentPCMFrame * ((double)pMP3->src.config.sampleRateIn / pMP3->src.config.sampleRateOut); + pMP3->src.algo.linear.alpha = pMP3->src.algo.linear.alpha - (drmp3_uint32)(pMP3->src.algo.linear.alpha); + if (pMP3->src.algo.linear.alpha > 0) { + pMP3->src.algo.linear.isPrevFramesLoaded = 1; + } + + // Now at this point we can follow the same process as the brute force technique where we just skip over unnecessary MP3 frames and then + // read-and-discard at least 2 whole MP3 frames. + drmp3_uint64 leftoverFrames = frameIndex - pMP3->currentPCMFrame; + return drmp3_seek_forward_by_pcm_frames__brute_force(pMP3, leftoverFrames); +} + +drmp3_bool32 drmp3_seek_to_pcm_frame(drmp3* pMP3, drmp3_uint64 frameIndex) +{ + if (pMP3 == NULL || pMP3->onSeek == NULL) { + return DRMP3_FALSE; + } + + if (frameIndex == 0) { + return drmp3_seek_to_start_of_stream(pMP3); + } + + // Use the seek table if we have one. + if (pMP3->pSeekPoints != NULL && pMP3->seekPointCount > 0) { + return drmp3_seek_to_pcm_frame__seek_table(pMP3, frameIndex); + } else { + return drmp3_seek_to_pcm_frame__brute_force(pMP3, frameIndex); + } +} + +drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint64* pMP3FrameCount, drmp3_uint64* pPCMFrameCount) +{ + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + // The way this works is we move back to the start of the stream, iterate over each MP3 frame and calculate the frame count based + // on our output sample rate, the seek back to the PCM frame we were sitting on before calling this function. + + // The stream must support seeking for this to work. + if (pMP3->onSeek == NULL) { + return DRMP3_FALSE; + } + + // We'll need to seek back to where we were, so grab the PCM frame we're currently sitting on so we can restore later. + drmp3_uint64 currentPCMFrame = pMP3->currentPCMFrame; + + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + drmp3_uint64 totalPCMFrameCount = 0; + drmp3_uint64 totalMP3FrameCount = 0; + + float totalPCMFrameCountFractionalPart = 0; // <-- With resampling there will be a fractional part to each MP3 frame that we need to accumulate. + for (;;) { + drmp3_uint32 pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_FALSE); + if (pcmFramesInCurrentMP3FrameIn == 0) { + break; + } + + float srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate; + drmp3_assert(srcRatio > 0); + + float pcmFramesInCurrentMP3FrameOutF = totalPCMFrameCountFractionalPart + (pcmFramesInCurrentMP3FrameIn / srcRatio); + drmp3_uint32 pcmFramesInCurrentMP3FrameOut = (drmp3_uint32)pcmFramesInCurrentMP3FrameOutF; + totalPCMFrameCountFractionalPart = pcmFramesInCurrentMP3FrameOutF - pcmFramesInCurrentMP3FrameOut; + totalPCMFrameCount += pcmFramesInCurrentMP3FrameOut; + totalMP3FrameCount += 1; + } + + // Finally, we need to seek back to where we were. + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) { + return DRMP3_FALSE; + } + + if (pMP3FrameCount != NULL) { + *pMP3FrameCount = totalMP3FrameCount; + } + if (pPCMFrameCount != NULL) { + *pPCMFrameCount = totalPCMFrameCount; + } + + return DRMP3_TRUE; +} + +drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3) +{ + drmp3_uint64 totalPCMFrameCount; + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, NULL, &totalPCMFrameCount)) { + return 0; + } + + return totalPCMFrameCount; +} + +drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3) +{ + drmp3_uint64 totalMP3FrameCount; + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, NULL)) { + return 0; + } + + return totalMP3FrameCount; +} + +void drmp3__accumulate_running_pcm_frame_count(drmp3* pMP3, drmp3_uint32 pcmFrameCountIn, drmp3_uint64* pRunningPCMFrameCount, float* pRunningPCMFrameCountFractionalPart) +{ + float srcRatio = (float)pMP3->mp3FrameSampleRate / (float)pMP3->sampleRate; + drmp3_assert(srcRatio > 0); + + float pcmFrameCountOutF = *pRunningPCMFrameCountFractionalPart + (pcmFrameCountIn / srcRatio); + drmp3_uint32 pcmFrameCountOut = (drmp3_uint32)pcmFrameCountOutF; + *pRunningPCMFrameCountFractionalPart = pcmFrameCountOutF - pcmFrameCountOut; + *pRunningPCMFrameCount += pcmFrameCountOut; +} + +typedef struct +{ + drmp3_uint64 bytePos; + drmp3_uint64 pcmFrameIndex; // <-- After sample rate conversion. +} drmp3__seeking_mp3_frame_info; + +drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pSeekPointCount, drmp3_seek_point* pSeekPoints) +{ + if (pMP3 == NULL || pSeekPointCount == NULL || pSeekPoints == NULL) { + return DRMP3_FALSE; // Invalid args. + } + + drmp3_uint32 seekPointCount = *pSeekPointCount; + if (seekPointCount == 0) { + return DRMP3_FALSE; // The client has requested no seek points. Consider this to be invalid arguments since the client has probably not intended this. + } + + // We'll need to seek back to the current sample after calculating the seekpoints so we need to go ahead and grab the current location at the top. + drmp3_uint64 currentPCMFrame = pMP3->currentPCMFrame; + + // We never do more than the total number of MP3 frames and we limit it to 32-bits. + drmp3_uint64 totalMP3FrameCount; + drmp3_uint64 totalPCMFrameCount; + if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, &totalPCMFrameCount)) { + return DRMP3_FALSE; + } + + // If there's less than DRMP3_SEEK_LEADING_MP3_FRAMES+1 frames we just report 1 seek point which will be the very start of the stream. + if (totalMP3FrameCount < DRMP3_SEEK_LEADING_MP3_FRAMES+1) { + seekPointCount = 1; + pSeekPoints[0].seekPosInBytes = 0; + pSeekPoints[0].pcmFrameIndex = 0; + pSeekPoints[0].mp3FramesToDiscard = 0; + pSeekPoints[0].pcmFramesToDiscard = 0; + } else { + if (seekPointCount > totalMP3FrameCount-1) { + seekPointCount = (drmp3_uint32)totalMP3FrameCount-1; + } + + drmp3_uint64 pcmFramesBetweenSeekPoints = totalPCMFrameCount / (seekPointCount+1); + + // Here is where we actually calculate the seek points. We need to start by moving the start of the stream. We then enumerate over each + // MP3 frame. + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + + // We need to cache the byte positions of the previous MP3 frames. As a new MP3 frame is iterated, we cycle the byte positions in this + // array. The value in the first item in this array is the byte position that will be reported in the next seek point. + drmp3__seeking_mp3_frame_info mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES+1]; + + drmp3_uint64 runningPCMFrameCount = 0; + float runningPCMFrameCountFractionalPart = 0; + + // We need to initialize the array of MP3 byte positions for the leading MP3 frames. + for (int iMP3Frame = 0; iMP3Frame < DRMP3_SEEK_LEADING_MP3_FRAMES+1; ++iMP3Frame) { + // The byte position of the next frame will be the stream's cursor position, minus whatever is sitting in the buffer. + drmp3_assert(pMP3->streamCursor >= pMP3->dataSize); + mp3FrameInfo[iMP3Frame].bytePos = pMP3->streamCursor - pMP3->dataSize; + mp3FrameInfo[iMP3Frame].pcmFrameIndex = runningPCMFrameCount; + + // We need to get information about this frame so we can know how many samples it contained. + drmp3_uint32 pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_FALSE); + if (pcmFramesInCurrentMP3FrameIn == 0) { + return DRMP3_FALSE; // This should never happen. + } + + drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart); + } + + // At this point we will have extracted the byte positions of the leading MP3 frames. We can now start iterating over each seek point and + // calculate them. + drmp3_uint64 nextTargetPCMFrame = 0; + for (drmp3_uint32 iSeekPoint = 0; iSeekPoint < seekPointCount; ++iSeekPoint) { + nextTargetPCMFrame += pcmFramesBetweenSeekPoints; + + for (;;) { + if (nextTargetPCMFrame < runningPCMFrameCount) { + // The next seek point is in the current MP3 frame. + pSeekPoints[iSeekPoint].seekPosInBytes = mp3FrameInfo[0].bytePos; + pSeekPoints[iSeekPoint].pcmFrameIndex = nextTargetPCMFrame; + pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES; + pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex); + break; + } else { + // The next seek point is not in the current MP3 frame, so continue on to the next one. The first thing to do is cycle the cached + // MP3 frame info. + for (int i = 0; i < drmp3_countof(mp3FrameInfo)-1; ++i) { + mp3FrameInfo[i] = mp3FrameInfo[i+1]; + } + + // Cache previous MP3 frame info. + mp3FrameInfo[drmp3_countof(mp3FrameInfo)-1].bytePos = pMP3->streamCursor - pMP3->dataSize; + mp3FrameInfo[drmp3_countof(mp3FrameInfo)-1].pcmFrameIndex = runningPCMFrameCount; + + // Go to the next MP3 frame. This shouldn't ever fail, but just in case it does we just set the seek point and break. If it happens, it + // should only ever do it for the last seek point. + drmp3_uint32 pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, DRMP3_TRUE); + if (pcmFramesInCurrentMP3FrameIn == 0) { + pSeekPoints[iSeekPoint].seekPosInBytes = mp3FrameInfo[0].bytePos; + pSeekPoints[iSeekPoint].pcmFrameIndex = nextTargetPCMFrame; + pSeekPoints[iSeekPoint].mp3FramesToDiscard = DRMP3_SEEK_LEADING_MP3_FRAMES; + pSeekPoints[iSeekPoint].pcmFramesToDiscard = (drmp3_uint16)(nextTargetPCMFrame - mp3FrameInfo[DRMP3_SEEK_LEADING_MP3_FRAMES-1].pcmFrameIndex); + break; + } + + drmp3__accumulate_running_pcm_frame_count(pMP3, pcmFramesInCurrentMP3FrameIn, &runningPCMFrameCount, &runningPCMFrameCountFractionalPart); + } + } + } + + // Finally, we need to seek back to where we were. + if (!drmp3_seek_to_start_of_stream(pMP3)) { + return DRMP3_FALSE; + } + if (!drmp3_seek_to_pcm_frame(pMP3, currentPCMFrame)) { + return DRMP3_FALSE; + } + } + + *pSeekPointCount = seekPointCount; + return DRMP3_TRUE; +} + +drmp3_bool32 drmp3_bind_seek_table(drmp3* pMP3, drmp3_uint32 seekPointCount, drmp3_seek_point* pSeekPoints) +{ + if (pMP3 == NULL) { + return DRMP3_FALSE; + } + + if (seekPointCount == 0 || pSeekPoints == NULL) { + // Unbinding. + pMP3->seekPointCount = 0; + pMP3->pSeekPoints = NULL; + } else { + // Binding. + pMP3->seekPointCount = seekPointCount; + pMP3->pSeekPoints = pSeekPoints; + } return DRMP3_TRUE; } - -float* drmp3__full_decode_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) +float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) { drmp3_assert(pMP3 != NULL); @@ -2789,7 +3347,7 @@ float* drmp3__full_decode_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp float temp[4096]; for (;;) { drmp3_uint64 framesToReadRightNow = drmp3_countof(temp) / pMP3->channels; - drmp3_uint64 framesJustRead = drmp3_read_f32(pMP3, framesToReadRightNow, temp); + drmp3_uint64 framesJustRead = drmp3_read_pcm_frames_f32(pMP3, framesToReadRightNow, temp); if (framesJustRead == 0) { break; } @@ -2835,35 +3393,35 @@ float* drmp3__full_decode_and_close_f32(drmp3* pMP3, drmp3_config* pConfig, drmp return pFrames; } -float* drmp3_open_and_decode_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) +float* drmp3_open_and_read_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) { drmp3 mp3; if (!drmp3_init(&mp3, onRead, onSeek, pUserData, pConfig)) { return NULL; } - return drmp3__full_decode_and_close_f32(&mp3, pConfig, pTotalFrameCount); + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); } -float* drmp3_open_and_decode_memory_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) +float* drmp3_open_memory_and_read_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) { drmp3 mp3; if (!drmp3_init_memory(&mp3, pData, dataSize, pConfig)) { return NULL; } - return drmp3__full_decode_and_close_f32(&mp3, pConfig, pTotalFrameCount); + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); } #ifndef DR_MP3_NO_STDIO -float* drmp3_open_and_decode_file_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) +float* drmp3_open_file_and_read_f32(const char* filePath, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount) { drmp3 mp3; if (!drmp3_init_file(&mp3, filePath, pConfig)) { return NULL; } - return drmp3__full_decode_and_close_f32(&mp3, pConfig, pTotalFrameCount); + return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount); } #endif @@ -2890,7 +3448,18 @@ void drmp3_free(void* p) // REVISION HISTORY -// =============== +// ================ +// +// v0.4.0 - 2018-xx-xx +// - API CHANGE: Rename some APIs: +// - drmp3_read_f32 -> to drmp3_read_pcm_frames_f32 +// - drmp3_seek_to_frame -> drmp3_seek_to_pcm_frame +// - drmp3_open_and_decode_f32 -> drmp3_open_and_read_f32 +// - drmp3_open_and_decode_memory_f32 -> drmp3_open_memory_and_read_f32 +// - drmp3_open_and_decode_file_f32 -> drmp3_open_file_and_read_f32 +// - Add drmp3_get_pcm_frame_count(). +// - Add drmp3_get_mp3_frame_count(). +// - Improve seeking performance. // // v0.3.2 - 2018-09-11 // - Fix a couple of memory leaks. diff --git a/extras/dr_wav.h b/extras/dr_wav.h index 6aff80a9..c139472d 100644 --- a/extras/dr_wav.h +++ b/extras/dr_wav.h @@ -1,5 +1,5 @@ // WAV audio loader and writer. Public domain. See "unlicense" statement at the end of this file. -// dr_wav - v0.8.5 - 2018-09-11 +// dr_wav - v0.9.0-dev - 2018-xx-xx // // David Reid - mackron@gmail.com @@ -40,7 +40,7 @@ // unsigned int channels; // unsigned int sampleRate; // drwav_uint64 totalSampleCount; -// float* pSampleData = drwav_open_and_read_file_s32("my_song.wav", &channels, &sampleRate, &totalSampleCount); +// float* pSampleData = drwav_open_file_and_read_f32("my_song.wav", &channels, &sampleRate, &totalSampleCount); // if (pSampleData == NULL) { // // Error opening and reading WAV file. // } @@ -154,6 +154,14 @@ extern "C" { #define DR_WAVE_FORMAT_DVI_ADPCM 0x11 #define DR_WAVE_FORMAT_EXTENSIBLE 0xFFFE +// Constants. +#ifndef DRWAV_MAX_SMPL_LOOPS +#define DRWAV_MAX_SMPL_LOOPS 1 +#endif + +// Flags to pass into drwav_init_ex(), etc. +#define DRWAV_SEQUENTIAL 0x00000001 + typedef enum { drwav_seek_origin_start, @@ -166,6 +174,22 @@ typedef enum drwav_container_w64 } drwav_container; +typedef struct +{ + union + { + drwav_uint8 fourcc[4]; + drwav_uint8 guid[16]; + } id; + + // The size in bytes of the chunk. + drwav_uint64 sizeInBytes; + + // RIFF = 2 byte alignment. + // W64 = 8 byte alignment. + unsigned int paddingSize; +} drwav_chunk_header; + // Callback for when data is read. Return value is the number of bytes actually read. // // pUserData [in] The user data that was passed to drwav_init(), drwav_open() and family. @@ -201,6 +225,22 @@ typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t b // will be either drwav_seek_origin_start or drwav_seek_origin_current. typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin); +// Callback for when drwav_init_ex/drwav_open_ex finds a chunk. +// +// pChunkUserData [in] The user data that was passed to the pChunkUserData parameter of drwav_init_ex(), drwav_open_ex() and family. +// onRead [in] A pointer to the function to call when reading. +// onSeek [in] A pointer to the function to call when seeking. +// pReadSeekUserData [in] The user data that was passed to the pReadSeekUserData parameter of drwav_init_ex(), drwav_open_ex() and family. +// pChunkHeader [in] A pointer to an object containing basic header information about the chunk. Use this to identify the chunk. +// +// Returns the number of bytes read + seeked. +// +// To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same +// for seeking with onSeek(). The return value must be the total number of bytes you have read _plus_ seeked. +// +// You must not attempt to read beyond the boundary of the chunk. +typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader); + // Structure for internal use. Only used for loaders opened with drwav_open_memory(). typedef struct { @@ -264,6 +304,30 @@ typedef struct drwav_uint8 subFormat[16]; } drwav_fmt; +typedef struct +{ + drwav_uint32 cuePointId; + drwav_uint32 type; + drwav_uint32 start; + drwav_uint32 end; + drwav_uint32 fraction; + drwav_uint32 playCount; +} drwav_smpl_loop; + + typedef struct +{ + drwav_uint32 manufacturer; + drwav_uint32 product; + drwav_uint32 samplePeriod; + drwav_uint32 midiUnityNotes; + drwav_uint32 midiPitchFraction; + drwav_uint32 smpteFormat; + drwav_uint32 smpteOffset; + drwav_uint32 numSampleLoops; + drwav_uint32 samplerData; + drwav_smpl_loop loops[DRWAV_MAX_SMPL_LOOPS]; +} drwav_smpl; + typedef struct { // A pointer to the function to call when more data is needed. @@ -324,6 +388,10 @@ typedef struct drwav_bool32 isSequentialWrite; + // smpl chunk. + drwav_smpl smpl; + + // A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_open_memory(). drwav__memory_stream memoryStream; drwav__memory_stream_write memoryStreamWrite; @@ -359,9 +427,13 @@ typedef struct // Initializes a pre-allocated drwav object. // -// onRead [in] The function to call when data needs to be read from the client. -// onSeek [in] The function to call when the read position of the client data needs to move. -// pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. +// pWav [out] A pointer to the drwav object being initialized. +// onRead [in] The function to call when data needs to be read from the client. +// onSeek [in] The function to call when the read position of the client data needs to move. +// onChunk [in, optional] The function to call when a chunk is enumerated at initialized time. +// pUserData, pReadSeekUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek. +// pChunkUserData [in, optional] A pointer to application defined data that will be passed to onChunk. +// flags [in, optional] A set of flags for controlling how things are loaded. // // Returns true if successful; false otherwise. // @@ -373,8 +445,18 @@ typedef struct // If you want dr_wav to manage the memory allocation for you, consider using drwav_open() instead. This will allocate // a drwav object on the heap and return a pointer to it. // +// Possible values for flags: +// DRWAV_SEQUENTIAL: Never perform a backwards seek while loading. This disables the chunk callback and will cause this function +// to return as soon as the data chunk is found. Any chunks after the data chunk will be ignored. +// +// drwav_init() is equivalent to "drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);". +// +// The onChunk is callback is not called for the WAVE or FMT chunks. The contents of the FMT chunk can be read from pWav->fmt +// after the function returns. +// // See also: drwav_init_file(), drwav_init_memory(), drwav_uninit() drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData); +drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags); // Initializes a pre-allocated drwav object for writing. // @@ -421,8 +503,9 @@ void drwav_uninit(drwav* pWav); // This is different from drwav_init() in that it will allocate the drwav object for you via DRWAV_MALLOC() before // initializing it. // -// See also: drwav_open_file(), drwav_open_memory(), drwav_close() +// See also: drwav_init(), drwav_open_file(), drwav_open_memory(), drwav_close() drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData); +drwav* drwav_open_ex(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags); // Opens a wav file for writing using the given callbacks. // @@ -473,11 +556,18 @@ size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut); // using a compressed format consider using drwav_read_raw() or drwav_read_s16/s32/f32/etc(). drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut); +// Same as drwav_read(), except works on PCM frames instead of samples. Returns the number of PCM +// freames read. +drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut); + // Seeks to the given sample. // // Returns true if successful; false otherwise. drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample); +// Same as drwav_seek_to_sample() except workd on PCM frames. +drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex); + // Writes raw audio data. // @@ -489,6 +579,8 @@ size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData); // Returns the number of samples written. drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* pData); +// Same as drwav_write(), but works on PCM frames instead of samples. Returns the number of PCM frames written. +drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData); //// Conversion Utilities //// @@ -501,6 +593,9 @@ drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* p // If the return value is less than it means the end of the file has been reached. drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); +// Same as drwav_read_s16(), except works on PCM frames instead of samples. +drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut); + // Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount); @@ -530,6 +625,9 @@ void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sample // If the return value is less than it means the end of the file has been reached. drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut); +// Same as drwav_read_f32(), except works on PCM frames instead of samples. +drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut); + // Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount); @@ -559,6 +657,9 @@ void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount) // If the return value is less than it means the end of the file has been reached. drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut); +// Same as drwav_read_s32(), except works on PCM frames instead of samples. +drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut); + // Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount); @@ -593,6 +694,7 @@ void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sample // objects because the operating system may restrict the number of file handles an application can have open at // any given time. drwav_bool32 drwav_init_file(drwav* pWav, const char* filename); +drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags); // Helper for initializing a wave file for writing using stdio. // @@ -608,6 +710,7 @@ drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, // objects because the operating system may restrict the number of file handles an application can have open at // any given time. drwav* drwav_open_file(const char* filename); +drwav* drwav_open_file_ex(const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags); // Helper for opening a wave file for writing using stdio. // @@ -626,6 +729,7 @@ drwav* drwav_open_file_write_sequential(const char* filename, const drwav_data_f // // The buffer should contain the contents of the entire wave file, not just the sample data. drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize); +drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags); // Helper for initializing a writer which outputs data to a memory buffer. // @@ -643,6 +747,7 @@ drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size // // The buffer should contain the contents of the entire wave file, not just the sample data. drwav* drwav_open_memory(const void* data, size_t dataSize); +drwav* drwav_open_memory_ex(const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags); // Helper for opening a writer which outputs data to a memory buffer. // @@ -657,19 +762,28 @@ drwav* drwav_open_memory_write_sequential(void** ppData, size_t* pDataSize, cons #ifndef DR_WAV_NO_CONVERSION_API // Opens and reads a wav file in a single operation. drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); #ifndef DR_WAV_NO_STDIO // Opens and decodes a wav file in a single operation. -drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +drwav_int16* drwav_open_file_and_read_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); +float* drwav_open_file_and_read_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); +drwav_int32* drwav_open_file_and_read_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); #endif // Opens and decodes a wav file from a block of memory in a single operation. -drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); -drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +drwav_int16* drwav_open_memory_and_read_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); +float* drwav_open_memory_and_read_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); +drwav_int32* drwav_open_memory_and_read_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount); +drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount); #endif // Frees data that was allocated internally by dr_wav. @@ -727,6 +841,13 @@ void drwav_free(void* pDataReturnedByOpenAndRead); #define drwav_copy_memory DRWAV_COPY_MEMORY #define drwav_zero_memory DRWAV_ZERO_MEMORY +typedef drwav_int32 drwav_result; +#define DRWAV_SUCCESS 0 +#define DRWAV_ERROR -1 +#define DRWAV_INVALID_ARGS -2 +#define DRWAV_INVALID_OPERATION -3 +#define DRWAV_INVALID_FILE -100 +#define DRWAV_EOF -101 #define DRWAV_MAX_SIMD_VECTOR_SIZE 64 // 64 for AVX-512 in the future. @@ -756,6 +877,7 @@ static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xA static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 74636166-ACF3-11D3-8CD1-00C04F8EDB8A static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 61746164-ACF3-11D3-8CD1-00C04F8EDB8A +static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A}; // 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16]) { @@ -823,39 +945,21 @@ static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 fo formatTag == DR_WAVE_FORMAT_DVI_ADPCM; } - drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); drwav_uint64 drwav_read_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut); drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); drwav* drwav_open_write__internal(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData); -typedef struct -{ - union - { - drwav_uint8 fourcc[4]; - drwav_uint8 guid[16]; - } id; - - // The size in bytes of the chunk. - drwav_uint64 sizeInBytes; - - // RIFF = 2 byte alignment. - // W64 = 8 byte alignment. - unsigned int paddingSize; - -} drwav__chunk_header; - -static drwav_bool32 drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav__chunk_header* pHeaderOut) +static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut) { if (container == drwav_container_riff) { if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) { - return DRWAV_FALSE; + return DRWAV_EOF; } unsigned char sizeInBytes[4]; if (onRead(pUserData, sizeInBytes, 4) != 4) { - return DRWAV_FALSE; + return DRWAV_INVALID_FILE; } pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes); @@ -863,12 +967,12 @@ static drwav_bool32 drwav__read_chunk_header(drwav_read_proc onRead, void* pUser *pRunningBytesReadOut += 8; } else { if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) { - return DRWAV_FALSE; + return DRWAV_EOF; } unsigned char sizeInBytes[8]; if (onRead(pUserData, sizeInBytes, 8) != 8) { - return DRWAV_FALSE; + return DRWAV_INVALID_FILE; } pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24; // <-- Subtract 24 because w64 includes the size of the header. @@ -876,7 +980,7 @@ static drwav_bool32 drwav__read_chunk_header(drwav_read_proc onRead, void* pUser *pRunningBytesReadOut += 24; } - return DRWAV_TRUE; + return DRWAV_SUCCESS; } static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData) @@ -899,11 +1003,38 @@ static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 off return DRWAV_TRUE; } +static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData) +{ + if (offset <= 0x7FFFFFFF) { + return onSeek(pUserData, (int)offset, drwav_seek_origin_start); + } + + // Larger than 32-bit seek. + if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) { + return DRWAV_FALSE; + } + offset -= 0x7FFFFFFF; + + for (;;) { + if (offset <= 0x7FFFFFFF) { + return onSeek(pUserData, (int)offset, drwav_seek_origin_current); + } + + if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) { + return DRWAV_FALSE; + } + offset -= 0x7FFFFFFF; + } + + // Should never get here. + //return DRWAV_TRUE; +} + static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut) { - drwav__chunk_header header; - if (!drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header)) { + drwav_chunk_header header; + if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) { return DRWAV_FALSE; } @@ -916,7 +1047,7 @@ static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSe *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize; // Try the next header. - if (!drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header)) { + if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) { return DRWAV_FALSE; } } @@ -1041,13 +1172,18 @@ static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek } drwav_bool32 drwav_init_file(drwav* pWav, const char* filename) +{ + return drwav_init_file_ex(pWav, filename, NULL, NULL, 0); +} + +drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags) { FILE* pFile = drwav_fopen(filename, "rb"); if (pFile == NULL) { return DRWAV_FALSE; } - return drwav_init(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile); + return drwav_init_ex(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, onChunk, (void*)pFile, pChunkUserData, flags); } @@ -1072,13 +1208,18 @@ drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, } drwav* drwav_open_file(const char* filename) +{ + return drwav_open_file_ex(filename, NULL, NULL, 0); +} + +drwav* drwav_open_file_ex(const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags) { FILE* pFile = drwav_fopen(filename, "rb"); if (pFile == NULL) { return DRWAV_FALSE; } - drwav* pWav = drwav_open(drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile); + drwav* pWav = drwav_open_ex(drwav__on_read_stdio, drwav__on_seek_stdio, onChunk, (void*)pFile, pChunkUserData, flags); if (pWav == NULL) { fclose(pFile); return NULL; @@ -1232,6 +1373,11 @@ static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drw } drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize) +{ + return drwav_init_memory_ex(pWav, data, dataSize, NULL, NULL, 0); +} + +drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags) { if (data == NULL || dataSize == 0) { return DRWAV_FALSE; @@ -1243,7 +1389,7 @@ drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize) memoryStream.dataSize = dataSize; memoryStream.currentReadPos = 0; - if (!drwav_init(pWav, drwav__on_read_memory, drwav__on_seek_memory, (void*)&memoryStream)) { + if (!drwav_init_ex(pWav, drwav__on_read_memory, drwav__on_seek_memory, onChunk, (void*)&memoryStream, pChunkUserData, flags)) { return DRWAV_FALSE; } @@ -1291,6 +1437,11 @@ drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size drwav* drwav_open_memory(const void* data, size_t dataSize) +{ + return drwav_open_memory_ex(data, dataSize, NULL, NULL, 0); +} + +drwav* drwav_open_memory_ex(const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags) { if (data == NULL || dataSize == 0) { return NULL; @@ -1302,7 +1453,7 @@ drwav* drwav_open_memory(const void* data, size_t dataSize) memoryStream.dataSize = dataSize; memoryStream.currentReadPos = 0; - drwav* pWav = drwav_open(drwav__on_read_memory, drwav__on_seek_memory, (void*)&memoryStream); + drwav* pWav = drwav_open_ex(drwav__on_read_memory, drwav__on_seek_memory, onChunk, (void*)&memoryStream, pChunkUserData, flags); if (pWav == NULL) { return NULL; } @@ -1351,19 +1502,58 @@ drwav* drwav_open_memory_write_sequential(void** ppData, size_t* pDataSize, cons } +size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor) +{ + drwav_assert(onRead != NULL); + drwav_assert(pCursor != NULL); + + size_t bytesRead = onRead(pUserData, pBufferOut, bytesToRead); + *pCursor += bytesRead; + return bytesRead; +} + +drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor) +{ + drwav_assert(onSeek != NULL); + drwav_assert(pCursor != NULL); + + if (!onSeek(pUserData, offset, origin)) { + return DRWAV_FALSE; + } + + if (origin == drwav_seek_origin_start) { + *pCursor = offset; + } else { + *pCursor += offset; + } + + return DRWAV_TRUE; +} + + drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData) +{ + return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0); +} + +drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags) { if (onRead == NULL || onSeek == NULL) { return DRWAV_FALSE; } - drwav_zero_memory(pWav, sizeof(*pWav)); + drwav_uint64 cursor = 0; // <-- Keeps track of the byte position so we can seek to specific locations. + drwav_bool32 sequential = (flags & DRWAV_SEQUENTIAL) != 0; + drwav_zero_memory(pWav, sizeof(*pWav)); + pWav->onRead = onRead; + pWav->onSeek = onSeek; + pWav->pUserData = pReadSeekUserData; // The first 4 bytes should be the RIFF identifier. unsigned char riff[4]; - if (onRead(pUserData, riff, sizeof(riff)) != sizeof(riff)) { - return DRWAV_FALSE; // Failed to read data. + if (drwav__on_read(onRead, pReadSeekUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) { + return DRWAV_FALSE; } // The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for @@ -1375,7 +1565,7 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS // Check the rest of the GUID for validity. drwav_uint8 riff2[12]; - if (onRead(pUserData, riff2, sizeof(riff2)) != sizeof(riff2)) { + if (drwav__on_read(onRead, pReadSeekUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) { return DRWAV_FALSE; } @@ -1392,7 +1582,7 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS if (pWav->container == drwav_container_riff) { // RIFF/WAVE unsigned char chunkSizeBytes[4]; - if (onRead(pUserData, chunkSizeBytes, sizeof(chunkSizeBytes)) != sizeof(chunkSizeBytes)) { + if (drwav__on_read(onRead, pReadSeekUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) { return DRWAV_FALSE; } @@ -1402,19 +1592,17 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS } unsigned char wave[4]; - if (onRead(pUserData, wave, sizeof(wave)) != sizeof(wave)) { + if (drwav__on_read(onRead, pReadSeekUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) { return DRWAV_FALSE; } if (!drwav__fourcc_equal(wave, "WAVE")) { return DRWAV_FALSE; // Expecting "WAVE". } - - pWav->dataChunkDataPos = 4 + sizeof(chunkSizeBytes) + sizeof(wave); } else { // W64 unsigned char chunkSize[8]; - if (onRead(pUserData, chunkSize, sizeof(chunkSize)) != sizeof(chunkSize)) { + if (drwav__on_read(onRead, pReadSeekUserData, chunkSize, sizeof(chunkSize), &cursor) != sizeof(chunkSize)) { return DRWAV_FALSE; } @@ -1423,21 +1611,19 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS } drwav_uint8 wave[16]; - if (onRead(pUserData, wave, sizeof(wave)) != sizeof(wave)) { + if (drwav__on_read(onRead, pReadSeekUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) { return DRWAV_FALSE; } if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) { return DRWAV_FALSE; } - - pWav->dataChunkDataPos = 16 + sizeof(chunkSize) + sizeof(wave); } // The next bytes should be the "fmt " chunk. drwav_fmt fmt; - if (!drwav__read_fmt(onRead, onSeek, pUserData, pWav->container, &pWav->dataChunkDataPos, &fmt)) { + if (!drwav__read_fmt(onRead, onSeek, pReadSeekUserData, pWav->container, &cursor, &fmt)) { return DRWAV_FALSE; // Failed to read the "fmt " chunk. } @@ -1454,37 +1640,80 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS } + drwav_uint64 sampleCountFromFactChunk = 0; + // We need to enumerate over each chunk for two reasons: + // 1) The "data" chunk may not be the next one + // 2) We may want to report each chunk back to the client + // + // In order to correctly report each chunk back to the client we will need to keep looping until the end of the file. + drwav_bool32 foundDataChunk = DRWAV_FALSE; + drwav_uint64 dataChunkSize = 0; + // The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. - drwav_uint64 dataSize; + drwav_uint64 chunkSize = 0; for (;;) { - drwav__chunk_header header; - if (!drwav__read_chunk_header(onRead, pUserData, pWav->container, &pWav->dataChunkDataPos, &header)) { - return DRWAV_FALSE; + drwav_chunk_header header; + drwav_result result = drwav__read_chunk_header(onRead, pReadSeekUserData, pWav->container, &cursor, &header); + if (result != DRWAV_SUCCESS) { + if (!foundDataChunk) { + return DRWAV_FALSE; + } else { + break; // Probably at the end of the file. Get out of the loop. + } } - dataSize = header.sizeInBytes; + // Tell the client about this chunk. + if (!sequential && onChunk != NULL) { + drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, onRead, onSeek, pReadSeekUserData, &header); + + // dr_wav may need to read the contents of the chunk, so we now need to seek back to the position before + // we called the callback. + if (callbackBytesRead > 0) { + if (!drwav__seek_from_start(onSeek, cursor, pReadSeekUserData)) { + return DRWAV_FALSE; + } + } + } + + + if (!foundDataChunk) { + pWav->dataChunkDataPos = cursor; + } + + chunkSize = header.sizeInBytes; if (pWav->container == drwav_container_riff) { if (drwav__fourcc_equal(header.id.fourcc, "data")) { - break; + foundDataChunk = DRWAV_TRUE; + dataChunkSize = chunkSize; } } else { if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) { - break; + foundDataChunk = DRWAV_TRUE; + dataChunkSize = chunkSize; } } + // If at this point we have found the data chunk and we're running in sequential mode, we need to break out of this loop. The reason for + // this is that we would otherwise require a backwards seek which sequential mode forbids. + if (foundDataChunk && sequential) { + break; + } + // Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. if (pWav->container == drwav_container_riff) { if (drwav__fourcc_equal(header.id.fourcc, "fact")) { drwav_uint32 sampleCount; - if (onRead(pUserData, &sampleCount, 4) != 4) { + if (drwav__on_read(onRead, pReadSeekUserData, &sampleCount, 4, &cursor) != 4) { return DRWAV_FALSE; } - pWav->dataChunkDataPos += 4; - dataSize -= 4; + chunkSize -= 4; + + if (!foundDataChunk) { + pWav->dataChunkDataPos = cursor; + } // The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this // for Microsoft ADPCM formats. @@ -1496,52 +1725,118 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS } } else { if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) { - if (onRead(pUserData, &sampleCountFromFactChunk, 8) != 8) { + if (drwav__on_read(onRead, pReadSeekUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) { return DRWAV_FALSE; } - pWav->dataChunkDataPos += 8; - dataSize -= 8; + chunkSize -= 8; + + if (!foundDataChunk) { + pWav->dataChunkDataPos = cursor; + } } } - // If we get here it means we didn't find the "data" chunk. Seek past it. + // "smpl" chunk. + if (pWav->container == drwav_container_riff) { + if (drwav__fourcc_equal(header.id.fourcc, "smpl")) { + unsigned char smplHeaderData[36]; // 36 = size of the smpl header section, not including the loop data. + if (chunkSize >= sizeof(smplHeaderData)) { + drwav_uint64 bytesJustRead = drwav__on_read(onRead, pReadSeekUserData, smplHeaderData, sizeof(smplHeaderData), &cursor); + chunkSize -= bytesJustRead; + + if (bytesJustRead == sizeof(smplHeaderData)) { + pWav->smpl.manufacturer = drwav__bytes_to_u32(smplHeaderData+0); + pWav->smpl.product = drwav__bytes_to_u32(smplHeaderData+4); + pWav->smpl.samplePeriod = drwav__bytes_to_u32(smplHeaderData+8); + pWav->smpl.midiUnityNotes = drwav__bytes_to_u32(smplHeaderData+12); + pWav->smpl.midiPitchFraction = drwav__bytes_to_u32(smplHeaderData+16); + pWav->smpl.smpteFormat = drwav__bytes_to_u32(smplHeaderData+20); + pWav->smpl.smpteOffset = drwav__bytes_to_u32(smplHeaderData+24); + pWav->smpl.numSampleLoops = drwav__bytes_to_u32(smplHeaderData+28); + pWav->smpl.samplerData = drwav__bytes_to_u32(smplHeaderData+32); + + for (drwav_uint32 iLoop = 0; iLoop < pWav->smpl.numSampleLoops && iLoop < drwav_countof(pWav->smpl.loops); ++iLoop) { + unsigned char smplLoopData[24]; // 24 = size of a loop section in the smpl chunk. + bytesJustRead = drwav__on_read(onRead, pReadSeekUserData, smplLoopData, sizeof(smplLoopData), &cursor); + chunkSize -= bytesJustRead; + + if (bytesJustRead == sizeof(smplLoopData)) { + pWav->smpl.loops[iLoop].cuePointId = drwav__bytes_to_u32(smplLoopData+0); + pWav->smpl.loops[iLoop].type = drwav__bytes_to_u32(smplLoopData+4); + pWav->smpl.loops[iLoop].start = drwav__bytes_to_u32(smplLoopData+8); + pWav->smpl.loops[iLoop].end = drwav__bytes_to_u32(smplLoopData+12); + pWav->smpl.loops[iLoop].fraction = drwav__bytes_to_u32(smplLoopData+16); + pWav->smpl.loops[iLoop].playCount = drwav__bytes_to_u32(smplLoopData+20); + } else { + break; // Break from the smpl loop for loop. + } + } + } + } else { + // Looks like invalid data. Ignore the chunk. + } + } + } else { + if (drwav__guid_equal(header.id.guid, drwavGUID_W64_SMPL)) { + // This path will be hit when a W64 WAV file contains a smpl chunk. I don't have a sample file to test this path, so a contribution + // is welcome to add support for this. + } + } // Make sure we seek past the padding. - dataSize += header.paddingSize; - drwav__seek_forward(onSeek, dataSize, pUserData); - pWav->dataChunkDataPos += dataSize; + chunkSize += header.paddingSize; + if (!drwav__seek_forward(onSeek, chunkSize, pReadSeekUserData)) { + break; + } + cursor += chunkSize; + + if (!foundDataChunk) { + pWav->dataChunkDataPos = cursor; + } } + // If we haven't found a data chunk, return an error. + if (!foundDataChunk) { + return DRWAV_FALSE; + } + + // We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. + if (!sequential) { + if (!drwav__seek_from_start(onSeek, pWav->dataChunkDataPos, pReadSeekUserData)) { + return DRWAV_FALSE; + } + cursor = pWav->dataChunkDataPos; + } + + // At this point we should be sitting on the first byte of the raw audio data. - pWav->onRead = onRead; - pWav->onSeek = onSeek; - pWav->pUserData = pUserData; pWav->fmt = fmt; pWav->sampleRate = fmt.sampleRate; pWav->channels = fmt.channels; pWav->bitsPerSample = fmt.bitsPerSample; - pWav->bytesPerSample = fmt.blockAlign / fmt.channels; - pWav->bytesRemaining = dataSize; + pWav->bytesRemaining = dataChunkSize; pWav->translatedFormatTag = translatedFormatTag; - pWav->dataChunkDataSize = dataSize; + pWav->dataChunkDataSize = dataChunkSize; - // The bytes per sample should never be 0 at this point. This would indicate an invalid WAV file. - if (pWav->bytesPerSample == 0) { - return DRWAV_FALSE; + // The number of bytes per sample is based on the bits per sample or the block align. We prioritize floor(bitsPerSample/8), but if + // this is zero of the bits per sample is not a multiple of 8 we need to fall back to the block align. + pWav->bytesPerSample = pWav->bitsPerSample/8; + if (pWav->bytesPerSample == 0 || (pWav->bitsPerSample & 0x7) != 0 /*|| pWav->bytesPerSample < fmt.blockAlign/fmt.channels*/) { + pWav->bytesPerSample = fmt.blockAlign/fmt.channels; } if (sampleCountFromFactChunk != 0) { pWav->totalSampleCount = sampleCountFromFactChunk * fmt.channels; } else { - pWav->totalSampleCount = dataSize / pWav->bytesPerSample; + pWav->totalSampleCount = dataChunkSize / pWav->bytesPerSample; if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - drwav_uint64 blockCount = dataSize / fmt.blockAlign; + drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign; pWav->totalSampleCount = (blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2; // x2 because two samples per byte. } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - drwav_uint64 blockCount = dataSize / fmt.blockAlign; + drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign; pWav->totalSampleCount = ((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels); } } @@ -1566,11 +1861,11 @@ drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onS // always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my // correctness tests against libsndfile, and is disabled by default. if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) { - drwav_uint64 blockCount = dataSize / fmt.blockAlign; + drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign; pWav->totalSampleCount = (blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2; // x2 because two samples per byte. } if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) { - drwav_uint64 blockCount = dataSize / fmt.blockAlign; + drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign; pWav->totalSampleCount = ((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels); } #endif @@ -1819,13 +2114,18 @@ void drwav_uninit(drwav* pWav) drwav* drwav_open(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData) +{ + return drwav_open_ex(onRead, onSeek, NULL, pUserData, NULL, 0); +} + +drwav* drwav_open_ex(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags) { drwav* pWav = (drwav*)DRWAV_MALLOC(sizeof(*pWav)); if (pWav == NULL) { return NULL; } - if (!drwav_init(pWav, onRead, onSeek, pUserData)) { + if (!drwav_init_ex(pWav, onRead, onSeek, onChunk, pReadSeekUserData, pChunkUserData, flags)) { DRWAV_FREE(pWav); return NULL; } @@ -1884,7 +2184,7 @@ size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut) drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOut) { - if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL) { + if (pWav == NULL || samplesToRead == 0 || pBufferOut == NULL || pWav->bytesPerSample == 0) { return 0; } @@ -1902,6 +2202,11 @@ drwav_uint64 drwav_read(drwav* pWav, drwav_uint64 samplesToRead, void* pBufferOu return bytesRead / pWav->bytesPerSample; } +drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut) +{ + return drwav_read(pWav, framesToRead * pWav->channels, pBufferOut) / pWav->channels; +} + drwav_bool32 drwav_seek_to_first_sample(drwav* pWav) { if (pWav->onWrite != NULL) { @@ -2015,6 +2320,11 @@ drwav_bool32 drwav_seek_to_sample(drwav* pWav, drwav_uint64 sample) return DRWAV_TRUE; } +drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex) +{ + return drwav_seek_to_sample(pWav, targetFrameIndex * pWav->channels); +} + size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData) { @@ -2060,6 +2370,11 @@ drwav_uint64 drwav_write(drwav* pWav, drwav_uint64 samplesToWrite, const void* p return (bytesWritten * 8) / pWav->bitsPerSample; } +drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData) +{ + return drwav_write(pWav, framesToWrite * pWav->channels, pData) / pWav->channels; +} + drwav_uint64 drwav_read_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) @@ -2482,9 +2797,13 @@ static void drwav__ieee_to_s16(drwav_int16* pOut, const unsigned char* pIn, size drwav_uint64 drwav_read_s16__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) { // Fast path. - if (pWav->bytesPerSample == 2) { + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) { return drwav_read(pWav, samplesToRead, pBufferOut); } + + if (pWav->bytesPerSample == 0) { + return 0; + } drwav_uint64 totalSamplesRead = 0; unsigned char sampleData[4096]; @@ -2506,6 +2825,10 @@ drwav_uint64 drwav_read_s16__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_ drwav_uint64 drwav_read_s16__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) { + if (pWav->bytesPerSample == 0) { + return 0; + } + drwav_uint64 totalSamplesRead = 0; unsigned char sampleData[4096]; while (samplesToRead > 0) { @@ -2526,6 +2849,10 @@ drwav_uint64 drwav_read_s16__ieee(drwav* pWav, drwav_uint64 samplesToRead, drwav drwav_uint64 drwav_read_s16__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) { + if (pWav->bytesPerSample == 0) { + return 0; + } + drwav_uint64 totalSamplesRead = 0; unsigned char sampleData[4096]; while (samplesToRead > 0) { @@ -2546,6 +2873,10 @@ drwav_uint64 drwav_read_s16__alaw(drwav* pWav, drwav_uint64 samplesToRead, drwav drwav_uint64 drwav_read_s16__mulaw(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut) { + if (pWav->bytesPerSample == 0) { + return 0; + } + drwav_uint64 totalSamplesRead = 0; unsigned char sampleData[4096]; while (samplesToRead > 0) { @@ -2602,6 +2933,11 @@ drwav_uint64 drwav_read_s16(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16 return 0; } +drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut) +{ + return drwav_read_s16(pWav, framesToRead * pWav->channels, pBufferOut) / pWav->channels; +} + void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount) { int r; @@ -2812,10 +3148,10 @@ drwav_uint64 drwav_read_f32__ima(drwav* pWav, drwav_uint64 samplesToRead, float* drwav_uint64 drwav_read_f32__ieee(drwav* pWav, drwav_uint64 samplesToRead, float* pBufferOut) { // Fast path. - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bytesPerSample == 4) { + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) { return drwav_read(pWav, samplesToRead, pBufferOut); } - + if (pWav->bytesPerSample == 0) { return 0; } @@ -2924,6 +3260,11 @@ drwav_uint64 drwav_read_f32(drwav* pWav, drwav_uint64 samplesToRead, float* pBuf return 0; } +drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut) +{ + return drwav_read_f32(pWav, framesToRead * pWav->channels, pBufferOut) / pWav->channels; +} + void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount) { if (pOut == NULL || pIn == NULL) { @@ -3085,10 +3426,10 @@ static void drwav__ieee_to_s32(drwav_int32* pOut, const unsigned char* pIn, size drwav_uint64 drwav_read_s32__pcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32* pBufferOut) { // Fast path. - if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bytesPerSample == 4) { + if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) { return drwav_read(pWav, samplesToRead, pBufferOut); } - + if (pWav->bytesPerSample == 0) { return 0; } @@ -3266,6 +3607,11 @@ drwav_uint64 drwav_read_s32(drwav* pWav, drwav_uint64 samplesToRead, drwav_int32 return 0; } +drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut) +{ + return drwav_read_s32(pWav, framesToRead * pWav->channels, pBufferOut) / pWav->channels; +} + void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount) { if (pOut == NULL || pIn == NULL) { @@ -3446,8 +3792,8 @@ drwav_int32* drwav__read_and_close_s32(drwav* pWav, unsigned int* channels, unsi drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { - if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; + if (sampleRate) *sampleRate = 0; if (totalSampleCount) *totalSampleCount = 0; drwav wav; @@ -3458,6 +3804,27 @@ drwav_int16* drwav_open_and_read_s16(drwav_read_proc onRead, drwav_seek_proc onS return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount); } +drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + drwav_int16* result = drwav_open_and_read_s16(onRead, onSeek, pUserData, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} + float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { if (sampleRate) *sampleRate = 0; @@ -3472,6 +3839,27 @@ float* drwav_open_and_read_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, v return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount); } +float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + float* result = drwav_open_and_read_f32(onRead, onSeek, pUserData, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} + drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { if (sampleRate) *sampleRate = 0; @@ -3486,8 +3874,29 @@ drwav_int32* drwav_open_and_read_s32(drwav_read_proc onRead, drwav_seek_proc onS return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount); } +drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + drwav_int32* result = drwav_open_and_read_s32(onRead, onSeek, pUserData, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} + #ifndef DR_WAV_NO_STDIO -drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int16* drwav_open_file_and_read_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; @@ -3501,7 +3910,28 @@ drwav_int16* drwav_open_and_read_file_s16(const char* filename, unsigned int* ch return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount); } -float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + drwav_int16* result = drwav_open_file_and_read_s16(filename, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} + +float* drwav_open_file_and_read_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; @@ -3515,7 +3945,28 @@ float* drwav_open_and_read_file_f32(const char* filename, unsigned int* channels return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount); } -drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + float* result = drwav_open_file_and_read_f32(filename, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} + +drwav_int32* drwav_open_file_and_read_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; @@ -3528,9 +3979,30 @@ drwav_int32* drwav_open_and_read_file_s32(const char* filename, unsigned int* ch return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount); } + +drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + drwav_int32* result = drwav_open_file_and_read_s32(filename, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} #endif -drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int16* drwav_open_memory_and_read_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; @@ -3544,7 +4016,28 @@ drwav_int16* drwav_open_and_read_memory_s16(const void* data, size_t dataSize, u return drwav__read_and_close_s16(&wav, channels, sampleRate, totalSampleCount); } -float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + drwav_int16* result = drwav_open_memory_and_read_s16(data, dataSize, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} + +float* drwav_open_memory_and_read_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; @@ -3558,7 +4051,28 @@ float* drwav_open_and_read_memory_f32(const void* data, size_t dataSize, unsigne return drwav__read_and_close_f32(&wav, channels, sampleRate, totalSampleCount); } -drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) +float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + float* result = drwav_open_memory_and_read_f32(data, dataSize, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} + +drwav_int32* drwav_open_memory_and_read_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalSampleCount) { if (sampleRate) *sampleRate = 0; if (channels) *channels = 0; @@ -3571,6 +4085,27 @@ drwav_int32* drwav_open_and_read_memory_s32(const void* data, size_t dataSize, u return drwav__read_and_close_s32(&wav, channels, sampleRate, totalSampleCount); } + +drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut) +{ + if (channelsOut) *channelsOut = 0; + if (sampleRateOut) *sampleRateOut = 0; + if (totalFrameCountOut) *totalFrameCountOut = 0; + + unsigned int channels; + unsigned int sampleRate; + drwav_uint64 totalSampleCount; + drwav_int32* result = drwav_open_memory_and_read_s32(data, dataSize, &channels, &sampleRate, &totalSampleCount); + if (result == NULL) { + return NULL; + } + + if (channelsOut) *channelsOut = channels; + if (sampleRateOut) *sampleRateOut = sampleRate; + if (totalFrameCountOut) *totalFrameCountOut = totalSampleCount / channels; + + return result; +} #endif //DR_WAV_NO_CONVERSION_API @@ -3584,6 +4119,15 @@ void drwav_free(void* pDataReturnedByOpenAndRead) // REVISION HISTORY // +// v0.9.0 - 2018-xx-xx +// - API CHANGE: Rename drwav_open_and_read_file_*() to drwav_open_file_and_read_*(). +// - API CHANGE: Rename drwav_open_and_read_memory_*() to drwav_open_memory_and_read_*(). +// - Add built-in support for smpl chunks. +// - Add support for firing a callback for each chunk in the file at initialization time. +// - This is enabled through the drwav_init_ex(), etc. family of APIs. +// - Add new reading APIs for reading by PCM frames instead of samples. +// - Handle invalid FMT chunks more robustly. +// // v0.8.5 - 2018-09-11 // - Const correctness. // - Fix a potential stack overflow. diff --git a/mini_al.h b/mini_al.h index 4dbc17f8..08210c15 100644 --- a/mini_al.h +++ b/mini_al.h @@ -1,5 +1,5 @@ // Audio playback and capture library. Public domain. See "unlicense" statement at the end of this file. -// mini_al - v0.8.13 - 2018-12-04 +// mini_al - v0.8.14 - 2018-12-16 // // David Reid - davidreidsoftware@gmail.com @@ -18247,10 +18247,16 @@ void mal_device_uninit__opensl(mal_device* pDevice) // Uninit device. if (pDevice->type == mal_device_type_playback) { - if (pDevice->opensl.pAudioPlayerObj) MAL_OPENSL_OBJ(pDevice->opensl.pAudioPlayerObj)->Destroy((SLObjectItf)pDevice->opensl.pAudioPlayerObj); - if (pDevice->opensl.pOutputMixObj) MAL_OPENSL_OBJ(pDevice->opensl.pOutputMixObj)->Destroy((SLObjectItf)pDevice->opensl.pOutputMixObj); + if (pDevice->opensl.pAudioPlayerObj) { + MAL_OPENSL_OBJ(pDevice->opensl.pAudioPlayerObj)->Destroy((SLObjectItf)pDevice->opensl.pAudioPlayerObj); + } + if (pDevice->opensl.pOutputMixObj) { + MAL_OPENSL_OBJ(pDevice->opensl.pOutputMixObj)->Destroy((SLObjectItf)pDevice->opensl.pOutputMixObj); + } } else { - if (pDevice->opensl.pAudioRecorderObj) MAL_OPENSL_OBJ(pDevice->opensl.pAudioRecorderObj)->Destroy((SLObjectItf)pDevice->opensl.pAudioRecorderObj); + if (pDevice->opensl.pAudioRecorderObj) { + MAL_OPENSL_OBJ(pDevice->opensl.pAudioRecorderObj)->Destroy((SLObjectItf)pDevice->opensl.pAudioRecorderObj); + } } mal_free(pDevice->opensl.pBuffer); @@ -27083,7 +27089,7 @@ mal_result mal_decoder_internal_on_seek_to_frame__wav(mal_decoder* pDecoder, mal drwav* pWav = (drwav*)pDecoder->pInternalDecoder; mal_assert(pWav != NULL); - drwav_bool32 result = drwav_seek_to_sample(pWav, frameIndex*pWav->channels); + drwav_bool32 result = drwav_seek_to_pcm_frame(pWav, frameIndex); if (result) { return MAL_SUCCESS; } else { @@ -27108,9 +27114,9 @@ mal_uint32 mal_decoder_internal_on_read_frames__wav(mal_dsp* pDSP, mal_uint32 fr mal_assert(pWav != NULL); switch (pDecoder->internalFormat) { - case mal_format_s16: return (mal_uint32)drwav_read_s16(pWav, frameCount*pDecoder->internalChannels, (drwav_int16*)pSamplesOut) / pDecoder->internalChannels; - case mal_format_s32: return (mal_uint32)drwav_read_s32(pWav, frameCount*pDecoder->internalChannels, (drwav_int32*)pSamplesOut) / pDecoder->internalChannels; - case mal_format_f32: return (mal_uint32)drwav_read_f32(pWav, frameCount*pDecoder->internalChannels, (float*)pSamplesOut) / pDecoder->internalChannels; + case mal_format_s16: return (mal_uint32)drwav_read_pcm_frames_s16(pWav, frameCount, (drwav_int16*)pSamplesOut); + case mal_format_s32: return (mal_uint32)drwav_read_pcm_frames_s32(pWav, frameCount, (drwav_int32*)pSamplesOut); + case mal_format_f32: return (mal_uint32)drwav_read_pcm_frames_f32(pWav, frameCount, (float*)pSamplesOut); default: break; } @@ -27210,7 +27216,7 @@ mal_result mal_decoder_internal_on_seek_to_frame__flac(mal_decoder* pDecoder, ma drflac* pFlac = (drflac*)pDecoder->pInternalDecoder; mal_assert(pFlac != NULL); - drflac_bool32 result = drflac_seek_to_sample(pFlac, frameIndex*pFlac->channels); + drflac_bool32 result = drflac_seek_to_pcm_frame(pFlac, frameIndex); if (result) { return MAL_SUCCESS; } else { @@ -27235,7 +27241,7 @@ mal_uint32 mal_decoder_internal_on_read_frames__flac(mal_dsp* pDSP, mal_uint32 f drflac* pFlac = (drflac*)pDecoder->pInternalDecoder; mal_assert(pFlac != NULL); - return (mal_uint32)drflac_read_s32(pFlac, frameCount*pDecoder->internalChannels, (drflac_int32*)pSamplesOut) / pDecoder->internalChannels; + return (mal_uint32)drflac_read_pcm_frames_s32(pFlac, frameCount, (drflac_int32*)pSamplesOut); } mal_result mal_decoder_init_flac__internal(const mal_decoder_config* pConfig, mal_decoder* pDecoder) @@ -27574,7 +27580,7 @@ mal_result mal_decoder_internal_on_seek_to_frame__mp3(mal_decoder* pDecoder, mal drmp3* pMP3 = (drmp3*)pDecoder->pInternalDecoder; mal_assert(pMP3 != NULL); - drmp3_bool32 result = drmp3_seek_to_frame(pMP3, frameIndex); + drmp3_bool32 result = drmp3_seek_to_pcm_frame(pMP3, frameIndex); if (result) { return MAL_SUCCESS; } else { @@ -27600,7 +27606,7 @@ mal_uint32 mal_decoder_internal_on_read_frames__mp3(mal_dsp* pDSP, mal_uint32 fr drmp3* pMP3 = (drmp3*)pDecoder->pInternalDecoder; mal_assert(pMP3 != NULL); - return (mal_uint32)drmp3_read_f32(pMP3, frameCount, (float*)pSamplesOut); + return (mal_uint32)drmp3_read_pcm_frames_f32(pMP3, frameCount, (float*)pSamplesOut); } mal_result mal_decoder_init_mp3__internal(const mal_decoder_config* pConfig, mal_decoder* pDecoder) @@ -28484,6 +28490,10 @@ mal_uint64 mal_sine_wave_read_ex(mal_sine_wave* pSineWave, mal_uint64 frameCount // REVISION HISTORY // ================ // +// v0.8.14 - 2018-12-16 +// - Core Audio: Fix a bug where the device state is not set correctly after stopping. +// - Update decoders to use updated APIs in dr_flac, dr_mp3 and dr_wav. +// // v0.8.13 - 2018-12-04 // - Core Audio: Fix a bug with channel mapping. // - Fix a bug with channel routing where the back/left and back/right channels have the wrong weight.