From 1f97a8138b1b3627824baddc15e45912bb6c4a51 Mon Sep 17 00:00:00 2001 From: David Reid Date: Fri, 14 Aug 2020 18:50:22 +1000 Subject: [PATCH] Update dr_flac. --- extras/dr_flac.h | 43 ++++++++++++++++++++++++++++++++----------- miniaudio.h | 20 ++++++++++---------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/extras/dr_flac.h b/extras/dr_flac.h index a78f370e..04fa2b06 100644 --- a/extras/dr_flac.h +++ b/extras/dr_flac.h @@ -1,6 +1,6 @@ /* FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file. -dr_flac - v0.12.17 - 2020-08-02 +dr_flac - v0.12.18 - 2020-08-14 David Reid - mackron@gmail.com @@ -232,7 +232,7 @@ extern "C" { #define DRFLAC_VERSION_MAJOR 0 #define DRFLAC_VERSION_MINOR 12 -#define DRFLAC_VERSION_REVISION 17 +#define DRFLAC_VERSION_REVISION 18 #define DRFLAC_VERSION_STRING DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION) #include /* For size_t. */ @@ -1518,7 +1518,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void) } -#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) +#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) && !defined(__clang__) #define DRFLAC_HAS_LZCNT_INTRINSIC #elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) #define DRFLAC_HAS_LZCNT_INTRINSIC @@ -1530,7 +1530,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void) #endif #endif -#if defined(_MSC_VER) && _MSC_VER >= 1400 +#if defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__) #define DRFLAC_HAS_BYTESWAP16_INTRINSIC #define DRFLAC_HAS_BYTESWAP32_INTRINSIC #define DRFLAC_HAS_BYTESWAP64_INTRINSIC @@ -1773,7 +1773,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void) static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n) { #ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC - #if defined(_MSC_VER) + #if defined(_MSC_VER) && !defined(__clang__) return _byteswap_ushort(n); #elif defined(__GNUC__) || defined(__clang__) return __builtin_bswap16(n); @@ -1789,7 +1789,7 @@ static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n) static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n) { #ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC - #if defined(_MSC_VER) + #if defined(_MSC_VER) && !defined(__clang__) return _byteswap_ulong(n); #elif defined(__GNUC__) || defined(__clang__) #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT) /* <-- 64-bit inline assembly has not been tested, so disabling for now. */ @@ -1820,7 +1820,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n) static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n) { #ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC - #if defined(_MSC_VER) + #if defined(_MSC_VER) && !defined(__clang__) return _byteswap_uint64(n); #elif defined(__GNUC__) || defined(__clang__) return __builtin_bswap64(n); @@ -2624,7 +2624,7 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) #define DRFLAC_IMPLEMENT_CLZ_LZCNT #endif -#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) +#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__) #define DRFLAC_IMPLEMENT_CLZ_MSVC #endif @@ -2679,7 +2679,25 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void) static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) { -#if defined(_MSC_VER) && !defined(__clang__) + /* + It's critical for competitive decoding performance that this function be highly optimal. With MSVC we can use the __lzcnt64() and __lzcnt() intrinsics + to achieve good performance, however on GCC and Clang it's a little bit more annoying. The __builtin_clzl() and __builtin_clzll() intrinsics leave + it undefined as to the return value when `x` is 0. We need this to be well defined as returning 32 or 64, depending on whether or not it's a 32- or + 64-bit build. To work around this we would need to add a conditional to check for the x = 0 case, but this creates unnecessary inefficiency. To work + around this problem I have written some inline assembly to emit the LZCNT (x86) or CLZ (ARM) instruction directly which removes the need to include + the conditional. This has worked well in the past, but for some reason Clang's MSVC compatible driver, clang-cl, does not seem to be handling this + in the same way as the normal Clang driver. It seems that `clang-cl` is just outputting the wrong results sometimes, maybe due to some register + getting clobbered? + + I'm not sure if this is a bug with dr_flac's inlined assembly (most likely), a bug in `clang-cl` or just a misunderstanding on my part with inline + assembly rules for `clang-cl`. If somebody can identify an error in dr_flac's inlined assembly I'm happy to get that fixed. + + Fortunately there is an easy workaround for this. Clang implements MSVC-specific intrinsics for compatibility. It also defines _MSC_VER for extra + compatibility. We can therefore just check for _MSC_VER and use the MSVC intrinsic which, fortunately for us, Clang supports. It would still be nice + to know how to fix the inlined assembly for correctness sake, however. + */ + +#if defined(_MSC_VER) /*&& !defined(__clang__)*/ /* <-- Intentionally wanting Clang to use the MSVC __lzcnt64/__lzcnt intrinsics due to above ^. */ #ifdef DRFLAC_64BIT return (drflac_uint32)__lzcnt64(x); #else @@ -2691,7 +2709,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) { drflac_uint64 r; __asm__ __volatile__ ( - "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) + "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" ); return (drflac_uint32)r; @@ -2700,7 +2718,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) { drflac_uint32 r; __asm__ __volatile__ ( - "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) + "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" ); return r; @@ -11751,6 +11769,9 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat /* REVISION HISTORY ================ +v0.12.18 - 2020-08-14 + - Fix a crash when compiling with clang-cl. + v0.12.17 - 2020-08-02 - Simplify sized types. diff --git a/miniaudio.h b/miniaudio.h index 92539dd7..f0ad52d7 100644 --- a/miniaudio.h +++ b/miniaudio.h @@ -42914,7 +42914,7 @@ extern "C" { #define DRFLAC_XSTRINGIFY(x) DRFLAC_STRINGIFY(x) #define DRFLAC_VERSION_MAJOR 0 #define DRFLAC_VERSION_MINOR 12 -#define DRFLAC_VERSION_REVISION 17 +#define DRFLAC_VERSION_REVISION 18 #define DRFLAC_VERSION_STRING DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION) #include typedef signed char drflac_int8; @@ -50929,7 +50929,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void) return DRFLAC_FALSE; #endif } -#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) +#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) && !defined(__clang__) #define DRFLAC_HAS_LZCNT_INTRINSIC #elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) #define DRFLAC_HAS_LZCNT_INTRINSIC @@ -50940,7 +50940,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void) #endif #endif #endif -#if defined(_MSC_VER) && _MSC_VER >= 1400 +#if defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__) #define DRFLAC_HAS_BYTESWAP16_INTRINSIC #define DRFLAC_HAS_BYTESWAP32_INTRINSIC #define DRFLAC_HAS_BYTESWAP64_INTRINSIC @@ -51141,7 +51141,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void) static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n) { #ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC - #if defined(_MSC_VER) + #if defined(_MSC_VER) && !defined(__clang__) return _byteswap_ushort(n); #elif defined(__GNUC__) || defined(__clang__) return __builtin_bswap16(n); @@ -51156,7 +51156,7 @@ static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n) static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n) { #ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC - #if defined(_MSC_VER) + #if defined(_MSC_VER) && !defined(__clang__) return _byteswap_ulong(n); #elif defined(__GNUC__) || defined(__clang__) #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT) @@ -51185,7 +51185,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n) static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n) { #ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC - #if defined(_MSC_VER) + #if defined(_MSC_VER) && !defined(__clang__) return _byteswap_uint64(n); #elif defined(__GNUC__) || defined(__clang__) return __builtin_bswap64(n); @@ -51801,7 +51801,7 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) #define DRFLAC_IMPLEMENT_CLZ_LZCNT #endif -#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) +#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__) #define DRFLAC_IMPLEMENT_CLZ_MSVC #endif static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) @@ -51848,7 +51848,7 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void) } static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) { -#if defined(_MSC_VER) && !defined(__clang__) +#if defined(_MSC_VER) #ifdef DRFLAC_64BIT return (drflac_uint32)__lzcnt64(x); #else @@ -51860,7 +51860,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) { drflac_uint64 r; __asm__ __volatile__ ( - "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) + "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" ); return (drflac_uint32)r; } @@ -51868,7 +51868,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) { drflac_uint32 r; __asm__ __volatile__ ( - "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) + "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" ); return r; }