Update minimp3

based on afb604c06b/minimp3.h
This reapplies be4bdb5365.
This commit is contained in:
Robin Gareus
2023-07-05 18:05:33 +02:00
parent 538bb73f99
commit d01dbcba83

View File

@@ -14,7 +14,7 @@
typedef struct
{
int frame_bytes, channels, hz, layer, bitrate_kbps;
int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps;
} mp3dec_frame_info_t;
typedef struct
@@ -88,7 +88,7 @@ int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_s
#if !defined(MINIMP3_NO_SIMD)
#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__))
#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64))
/* x64 always have SSE2, arm64 always have neon, no need for generic code */
#define MINIMP3_ONLY_SIMD
#endif /* SIMD checks... */
@@ -138,7 +138,7 @@ static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[
#endif /* defined(__PIC__)*/
}
#endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
static int have_simd()
static int have_simd(void)
{
#ifdef MINIMP3_ONLY_SIMD
return 1;
@@ -163,7 +163,7 @@ end:
return g_have_simd - 1;
#endif /* MINIMP3_ONLY_SIMD */
}
#elif defined(__ARM_NEON) || defined(__aarch64__)
#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)
#include <arm_neon.h>
#define HAVE_SSE 0
#define HAVE_SIMD 1
@@ -193,6 +193,18 @@ static int have_simd()
#define HAVE_SIMD 0
#endif /* !defined(MINIMP3_NO_SIMD) */
#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64)
#define HAVE_ARMV6 1
static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a)
{
int32_t x = 0;
__asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
return x;
}
#else
#define HAVE_ARMV6 0
#endif
typedef struct
{
const uint8_t *buf;
@@ -871,12 +883,22 @@ static void L3_midside_stereo(float *left, int n)
int i = 0;
float *right = left + 576;
#if HAVE_SIMD
if (have_simd()) for (; i < n - 3; i += 4)
if (have_simd())
{
f4 vl = VLD(left + i);
f4 vr = VLD(right + i);
VSTORE(left + i, VADD(vl, vr));
VSTORE(right + i, VSUB(vl, vr));
for (; i < n - 3; i += 4)
{
f4 vl = VLD(left + i);
f4 vr = VLD(right + i);
VSTORE(left + i, VADD(vl, vr));
VSTORE(right + i, VSUB(vl, vr));
}
#ifdef __GNUC__
/* Workaround for spurious -Waggressive-loop-optimizations warning from gcc.
* For more info see: https://github.com/lieff/minimp3/issues/88
*/
if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0)
return;
#endif
}
#endif /* HAVE_SIMD */
for (; i < n; i++)
@@ -1343,7 +1365,7 @@ static void mp3d_DCT_II(float *grbuf, int n)
} else
#endif /* HAVE_SIMD */
#ifdef MINIMP3_ONLY_SIMD
{}
{} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
#else /* MINIMP3_ONLY_SIMD */
for (; k < n; k++)
{
@@ -1409,10 +1431,16 @@ static void mp3d_DCT_II(float *grbuf, int n)
#ifndef MINIMP3_FLOAT_OUTPUT
static int16_t mp3d_scale_pcm(float sample)
{
#if HAVE_ARMV6
int32_t s32 = (int32_t)(sample + .5f);
s32 -= (s32 < 0);
int16_t s = (int16_t)minimp3_clip_int16_arm(s32);
#else
if (sample >= 32766.5) return (int16_t) 32767;
if (sample <= -32767.5) return (int16_t)-32768;
int16_t s = (int16_t)(sample + .5f);
s -= (s < 0); /* away from zero, to be compliant */
#endif
return s;
}
#else /* MINIMP3_FLOAT_OUTPUT */
@@ -1567,7 +1595,7 @@ static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
} else
#endif /* HAVE_SIMD */
#ifdef MINIMP3_ONLY_SIMD
{}
{} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
#else /* MINIMP3_ONLY_SIMD */
for (i = 14; i >= 0; i--)
{
@@ -1676,7 +1704,7 @@ static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_b
}
}
*ptr_frame_bytes = 0;
return i;
return mp3_bytes;
}
void mp3dec_init(mp3dec_t *dec)
@@ -1713,6 +1741,7 @@ int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_s
hdr = mp3 + i;
memcpy(dec->header, hdr, HDR_SIZE);
info->frame_bytes = i + frame_size;
info->frame_offset = i;
info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
info->hz = hdr_sample_rate_hz(hdr);
info->layer = 4 - HDR_GET_LAYER(hdr);
@@ -1781,60 +1810,56 @@ int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_s
#ifdef MINIMP3_FLOAT_OUTPUT
void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)
{
if(num_samples > 0)
{
int i = 0;
int i = 0;
#if HAVE_SIMD
int aligned_count = num_samples & ~7;
for(;i < aligned_count;i+=8)
{
static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
f4 a = VMUL(VLD(&in[i ]), g_scale);
f4 b = VMUL(VLD(&in[i+4]), g_scale);
int aligned_count = num_samples & ~7;
for(; i < aligned_count; i += 8)
{
static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
f4 a = VMUL(VLD(&in[i ]), g_scale);
f4 b = VMUL(VLD(&in[i+4]), g_scale);
#if HAVE_SSE
static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
__m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
out[i ] = _mm_extract_epi16(pcm8, 0);
out[i+1] = _mm_extract_epi16(pcm8, 1);
out[i+2] = _mm_extract_epi16(pcm8, 2);
out[i+3] = _mm_extract_epi16(pcm8, 3);
out[i+4] = _mm_extract_epi16(pcm8, 4);
out[i+5] = _mm_extract_epi16(pcm8, 5);
out[i+6] = _mm_extract_epi16(pcm8, 6);
out[i+7] = _mm_extract_epi16(pcm8, 7);
static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
__m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
out[i ] = _mm_extract_epi16(pcm8, 0);
out[i+1] = _mm_extract_epi16(pcm8, 1);
out[i+2] = _mm_extract_epi16(pcm8, 2);
out[i+3] = _mm_extract_epi16(pcm8, 3);
out[i+4] = _mm_extract_epi16(pcm8, 4);
out[i+5] = _mm_extract_epi16(pcm8, 5);
out[i+6] = _mm_extract_epi16(pcm8, 6);
out[i+7] = _mm_extract_epi16(pcm8, 7);
#else /* HAVE_SSE */
int16x4_t pcma, pcmb;
a = VADD(a, VSET(0.5f));
b = VADD(b, VSET(0.5f));
pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
vst1_lane_s16(out+i , pcma, 0);
vst1_lane_s16(out+i+1, pcma, 1);
vst1_lane_s16(out+i+2, pcma, 2);
vst1_lane_s16(out+i+3, pcma, 3);
vst1_lane_s16(out+i+4, pcmb, 0);
vst1_lane_s16(out+i+5, pcmb, 1);
vst1_lane_s16(out+i+6, pcmb, 2);
vst1_lane_s16(out+i+7, pcmb, 3);
int16x4_t pcma, pcmb;
a = VADD(a, VSET(0.5f));
b = VADD(b, VSET(0.5f));
pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
vst1_lane_s16(out+i , pcma, 0);
vst1_lane_s16(out+i+1, pcma, 1);
vst1_lane_s16(out+i+2, pcma, 2);
vst1_lane_s16(out+i+3, pcma, 3);
vst1_lane_s16(out+i+4, pcmb, 0);
vst1_lane_s16(out+i+5, pcmb, 1);
vst1_lane_s16(out+i+6, pcmb, 2);
vst1_lane_s16(out+i+7, pcmb, 3);
#endif /* HAVE_SSE */
}
}
#endif /* HAVE_SIMD */
for(; i < num_samples; i++)
for(; i < num_samples; i++)
{
float sample = in[i] * 32768.0f;
if (sample >= 32766.5)
out[i] = (int16_t) 32767;
else if (sample <= -32767.5)
out[i] = (int16_t)-32768;
else
{
float sample = in[i] * 32768.0f;
if (sample >= 32766.5)
out[i] = (int16_t) 32767;
else if (sample <= -32767.5)
out[i] = (int16_t)-32768;
else
{
int16_t s = (int16_t)(sample + .5f);
s -= (s < 0); /* away from zero, to be compliant */
out[i] = s;
}
int16_t s = (int16_t)(sample + .5f);
s -= (s < 0); /* away from zero, to be compliant */
out[i] = s;
}
}
}