From 71101011723e63459db24d06c568d898ef7a6a06 Mon Sep 17 00:00:00 2001 From: Rob Sykes Date: Mon, 30 May 2016 22:07:46 +0100 Subject: [PATCH] clean-up --- CMakeLists.txt | 5 +- multi-arch | 6 +- src/data-io.c | 47 +++------------ src/rint-clip.h | 51 ++++++++-------- src/rint.h | 133 ++++++++++++++++++++++++++---------------- src/soxr.c | 11 ++-- tests/throughput-test | 6 +- tests/throughput.c | 8 ++- 8 files changed, 145 insertions(+), 122 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fbe632..c8d6bae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,7 +150,10 @@ test_big_endian (HAVE_BIGENDIAN) if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang") set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wconversion -Wall -Wextra \ - -pedantic -Wundef -Wpointer-arith -Wno-long-long -Wno-keyword-macro") + -pedantic -Wundef -Wpointer-arith -Wno-long-long") + if (CMAKE_C_COMPILER_ID STREQUAL "Clang") + set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wno-keyword-macro") + endif () if (WITH_DEV_GPROF) set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -pg") endif () diff --git a/multi-arch b/multi-arch index 174cc4b..63bb223 100755 --- a/multi-arch +++ b/multi-arch @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash set -e # SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net @@ -23,7 +23,9 @@ for n in \ echo "***" $dir mkdir -p $dir cd $dir - cmake -DCMAKE_BUILD_TYPE=$build -DCMAKE_C_COMPILER=$compiler -DCMAKE_SYSTEM_NAME="$system" .. + cmake -DCMAKE_BUILD_TYPE=$build -DCMAKE_C_COMPILER=$compiler -DCMAKE_SYSTEM_NAME="$system" -DBUILD_SHARED_LIBS=OFF -DWITH_OPENMP=OFF .. make $j && [ /$system = / ] && ctest -j || true + cd tests + ../../tests/throughput-test && SOXR_THROUGHPUT_GAIN=.6 ../../tests/throughput-test 2 3 || true ) done diff --git a/src/data-io.c b/src/data-io.c index 52144c2..fb61675 100644 --- a/src/data-io.c +++ b/src/data-io.c @@ -1,4 +1,4 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ #include @@ -14,8 +14,8 @@ unsigned i; \ size_t j; \ T const * src = *src0; \ - if (ch > 1) \ - for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) dest[i][j] = (DEINTERLEAVE_TO)*src++; \ + if (ch > 1) for (j = 0; j < n; ++j) \ + for (i = 0; i < ch; ++i) dest[i][j] = (DEINTERLEAVE_TO)*src++; \ else if (flag) memcpy(dest[0], src, n * sizeof(T)), src = &src[n]; \ else for (j = 0; j < n; dest[0][j++] = (DEINTERLEAVE_TO)*src++); \ *src0 = src; \ @@ -60,35 +60,6 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #include "rint.h" -#if HAVE_FENV_H - #include - #define fe_test_invalid() fetestexcept(FE_INVALID) - #define fe_clear_invalid() feclearexcept(FE_INVALID) -#elif defined _MSC_VER - #define FE_INVALID 1 - #if defined _WIN64 - #include - #define fe_test_invalid() (_statusfp() & _SW_INVALID) - #define fe_clear_invalid _clearfp /* FIXME clears all */ - #else - static __inline int fe_test_invalid() - { - short status_word; - __asm fnstsw status_word - return status_word & FE_INVALID; - } - - static __inline int fe_clear_invalid() - { - int16_t status[14]; - __asm fnstenv status - status[2] &= ~FE_INVALID; - __asm fldenv status - return 0; - } - #endif -#endif - #if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__ @@ -103,7 +74,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #define LSX_RINT_CLIP_2 lsx_rint32_clip_2 #define LSX_RINT_CLIP lsx_rint32_clip #define RINT_CLIP rint32_clip -#define RINT rint32 +#define RINT rint32D #if defined FPU_RINT32 #define FPU_RINT #endif @@ -114,7 +85,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #define LSX_RINT_CLIP_2 lsx_rint16_clip_2 #define LSX_RINT_CLIP lsx_rint16_clip #define RINT_CLIP rint16_clip -#define RINT rint16 +#define RINT rint16D #if defined FPU_RINT16 #define FPU_RINT #endif @@ -125,7 +96,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither #define LSX_RINT_CLIP lsx_rint16_clip_dither #define RINT_CLIP rint16_clip_dither -#define RINT rint16 +#define RINT rint16D #if defined FPU_RINT16 #define FPU_RINT #endif @@ -145,7 +116,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f #define LSX_RINT_CLIP lsx_rint32_clip_f #define RINT_CLIP rint32_clip_f -#define RINT rint32 +#define RINT rint32F #if defined FPU_RINT32 #define FPU_RINT #endif @@ -156,7 +127,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #define LSX_RINT_CLIP_2 lsx_rint16_clip_2_f #define LSX_RINT_CLIP lsx_rint16_clip_f #define RINT_CLIP rint16_clip_f -#define RINT rint16 +#define RINT rint16F #if defined FPU_RINT16 #define FPU_RINT #endif @@ -167,7 +138,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither_f #define LSX_RINT_CLIP lsx_rint16_clip_dither_f #define RINT_CLIP rint16_clip_dither_f -#define RINT rint16 +#define RINT rint16D #if defined FPU_RINT16 #define FPU_RINT #endif diff --git a/src/rint-clip.h b/src/rint-clip.h index 06764a8..bfb6458 100644 --- a/src/rint-clip.h +++ b/src/rint-clip.h @@ -1,9 +1,9 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ #if defined DITHER -#define DITHERING (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31)) +#define DITHERING + (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31)) #define DITHER_RAND (seed = 1664525UL * seed + 1013904223UL) >> 3 #define DITHER_VARS unsigned long ran1 = DITHER_RAND, ran2 = DITHER_RAND #define SEED_ARG , unsigned long * seed0 @@ -12,10 +12,11 @@ #define COPY_SEED1 unsigned long seed1 = seed #define PASS_SEED1 , &seed1 #define PASS_SEED , &seed +#define FLOATD double #else -#define DITHERING 0 +#define DITHERING #define DITHER_VARS #define SEED_ARG #define SAVE_SEED @@ -23,9 +24,12 @@ #define COPY_SEED1 #define PASS_SEED1 #define PASS_SEED +#define FLOATD FLOATX #endif +#define DO_16 _;_;_;_;_;_;_;_;_;_;_;_;_;_;_;_ + #if defined FE_INVALID && defined FPU_RINT @@ -35,8 +39,8 @@ static void RINT_CLIP(RINT_T * const dest, FLOATX const * const src, COPY_SEED DITHER_VARS; for (; i < n; ++i) { - double d = src[i] + DITHERING; - dest[stride * i] = RINT(d); + FLOATD const d = src[i] DITHERING; + RINT(dest[stride * i], d); if (fe_test_invalid()) { fe_clear_invalid(); dest[stride * i] = d > 0? RINT_MAX : -RINT_MAX - 1; @@ -56,29 +60,29 @@ static size_t LSX_RINT_CLIP(void * * const dest0, FLOATX const * const src, RINT_T * dest = *dest0; COPY_SEED #if defined FE_INVALID && defined FPU_RINT -#define _ dest[i] = RINT(src[i] + DITHERING), ++i, - fe_clear_invalid(); - for (i = 0; i < (n & ~7u);) { +#define _ RINT(dest[i], src[i] DITHERING); ++i + for (i = 0; i < (n & ~15u);) { COPY_SEED1; DITHER_VARS; - _ _ _ _ _ _ _ _ (void)0; + DO_16; if (fe_test_invalid()) { fe_clear_invalid(); - RINT_CLIP(dest, src, 1, i - 8, i, &clips PASS_SEED1); + RINT_CLIP(dest, src, 1, i - 16, i, &clips PASS_SEED1); } } RINT_CLIP(dest, src, 1, i, n, &clips PASS_SEED); #else -#define _ d = src[i] + DITHERING, dest[i++] = (RINT_T)(d > 0? d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5), +#define _ d = src[i] DITHERING, dest[i++] = (RINT_T)(d > 0? \ + d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5) const double N = 1. + RINT_MAX; double d; - for (i = 0; i < (n & ~7u);) { + for (i = 0; i < (n & ~15u);) { DITHER_VARS; - _ _ _ _ _ _ _ _ (void)0; + DO_16; } { DITHER_VARS; - for (; i < n; _ (void)0); + for (; i < n; _); } #endif SAVE_SEED; @@ -97,34 +101,34 @@ static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs, RINT_T * dest = *dest0; COPY_SEED #if defined FE_INVALID && defined FPU_RINT -#define _ dest[stride * i] = RINT(src[i] + DITHERING), ++i, - fe_clear_invalid(); +#define _ RINT(dest[stride * i], src[i] DITHERING); ++i for (j = 0; j < stride; ++j, ++dest) { FLOATX const * const src = srcs[j]; - for (i = 0; i < (n & ~7u);) { + for (i = 0; i < (n & ~15u);) { COPY_SEED1; DITHER_VARS; - _ _ _ _ _ _ _ _ (void)0; + DO_16; if (fe_test_invalid()) { fe_clear_invalid(); - RINT_CLIP(dest, src, stride, i - 8, i, &clips PASS_SEED1); + RINT_CLIP(dest, src, stride, i - 16, i, &clips PASS_SEED1); } } RINT_CLIP(dest, src, stride, i, n, &clips PASS_SEED); } #else -#define _ d = src[i] + DITHERING, dest[stride * i++] = (RINT_T)(d > 0? d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5), +#define _ d = src[i] DITHERING, dest[stride * i++] = (RINT_T)(d > 0? \ + d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5) const double N = 1. + RINT_MAX; double d; for (j = 0; j < stride; ++j, ++dest) { FLOATX const * const src = srcs[j]; - for (i = 0; i < (n & ~7u);) { + for (i = 0; i < (n & ~15u);) { DITHER_VARS; - _ _ _ _ _ _ _ _ (void)0; + DO_16; } { DITHER_VARS; - for (; i < n; _ (void)0); + for (; i < n; _); } } #endif @@ -134,6 +138,7 @@ static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs, } #undef _ +#undef FLOATD #undef PASS_SEED #undef PASS_SEED1 #undef COPY_SEED1 diff --git a/src/rint.h b/src/rint.h index b22c07c..d3629ae 100644 --- a/src/rint.h +++ b/src/rint.h @@ -1,4 +1,4 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ #if !defined soxr_rint_included @@ -6,63 +6,94 @@ #include "std-types.h" - - -#if HAVE_LRINT && LONG_MAX == 2147483647L - #include - #define FPU_RINT32 - #define rint32 lrint -#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) - #define FPU_RINT32 - static __inline int32_t rint32(double input) { - int32_t result; - __asm__ __volatile__("fistpl %0": "=m"(result): "t"(input): "st"); - return result; - } -#elif defined __GNUC__ && defined __arm__ - #define FPU_RINT32 - static __inline int32_t rint32(double input) { - register int32_t result; - __asm__ __volatile__ ("ftosid %0, %P1": "=w"(result): "w"(input)); - return result; - } -#elif defined _MSC_VER && defined _M_IX86 /* FIXME need solution for MSVC x64 */ - #define FPU_RINT32 - static __inline int32_t rint32(double input) { - int32_t result; - _asm { - fld input - fistp result - } - return result; - } -#else - #define rint32(x) (int32_t)((x) < 0? x - .5 : x + .5) -#endif - - - #if defined __GNUC__ && (defined __i386__ || defined __x86_64__) + #define FPU_RINT32 #define FPU_RINT16 - static __inline int16_t rint16(double input) { - int16_t result; - __asm__ __volatile__("fistps %0": "=m"(result): "t"(input): "st"); - return result; + #define rint32D(a,b) __asm__ __volatile__("fistpl %0": "=m"(a): "t"(b): "st") + #define rint16D(a,b) __asm__ __volatile__("fistps %0": "=m"(a): "t"(b): "st") + #define rint32F rint32D + #define rint16F rint16D + #define FE_INVALID 1 + static __inline int fe_test_invalid(void) { + int status_word; + __asm__ __volatile__("fnstsw %%ax": "=a"(status_word)); + return status_word & FE_INVALID; } -#elif defined _MSC_VER && defined _M_IX86 /* FIXME need solution for MSVC x64 */ + static __inline int fe_clear_invalid(void) { + int32_t status[7]; + __asm__ __volatile__("fnstenv %0": "=m"(status)); + status[1] &= ~FE_INVALID; + __asm__ __volatile__("fldenv %0": : "m"(status)); + return 0; + } +#elif defined _MSC_VER && defined _M_IX86 + #define FPU_RINT32 #define FPU_RINT16 - static __inline int16_t rint16(double input) { - int16_t result; - _asm { - fld input - fistp result - } - return result; + #define rint_fn(N,Y,X) \ + static __inline void N(Y *y, X x) {Y t; {__asm fld x __asm fistp t} *y=t;} + rint_fn(rint32d, int32_t, double) + rint_fn(rint32f, int32_t, float ) + rint_fn(rint16d, int16_t, double) + rint_fn(rint16f, int16_t, float ) + #define rint32D(y,x) rint32d(&(y),x) + #define rint32F(y,x) rint32f(&(y),x) + #define rint16D(y,x) rint16d(&(y),x) + #define rint16F(y,x) rint16f(&(y),x) + #define FE_INVALID 1 + static __inline int fe_test_invalid(void) { + short status_word; + __asm fnstsw status_word + return status_word & FE_INVALID; } -#else - #define rint16(x) (int16_t)((x) < 0? x - .5 : x + .5) + static __inline int fe_clear_invalid(void) { + int32_t status[7]; + __asm fnstenv status + status[1] &= ~FE_INVALID; + __asm fldenv status + return 0; + } +#elif defined _MSC_VER && defined _M_X64 + #include + #include + #define FPU_RINT32 + #define FPU_RINT16 + static __inline void rint32d(int32_t *y, double x) { + *y = _mm_cvtsd_si32(_mm_load_sd(&x));} + static __inline void rint32f(int32_t *y, float x) { + *y = _mm_cvtss_si32(_mm_load_ss(&x));} + static __inline void rint16d(int16_t *y, double x) { + x = x*65536+32738; *y = (int16_t)(_mm_cvtsd_si32(_mm_load_sd(&x)) >> 16);} + #define rint32D(y,x) rint32d(&(y),x) + #define rint32F(y,x) rint32f(&(y),x) + #define rint16D(y,x) rint16d(&(y),x) + #define rint16F(y,x) rint16d(&(y),(double)(x)) + #define FE_INVALID 1 + #define fe_test_invalid() (_statusfp() & _SW_INVALID) + #define fe_clear_invalid _clearfp /* Note: clears all */ +#elif HAVE_LRINT && LONG_MAX == 2147483647L && HAVE_FENV_H + #include + #include + #define FPU_RINT32 + #define rint32D(y,x) ((y)=lrint(x)) + #define rint32F(y,x) ((y)=lrintf(x)) + #define fe_test_invalid() fetestexcept(FE_INVALID) + #define fe_clear_invalid() feclearexcept(FE_INVALID) #endif +#if !defined FPU_RINT32 + #define rint32D(y,x) ((y)=(int32_t)((x) < 0? x - .5 : x + .5)) + #define rint32F(y,x) rint32D(y,(double)(x)) +#endif +#if !defined FPU_RINT16 + #define rint16D(y,x) ((y)=(int16_t)((x) < 0? x - .5 : x + .5)) + #define rint16F(y,x) rint16D(y,(double)(x)) +#endif + +static __inline int32_t rint32(double input) { + int32_t result; rint32D(result, input); return result;} + +static __inline int16_t rint16(double input) { + int16_t result; rint16D(result, input); return result;} #endif diff --git a/src/soxr.c b/src/soxr.c index 3d36ae9..4a7b2da 100644 --- a/src/soxr.c +++ b/src/soxr.c @@ -372,6 +372,8 @@ soxr_t soxr_create( if (!error && !(p = calloc(sizeof(*p), 1))) error = "malloc failed"; if (p) { + control_block_t * control_block; + p->q_spec = q_spec? *q_spec : soxr_quality_spec(SOXR_HQ, 0); if (q_spec) { /* Backwards compatibility with original API: */ @@ -415,14 +417,14 @@ soxr_t soxr_create( ) { p->deinterleave = (deinterleave_t)_soxr_deinterleave_f; p->interleave = (interleave_t)_soxr_interleave_f; - memcpy(&p->control_block, + control_block = #if WITH_VR32 ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))? &_soxr_vr32_cb : #endif #if WITH_CR32S !WITH_CR32 || should_use_simd32()? &_soxr_rate32s_cb : #endif - &_soxr_rate32_cb, sizeof(p->control_block)); + &_soxr_rate32_cb; } #if WITH_CR64 || WITH_CR64S else @@ -432,13 +434,14 @@ soxr_t soxr_create( { p->deinterleave = (deinterleave_t)_soxr_deinterleave; p->interleave = (interleave_t)_soxr_interleave; - memcpy(&p->control_block, + control_block = #if WITH_CR64S !WITH_CR64 || should_use_simd64()? &_soxr_rate64s_cb : #endif - &_soxr_rate64_cb, sizeof(p->control_block)); + &_soxr_rate64_cb; } #endif + memcpy(&p->control_block, control_block, sizeof(p->control_block)); if (p->num_channels && io_ratio!=0) error = soxr_set_io_ratio(p, io_ratio, 0); diff --git a/tests/throughput-test b/tests/throughput-test index 544c620..b03a2a4 100755 --- a/tests/throughput-test +++ b/tests/throughput-test @@ -1,4 +1,8 @@ #!/bin/sh set -e -for n in `seq 0 3`; do ./throughput 44.1 48 1 0 $n; done +test -r throughput.exe && wine=wine + +test /$1 = / && list="`seq 0 3`" || list="$*" + +for n in $list; do $wine ./throughput 44.1 48 1 0 $n 4; done diff --git a/tests/throughput.c b/tests/throughput.c index 1ca37a1..80256ed 100644 --- a/tests/throughput.c +++ b/tests/throughput.c @@ -85,7 +85,9 @@ int main(int n, char const * arg[]) size_t odone = 0, clips = 0, omax = 0, i; soxr_error_t error; soxr_t soxr; - + int32_t seed = 0; + char const * e = getenv("SOXR_THROUGHPUT_GAIN"); + double gain = e? atof(e) : .5; /* Overrides (if given): */ if (passband_end > 0) q_spec.passband_end = passband_end / 100; @@ -99,7 +101,9 @@ int main(int n, char const * arg[]) &error, /* To report any error during creation. */ &io_spec, &q_spec, &runtime_spec); -#define RAND ((rand()*(1./RAND_MAX)-.5)*1) +#define ranqd1(x) ((x) = 1664525 * (x) + 1013904223) /* int32_t x */ +#define dranqd1(x) (ranqd1(x) * (1. / (65536. * 32768.))) /* [-1,1) */ +#define RAND (dranqd1(seed) * gain) #define DURATION_MSECS 125 #define NUM_ATTEMPTS 8