clean-up

2016-05-30 22:07:46 +01:00 · 2016-05-30 22:07:46 +01:00 · 7110101172
parent e47d6560af
commit 7110101172
8 changed files with 145 additions and 122 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -150,7 +150,10 @@ test_big_endian (HAVE_BIGENDIAN)

 if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
  set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wconversion -Wall -Wextra \
-      -pedantic -Wundef -Wpointer-arith -Wno-long-long -Wno-keyword-macro")
+      -pedantic -Wundef -Wpointer-arith -Wno-long-long")
+  if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
+    set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wno-keyword-macro")
+  endif ()
  if (WITH_DEV_GPROF)
    set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -pg")
  endif ()
--- a/6
+++ b/6
@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
 set -e

 # SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
@ -23,7 +23,9 @@ for n in \
  echo "***" $dir
  mkdir -p $dir
    cd $dir
-    cmake -DCMAKE_BUILD_TYPE=$build -DCMAKE_C_COMPILER=$compiler -DCMAKE_SYSTEM_NAME="$system" ..
+    cmake -DCMAKE_BUILD_TYPE=$build -DCMAKE_C_COMPILER=$compiler -DCMAKE_SYSTEM_NAME="$system" -DBUILD_SHARED_LIBS=OFF -DWITH_OPENMP=OFF ..
    make $j && [ /$system = / ] && ctest -j || true
+    cd tests
+    ../../tests/throughput-test && SOXR_THROUGHPUT_GAIN=.6 ../../tests/throughput-test 2 3 || true
  )
 done
--- a/src/data-io.c
+++ b/src/data-io.c
@ -1,4 +1,4 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
 * Licence for this file: LGPL v2.1                  See LICENCE for details. */

 #include <limits.h>
@ -14,8 +14,8 @@
  unsigned i; \
  size_t j; \
  T const * src = *src0; \
-  if (ch > 1) \
-    for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) dest[i][j] = (DEINTERLEAVE_TO)*src++; \
+  if (ch > 1) for (j = 0; j < n; ++j) \
+    for (i = 0; i < ch; ++i) dest[i][j] = (DEINTERLEAVE_TO)*src++; \
  else if (flag) memcpy(dest[0], src, n * sizeof(T)), src = &src[n]; \
  else for (j = 0; j < n; dest[0][j++] = (DEINTERLEAVE_TO)*src++); \
  *src0 = src; \
@ -60,35 +60,6 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */

 #include "rint.h"

-#if HAVE_FENV_H
-  #include <fenv.h>
-  #define fe_test_invalid() fetestexcept(FE_INVALID)
-  #define fe_clear_invalid() feclearexcept(FE_INVALID)
-#elif defined _MSC_VER
-  #define FE_INVALID 1
-  #if defined _WIN64
-    #include <float.h>
-    #define fe_test_invalid() (_statusfp() & _SW_INVALID)
-    #define fe_clear_invalid _clearfp /* FIXME clears all */
-  #else
-  static __inline int fe_test_invalid()
-  {
-    short status_word;
-    __asm fnstsw status_word
-    return status_word & FE_INVALID;
-  }
-
-  static __inline int fe_clear_invalid()
-  {
-    int16_t status[14];
-    __asm fnstenv status
-    status[2] &= ~FE_INVALID;
-    __asm fldenv status
-    return 0;
-  }
-  #endif
-#endif
-


 #if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__
@ -103,7 +74,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
 #define LSX_RINT_CLIP_2 lsx_rint32_clip_2
 #define LSX_RINT_CLIP lsx_rint32_clip
 #define RINT_CLIP rint32_clip
-#define RINT rint32
+#define RINT rint32D
 #if defined FPU_RINT32
  #define FPU_RINT
 #endif
@ -114,7 +85,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
 #define LSX_RINT_CLIP_2 lsx_rint16_clip_2
 #define LSX_RINT_CLIP lsx_rint16_clip
 #define RINT_CLIP rint16_clip
-#define RINT rint16
+#define RINT rint16D
 #if defined FPU_RINT16
  #define FPU_RINT
 #endif
@ -125,7 +96,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
 #define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither
 #define LSX_RINT_CLIP lsx_rint16_clip_dither
 #define RINT_CLIP rint16_clip_dither
-#define RINT rint16
+#define RINT rint16D
 #if defined FPU_RINT16
  #define FPU_RINT
 #endif
@ -145,7 +116,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
 #define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f
 #define LSX_RINT_CLIP lsx_rint32_clip_f
 #define RINT_CLIP rint32_clip_f
-#define RINT rint32
+#define RINT rint32F
 #if defined FPU_RINT32
  #define FPU_RINT
 #endif
@ -156,7 +127,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
 #define LSX_RINT_CLIP_2 lsx_rint16_clip_2_f
 #define LSX_RINT_CLIP lsx_rint16_clip_f
 #define RINT_CLIP rint16_clip_f
-#define RINT rint16
+#define RINT rint16F
 #if defined FPU_RINT16
  #define FPU_RINT
 #endif
@ -167,7 +138,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
 #define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither_f
 #define LSX_RINT_CLIP lsx_rint16_clip_dither_f
 #define RINT_CLIP rint16_clip_dither_f
-#define RINT rint16
+#define RINT rint16D
 #if defined FPU_RINT16
  #define FPU_RINT
 #endif
--- a/src/rint-clip.h
+++ b/src/rint-clip.h
@ -1,9 +1,9 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
 * Licence for this file: LGPL v2.1                  See LICENCE for details. */

 #if defined DITHER

-#define DITHERING (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31))
+#define DITHERING + (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31))
 #define DITHER_RAND (seed = 1664525UL * seed + 1013904223UL) >> 3
 #define DITHER_VARS unsigned long ran1 = DITHER_RAND, ran2 = DITHER_RAND
 #define SEED_ARG , unsigned long * seed0
@ -12,10 +12,11 @@
 #define COPY_SEED1 unsigned long seed1 = seed
 #define PASS_SEED1 , &seed1
 #define PASS_SEED  , &seed
+#define FLOATD double

 #else

-#define DITHERING 0
+#define DITHERING
 #define DITHER_VARS
 #define SEED_ARG
 #define SAVE_SEED
@ -23,9 +24,12 @@
 #define COPY_SEED1
 #define PASS_SEED1
 #define PASS_SEED
+#define FLOATD FLOATX

 #endif

+#define DO_16 _;_;_;_;_;_;_;_;_;_;_;_;_;_;_;_
+


 #if defined FE_INVALID && defined FPU_RINT
@ -35,8 +39,8 @@ static void RINT_CLIP(RINT_T * const dest, FLOATX const * const src,
  COPY_SEED
  DITHER_VARS;
  for (; i < n; ++i) {
-    double d = src[i] + DITHERING;
-    dest[stride * i] = RINT(d);
+    FLOATD const d = src[i] DITHERING;
+    RINT(dest[stride * i], d);
    if (fe_test_invalid()) {
      fe_clear_invalid();
      dest[stride * i] = d > 0? RINT_MAX : -RINT_MAX - 1;
@ -56,29 +60,29 @@ static size_t LSX_RINT_CLIP(void * * const dest0, FLOATX const * const src,
  RINT_T * dest = *dest0;
  COPY_SEED
 #if defined FE_INVALID && defined FPU_RINT
-#define _ dest[i] = RINT(src[i] + DITHERING), ++i,
-  fe_clear_invalid();
-  for (i = 0; i < (n & ~7u);) {
+#define _ RINT(dest[i], src[i] DITHERING); ++i
+  for (i = 0; i < (n & ~15u);) {
    COPY_SEED1;
    DITHER_VARS;
-    _ _ _ _ _ _ _ _ (void)0;
+    DO_16;
    if (fe_test_invalid()) {
      fe_clear_invalid();
-      RINT_CLIP(dest, src, 1, i - 8, i, &clips PASS_SEED1);
+      RINT_CLIP(dest, src, 1, i - 16, i, &clips PASS_SEED1);
    }
  }
  RINT_CLIP(dest, src, 1, i, n, &clips PASS_SEED);
 #else
-#define _ d = src[i] + DITHERING, dest[i++] = (RINT_T)(d > 0? d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5),
+#define _ d = src[i] DITHERING, dest[i++] = (RINT_T)(d > 0? \
+    d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5)
  const double N = 1. + RINT_MAX;
  double d;
-  for (i = 0; i < (n & ~7u);) {
+  for (i = 0; i < (n & ~15u);) {
    DITHER_VARS;
-    _ _ _ _ _ _ _ _ (void)0;
+    DO_16;
  }
  {
    DITHER_VARS;
-    for (; i < n; _ (void)0);
+    for (; i < n; _);
  }
 #endif
  SAVE_SEED;
@ -97,34 +101,34 @@ static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs,
  RINT_T * dest = *dest0;
  COPY_SEED
 #if defined FE_INVALID && defined FPU_RINT
-#define _ dest[stride * i] = RINT(src[i] + DITHERING), ++i,
-  fe_clear_invalid();
+#define _ RINT(dest[stride * i], src[i] DITHERING); ++i
  for (j = 0; j < stride; ++j, ++dest) {
    FLOATX const * const src = srcs[j];
-    for (i = 0; i < (n & ~7u);) {
+    for (i = 0; i < (n & ~15u);) {
      COPY_SEED1;
      DITHER_VARS;
-      _ _ _ _ _ _ _ _ (void)0;
+      DO_16;
      if (fe_test_invalid()) {
        fe_clear_invalid();
-        RINT_CLIP(dest, src, stride, i - 8, i, &clips PASS_SEED1);
+        RINT_CLIP(dest, src, stride, i - 16, i, &clips PASS_SEED1);
      }
    }
    RINT_CLIP(dest, src, stride, i, n, &clips PASS_SEED);
  }
 #else
-#define _ d = src[i] + DITHERING, dest[stride * i++] = (RINT_T)(d > 0? d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5),
+#define _ d = src[i] DITHERING, dest[stride * i++] = (RINT_T)(d > 0? \
+    d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5)
  const double N = 1. + RINT_MAX;
  double d;
  for (j = 0; j < stride; ++j, ++dest) {
    FLOATX const * const src = srcs[j];
-    for (i = 0; i < (n & ~7u);) {
+    for (i = 0; i < (n & ~15u);) {
      DITHER_VARS;
-      _ _ _ _ _ _ _ _ (void)0;
+      DO_16;
    }
    {
      DITHER_VARS;
-      for (; i < n; _ (void)0);
+      for (; i < n; _);
    }
  }
 #endif
@ -134,6 +138,7 @@ static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs,
 }
 #undef _

+#undef FLOATD
 #undef PASS_SEED
 #undef PASS_SEED1
 #undef COPY_SEED1
--- a/src/rint.h
+++ b/src/rint.h
@ -1,4 +1,4 @@
-/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library      Copyright (c) 2007-16 robs@users.sourceforge.net
 * Licence for this file: LGPL v2.1                  See LICENCE for details. */

 #if !defined soxr_rint_included
@ -6,63 +6,94 @@

 #include "std-types.h"

-
-
-#if HAVE_LRINT && LONG_MAX == 2147483647L
-  #include <math.h>
-  #define FPU_RINT32
-  #define rint32 lrint
-#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
-  #define FPU_RINT32
-  static __inline int32_t rint32(double input) {
-    int32_t result;
-    __asm__ __volatile__("fistpl %0": "=m"(result): "t"(input): "st");
-    return result;
-  }
-#elif defined __GNUC__ && defined __arm__
-  #define FPU_RINT32
-  static __inline int32_t rint32(double input) {
-    register int32_t result;
-    __asm__ __volatile__ ("ftosid %0, %P1": "=w"(result): "w"(input));
-    return result;
-  }
-#elif defined _MSC_VER && defined _M_IX86 /* FIXME need solution for MSVC x64 */
-  #define FPU_RINT32
-  static __inline int32_t rint32(double input) {
-    int32_t result;
-    _asm {
-      fld input
-      fistp result
-    }
-    return result;
-  }
-#else
-  #define rint32(x) (int32_t)((x) < 0? x - .5 : x + .5)
-#endif
-
-
-
 #if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+  #define FPU_RINT32
  #define FPU_RINT16
-  static __inline int16_t rint16(double input) {
-    int16_t result;
-    __asm__ __volatile__("fistps %0": "=m"(result): "t"(input): "st");
-    return result;
+  #define rint32D(a,b) __asm__ __volatile__("fistpl %0": "=m"(a): "t"(b): "st")
+  #define rint16D(a,b) __asm__ __volatile__("fistps %0": "=m"(a): "t"(b): "st")
+  #define rint32F rint32D
+  #define rint16F rint16D
+  #define FE_INVALID 1
+  static __inline int fe_test_invalid(void) {
+    int status_word;
+    __asm__ __volatile__("fnstsw %%ax": "=a"(status_word));
+    return status_word & FE_INVALID;
  }
-#elif defined _MSC_VER && defined _M_IX86 /* FIXME need solution for MSVC x64 */
+  static __inline int fe_clear_invalid(void) {
+    int32_t status[7];
+    __asm__ __volatile__("fnstenv %0": "=m"(status));
+    status[1] &= ~FE_INVALID;
+    __asm__ __volatile__("fldenv %0": : "m"(status));
+    return 0;
+  }
+#elif defined _MSC_VER && defined _M_IX86
+  #define FPU_RINT32
  #define FPU_RINT16
-  static __inline int16_t rint16(double input) {
-    int16_t result;
-    _asm {
-      fld input
-      fistp result
-    }
-    return result;
+  #define rint_fn(N,Y,X) \
+    static __inline void N(Y *y, X x) {Y t; {__asm fld x __asm fistp t} *y=t;}
+  rint_fn(rint32d, int32_t, double)
+  rint_fn(rint32f, int32_t, float )
+  rint_fn(rint16d, int16_t, double)
+  rint_fn(rint16f, int16_t, float )
+  #define rint32D(y,x) rint32d(&(y),x)
+  #define rint32F(y,x) rint32f(&(y),x)
+  #define rint16D(y,x) rint16d(&(y),x)
+  #define rint16F(y,x) rint16f(&(y),x)
+  #define FE_INVALID 1
+  static __inline int fe_test_invalid(void) {
+    short status_word;
+    __asm fnstsw status_word
+    return status_word & FE_INVALID;
  }
-#else
-  #define rint16(x) (int16_t)((x) < 0? x - .5 : x + .5)
+  static __inline int fe_clear_invalid(void) {
+    int32_t status[7];
+    __asm fnstenv status
+    status[1] &= ~FE_INVALID;
+    __asm fldenv status
+    return 0;
+  }
+#elif defined _MSC_VER && defined _M_X64
+  #include <emmintrin.h>
+  #include <float.h>
+  #define FPU_RINT32
+  #define FPU_RINT16
+  static __inline void rint32d(int32_t *y, double x) {
+    *y = _mm_cvtsd_si32(_mm_load_sd(&x));}
+  static __inline void rint32f(int32_t *y, float  x) {
+    *y = _mm_cvtss_si32(_mm_load_ss(&x));}
+  static __inline void rint16d(int16_t *y, double x) {
+    x = x*65536+32738; *y = (int16_t)(_mm_cvtsd_si32(_mm_load_sd(&x)) >> 16);}
+  #define rint32D(y,x) rint32d(&(y),x)
+  #define rint32F(y,x) rint32f(&(y),x)
+  #define rint16D(y,x) rint16d(&(y),x)
+  #define rint16F(y,x) rint16d(&(y),(double)(x))
+  #define FE_INVALID 1
+  #define fe_test_invalid() (_statusfp() & _SW_INVALID)
+  #define fe_clear_invalid _clearfp /* Note: clears all */
+#elif HAVE_LRINT && LONG_MAX == 2147483647L && HAVE_FENV_H
+  #include <math.h>
+  #include <fenv.h>
+  #define FPU_RINT32
+  #define rint32D(y,x) ((y)=lrint(x))
+  #define rint32F(y,x) ((y)=lrintf(x))
+  #define fe_test_invalid() fetestexcept(FE_INVALID)
+  #define fe_clear_invalid() feclearexcept(FE_INVALID)
 #endif

+#if !defined FPU_RINT32
+  #define rint32D(y,x) ((y)=(int32_t)((x) < 0? x - .5 : x + .5))
+  #define rint32F(y,x) rint32D(y,(double)(x))
+#endif

+#if !defined FPU_RINT16
+  #define rint16D(y,x) ((y)=(int16_t)((x) < 0? x - .5 : x + .5))
+  #define rint16F(y,x) rint16D(y,(double)(x))
+#endif
+
+static __inline int32_t rint32(double input) {
+  int32_t result; rint32D(result, input); return result;}
+
+static __inline int16_t rint16(double input) {
+  int16_t result; rint16D(result, input); return result;}

 #endif
--- a/src/soxr.c
+++ b/src/soxr.c
@ -372,6 +372,8 @@ soxr_t soxr_create(
  if (!error && !(p = calloc(sizeof(*p), 1))) error = "malloc failed";

  if (p) {
+    control_block_t * control_block;
+
    p->q_spec = q_spec? *q_spec : soxr_quality_spec(SOXR_HQ, 0);

    if (q_spec) { /* Backwards compatibility with original API: */
@ -415,14 +417,14 @@ soxr_t soxr_create(
        ) {
      p->deinterleave = (deinterleave_t)_soxr_deinterleave_f;
      p->interleave = (interleave_t)_soxr_interleave_f;
-      memcpy(&p->control_block,
+      control_block =
 #if WITH_VR32
          ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))? &_soxr_vr32_cb :
 #endif
 #if WITH_CR32S
          !WITH_CR32 || should_use_simd32()? &_soxr_rate32s_cb :
 #endif
-          &_soxr_rate32_cb, sizeof(p->control_block));
+          &_soxr_rate32_cb;
    }
 #if WITH_CR64 || WITH_CR64S
    else
@ -432,13 +434,14 @@ soxr_t soxr_create(
    {
      p->deinterleave = (deinterleave_t)_soxr_deinterleave;
      p->interleave = (interleave_t)_soxr_interleave;
-      memcpy(&p->control_block,
+      control_block =
 #if WITH_CR64S
          !WITH_CR64 || should_use_simd64()? &_soxr_rate64s_cb :
 #endif
-          &_soxr_rate64_cb, sizeof(p->control_block));
+          &_soxr_rate64_cb;
    }
 #endif
+    memcpy(&p->control_block, control_block, sizeof(p->control_block));

    if (p->num_channels && io_ratio!=0)
      error = soxr_set_io_ratio(p, io_ratio, 0);
--- a/tests/throughput-test
+++ b/tests/throughput-test
@ -1,4 +1,8 @@
 #!/bin/sh
 set -e

-for n in `seq 0 3`; do ./throughput 44.1 48 1 0 $n; done
+test -r throughput.exe && wine=wine
+
+test /$1 = / && list="`seq 0 3`" || list="$*"
+
+for n in $list; do $wine ./throughput 44.1 48 1 0 $n 4; done
--- a/tests/throughput.c
+++ b/tests/throughput.c
@ -85,7 +85,9 @@ int main(int n, char const * arg[])
  size_t odone = 0, clips = 0, omax = 0, i;
  soxr_error_t error;
  soxr_t soxr;
-
+  int32_t seed = 0;
+  char const * e = getenv("SOXR_THROUGHPUT_GAIN");
+  double gain = e? atof(e) : .5;

  /* Overrides (if given): */
  if (passband_end   > 0) q_spec.passband_end   = passband_end / 100;
@ -99,7 +101,9 @@ int main(int n, char const * arg[])
      &error,                         /* To report any error during creation. */
      &io_spec, &q_spec, &runtime_spec);

-#define RAND ((rand()*(1./RAND_MAX)-.5)*1)
+#define ranqd1(x) ((x) = 1664525 * (x) + 1013904223) /* int32_t x */
+#define dranqd1(x) (ranqd1(x) * (1. / (65536. * 32768.))) /* [-1,1) */
+#define RAND (dranqd1(seed) * gain)
 #define DURATION_MSECS 125
 #define NUM_ATTEMPTS 8