diff --git a/CMakeLists.txt b/CMakeLists.txt index c8d6bae..b9ee5d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ set (DESCRIPTION_SUMMARY set (PROJECT_VERSION_MAJOR 0) set (PROJECT_VERSION_MINOR 1) -set (PROJECT_VERSION_PATCH 2) +set (PROJECT_VERSION_PATCH "3b1") # For shared-object; if, since the last public release: # 1) library code changed at all: ++revision @@ -22,7 +22,7 @@ set (PROJECT_VERSION_PATCH 2) # 4) interfaces removed: age = 0 set (SO_VERSION_CURRENT 1) -set (SO_VERSION_REVISION 1) +set (SO_VERSION_REVISION 2) set (SO_VERSION_AGE 1) math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}") @@ -63,18 +63,22 @@ cmake_dependent_option (WITH_CR64S cmake_dependent_option (WITH_CR32S "Include HQ SIMD constant-rate resampling engine." ON "WITH_VR32 OR WITH_CR64 OR WITH_CR32 OR WITH_CR64S" ON) -cmake_dependent_option (WITH_AVFFT - "Use libavcodec (LGPL) for HQ SIMD DFT." OFF - "WITH_CR32S;NOT WITH_PFFFT" OFF) cmake_dependent_option (WITH_PFFFT "Use PFFFT (BSD-like licence) for HQ SIMD DFT." ON "WITH_CR32S;NOT WITH_AVFFT" OFF) +cmake_dependent_option (WITH_AVFFT + "Use libavcodec (LGPL) for HQ SIMD DFT." OFF + "WITH_CR32S;NOT WITH_PFFFT" OFF) cmake_dependent_option (BUILD_LSR_TESTS "Build LSR tests." OFF "UNIX;NOT CMAKE_CROSSCOMPILING;EXISTS ${PROJECT_SOURCE_DIR}/lsr-tests;WITH_LSR_BINDINGS" OFF) +option (WITH_HI_PREC_CLOCK "Enable high-precision time-base." ON) +option (WITH_FLOAT_STD_PREC_CLOCK + "Use floating-point for standard-precision time-base." OFF) option (WITH_DEV_TRACE "Enable developer trace capability." ON) option (WITH_DEV_GPROF "Enable developer grpof output." OFF) -mark_as_advanced (WITH_DEV_TRACE WITH_DEV_GPROF) +mark_as_advanced (WITH_HI_PREC_CLOCK WITH_FLOAT_STD_PREC_CLOCK + WITH_DEV_TRACE WITH_DEV_GPROF) @@ -130,7 +134,7 @@ if (WITH_AVFFT) endif () endif () -if (WITH_AVFFT OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND SIMD32_FOUND)) +if (WITH_AVFFT OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND SIMD32_FOUND AND WITH_CR32)) find_package (LibAVUtil) if (AVUTIL_FOUND) include_directories (${AVUTIL_INCLUDE_DIRS}) diff --git a/TODO b/TODO index c699c0c..2d1bc19 100644 --- a/TODO +++ b/TODO @@ -1,2 +1,3 @@ -* SOXR_ALLOW_ALIASING -* Explicit flush API fn, perhaps. +* vr32s +* vr32 with 1-delay-clear +* fir_to_phase with RDFT32 diff --git a/go.bat b/go.bat index c73d4c2..aabff75 100644 --- a/go.bat +++ b/go.bat @@ -1,5 +1,5 @@ @echo off -rem SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +rem SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net rem Licence for this file: LGPL v2.1 See LICENCE for details. set build=%1 diff --git a/msvc/soxr-config.h b/msvc/soxr-config.h index d17ae6b..89f7a91 100644 --- a/msvc/soxr-config.h +++ b/msvc/soxr-config.h @@ -9,6 +9,7 @@ #define AVCODEC_FOUND 0 #define AVUTIL_FOUND 0 +#define WITH_PFFFT 1 #define HAVE_FENV_H 1 #define HAVE_STDBOOL_H 1 @@ -22,6 +23,8 @@ #define WITH_CR64S 1 #define WITH_VR32 1 +#define WITH_HI_PREC_CLOCK 1 +#define WITH_FLOAT_STD_PREC_CLOCK 0 #define WITH_DEV_TRACE 1 #endif diff --git a/multi-arch b/multi-arch index 63bb223..288b578 100755 --- a/multi-arch +++ b/multi-arch @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -e -# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net # Licence for this file: LGPL v2.1 See LICENCE for details. rm -f CMakeCache.txt # Prevent interference from any in-tree build diff --git a/soxr-config.h.in b/soxr-config.h.in index 8d654d8..00b3b45 100644 --- a/soxr-config.h.in +++ b/soxr-config.h.in @@ -6,6 +6,7 @@ #cmakedefine01 AVCODEC_FOUND #cmakedefine01 AVUTIL_FOUND +#cmakedefine01 WITH_PFFFT #cmakedefine01 HAVE_FENV_H #cmakedefine01 HAVE_STDBOOL_H @@ -19,6 +20,8 @@ #cmakedefine01 WITH_CR64S #cmakedefine01 WITH_VR32 +#cmakedefine01 WITH_HI_PREC_CLOCK +#cmakedefine01 WITH_FLOAT_STD_PREC_CLOCK #cmakedefine01 WITH_DEV_TRACE #endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3c45b13..bb01a0d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -32,13 +32,16 @@ elseif (WITH_PFFFT) #set (RDFT32 pffft32) set (RDFT32S pffft32s) elseif (WITH_CR32S) - set (RDFT32S fft4g32s fft4g32) + set (RDFT32S fft4g32s) + if (NOT WITH_CR32) + list (APPEND RDFT32S fft4g32) + endif () endif () set (SOURCES ${PROJECT_NAME}.c data-io) if (WITH_CR32 OR WITH_CR32S OR WITH_CR64 OR WITH_CR64S) - list (APPEND SOURCES dbesi0 filter fft4g64 cr.c) + list (APPEND SOURCES dbesi0 filter fft4g64 cr) endif () if (WITH_CR32) diff --git a/src/cr-core.c b/src/cr-core.c index 3f35ff0..d45a3fd 100644 --- a/src/cr-core.c +++ b/src/cr-core.c @@ -139,7 +139,8 @@ static half_fir_info_t const half_firs[] = { #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_POLY) && defined __AVX__) #define SIMD_SSE 0 #else - #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_POLY) && (defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86)) + #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_POLY) && \ + (defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86)) #define SIMD_AVX 0 #endif @@ -147,8 +148,6 @@ static half_fir_info_t const half_firs[] = { - -#define HI_PREC_CLOCK #define COEFS (sample_t * __restrict)p->shared->poly_fir_coefs #define VAR_LENGTH p->n #define VAR_CONVOLVE(n) while (j < (n)) _ diff --git a/src/cr.c b/src/cr.c index eb65a04..5f09604 100644 --- a/src/cr.c +++ b/src/cr.c @@ -343,7 +343,7 @@ STATIC char const * _soxr_init( for (i = 0; i < p->num_stages; ++i) { p->stages[i].num = i; p->stages[i].shared = shared; - p->stages[i].input_size = 4096; + p->stages[i].input_size = 8192; } p->stages[0].is_input = true; @@ -443,8 +443,8 @@ STATIC char const * _soxr_init( s->L = arbL; s->use_hi_prec_clock = mode>1 && (q_spec->flags & SOXR_HI_PREC_CLOCK) && !rational; -#if FLOAT_HI_PREC_CLOCK - if (s->use_hi_prec_clock) { +#if WITH_FLOAT_STD_PREC_CLOCK + if (order && !s->use_hi_prec_clock) { s->at.flt = at; s->step.flt = arbM; s->out_in_ratio = (double)(arbL / s->step.flt); @@ -452,7 +452,7 @@ STATIC char const * _soxr_init( #endif { s->at.whole = (int64_t)(at * MULT32 + .5); -#if !FLOAT_HI_PREC_CLOCK +#if WITH_HI_PREC_CLOCK if (s->use_hi_prec_clock) { double M = arbM * MULT32; s->at.fix.ls.parts.ms = 0x80000000ul; @@ -474,7 +474,7 @@ STATIC char const * _soxr_init( s++, postL, postM, &multiplier, r_spec->log2_min_dft_size, r_spec->log2_large_dft_size, core_flags, core->rdft_cb); - lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i %x", 1/io_ratio, + lsx_debug("%g: >>%i %i/%i %i/%g %i/%i (%x)", 1/io_ratio, shr, preL, preM, arbL, arbM, postL, postM, core_flags); for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) { diff --git a/src/cr.h b/src/cr.h index 1b707e6..d6e8637 100644 --- a/src/cr.h +++ b/src/cr.h @@ -10,7 +10,12 @@ typedef void real; /* float or double */ struct stage; typedef void (* stage_fn_t)(struct stage * input, fifo_t * output); -typedef struct half_fir_info {int num_coefs; real const * coefs; stage_fn_t fn, dfn; float att;} half_fir_info_t; +typedef struct half_fir_info { + int num_coefs; + real const * coefs; + stage_fn_t fn, dfn; + float att; +} half_fir_info_t; typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t; typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t; @@ -48,9 +53,6 @@ typedef union { /* Uint64 in parts */ uint64_t all; } uint64p_t; -#define FLOAT_HI_PREC_CLOCK 0 /* Non-float hi-prec has ~96 bits. */ -#define float_step_t long double /* __float128 is also a (slow) option */ - typedef struct { int dft_length, num_taps, post_peak; void * dft_forward_setup, * dft_backward_setup; @@ -62,11 +64,17 @@ typedef struct { /* So generated filter coefs may be shared between channels */ dft_filter_t dft_filter[2]; } rate_shared_t; +typedef double float_step_t; /* Or long double or __float128. */ + typedef union { /* Fixed point arithmetic */ - struct {uint64p_t ls; int64p_t ms;} fix; + struct {uint64p_t ls; int64p_t ms;} fix; /* Hi-prec has ~96 bits. */ float_step_t flt; } step_t; +#define integer fix.ms.parts.ms +#define fraction fix.ms.parts.ls +#define whole fix.ms.all + #define CORE_DBL 1 #define CORE_SIMD_POLY 2 #define CORE_SIMD_HALF 4 @@ -113,16 +121,11 @@ typedef struct stage { #define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post) #define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre) -#define integer fix.ms.parts.ms -#define fraction fix.ms.parts.ls -#define whole fix.ms.all - #define lq_bw0 (1385/2048.) /* ~.67625, FP exact. */ typedef enum {rolloff_small, rolloff_medium, rolloff_none} rolloff_t; - typedef struct { void * (* alloc)(size_t); void * (* calloc)(size_t, size_t); diff --git a/src/fft4g32.c b/src/fft4g32.c index 5dcf34d..7a31ba4 100644 --- a/src/fft4g32.c +++ b/src/fft4g32.c @@ -5,8 +5,10 @@ #include "filter.h" #define FFT4G_FLOAT #include "fft4g.c" -#include "rdft_t.h" +#include "soxr-config.h" +#if WITH_CR32 +#include "rdft_t.h" static void * null(void) {return 0;} static void forward (int length, void * setup, double * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;} static void backward(int length, void * setup, double * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;} @@ -31,3 +33,4 @@ fn_t _soxr_rdft32_cb[] = { (fn_t)free, (fn_t)flags, }; +#endif diff --git a/src/filter.c b/src/filter.c index aec0b6e..019d24d 100644 --- a/src/filter.c +++ b/src/filter.c @@ -28,7 +28,7 @@ #include "fft4g_cache.h" #endif -#if WITH_CR32 && !AVCODEC_FOUND +#if (WITH_CR32 && !AVCODEC_FOUND) || (WITH_CR32S && !AVCODEC_FOUND && !WITH_PFFFT) #define DFT_FLOAT float #define DONE_WITH_FFT_CACHE done_with_fft_cache_f #define FFT_CACHE_CCRW fft_cache_ccrw_f @@ -93,7 +93,7 @@ double * lsx_make_lpf( double * h = malloc((size_t)num_taps * sizeof(*h)); double mult = scale / lsx_bessel_I_0(beta), mult1 = 1 / (.5 * m + rho); assert(Fc >= 0 && Fc <= 1); - lsx_debug("make_lpf(n=%i Fc=%.7g β=%g ρ=%g scale=%g)", + lsx_debug("make_lpf(n=%i Fc=%.7g beta=%g rho=%g scale=%g)", num_taps, Fc, beta, rho, scale); if (h) for (i = 0; i <= m / 2; ++i) { @@ -120,7 +120,7 @@ double * lsx_design_lpf( double Fn, /* Nyquist freq; e.g. 0.5, 1, PI */ double att, /* Stop-band attenuation in dB */ int * num_taps, /* 0: value will be estimated */ - int k, /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */ + int k, /* >0: number of phases; <0: num_taps = 1 (mod -k) */ double beta) /* <0: value will be estimated */ { int n = *num_taps, phases = max(k, 1), modulo = max(-k, 1); diff --git a/src/filter.h b/src/filter.h index 56333ff..ccb3ba8 100644 --- a/src/filter.h +++ b/src/filter.h @@ -31,7 +31,7 @@ double * lsx_design_lpf( double Fn, /* Nyquist freq; e.g. 0.5, 1, PI; < 0: dummy run */ double att, /* Stop-band attenuation in dB */ int * num_taps, /* 0: value will be estimated */ - int k, /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */ + int k, /* >0: number of phases; <0: num_taps = 1 (mod -k) */ double beta); /* <0: value will be estimated */ void lsx_fir_to_phase(double * * h, int * len, diff --git a/src/internal.h b/src/internal.h index ee691a0..08924d5 100644 --- a/src/internal.h +++ b/src/internal.h @@ -55,6 +55,7 @@ #define SOXR_ROLLOFF_LSR2Q 3u /* Reserved for internal use. */ #define SOXR_ROLLOFF_MASK 3u /* For masking these bits. */ +#define SOXR_MAINTAIN_3DB_PT 4u /* Reserved for internal use. */ #define SOXR_PROMOTE_TO_LQ 64u /* Reserved for internal use. */ diff --git a/src/poly-fir.h b/src/poly-fir.h index 94db90e..d138e03 100644 --- a/src/poly-fir.h +++ b/src/poly-fir.h @@ -26,8 +26,6 @@ #define BEGINNING v4_t X = vLds(x), sum = vZero(); \ v4_t const * const __restrict coefs = (v4_t *)COEFS - #define MIDDLE switch (N) {case 3: CONVOLVE(3); break; case 4: CONVOLVE(4); \ - break; case 5: CONVOLVE(5); break; default: CONVOLVE(N); } #define END vStorSum(output+i, sum) #define cc(n) case n: core(n); break #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);} @@ -48,60 +46,74 @@ #define d (coef(COEFS, COEF_INTERP, N, phase, 3,j)) #define BEGINNING sample_t sum = 0 - #define MIDDLE CONVOLVE(N) #define END output[i] = sum #define CORE(n) core(n) #endif -#define fphpCore(n) \ - if (p->use_hi_prec_clock) { \ - float_step_t at = p->at.flt; \ - for (i = 0; (int)at < num_in; ++i, at += p->step.flt) { \ - sample_t const * const __restrict in = input + (int)at; \ - float_step_t frac = at - (int)at; \ - int phase = (int)(frac * (1 << PHASE_BITS)); \ - sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase); \ - int j = 0; \ - BEGINNING; CONVOLVE(n); END; \ - } \ - fifo_read(&p->fifo, (int)at, NULL); \ - p->at.flt = at - (int)at; \ - } else -#define hpCore(n) \ - if (p->use_hi_prec_clock) { \ - for (i = 0; p->at.integer < num_in; ++i, \ - p->at.fix.ls.all += p->step.fix.ls.all, \ - p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) { \ - sample_t const * const __restrict in = input + p->at.integer; \ - uint32_t frac = p->at.fraction; \ - int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \ - sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); /* low-order bits, scaled to [0,1) */ \ - int j = 0; \ - BEGINNING; CONVOLVE(n); END; \ - } \ - fifo_read(&p->fifo, p->at.integer, NULL); \ - p->at.integer = 0; \ - } else -#define spCore(n) { \ - for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) { \ - sample_t const * const __restrict in = input + p->at.integer; \ - uint32_t frac = p->at.fraction; \ - int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \ - sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); /* low-order bits, scaled to [0,1) */ \ - int j = 0; \ - BEGINNING; CONVOLVE(n); END; \ - } \ - fifo_read(&p->fifo, p->at.integer, NULL); \ - p->at.integer = 0; } +#define floatPrecCore(n) { \ + float_step_t at = p->at.flt; \ + for (i = 0; (int)at < num_in; ++i, at += p->step.flt) { \ + sample_t const * const __restrict in = input + (int)at; \ + float_step_t frac = at - (int)at; \ + int phase = (int)(frac * (1 << PHASE_BITS)); \ + sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase); \ + int j = 0; \ + BEGINNING; CONVOLVE(n); END; \ + } \ + fifo_read(&p->fifo, (int)at, NULL); \ + p->at.flt = at - (int)at; } /* Could round to 1 in some cirmcumstances. */ -#if defined HI_PREC_CLOCK && FLOAT_HI_PREC_CLOCK - #define core(n) fphpCore(n) spCore(n) -#elif defined HI_PREC_CLOCK - #define core(n) hpCore(n) spCore(n) + + +#define highPrecCore(n) { \ + step_t at; at.fix = p->at.fix; \ + for (i = 0; at.integer < num_in; ++i, \ + at.fix.ls.all += p->step.fix.ls.all, \ + at.whole += p->step.whole + (at.fix.ls.all < p->step.fix.ls.all)) { \ + sample_t const * const __restrict in = input + at.integer; \ + uint32_t frac = at.fraction; \ + int phase = (int)(frac >> (32 - PHASE_BITS)); /* High-order bits */ \ + /* Low-order bits, scaled to [0,1): */ \ + sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); \ + int j = 0; \ + BEGINNING; CONVOLVE(n); END; \ + } \ + fifo_read(&p->fifo, at.integer, NULL); \ + p->at.whole = at.fraction; \ + p->at.fix.ls = at.fix.ls; } + + + +#define stdPrecCore(n) { \ + int64p_t at; at.all = p->at.whole; \ + for (i = 0; at.parts.ms < num_in; ++i, at.all += p->step.whole) { \ + sample_t const * const __restrict in = input + at.parts.ms; \ + uint32_t const frac = at.parts.ls; \ + int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \ + /* Low-order bits, scaled to [0,1): */ \ + sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); \ + int j = 0; \ + BEGINNING; CONVOLVE(n); END; \ + } \ + fifo_read(&p->fifo, at.parts.ms, NULL); \ + p->at.whole = at.parts.ls; } + + + +#if WITH_FLOAT_STD_PREC_CLOCK + #define SPCORE floatPrecCore #else - #define core(n) spCore(n) + #define SPCORE stdPrecCore +#endif + + + +#if WITH_HI_PREC_CLOCK + #define core(n) if (p->use_hi_prec_clock) highPrecCore(n) else SPCORE(n) +#else + #define core(n) SPCORE(n) #endif @@ -131,7 +143,6 @@ static void FUNCTION(stage_t * p, fifo_t * output_fifo) #undef COEF_INTERP #undef N #undef BEGINNING -#undef MIDDLE #undef END #undef CONVOLVE #undef FIR_LENGTH diff --git a/src/poly-fir0.h b/src/poly-fir0.h index 0f28c69..76fca2d 100644 --- a/src/poly-fir0.h +++ b/src/poly-fir0.h @@ -22,9 +22,8 @@ #endif #define core(n) \ - for (i = 0; p->at.integer < num_in * p->L; ++i, \ - p->at.integer += p->step.integer) { \ - int const div = p->at.integer / p->L, rem = p->at.integer % p->L; \ + for (i = 0; at < num_in * p->L; ++i, at += step) { \ + int const div = at / p->L, rem = at % p->L; \ sample_t const * const __restrict at = input + div; \ int j = 0; BEGINNING; CONVOLVE(n); END;} @@ -33,13 +32,14 @@ static void FUNCTION(stage_t * p, fifo_t * output_fifo) int num_in = min(stage_occupancy(p), p->input_size); if (num_in) { sample_t const * input = stage_read_p(p); - int i, num_out = (num_in * p->L - p->at.integer + p->step.integer - 1) / p->step.integer; + int at = p->at.integer, step = p->step.integer; + int i, num_out = (num_in * p->L - at + step - 1) / step; sample_t * __restrict output = fifo_reserve(output_fifo, num_out); CORE(N); assert(i == num_out); - fifo_read(&p->fifo, p->at.integer / p->L, NULL); - p->at.integer = p->at.integer % p->L; + fifo_read(&p->fifo, at / p->L, NULL); + p->at.integer = at % p->L; } } diff --git a/src/rint.h b/src/rint.h index d3629ae..2f1dfbe 100644 --- a/src/rint.h +++ b/src/rint.h @@ -6,6 +6,9 @@ #include "std-types.h" +/* For x86, compiler-supplied versions of these functions (where available) + * can have poor performance (e.g. mingw32), so prefer these asm versions: */ + #if defined __GNUC__ && (defined __i386__ || defined __x86_64__) #define FPU_RINT32 #define FPU_RINT16 @@ -23,7 +26,7 @@ int32_t status[7]; __asm__ __volatile__("fnstenv %0": "=m"(status)); status[1] &= ~FE_INVALID; - __asm__ __volatile__("fldenv %0": : "m"(status)); + __asm__ __volatile__("fldenv %0": : "m"(*status)); return 0; } #elif defined _MSC_VER && defined _M_IX86 @@ -69,7 +72,7 @@ #define rint16F(y,x) rint16d(&(y),(double)(x)) #define FE_INVALID 1 #define fe_test_invalid() (_statusfp() & _SW_INVALID) - #define fe_clear_invalid _clearfp /* Note: clears all */ + #define fe_clear_invalid _clearfp /* Note: clears all. */ #elif HAVE_LRINT && LONG_MAX == 2147483647L && HAVE_FENV_H #include #include diff --git a/src/soxr.c b/src/soxr.c index 4a7b2da..9bd5fcb 100644 --- a/src/soxr.c +++ b/src/soxr.c @@ -92,10 +92,10 @@ struct soxr { -#if !WITH_CR32 && !WITH_CR32S && !WITH_CR64 && !WITH_CR64S - #define lsx_to_3dB(x) ((x)/(x)) -#else +#if WITH_CR32 || WITH_CR32S || WITH_CR64 || WITH_CR64S #include "filter.h" +#else + #define lsx_to_3dB(x) ((x)/(x)) #endif @@ -193,7 +193,7 @@ soxr_io_spec_t soxr_io_spec( -#if WITH_CR32S || WITH_CR64S +#if (WITH_CR32S && WITH_CR32) || (WITH_CR64S && WITH_CR64) #if defined __GNUC__ && defined __x86_64__ #define CPUID(type, eax_, ebx_, ecx_, edx_) \ __asm__ __volatile__ ( \ @@ -240,7 +240,7 @@ soxr_io_spec_t soxr_io_spec( -#if WITH_CR32S +#if WITH_CR32S && WITH_CR32 static bool cpu_has_simd32(void) { #if defined __x86_64__ || defined _M_X64 @@ -259,14 +259,17 @@ soxr_io_spec_t soxr_io_spec( static bool should_use_simd32(void) { - char const * e = getenv("SOXR_USE_SIMD32"); - return e? !!atoi(e) : cpu_has_simd32(); + char const * e; + return ((e = getenv("SOXR_USE_SIMD" )))? !!atoi(e) : + ((e = getenv("SOXR_USE_SIMD32")))? !!atoi(e) : cpu_has_simd32(); } +#else + #define should_use_simd32() true #endif -#if WITH_CR64S +#if WITH_CR64S && WITH_CR64 #if defined __GNUC__ #define XGETBV(type, eax_, edx_) \ __asm__ __volatile__ ( \ @@ -306,9 +309,12 @@ soxr_io_spec_t soxr_io_spec( static bool should_use_simd64(void) { - char const * e = getenv("SOXR_USE_SIMD64"); - return e? !!atoi(e) : cpu_has_simd64(); + char const * e; + return ((e = getenv("SOXR_USE_SIMD" )))? !!atoi(e) : + ((e = getenv("SOXR_USE_SIMD64")))? !!atoi(e) : cpu_has_simd64(); } +#else + #define should_use_simd64() true #endif @@ -322,7 +328,8 @@ extern control_block_t -static void runtime_num(char const * env_name, int min, int max, unsigned * field) +static void runtime_num(char const * env_name, + int min, int max, unsigned * field) { char const * e = getenv(env_name); if (e) { @@ -334,7 +341,8 @@ static void runtime_num(char const * env_name, int min, int max, unsigned * fiel -static void runtime_flag(char const * env_name, unsigned n_bits, unsigned n_shift, unsigned long * flags) +static void runtime_flag(char const * env_name, + unsigned n_bits, unsigned n_shift, unsigned long * flags) { char const * e = getenv(env_name); if (e) { @@ -355,14 +363,28 @@ soxr_t soxr_create( soxr_quality_spec_t const * q_spec, soxr_runtime_spec_t const * runtime_spec) { - double io_ratio = output_rate!=0? input_rate!=0? input_rate / output_rate : -1 : input_rate!=0? -1 : 0; + double io_ratio = output_rate!=0? input_rate!=0? + input_rate / output_rate : -1 : input_rate!=0? -1 : 0; static const float datatype_full_scale[] = {1, 1, 65536.*32768, 32768}; soxr_t p = 0; soxr_error_t error = 0; #if WITH_DEV_TRACE +#define _(x) (char)(sizeof(x)>=10? 'a'+(char)(sizeof(x)-10):'0'+(char)sizeof(x)) char const * e = getenv("SOXR_TRACE"); _soxr_trace_level = e? atoi(e) : 0; + { + char const arch[] = {_(char), _(short), _(int), _(long), _(long long) + , ' ', _(float), _(double), _(long double) + , ' ', _(int *), _(int (*)(int)) + , ' ', HAVE_BIGENDIAN ? 'B' : 'L' +#if defined _OPENMP + , ' ', 'O', 'M', 'P' +#endif + , 0}; +#undef _ + lsx_debug("arch: %s", arch); + } #endif if (q_spec && q_spec->e) error = q_spec->e; diff --git a/src/soxr.h b/src/soxr.h index 640b698..022ba26 100644 --- a/src/soxr.h +++ b/src/soxr.h @@ -65,8 +65,8 @@ input or output (e.g. ilen, olen). */ /* E.g. #if SOXR_THIS_VERSION >= SOXR_VERSION(0,1,1) ... */ #define SOXR_VERSION(x,y,z) (((x)<<16)|((y)<<8)|(z)) -#define SOXR_THIS_VERSION SOXR_VERSION(0,1,2) -#define SOXR_THIS_VERSION_STR "0.1.2" +#define SOXR_THIS_VERSION SOXR_VERSION(0,1,3) +#define SOXR_THIS_VERSION_STR "0.1.3b1" @@ -249,7 +249,6 @@ struct soxr_quality_spec { /* Typically */ #define SOXR_ROLLOFF_MEDIUM 1u /* <= 0.35 dB */ #define SOXR_ROLLOFF_NONE 2u /* For Chebyshev bandwidth. */ -#define SOXR_MAINTAIN_3DB_PT 4u /* Reserved for internal use. */ #define SOXR_HI_PREC_CLOCK 8u /* Increase `irrational' ratio accuracy. */ #define SOXR_DOUBLE_PRECISION 16u /* Use D.P. calcs even if precision <= 20. */ #define SOXR_VR 32u /* Variable-rate resampling. */ @@ -257,12 +256,12 @@ struct soxr_quality_spec { /* Typically */ struct soxr_runtime_spec { /* Typically */ - unsigned log2_min_dft_size;/* For DFT efficiency. [8,15] 10 */ - unsigned log2_large_dft_size;/* For DFT efficiency. [8,20] 17 */ - unsigned coef_size_kbytes; /* For SOXR_COEF_INTERP_AUTO (below). 400 */ - unsigned num_threads; /* If built so. 0 means `automatic'. 1 */ - void * e; /* Reserved for internal use. 0 */ - unsigned long flags; /* Per the following #defines. 0 */ + unsigned log2_min_dft_size; /* For DFT efficiency. [8,15] 11 */ + unsigned log2_large_dft_size; /* For DFT efficiency. [8,20] 17 */ + unsigned coef_size_kbytes; /* For SOXR_COEF_INTERP_AUTO (below). 400 */ + unsigned num_threads; /* If built so. 0 means `automatic'. 1 */ + void * e; /* Reserved for internal use. 0 */ + unsigned long flags; /* Per the following #defines. 0 */ }; /* For `irrational' ratios only: */ #define SOXR_COEF_INTERP_AUTO 0u /* Auto select coef. interpolation. */ @@ -293,7 +292,7 @@ SOXR soxr_quality_spec_t soxr_quality_spec( #define SOXR_24_BITQ 5 #define SOXR_28_BITQ 6 #define SOXR_32_BITQ 7 - /* For internal use only; to be removed: */ + /* Reserved for internal use (to be removed): */ #define SOXR_LSR0Q 8 /* 'Best sinc'. */ #define SOXR_LSR1Q 9 /* 'Medium sinc'. */ #define SOXR_LSR2Q 10 /* 'Fast sinc'. */ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 55cb55d..ee8dd0b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -44,8 +44,12 @@ if (WITH_CR64 OR WITH_CR64S) set (test_bits ${test_bits} 28) endif () +set (rates 192000) +if (WITH_HI_PREC_CLOCK) + set (rates ${rates} 65537) +endif () foreach (b ${test_bits}) - foreach (r 192000 65537) + foreach (r ${rates}) add_cmp_test (${base_rate} ${r} ${b}) add_cmp_test (${r} ${base_rate} ${b}) endforeach () diff --git a/tests/throughput-test b/tests/throughput-test index b03a2a4..aef36f6 100755 --- a/tests/throughput-test +++ b/tests/throughput-test @@ -1,6 +1,9 @@ #!/bin/sh set -e +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net +# Licence for this file: LGPL v2.1 See LICENCE for details. + test -r throughput.exe && wine=wine test /$1 = / && list="`seq 0 3`" || list="$*" diff --git a/tests/throughput-test.bat b/tests/throughput-test.bat index 482c93b..46b8f7d 100644 --- a/tests/throughput-test.bat +++ b/tests/throughput-test.bat @@ -1,2 +1,5 @@ @echo off +rem SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net +rem Licence for this file: LGPL v2.1 See LICENCE for details. + for /L %%i in (0,1,3) DO throughput 44.1 48 1 0 %%i diff --git a/tests/throughput.c b/tests/throughput.c index 80256ed..e81d530 100644 --- a/tests/throughput.c +++ b/tests/throughput.c @@ -16,23 +16,23 @@ #define timerRunning() (QueryPerformanceCounter(&tmp), \ (tmp.QuadPart-start.QuadPart < stop.QuadPart)) #else - #include - #include - #if defined _POSIX_TIMERS && _POSIX_TIMERS > 0 - #define K (k*k) - #define tv_frac tv_nsec - #if defined _POSIX_MONOTONIC_CLOCK - #define get_time(x) clock_gettime(CLOCK_MONOTONIC, x) - #else - #define get_time(x) clock_gettime(CLOCK_REALTIME, x) - #endif + #include + #if defined timeradd + #define K k + #define tv_frac tv_usec + #define timespec timeval + #define get_time(x) gettimeofday(x, NULL) #else - #include - #if defined timeradd - #define K k - #define tv_frac tv_usec - #define timespec timeval - #define get_time(x) gettimeofday(x, NULL) + #include + #include + #if defined _POSIX_TIMERS && _POSIX_TIMERS > 0 + #define K (k*k) + #define tv_frac tv_nsec + #if defined _POSIX_MONOTONIC_CLOCK + #define get_time(x) clock_gettime(CLOCK_MONOTONIC, x) + #else + #define get_time(x) clock_gettime(CLOCK_REALTIME, x) + #endif #else #include #define K 1