diff --git a/CMakeLists.txt b/CMakeLists.txt
index 69e6425..60a5c33 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,8 @@
cmake_minimum_required (VERSION 3.0 FATAL_ERROR)
project (soxr C)
-set (DESCRIPTION_SUMMARY "High quality, one-dimensional sample-rate conversion library")
+set (DESCRIPTION_SUMMARY
+ "High quality, one-dimensional sample-rate conversion library")
@@ -15,15 +16,20 @@ set (PROJECT_VERSION_MINOR 1)
set (PROJECT_VERSION_PATCH 2)
# For shared-object; if, since the last public release:
-# * library code changed at all: ++revision
-# * interfaces changed at all: ++current, revision = 0
-# * interfaces added: ++age
-# * interfaces removed: age = 0
+#
+# 1) library code changed at all: ++revision
+# 2) interfaces changed at all: ++current, revision = 0
+# 3) interfaces added: ++age
+# 4) interfaces removed: age = 0
set (SO_VERSION_CURRENT 1)
set (SO_VERSION_REVISION 1)
set (SO_VERSION_AGE 1)
+math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}")
+math (EXPR SO_VERSION_MINOR "${SO_VERSION_AGE}")
+math (EXPR SO_VERSION_PATCH "${SO_VERSION_REVISION}")
+
# Main options:
@@ -31,31 +37,45 @@ set (SO_VERSION_AGE 1)
include (CMakeDependentOption)
if (NOT CMAKE_BUILD_TYPE)
- set (CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
+ set (CMAKE_BUILD_TYPE Release CACHE STRING
+ "Build type, one of: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif ()
-option (BUILD_SHARED_LIBS "Build shared libraries." ON)
+option (BUILD_TESTS "Build sanity-tests." ON)
option (BUILD_EXAMPLES "Build examples." OFF)
option (WITH_OPENMP "Include OpenMP threading." ON)
option (WITH_LSR_BINDINGS "Include a `libsamplerate'-like interface." ON)
-cmake_dependent_option (BUILD_TESTS "Build sanity-tests." ON
- "NOT CMAKE_CROSSCOMPILING" OFF)
-cmake_dependent_option (WITH_SINGLE_PRECISION "Build with single precision (for up to 20-bit accuracy)." ON
- "WITH_DOUBLE_PRECISION" ON)
-cmake_dependent_option (WITH_DOUBLE_PRECISION "Build with double precision (for up to 32-bit accuracy)." ON
- "WITH_SINGLE_PRECISION" ON)
-cmake_dependent_option (WITH_SIMD "Use SIMD (for faster single precision)." ON
- "WITH_SINGLE_PRECISION" OFF)
-cmake_dependent_option (WITH_AVFFT "Use libavcodec (LGPL) for SIMD DFT." OFF
- "WITH_SIMD;NOT WITH_PFFFT" OFF)
-cmake_dependent_option (WITH_PFFFT "Use PFFFT (BSD-like licence) for SIMD DFT." ON
- "WITH_SIMD;NOT WITH_AVFFT" OFF)
+cmake_dependent_option (BUILD_SHARED_LIBS
+ "Build shared (dynamic) soxr libraries." ON
+ "NOT WITH_DEV_GPROF" OFF)
+cmake_dependent_option (WITH_VR32
+ "Include HQ variable-rate resampling engine." ON
+ "WITH_CR32 OR WITH_CR64 OR WITH_CR32S OR WITH_CR64S OR NOT DEFINED WITH_VR32" ON)
+cmake_dependent_option (WITH_CR32
+ "Include HQ constant-rate resampling engine." ON
+ "WITH_VR32 OR WITH_CR64 OR WITH_CR32S OR WITH_CR64S" ON)
+cmake_dependent_option (WITH_CR64
+ "Include VHQ constant-rate resampling engine." ON
+ "WITH_VR32 OR WITH_CR32 OR WITH_CR32S OR WITH_CR64S" ON)
+cmake_dependent_option (WITH_CR64S
+ "Include VHQ SIMD constant-rate resampling engine." ON
+ "WITH_VR32 OR WITH_CR32 OR WITH_CR32S OR WITH_CR64" ON)
+cmake_dependent_option (WITH_CR32S
+ "Include HQ SIMD constant-rate resampling engine." ON
+ "WITH_VR32 OR WITH_CR64 OR WITH_CR32 OR WITH_CR64S" ON)
+cmake_dependent_option (WITH_AVFFT
+ "Use libavcodec (LGPL) for HQ SIMD DFT." OFF
+ "WITH_CR32S;NOT WITH_PFFFT" OFF)
+cmake_dependent_option (WITH_PFFFT
+ "Use PFFFT (BSD-like licence) for HQ SIMD DFT." ON
+ "WITH_CR32S;NOT WITH_AVFFT" OFF)
cmake_dependent_option (BUILD_LSR_TESTS "Build LSR tests." OFF
"UNIX;NOT CMAKE_CROSSCOMPILING;EXISTS ${PROJECT_SOURCE_DIR}/lsr-tests;WITH_LSR_BINDINGS" OFF)
-option (WITH_DEV_TRACE "Enable developer trace output." OFF)
-mark_as_advanced (WITH_DEV_TRACE)
+option (WITH_DEV_TRACE "Enable developer trace capability." ON)
+option (WITH_DEV_GPROF "Enable developer grpof output." OFF)
+mark_as_advanced (WITH_DEV_TRACE WITH_DEV_GPROF)
@@ -79,15 +99,23 @@ endif ()
if (WITH_OPENMP)
find_package (OpenMP)
- if (OPENMP_FOUND)
+ if (OpenMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
- set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
+ set (CMAKE_EXE_LINKER_FLAGS
+ "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+ set (CMAKE_SHARED_LINKER_FLAGS
+ "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
endif ()
endif ()
-if (WITH_SIMD)
- find_package (SIMD)
+if (WITH_CR32S)
+ find_package (SIMD32)
+ set (WITH_CR32S ${SIMD32_FOUND})
+endif ()
+
+if (WITH_CR64S)
+ find_package (SIMD64)
+ set (WITH_CR64S ${SIMD64_FOUND})
endif ()
if (WITH_AVFFT)
@@ -98,7 +126,7 @@ if (WITH_AVFFT)
endif ()
endif ()
-if (WITH_AVFFT OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND SIMD_FOUND))
+if (WITH_AVFFT OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND SIMD32_FOUND))
find_package (LibAVUtil)
if (AVUTIL_FOUND)
include_directories (${AVUTIL_INCLUDE_DIRS})
@@ -117,13 +145,22 @@ test_big_endian (HAVE_BIGENDIAN)
# Compiler configuration:
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
- set (PROJECT_CXX_FLAGS "-Wconversion -Wall -W -pedantic -Wundef -Wcast-align -Wpointer-arith -Wno-long-long")
- set (PROJECT_C_FLAGS "${PROJECT_CXX_FLAGS} -std=gnu89 -Wnested-externs -Wmissing-prototypes -Wstrict-prototypes")
+ set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wconversion -Wall -Wextra")
+ set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -pedantic -Wundef -Wpointer-arith")
+ set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wno-long-long -Wno-keyword-macro")
+ if (WITH_DEV_GPROF)
+ set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -pg")
+ endif ()
+ # Can use std=c89, but gnu89 should give faster sinf, cosf, etc.:
+ set (PROJECT_C_FLAGS "${PROJECT_CXX_FLAGS} -std=gnu89 -Wnested-externs")
+ set (PROJECT_C_FLAGS "${PROJECT_C_FLAGS} -Wmissing-prototypes -Wstrict-prototypes")
if (CMAKE_BUILD_TYPE STREQUAL "Release")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s") # strip
endif ()
- cmake_dependent_option (VISIBILITY_HIDDEN "Build with -fvisibility=hidden." ON
+ cmake_dependent_option (VISIBILITY_HIDDEN
+ "Build shared libraries with -fvisibility=hidden." ON
"BUILD_SHARED_LIBS" OFF)
+ mark_as_advanced (VISIBILITY_HIDDEN)
if (VISIBILITY_HIDDEN)
add_definitions (-fvisibility=hidden -DSOXR_VISIBILITY)
endif ()
@@ -131,9 +168,14 @@ endif ()
if (MSVC)
add_definitions (-D_USE_MATH_DEFINES -D_CRT_SECURE_NO_WARNINGS)
- option (ENABLE_STATIC_RUNTIME "Visual Studio, link with runtime statically." OFF)
- if (ENABLE_STATIC_RUNTIME)
- foreach (flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+ option (BUILD_SHARED_RUNTIME "MSVC, link with runtime dynamically." ON)
+ if (NOT BUILD_SHARED_RUNTIME)
+ foreach (flag_var
+ CMAKE_C_FLAGS CMAKE_CXX_FLAGS
+ CMAKE_C_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG
+ CMAKE_C_FLAGS_RELEASE CMAKE_CXX_FLAGS_RELEASE
+ CMAKE_C_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_MINSIZEREL
+ CMAKE_C_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS_RELWITHDEBINFO)
string (REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endforeach ()
endif ()
@@ -147,7 +189,8 @@ endif ()
# Build configuration:
-if (${BUILD_SHARED_LIBS} AND ${CMAKE_SYSTEM_NAME} STREQUAL Windows) # Allow exes to find dlls:
+if (${BUILD_SHARED_LIBS} AND ${CMAKE_SYSTEM_NAME} STREQUAL Windows)
+ # Allow exes to find dlls:
set (BIN ${PROJECT_BINARY_DIR}/bin/)
set (EXAMPLES_BIN ${BIN})
set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${BIN})
@@ -188,17 +231,16 @@ endif ()
if (APPLE)
option (BUILD_FRAMEWORK "Build an OS X framework." OFF)
- set (FRAMEWORK_INSTALL_DIR "/Library/Frameworks" CACHE STRING "Directory to install frameworks to.")
+ set (FRAMEWORK_INSTALL_DIR
+ "/Library/Frameworks" CACHE STRING "Directory to install frameworks to.")
endif ()
# Top-level:
-set (PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH})
-math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}")
-math (EXPR SO_VERSION_MINOR "${SO_VERSION_AGE}")
-math (EXPR SO_VERSION_PATCH "${SO_VERSION_REVISION}")
+set (PROJECT_VERSION
+ ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH})
set (SO_VERSION ${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH})
configure_file (
@@ -206,7 +248,7 @@ configure_file (
${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.h)
include_directories (${PROJECT_BINARY_DIR})
-if (BUILD_TESTS OR BUILD_LSR_TESTS)
+if (NOT CMAKE_CROSSCOMPILING AND (BUILD_TESTS OR BUILD_LSR_TESTS))
enable_testing ()
endif ()
diff --git a/INSTALL b/INSTALL
index 3f30772..c429e61 100644
--- a/INSTALL
+++ b/INSTALL
@@ -78,32 +78,27 @@ To list help on the available options, enter:
Options, if given, should be preceded with '-D', e.g.
- cmake -DWITH_SIMD:BOOL=OFF ..
+ cmake -DBUILD_SHARED_LIBS:BOOL=OFF ..
Resampling engines
-As available on a given system, the library may include up to four resampling
-‘engines’, as follows:
+As available on a given system, options for including up to five resampling
+‘engines’ are available (per above) as follows:
- cr32: for constant-rate resampling with precision up to 20 bits,
- cr32s: SIMD variant of cr32,
- cr64: for constant-rate resampling with precision greater than 20 bits,
- vr32: for variable-rate resampling.
+ WITH_CR32: for constant-rate HQ resampling,
+ WITH_CR32S: SIMD variant of previous,
+ WITH_CR64: for constant-rate VHQ resampling,
+ WITH_CR64S: SIMD variant of previous,
+ WITH_VR32: for variable-rate HQ resampling.
-Engine inclusion is controlled (as above) by the following cmake option
-variables:
+By default, these options are all set to ON.
- cr32: WITH_SINGLE_PRECISION
- cr32s: WITH_SINGLE_PRECISION, WITH_SIMD
- cr32: WITH_DOUBLE_PRECISION
-
-By default, these variables are all set to ON.
-
-When both cr32 and cr32s engines are included, run-time selection is automatic
-(based on CPU capability) for x86 CPUs, and can be automatic for ARM CPUs if
-the 3rd-party library `libavutil' is available at libsoxr build-time.
+When both SIMD and non-SIMD engine variants are included, run-time selection
+is automatic (based on CPU capability) for x86 CPUs, and can be automatic for
+ARM CPUs if the 3rd-party library `libavutil' is available at libsoxr
+build-time.
@@ -114,13 +109,13 @@ E.g. targeting a Linux ARM system:
mkdir build
cd build
cmake -DCMAKE_SYSTEM_NAME=Linux \
- -DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc-5 \
+ -DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc \
..
or, also building the examples (one of which uses C++):
cmake -DCMAKE_SYSTEM_NAME=Linux \
- -DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc-5 \
- -DCMAKE_CXX_COMPILER=arm-linux-gnueabi-g++-5 \
+ -DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc \
+ -DCMAKE_CXX_COMPILER=arm-linux-gnueabi-g++ \
-DBUILD_EXAMPLES=1 \
..
@@ -158,7 +153,7 @@ Autotools-based systems might find it useful to create a file called
cmake -DBUILD_SHARED_LIBS=OFF .
(or with other build options as required).
-For MS visual studio, see msvc/README.
+For MS Visual C++, see msvc/README.
@@ -168,7 +163,7 @@ The libsoxr API structure ‘soxr_runtime_spec_t’ allows application developer
to optimise some aspects of libsoxr’s operation for a particular application.
However, since optimal performance might depend on an individual end-user’s
run-time system and the end-user’s preferences, environment variables are
-available to set (override) the run-time parameters:
+available to set (override) run-time parameters as follows:
Env. variable Equivalent soxr_runtime_spec_t item
------------------ -----------------------------------
@@ -178,8 +173,8 @@ available to set (override) the run-time parameters:
SOXR_MIN_DFT_SIZE log2_min_dft_size
SOXR_NUM_THREADS num_threads
-Additionally, the SOXR_USE_SIMD environment variable may be used to override
-automatic selection (or to provide manual selection where automatic selection
-is not available) between the cr32 and cr32s resampling engines. (Which engine
-is selected for a specific configuration of libsoxr can be checked using
-example #3, which reports it.)
+Additionally, the SOXR_USE_SIMD32 and SOXR_USE_SIMD64 environment variables
+may be used to override automatic selection (or to provide manual selection
+where automatic selection is not available) between SIMD and non-SIMD engine
+variants. (Which engine is selected for a specific configuration of libsoxr
+can be checked using example #3, which reports it.)
diff --git a/LICENCE b/LICENCE
index 1c61878..52f84ee 100644
--- a/LICENCE
+++ b/LICENCE
@@ -1,4 +1,4 @@
-SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
@@ -11,8 +11,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
-along with this library; if not, write to the Free Software Foundation,
-Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+along with this library; if not, see .
Notes
diff --git a/NEWS b/NEWS
index a2eca85..d368fa2 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,8 @@
Version 0.1.3 (2016-mm-dd)
- * Better support for clang, ARM+SIMD, and cross-compilation.
+ * SIMD enhancements: SSE, AVX, Neon.
+ * Improved support for clang, ARM, and cross-compilation.
* Other minor fixes/improvements to build/tests/documentation.
+ * N.B. some cmake configuration variable name changes.
Version 0.1.2 (2015-09-05)
* Fix conversion failure when I/O types differ but I/O rates don't.
diff --git a/README b/README
index 06f11e6..0070896 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
The SoX Resampler library `libsoxr' performs one-dimensional sample-rate
conversion -- it may be used, for example, to resample PCM-encoded audio.
diff --git a/TODO b/TODO
index 1c4a31b..c699c0c 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,2 @@
* SOXR_ALLOW_ALIASING
* Explicit flush API fn, perhaps.
-* More SIMD.
diff --git a/cmake/Modules/FindCFlags.cmake b/cmake/Modules/FindCFlags.cmake
new file mode 100644
index 0000000..f118727
--- /dev/null
+++ b/cmake/Modules/FindCFlags.cmake
@@ -0,0 +1,35 @@
+# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1 See LICENCE for details.
+
+# - Function to find C compiler feature flags
+
+include (CheckCSourceCompiles)
+include (FindPackageHandleStandardArgs)
+
+function (FindCFlags PKG_NAME PKG_DESC TRIAL_C_FLAGS TEST_C_SOURCE)
+
+foreach (TRIAL_C_FLAG ${TRIAL_C_FLAGS})
+ message (STATUS "Trying ${PKG_NAME} C flags: ${TRIAL_C_FLAG}")
+ unset (DETECT_${PKG_NAME}_C_FLAGS CACHE) #displayed by check_c_source_compiles
+
+ set (TMP "${CMAKE_REQUIRED_FLAGS}")
+ set (CMAKE_REQUIRED_FLAGS "${TRIAL_C_FLAG}")
+ check_c_source_compiles ("${TEST_C_SOURCE}" DETECT_${PKG_NAME}_C_FLAGS)
+ set (CMAKE_REQUIRED_FLAGS "${TMP}")
+
+ if (DETECT_${PKG_NAME}_C_FLAGS)
+ set (DETECTED_C_FLAGS "${TRIAL_C_FLAG}")
+ break ()
+ endif ()
+endforeach ()
+
+# N.B. Will not overwrite existing cache variable:
+set (${PKG_NAME}_C_FLAGS "${DETECTED_C_FLAGS}"
+ CACHE STRING "C compiler flags for ${PKG_DESC}")
+
+find_package_handle_standard_args (
+ ${PKG_NAME} DEFAULT_MSG ${PKG_NAME}_C_FLAGS ${PKG_NAME}_C_FLAGS)
+mark_as_advanced (${PKG_NAME}_C_FLAGS)
+set (${PKG_NAME}_FOUND ${${PKG_NAME}_FOUND} PARENT_SCOPE)
+
+endfunction ()
diff --git a/cmake/Modules/FindOpenMP.cmake b/cmake/Modules/FindOpenMP.cmake
index 74e5bc5..3664eed 100644
--- a/cmake/Modules/FindOpenMP.cmake
+++ b/cmake/Modules/FindOpenMP.cmake
@@ -1,117 +1,38 @@
+# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1 See LICENCE for details.
+
# - Finds OpenMP support
-# This module can be used to detect OpenMP support in a compiler.
-# If the compiler supports OpenMP, the flags required to compile with
-# openmp support are set.
#
# The following variables are set:
-# OpenMP_C_FLAGS - flags to add to the C compiler for OpenMP support
-# OPENMP_FOUND - true if openmp is detected
-#
-# Supported compilers can be found at http://openmp.org/wp/openmp-compilers/
-#
-# Modifications for soxr:
-# * don't rely on presence of C++ compiler
-# * support MINGW
-#
-#=============================================================================
-# Copyright 2009 Kitware, Inc.
-# Copyright 2008-2009 André Rigland Brodtkorb
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# * The names of Kitware, Inc., the Insight Consortium, or the names of
-# any consortium members, or of any contributors, may not be used to
-# endorse or promote products derived from this software without
-# specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
-# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# OpenMP_C_FLAGS - flags to add to the C compiler for this package.
+# OpenMP_FOUND - true if support for this package is found.
-include (CheckCSourceCompiles)
-include (FindPackageHandleStandardArgs)
-
-set (OpenMP_C_FLAG_CANDIDATES
- #Gnu
- "-fopenmp"
- #Clang
- "-fopenmp=libiomp5"
- #Microsoft Visual Studio
- "/openmp"
- #Intel windows
- "-Qopenmp"
- #Intel
- "-openmp"
- #Empty, if compiler automatically accepts openmp
- " "
- #Sun
- "-xopenmp"
- #HP
- "+Oopenmp"
- #IBM XL C/c++
- "-qsmp"
- #Portland Group
- "-mp"
-)
-
-# sample openmp source code to test
-set (OpenMP_C_TEST_SOURCE
-"
-#include
-int main() {
-#ifdef _OPENMP
- return 0;
-#else
- breaks_on_purpose
-#endif
-}
-")
-# if these are set then do not try to find them again,
-# by avoiding any try_compiles for the flags
if (DEFINED OpenMP_C_FLAGS)
- set (OpenMP_C_FLAG_CANDIDATES)
-endif (DEFINED OpenMP_C_FLAGS)
+ set (TRIAL_C_FLAGS)
+else ()
+ set (TRIAL_C_FLAGS
+ "-fopenmp" # Gnu
+ "-fopenmp=libiomp5" # Clang
+ "/openmp" # MSVC
+ " "
+ )
-# check c compiler
-foreach (FLAG ${OpenMP_C_FLAG_CANDIDATES})
- set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
- set (CMAKE_REQUIRED_FLAGS "${FLAG}")
- unset (OpenMP_FLAG_DETECTED CACHE)
- message (STATUS "Try OpenMP C flag = [${FLAG}]")
- check_c_source_compiles ("${OpenMP_C_TEST_SOURCE}" OpenMP_FLAG_DETECTED)
- set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}")
- if (OpenMP_FLAG_DETECTED)
- set (OpenMP_C_FLAGS_INTERNAL "${FLAG}")
- break ()
- endif (OpenMP_FLAG_DETECTED)
-endforeach (FLAG ${OpenMP_C_FLAG_CANDIDATES})
+ set (TEST_C_SOURCE "
+ #ifndef _OPENMP
+ #error
+ #endif
+ #include
+ int main() {return 0;}
+ ")
+endif ()
-set (OpenMP_C_FLAGS "${OpenMP_C_FLAGS_INTERNAL}"
- CACHE STRING "C compiler flags for OpenMP parallization")
+include (FindCFlags)
-# handle the standard arguments for find_package
-find_package_handle_standard_args (OpenMP DEFAULT_MSG
- OpenMP_C_FLAGS)
+FindCFlags ("OpenMP" "OpenMP threading"
+ "${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}")
if (MINGW)
set (OpenMP_SHARED_LINKER_FLAGS "${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_C_FLAGS}")
set (OpenMP_EXE_LINKER_FLAGS "${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_C_FLAGS}")
+ mark_as_advanced (OpenMP_SHARED_LINKER_FLAGS OpenMP_EXE_LINKER_FLAGS)
endif ()
-
-mark_as_advanced (OpenMP_C_FLAGS OpenMP_SHARED_LINKER_FLAGS OpenMP_EXE_LINKER_FLAGS)
diff --git a/cmake/Modules/FindSIMD.cmake b/cmake/Modules/FindSIMD.cmake
deleted file mode 100644
index 30dca70..0000000
--- a/cmake/Modules/FindSIMD.cmake
+++ /dev/null
@@ -1,104 +0,0 @@
-# - Finds SIMD support
-#
-# The following variables are set:
-# SIMD_C_FLAGS - flags to add to the C compiler for this package.
-# SIMD_FOUND - true if support for this package is found.
-#
-#=============================================================================
-# Based on FindOpenMP.cmake, which is:
-#
-# Copyright 2009 Kitware, Inc.
-# Copyright 2008-2009 André Rigland Brodtkorb
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# * The names of Kitware, Inc., the Insight Consortium, or the names of
-# any consortium members, or of any contributors, may not be used to
-# endorse or promote products derived from this software without
-# specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
-# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include (CheckCSourceCompiles)
-include (FindPackageHandleStandardArgs)
-
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
- set (SIMD_C_FLAG_CANDIDATES
- # Gcc
- "-Wno-cast-align -mfpu=neon-vfpv4 -mcpu=cortex-a7"
- "-Wno-cast-align -mfpu=neon -mfloat-abi=hard"
- "-Wno-cast-align -mfpu=neon -mfloat-abi=softfp"
- "-Wno-cast-align -mfpu=neon -mfloat-abi=soft"
- )
- set (SIMD_C_TEST_SOURCE "
- #include
- int main() {
- float32x4_t a = vdupq_n_f32(0), b = a, c = vaddq_f32(a,b);
- return 0;
- }
- ")
-else ()
- if (WIN32) # Safety for when mixed lib/app compilers (but performance hit)
- set (GCC_WIN32_SIMD_OPTS "-mincoming-stack-boundary=2")
- endif ()
-
- set (SIMD_C_FLAG_CANDIDATES
- # x64
- "-Wno-cast-align"
- " "
- # Microsoft Visual Studio x86
- "/arch:SSE /fp:fast -D__SSE__"
- # Gcc x86
- "-Wno-cast-align -msse -mfpmath=sse ${GCC_WIN32_SIMD_OPTS}"
- # Gcc x86 (old versions)
- "-msse -mfpmath=sse"
- )
- set (SIMD_C_TEST_SOURCE "
- #include
- int main() {
- __m128 a = _mm_setzero_ps(), b = a, c = _mm_add_ps(a,b);
- return 0;
- }
- ")
-endif ()
-
-if (DEFINED SIMD_C_FLAGS)
- set (SIMD_C_FLAG_CANDIDATES)
-endif ()
-
-foreach (FLAG ${SIMD_C_FLAG_CANDIDATES})
- set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
- set (CMAKE_REQUIRED_FLAGS "${FLAG}")
- unset (SIMD_FLAG_DETECTED CACHE)
- message (STATUS "Try SIMD C flag = [${FLAG}]")
- check_c_source_compiles ("${SIMD_C_TEST_SOURCE}" SIMD_FLAG_DETECTED)
- set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}")
- if (SIMD_FLAG_DETECTED)
- set (SIMD_C_FLAGS_INTERNAL "${FLAG}")
- break ()
- endif ()
-endforeach ()
-
-set (SIMD_C_FLAGS "${SIMD_C_FLAGS_INTERNAL}"
- CACHE STRING "C compiler flags for SIMD vectorization")
-
-find_package_handle_standard_args (SIMD DEFAULT_MSG SIMD_C_FLAGS SIMD_C_FLAGS)
-mark_as_advanced (SIMD_C_FLAGS)
diff --git a/cmake/Modules/FindSIMD32.cmake b/cmake/Modules/FindSIMD32.cmake
new file mode 100644
index 0000000..c215455
--- /dev/null
+++ b/cmake/Modules/FindSIMD32.cmake
@@ -0,0 +1,54 @@
+# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1 See LICENCE for details.
+
+# - Finds SIMD32 support
+#
+# The following variables are set:
+# SIMD32_C_FLAGS - flags to add to the C compiler for this package.
+# SIMD32_FOUND - true if support for this package is found.
+
+if (DEFINED SIMD32_C_FLAGS)
+ set (TRIAL_C_FLAGS)
+elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
+ set (TRIAL_C_FLAGS
+ # Gcc
+ "-mfpu=neon-vfpv4 -mcpu=cortex-a7"
+ "-mfpu=neon -mfloat-abi=hard"
+ "-mfpu=neon -mfloat-abi=softfp"
+ "-mfpu=neon -mfloat-abi=soft"
+ )
+ set (TEST_C_SOURCE "
+ #include
+ int main() {
+ float32x4_t a = vdupq_n_f32(0), b = a, c = vaddq_f32(a,b);
+ return 0;
+ }
+ ")
+else ()
+ if (WIN32) # Safety for when mixed lib/app compilers (but performance hit)
+ set (GCC_WIN32_SIMD32_OPTS "-mincoming-stack-boundary=2")
+ endif ()
+
+ set (TRIAL_C_FLAGS
+ # x64
+ " "
+ # MSVC x86
+ "/arch:SSE /fp:fast -D__SSE__"
+ # Gcc x86
+ "-Wno-cast-align -msse -mfpmath=sse ${GCC_WIN32_SIMD32_OPTS}"
+ # Gcc x86 (old versions)
+ "-msse -mfpmath=sse"
+ )
+ set (TEST_C_SOURCE "
+ #include
+ int main() {
+ __m128 a = _mm_setzero_ps(), b = a, c = _mm_add_ps(a,b);
+ return 0;
+ }
+ ")
+endif ()
+
+include (FindCFlags)
+
+FindCFlags ("SIMD32" "FLOAT-32 (single-precision) SIMD vectorization"
+ "${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}")
diff --git a/cmake/Modules/FindSIMD64.cmake b/cmake/Modules/FindSIMD64.cmake
new file mode 100644
index 0000000..5bf82c9
--- /dev/null
+++ b/cmake/Modules/FindSIMD64.cmake
@@ -0,0 +1,29 @@
+# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1 See LICENCE for details.
+
+# - Finds SIMD64 support
+#
+# The following variables are set:
+# SIMD64_C_FLAGS - flags to add to the C compiler for this package.
+# SIMD64_FOUND - true if support for this package is found.
+
+if (DEFINED SIMD64_C_FLAGS OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
+ set (TRIAL_C_FLAGS)
+else ()
+ set (TRIAL_C_FLAGS
+ "-mavx" # Gcc
+ "/arch:AVX" # MSVC
+ )
+ set (TEST_C_SOURCE "
+ #include
+ int main() {
+ __m256d a = _mm256_setzero_pd(), b = a, c = _mm256_add_pd(a,b);
+ return 0;
+ }
+ ")
+endif ()
+
+include (FindCFlags)
+
+FindCFlags ("SIMD64" "FLOAT-64 (double-precision) SIMD vectorization"
+ "${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}")
diff --git a/cmake/Modules/SetSystemProcessor.cmake b/cmake/Modules/SetSystemProcessor.cmake
index d1973f8..9bafe05 100644
--- a/cmake/Modules/SetSystemProcessor.cmake
+++ b/cmake/Modules/SetSystemProcessor.cmake
@@ -5,13 +5,16 @@
macro (set_system_processor)
if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "")
+ unset(CMAKE_SYSTEM_PROCESSOR)
+ endif ()
+ if (NOT DEFINED CMAKE_SYSTEM_PROCESSOR)
include (CheckCSourceCompiles)
- set (CPU_CANDIDATES
+ set (CPU_LINES
"#if defined __x86_64__ || defined _M_X64 /*\;x86_64\;*/"
"#if defined __i386__ || defined _M_IX86 /*\;x86_32\;*/"
"#if defined __arm__ || defined _M_ARM /*\;arm\;*/"
)
- foreach (CPU_LINE ${CPU_CANDIDATES})
+ foreach (CPU_LINE ${CPU_LINES})
string (CONCAT CPU_SOURCE "${CPU_LINE}" "
int main() {return 0;}
#endif
@@ -19,10 +22,14 @@ macro (set_system_processor)
unset (SYSTEM_PROCESSOR_DETECTED CACHE)
check_c_source_compiles ("${CPU_SOURCE}" SYSTEM_PROCESSOR_DETECTED)
if (SYSTEM_PROCESSOR_DETECTED)
- list (GET CPU_LINE 1 CMAKE_SYSTEM_PROCESSOR)
- message (STATUS "CMAKE_SYSTEM_PROCESSOR set to ${CMAKE_SYSTEM_PROCESSOR}")
+ list (GET CPU_LINE 1 DETECTED_SYSTEM_PROCESSOR)
+ message (STATUS "CMAKE_SYSTEM_PROCESSOR is ${DETECTED_SYSTEM_PROCESSOR}")
break ()
endif ()
endforeach ()
endif ()
+
+ # N.B. Will not overwrite existing cache variable:
+ set (CMAKE_SYSTEM_PROCESSOR "${DETECTED_SYSTEM_PROCESSOR}"
+ CACHE STRING "Target system processor")
endmacro ()
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index b7b50f8..7a95823 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,16 +1,18 @@
-# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
+if (${BUILD_EXAMPLES} OR ${BUILD_TESTS})
+ set (SOURCES 3-options-input-fn)
+ if (${WITH_LSR_BINDINGS})
+ set (LSR_SOURCES 1a-lsr)
+ endif ()
+endif ()
+
if (${BUILD_EXAMPLES})
project (soxr) # Adds c++ compiler
- file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/[1-9]-*.[cC])
-elseif (${BUILD_TESTS})
- file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/3*.c)
-endif ()
-
-if (${BUILD_EXAMPLES} OR ${BUILD_TESTS})
- if (${WITH_LSR_BINDINGS})
- set (LSR_SOURCES 1a-lsr.c)
+ list (APPEND SOURCES 1-single-block 2-stream 4-split-channels)
+ if (${WITH_VR32})
+ list (APPEND SOURCES 5-variable-rate)
endif ()
endif ()
@@ -34,4 +36,5 @@ if (${BUILD_TESTS} AND ${WITH_LSR_BINDINGS})
endif ()
file (GLOB INSTALL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.[cCh])
-install (FILES ${INSTALL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/README DESTINATION ${DOC_INSTALL_DIR}/examples)
+install (FILES ${INSTALL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/README
+ DESTINATION ${DOC_INSTALL_DIR}/examples)
diff --git a/examples/examples-common.h b/examples/examples-common.h
index 25bd48c..7ebde73 100644
--- a/examples/examples-common.h
+++ b/examples/examples-common.h
@@ -1,4 +1,4 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Common includes etc. for the examples. */
@@ -6,7 +6,7 @@
#include
#include
#include
-#include
+#include "math-wrap.h"
#include
#include
#include
@@ -17,10 +17,7 @@
#include
#include
#define USE_STD_STDIO _setmode(_fileno(stdout), _O_BINARY), \
- _setmode(_fileno(stdin ), _O_BINARY);
- /* Sometimes missing, so ensure that it is defined: */
- #undef M_PI
- #define M_PI 3.14159265358979323846
+ _setmode(_fileno(stdin ), _O_BINARY)
#else
#define USE_STD_STDIO
#endif
diff --git a/lsr-tests/CMakeLists.txt b/lsr-tests/CMakeLists.txt
index 1ac041e..4f718f7 100644
--- a/lsr-tests/CMakeLists.txt
+++ b/lsr-tests/CMakeLists.txt
@@ -35,7 +35,7 @@ set (tests
callback_hang_test callback_test downsample_test
float_short_test misc_test multi_channel_test
reset_test simple_test termination_test varispeed_test)
-if (WITH_DOUBLE_PRECISION)
+if (WITH_CR64 OR WITH_CR64S)
set (tests ${tests} snr_bw_test)
endif ()
diff --git a/lsr-tests/config.h.in b/lsr-tests/config.h.in
index 3de8799..1095e00 100644
--- a/lsr-tests/config.h.in
+++ b/lsr-tests/config.h.in
@@ -1,4 +1,4 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined soxsrc_lsr_tests_config_included
diff --git a/lsr-tests/snr_bw_test.c b/lsr-tests/snr_bw_test.c
index 26fb279..55130b4 100644
--- a/lsr-tests/snr_bw_test.c
+++ b/lsr-tests/snr_bw_test.c
@@ -118,9 +118,9 @@ main (int argc, char *argv [])
BOOLEAN_TRUE,
{ { 1, { 0.01111111111 }, 3.0, 1, 130.0, 1.0 },
{ 1, { 0.01111111111 }, 0.6, 1, 132.0, 1.0 },
- { 1, { 0.01111111111 }, 0.3, 1, 138.0, 1.0 },
+ { 1, { 0.01111111111 }, 0.3, 1, 135.0, 1.0 },
{ 1, { 0.01111111111 }, 1.0, 1, 155.0, 1.0 },
- { 1, { 0.01111111111 }, 1.001, 1, 134.0, 1.0 },
+ { 1, { 0.01111111111 }, 1.001, 1, 133.0, 1.0 },
{ 2, { 0.011111, 0.324 }, 1.9999, 2, 127.0, 1.0 },
{ 2, { 0.012345, 0.457 }, 0.456789, 1, 124.0, 0.5 },
{ 2, { 0.011111, 0.45 }, 0.6, 1, 126.0, 0.5 },
@@ -135,10 +135,10 @@ main (int argc, char *argv [])
{ 1, { 0.01111111111 }, 0.6, 1, 147.0, 1.0 },
{ 1, { 0.01111111111 }, 0.3, 1, 147.0, 1.0 },
{ 1, { 0.01111111111 }, 1.0, 1, 155.0, 1.0 },
- { 1, { 0.01111111111 }, 1.001, 1, 147.0, 1.0 },
+ { 1, { 0.01111111111 }, 1.001, 1, 146.0, 1.0 },
{ 2, { 0.011111, 0.324 }, 1.9999, 2, 147.0, 1.0 },
{ 2, { 0.012345, 0.457 }, 0.456789, 1, 148.0, 0.5 },
- { 2, { 0.011111, 0.45 }, 0.6, 1, 149.0, 0.5 },
+ { 2, { 0.011111, 0.45 }, 0.6, 1, 145.0, 0.5 },
{ 1, { 0.43111111111 }, 1.33, 1, 145.0, 1.0 }
}
},
diff --git a/msvc/libsoxr.vcproj b/msvc/libsoxr.vcproj
index b1e1714..2dd7be0 100644
--- a/msvc/libsoxr.vcproj
+++ b/msvc/libsoxr.vcproj
@@ -60,6 +60,12 @@
+
+
+
+
+
+
@@ -67,10 +73,9 @@
-
-
-
-
+
+
+
diff --git a/msvc/soxr-config.h b/msvc/soxr-config.h
index d6b07ac..a1fcfb4 100644
--- a/msvc/soxr-config.h
+++ b/msvc/soxr-config.h
@@ -7,12 +7,14 @@
#if !defined soxr_config_included
#define soxr_config_included
-#define WITH_SINGLE_PRECISION 1
-#define WITH_DOUBLE_PRECISION 1
+#define WITH_CR32 1
+#define WITH_CR32S 1
+#define WITH_CR64 1
+#define WITH_CR64S 1
+#define WITH_VR32 1
#define AVCODEC_FOUND 0
#define AVUTIL_FOUND 0
-#define SIMD_FOUND 1
#define HAVE_FENV_H 0
#define HAVE_STDBOOL_H 0
diff --git a/multi-arch b/multi-arch
new file mode 100755
index 0000000..174cc4b
--- /dev/null
+++ b/multi-arch
@@ -0,0 +1,29 @@
+#!/bin/sh
+set -e
+
+# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1 See LICENCE for details.
+
+rm -f CMakeCache.txt # Prevent interference from any in-tree build
+
+j=-j4
+build=Release
+
+for n in \
+ cc: \
+ clang: \
+ arm-linux-gnueabi-gcc:Linux \
+ x86_64-w64-mingw32-gcc:Windows \
+ i686-w64-mingw32-gcc:Windows \
+ ; do
+ compiler=$(echo $n | sed 's/:.*//')
+ system=$(echo $n | sed 's/.*://')
+ dir=$build-$compiler
+ which $compiler > /dev/null || echo $compiler not found && (
+ echo "***" $dir
+ mkdir -p $dir
+ cd $dir
+ cmake -DCMAKE_BUILD_TYPE=$build -DCMAKE_C_COMPILER=$compiler -DCMAKE_SYSTEM_NAME="$system" ..
+ make $j && [ /$system = / ] && ctest -j || true
+ )
+done
diff --git a/soxr-config.h.in b/soxr-config.h.in
index 3c7dbe9..37ccdb2 100644
--- a/soxr-config.h.in
+++ b/soxr-config.h.in
@@ -4,12 +4,14 @@
#if !defined soxr_config_included
#define soxr_config_included
-#cmakedefine01 WITH_SINGLE_PRECISION
-#cmakedefine01 WITH_DOUBLE_PRECISION
+#cmakedefine01 WITH_CR32
+#cmakedefine01 WITH_CR32S
+#cmakedefine01 WITH_CR64
+#cmakedefine01 WITH_CR64S
+#cmakedefine01 WITH_VR32
#cmakedefine01 AVCODEC_FOUND
#cmakedefine01 AVUTIL_FOUND
-#cmakedefine01 SIMD_FOUND
#cmakedefine01 HAVE_FENV_H
#cmakedefine01 HAVE_STDBOOL_H
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 19c7522..67d34d7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
@@ -7,8 +7,10 @@
if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vr-coefs.h)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
- set_property(SOURCE vr32.c APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h)
+ set_property(SOURCE vr32.c
+ APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h)
add_executable (vr-coefs vr-coefs.c)
+ target_link_libraries (vr-coefs ${LIBM_LIBRARIES})
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h
COMMAND vr-coefs > ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h
DEPENDS vr-coefs)
@@ -29,22 +31,41 @@ if (AVCODEC_FOUND)
elseif (WITH_PFFFT)
#set (RDFT32 pffft32)
set (RDFT32S pffft32s)
-elseif (SIMD_FOUND)
- set (RDFT32S fft4g32s)
+elseif (WITH_CR32S)
+ set (RDFT32S fft4g32s fft4g32)
endif ()
-if (WITH_DOUBLE_PRECISION)
- set (DP_SOURCES rate64)
+set (SOURCES ${PROJECT_NAME}.c constructors data-io)
+
+if (WITH_CR32 OR WITH_CR32S OR WITH_CR64 OR WITH_CR64S)
+ list (APPEND SOURCES dbesi0 filter fft4g64 cr.c)
endif ()
-if (WITH_SINGLE_PRECISION)
- set (SP_SOURCES rate32 ${RDFT32})
+if (WITH_CR32)
+ list (APPEND SOURCES cr32 ${RDFT32})
endif ()
-if (SIMD_FOUND)
- set (SIMD_SOURCES rate32s ${RDFT32S} simd)
- foreach (source ${SIMD_SOURCES})
- set_property (SOURCE ${source} PROPERTY COMPILE_FLAGS ${SIMD_C_FLAGS})
+if (WITH_CR64)
+ list (APPEND SOURCES cr64)
+endif ()
+
+if (WITH_VR32)
+ list (APPEND SOURCES vr32)
+endif ()
+
+if (WITH_CR32S)
+ foreach (source cr32s ${RDFT32S} simd32)
+ list (APPEND SOURCES ${source})
+ set_property (SOURCE ${source}
+ APPEND_STRING PROPERTY COMPILE_FLAGS ${SIMD32_C_FLAGS})
+ endforeach ()
+endif ()
+
+if (WITH_CR64S)
+ foreach (source cr64s pffft64s simd64)
+ list (APPEND SOURCES ${source})
+ set_property (SOURCE ${source}
+ APPEND_STRING PROPERTY COMPILE_FLAGS ${SIMD64_C_FLAGS})
endforeach ()
endif ()
@@ -52,8 +73,7 @@ endif ()
# Libsoxr:
-add_library (${PROJECT_NAME} ${LIB_TYPE} ${PROJECT_NAME}.c data-io dbesi0 filter fft4g64
- ${SP_SOURCES} vr32 ${DP_SOURCES} ${SIMD_SOURCES})
+add_library (${PROJECT_NAME} ${LIB_TYPE} ${SOURCES})
target_link_libraries (${PROJECT_NAME} PRIVATE ${LIBS} ${LIBM_LIBRARIES})
set_target_properties (${PROJECT_NAME} PROPERTIES
VERSION "${SO_VERSION}"
diff --git a/src/aliases.h b/src/aliases.h
index eb42bdc..ebcce41 100644
--- a/src/aliases.h
+++ b/src/aliases.h
@@ -18,8 +18,10 @@
#define lsx_dfst_f _soxr_dfst_f
#define lsx_dfst _soxr_dfst
#define lsx_fir_to_phase _soxr_fir_to_phase
+#define lsx_f_resp _soxr_f_resp
#define lsx_init_fft_cache_f _soxr_init_fft_cache_f
#define lsx_init_fft_cache _soxr_init_fft_cache
+#define lsx_inv_f_resp _soxr_inv_f_resp
#define lsx_kaiser_beta _soxr_kaiser_beta
#define lsx_kaiser_params _soxr_kaiser_params
#define lsx_make_lpf _soxr_make_lpf
diff --git a/src/avfft32.c b/src/avfft32.c
index 5be13d2..fe651f5 100644
--- a/src/avfft32.c
+++ b/src/avfft32.c
@@ -1,17 +1,19 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
+#include
#include
#include
#include "filter.h"
+#include "rdft_t.h"
static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
static int multiplier(void) {return 2;}
static void nothing(void) {}
+static int flags(void) {return 0;}
-typedef void (* fn_t)(void);
fn_t _soxr_rdft32_cb[] = {
(fn_t)forward_setup,
(fn_t)backward_setup,
@@ -24,4 +26,8 @@ fn_t _soxr_rdft32_cb[] = {
(fn_t)_soxr_ordered_partial_convolve_f,
(fn_t)multiplier,
(fn_t)nothing,
+ (fn_t)malloc,
+ (fn_t)calloc,
+ (fn_t)free,
+ (fn_t)flags,
};
diff --git a/src/avfft32s.c b/src/avfft32s.c
index 75e485e..8b1da80 100644
--- a/src/avfft32s.c
+++ b/src/avfft32s.c
@@ -3,15 +3,16 @@
#include
#include
-#include "simd.h"
+#include "simd32.h"
+#include "rdft_t.h"
static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
static int multiplier(void) {return 2;}
static void nothing(void) {}
+static int flags(void) {return RDFT_IS_SIMD;}
-typedef void (* fn_t)(void);
fn_t _soxr_rdft32s_cb[] = {
(fn_t)forward_setup,
(fn_t)backward_setup,
@@ -20,8 +21,12 @@ fn_t _soxr_rdft32s_cb[] = {
(fn_t)rdft,
(fn_t)rdft,
(fn_t)rdft,
- (fn_t)_soxr_ordered_convolve_simd,
- (fn_t)_soxr_ordered_partial_convolve_simd,
+ (fn_t)ORDERED_CONVOLVE_SIMD,
+ (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
(fn_t)multiplier,
(fn_t)nothing,
+ (fn_t)SIMD_ALIGNED_MALLOC,
+ (fn_t)SIMD_ALIGNED_CALLOC,
+ (fn_t)SIMD_ALIGNED_FREE,
+ (fn_t)flags,
};
diff --git a/src/avx.h b/src/avx.h
new file mode 100644
index 0000000..ace19b5
--- /dev/null
+++ b/src/avx.h
@@ -0,0 +1,40 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+/* AVX support macros */
+
+#if !defined soxr_avx_included
+#define soxr_avx_included
+
+#include
+
+typedef __m256d v4sf;
+#define VZERO() _mm256_setzero_pd()
+#define VMUL(a,b) _mm256_mul_pd(a,b)
+#define VADD(a,b) _mm256_add_pd(a,b)
+#define VMADD(a,b,c) VADD(VMUL(a,b),c) /* Note: gcc -mfma will `fuse' these */
+#define VSUB(a,b) _mm256_sub_pd(a,b)
+#define LD_PS1(p) _mm256_set1_pd(p)
+#define INTERLEAVE2(in1, in2, out1, out2) {v4sf \
+ t1 = _mm256_unpacklo_pd(in1, in2), \
+ t2 = _mm256_unpackhi_pd(in1, in2); \
+ out1 = _mm256_permute2f128_pd(t1,t2,0x20); \
+ out2 = _mm256_permute2f128_pd(t1,t2,0x31); }
+#define UNINTERLEAVE2(in1, in2, out1, out2) {v4sf \
+ t1 = _mm256_permute2f128_pd(in1,in2,0x20), \
+ t2 = _mm256_permute2f128_pd(in1,in2,0x31); \
+ out1 = _mm256_unpacklo_pd(t1, t2); \
+ out2 = _mm256_unpackhi_pd(t1, t2);}
+#define VTRANSPOSE4(x0,x1,x2,x3) {v4sf \
+ t0 = _mm256_shuffle_pd(x0,x1, 0x0), \
+ t2 = _mm256_shuffle_pd(x0,x1, 0xf), \
+ t1 = _mm256_shuffle_pd(x2,x3, 0x0), \
+ t3 = _mm256_shuffle_pd(x2,x3, 0xf); \
+ x0 = _mm256_permute2f128_pd(t0,t1, 0x20); \
+ x1 = _mm256_permute2f128_pd(t2,t3, 0x20); \
+ x2 = _mm256_permute2f128_pd(t0,t1, 0x31); \
+ x3 = _mm256_permute2f128_pd(t2,t3, 0x31);}
+#define VSWAPHL(a,b) _mm256_permute2f128_pd(b, a, 0x30)
+#define VALIGNED(ptr) ((((long)(ptr)) & 0x1F) == 0)
+
+#endif
diff --git a/src/constructors.c b/src/constructors.c
new file mode 100644
index 0000000..0128990
--- /dev/null
+++ b/src/constructors.c
@@ -0,0 +1,85 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#include "soxr.h"
+#include "filter.h"
+#include "internal.h"
+#include
+#include
+#include
+
+#if !WITH_CR32 && !WITH_CR32S && !WITH_CR64 && !WITH_CR64S
+#undef lsx_to_3dB
+#define lsx_to_3dB(x) ((x)/(x))
+#endif
+
+
+
+soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags)
+{
+ soxr_quality_spec_t spec, * p = &spec;
+ unsigned quality = recipe & 0xf;
+ double rej;
+ memset(p, 0, sizeof(*p));
+ if (quality > SOXR_PRECISIONQ) {
+ p->e = "invalid quality type";
+ return spec;
+ }
+ flags |= quality < SOXR_LSR0Q ? RESET_ON_CLEAR : 0;
+ p->phase_response = "\62\31\144"[(recipe & 0x30)>>4];
+ p->stopband_begin = 1;
+ p->precision =
+ quality == SOXR_QQ ? 0 :
+ quality <= SOXR_16_BITQ ? 16 :
+ quality <= SOXR_32_BITQ ? 4 + quality * 4 :
+ quality <= SOXR_LSR2Q ? 55 - quality * 4 : /* TODO: move to lsr.c */
+ 0;
+ rej = p->precision * linear_to_dB(2.);
+ p->flags = flags;
+ if (quality <= SOXR_32_BITQ || quality == SOXR_PRECISIONQ) {
+ #define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
+ p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / lsx_to_3dB(rej);
+ if (quality <= 2)
+ p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
+ }
+ else { /* TODO: move to lsr.c */
+ static float const bw[] = {.931f, .832f, .663f};
+ p->passband_end = bw[quality - SOXR_LSR0Q];
+ if (quality == SOXR_LSR2Q)
+ p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_LSR2Q | SOXR_PROMOTE_TO_LQ;
+ }
+ if (recipe & SOXR_STEEP_FILTER)
+ p->passband_end = 1 - .01 / lsx_to_3dB(rej);
+ return spec;
+}
+
+
+
+soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads)
+{
+ soxr_runtime_spec_t spec, * p = &spec;
+ memset(p, 0, sizeof(*p));
+ p->log2_min_dft_size = 10;
+ p->log2_large_dft_size = 17;
+ p->coef_size_kbytes = 400;
+ p->num_threads = num_threads;
+ return spec;
+}
+
+
+
+soxr_io_spec_t soxr_io_spec(
+ soxr_datatype_t itype,
+ soxr_datatype_t otype)
+{
+ soxr_io_spec_t spec, * p = &spec;
+ memset(p, 0, sizeof(*p));
+ if ((itype | otype) >= SOXR_SPLIT * 2)
+ p->e = "invalid io datatype(s)";
+ else {
+ p->itype = itype;
+ p->otype = otype;
+ p->scale = 1;
+ }
+ return spec;
+}
diff --git a/src/cr-core.c b/src/cr-core.c
new file mode 100644
index 0000000..bc6282c
--- /dev/null
+++ b/src/cr-core.c
@@ -0,0 +1,297 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details.
+ *
+ * Constant-rate resampling engine-specific code. */
+
+#include
+#include
+#include
+#include
+
+#include "filter.h"
+
+#if defined SOXR_LIB
+ #include "internal.h"
+ #include "cr.h"
+ #if CORE_TYPE & CORE_DBL
+ typedef double sample_t;
+ #if CORE_TYPE & CORE_SIMD_DFT
+ #define RDFT_CB _soxr_rdft64s_cb
+ #else
+ #define RDFT_CB _soxr_rdft64_cb
+ #endif
+ #else
+ typedef float sample_t;
+ #if CORE_TYPE & CORE_SIMD_DFT
+ #define RDFT_CB _soxr_rdft32s_cb
+ #else
+ #define RDFT_CB _soxr_rdft32_cb
+ #endif
+ #endif
+
+ #if CORE_TYPE & (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
+ #if CORE_TYPE & CORE_DBL
+ #include "simd64.h"
+ #include "simd64-dev.h"
+ #else
+ #include "simd32.h"
+ #include "simd32-dev.h"
+ #endif
+ #endif
+
+ extern fn_t RDFT_CB[];
+#else
+ #define RDFT_CB 0
+#endif
+
+
+
+static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo)
+{
+ sample_t const * input = stage_read_p(p);
+ int num_in = min(stage_occupancy(p), p->input_size);
+ int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio);
+ sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+ for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
+ sample_t const * s = input + p->at.integer;
+ double x = p->at.fraction * (1 / MULT32);
+ double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b);
+ double c = s[1]-*s-a-b;
+ output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s));
+ }
+ assert(max_num_out - i >= 0);
+ fifo_trim_by(output_fifo, max_num_out - i);
+ fifo_read(&p->fifo, p->at.integer, NULL);
+ p->at.integer = 0;
+}
+
+
+
+#if CORE_TYPE & CORE_DBL
+ #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_HALF) && defined __AVX__)
+ #define SIMD_SSE 0
+#else
+ #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_HALF) && (defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86))
+ #define SIMD_AVX 0
+#endif
+
+#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_HALF) && defined __arm__)
+
+
+
+
+#include "half-coefs.h"
+
+#if !(CORE_TYPE & CORE_SIMD_HALF)
+#define FUNCTION_H h7
+#define CONVOLVE ____ __ _
+#include "half-fir.h"
+#endif
+
+#define FUNCTION_H h8
+#define CONVOLVE ____ ____
+#include "half-fir.h"
+
+#define FUNCTION_H h9
+#define CONVOLVE ____ ____ _
+#include "half-fir.h"
+
+#if CORE_TYPE & CORE_DBL
+ #define FUNCTION_H h10
+ #define CONVOLVE ____ ____ __
+ #include "half-fir.h"
+
+ #define FUNCTION_H h11
+ #define CONVOLVE ____ ____ __ _
+ #include "half-fir.h"
+
+ #define FUNCTION_H h12
+ #define CONVOLVE ____ ____ ____
+ #include "half-fir.h"
+
+ #define FUNCTION_H h13
+ #define CONVOLVE ____ ____ ____ _
+ #include "half-fir.h"
+#endif
+
+static half_fir_info_t const half_firs[] = {
+#if !(CORE_TYPE & CORE_SIMD_HALF)
+ { 7, half_fir_coefs_7 , h7 , 0 , 120.65f},
+#endif
+ { 8, half_fir_coefs_8 , h8 , 0 , 136.51f},
+ { 9, half_fir_coefs_9 , h9 , 0 , 152.32f},
+#if CORE_TYPE & CORE_DBL
+ {10, half_fir_coefs_10, h10, 0 , 168.08f},
+ {11, half_fir_coefs_11, h11, 0 , 183.79f},
+ {12, half_fir_coefs_12, h12, 0 , 199.46f},
+ {13, half_fir_coefs_13, h13, 0 , 215.12f},
+#endif
+};
+
+#undef SIMD_AVX
+#undef SIMD_NEON
+#undef SIMD_SSE
+
+
+
+#if CORE_TYPE & CORE_DBL
+ #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_POLY) && defined __AVX__)
+ #define SIMD_SSE 0
+#else
+ #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_POLY) && (defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86))
+ #define SIMD_AVX 0
+#endif
+
+#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_POLY) && defined __arm__)
+
+
+
+
+#define HI_PREC_CLOCK
+#define COEFS (sample_t * __restrict)p->shared->poly_fir_coefs
+#define VAR_LENGTH p->n
+#define VAR_CONVOLVE(n) while (j < (n)) _
+#define VAR_POLY_PHASE_BITS p->phase_bits
+
+
+
+#define FUNCTION vpoly0
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE(n) VAR_CONVOLVE(n)
+#include "poly-fir0.h"
+
+#define FUNCTION vpoly1
+#define COEF_INTERP 1
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE(n) VAR_CONVOLVE(n)
+#include "poly-fir.h"
+
+#define FUNCTION vpoly2
+#define COEF_INTERP 2
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE(n) VAR_CONVOLVE(n)
+#include "poly-fir.h"
+
+#define FUNCTION vpoly3
+#define COEF_INTERP 3
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE(n) VAR_CONVOLVE(n)
+#include "poly-fir.h"
+
+
+
+#if !(CORE_TYPE & CORE_SIMD_POLY)
+
+#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+#define FUNCTION U100_0
+#define FIR_LENGTH U100_l
+#define CONVOLVE(n) poly_fir_convolve_U100
+#include "poly-fir0.h"
+
+#define u100_l 11
+#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _
+#define FUNCTION u100_0
+#define FIR_LENGTH u100_l
+#define CONVOLVE(n) poly_fir_convolve_u100
+#include "poly-fir0.h"
+
+#define FUNCTION u100_1
+#define COEF_INTERP 1
+#define PHASE_BITS 8
+#define FIR_LENGTH u100_l
+#define CONVOLVE(n) poly_fir_convolve_u100
+#include "poly-fir.h"
+
+#define FUNCTION u100_2
+#define COEF_INTERP 2
+#define PHASE_BITS 6
+#define FIR_LENGTH u100_l
+#define CONVOLVE(n) poly_fir_convolve_u100
+#include "poly-fir.h"
+
+#endif
+
+#define u100_1_b 8
+#define u100_2_b 6
+
+
+
+static poly_fir_t const poly_firs[] = {
+ {-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}},
+ {-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}},
+ {-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}},
+ {-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}},
+ {-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}},
+ {-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}},
+
+ {-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}},
+ {-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}},
+ {-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}},
+ {-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}},
+ {-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}},
+ {-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}},
+
+#if CORE_TYPE & CORE_SIMD_POLY
+ {10.62f, {{0, vpoly0}, {0, 0}, {0, 0}}},
+ {-1, {{0, vpoly0}, {u100_1_b, vpoly1}, {u100_2_b, vpoly2}}},
+#else
+ {10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}},
+ {11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}},
+#endif
+ {-1, {{0, vpoly0}, { 9, vpoly1}, { 6, vpoly2}}},
+ {-1, {{0, vpoly0}, { 11, vpoly1}, { 7, vpoly2}}},
+ {-1, {{0, vpoly0}, { 13, vpoly1}, { 8, vpoly2}}},
+ {-1, {{0, vpoly0}, { 10, vpoly2}, { 8, vpoly3}}},
+ {-1, {{0, vpoly0}, { 12, vpoly2}, { 9, vpoly3}}},
+};
+
+
+
+static cr_core_t const cr_core = {
+
+#if CORE_TYPE & CORE_SIMD_POLY
+ {SIMD_ALIGNED_MALLOC, SIMD_ALIGNED_CALLOC, SIMD_ALIGNED_FREE},
+#else
+ {malloc, calloc, free},
+#endif
+ half_firs, array_length(half_firs),
+ 0, 0,
+ cubic_stage_fn,
+ poly_firs, RDFT_CB
+};
+
+
+
+#if defined SOXR_LIB
+
+#include "soxr.h"
+
+static char const * rate_create(void * channel, void * shared, double io_ratio,
+ soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale)
+{
+ return _soxr_init(channel, shared, io_ratio, q_spec, r_spec, scale,
+ &cr_core, CORE_TYPE);
+}
+
+
+
+static char const * id(void) {return CORE_STR;}
+
+fn_t RATE_CB[] = {
+ (fn_t)_soxr_input,
+ (fn_t)_soxr_process,
+ (fn_t)_soxr_output,
+ (fn_t)_soxr_flush,
+ (fn_t)_soxr_close,
+ (fn_t)_soxr_delay,
+ (fn_t)_soxr_sizes,
+ (fn_t)rate_create,
+ (fn_t)0,
+ (fn_t)id,
+};
+
+#endif
diff --git a/src/cr.c b/src/cr.c
new file mode 100644
index 0000000..eb65a04
--- /dev/null
+++ b/src/cr.c
@@ -0,0 +1,581 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details.
+ *
+ * Constant-rate resampling common code. */
+
+#include
+#include
+#include
+#include
+
+#include "filter.h"
+
+#if defined SOXR_LIB
+ #include "internal.h"
+ #define STATIC
+#endif
+
+#include "cr.h"
+
+#define num_coefs4 ((core_flags&CORE_SIMD_POLY)? ((num_coefs+3)&~3) : num_coefs)
+
+#define coef_coef(C,T,x) \
+ C((T*)result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i)
+
+#define STORE(C,T) { \
+ if (interp_order > 2) coef_coef(C,T,3) = (T)d; \
+ if (interp_order > 1) coef_coef(C,T,2) = (T)c; \
+ if (interp_order > 0) coef_coef(C,T,1) = (T)b; \
+ coef_coef(C,T,0) = (T)f0;}
+
+static real * prepare_poly_fir_coefs(double const * coefs, int num_coefs,
+ int num_phases, int interp_order, double multiplier,
+ core_flags_t core_flags, alloc_t const * mem)
+{
+ int i, j, length = num_coefs4 * num_phases * (interp_order + 1);
+ real * result = mem->calloc(1,(size_t)length << LOG2_SIZEOF_REAL(core_flags));
+ double fm1 = coefs[0], f1 = 0, f2 = 0;
+
+ for (i = num_coefs - 1; i >= 0; --i)
+ for (j = num_phases - 1; j >= 0; --j) {
+ double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
+ int pos = i * num_phases + j - 1;
+ fm1 = pos > 0 ? coefs[pos - 1] * multiplier : 0;
+ switch (interp_order) {
+ case 1: b = f1 - f0; break;
+ case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
+ case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break;
+ default: assert(!interp_order);
+ }
+ switch (core_flags & 3) {
+ case 0: if (WITH_CR32 ) STORE(coef , float ); break;
+ case 1: if (WITH_CR64 ) STORE(coef , double); break;
+ case 2: if (WITH_CR32S) STORE(coef4, float ); break;
+ default:if (WITH_CR64S) STORE(coef4, double); break;
+ }
+ f2 = f1, f1 = f0;
+ }
+ return result;
+}
+
+#undef STORE
+#undef coef_coef
+
+#define IS_FLOAT32 (WITH_CR32 || WITH_CR32S) && \
+ (!(WITH_CR64 || WITH_CR64S) || sizeof_real == sizeof(float))
+#define WITH_FLOAT64 WITH_CR64 || WITH_CR64S
+
+static void dft_stage_fn(stage_t * p, fifo_t * output_fifo)
+{
+ real * output, * dft_out;
+ int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
+ rate_shared_t const * s = p->shared;
+ dft_filter_t const * f = &s->dft_filter[p->dft_filter_num];
+ int const overlap = f->num_taps - 1;
+
+ if (p->at.integer + p->L * num_in >= f->dft_length) {
+ fn_t const * const RDFT_CB = p->rdft_cb;
+ size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(p->core_flags);
+ div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L);
+ real const * input = fifo_read_ptr(&p->fifo);
+ fifo_read(&p->fifo, divd.quot, NULL);
+ num_in -= divd.quot;
+
+ output = fifo_reserve(output_fifo, f->dft_length);
+ dft_out = (p->core_flags & CORE_SIMD_DFT)? p->dft_out : output;
+
+ if (lsx_is_power_of_2(p->L)) { /* F-domain */
+ int portion = f->dft_length / p->L;
+ memcpy(dft_out, input, (unsigned)portion * sizeof_real);
+ rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch);
+ if (IS_FLOAT32) {
+#define dft_out ((float *)dft_out)
+ for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
+ dft_out[i] = dft_out[(portion << 1) - i],
+ dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
+ dft_out[portion] = dft_out[1];
+ dft_out[portion + 1] = 0;
+ dft_out[1] = dft_out[0];
+#undef dft_out
+ }
+ else if (WITH_FLOAT64) {
+#define dft_out ((double *)dft_out)
+ for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
+ dft_out[i] = dft_out[(portion << 1) - i],
+ dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
+ dft_out[portion] = dft_out[1];
+ dft_out[portion + 1] = 0;
+ dft_out[1] = dft_out[0];
+#undef dft_out
+ }
+
+ for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) {
+ memcpy((char *)dft_out + (size_t)i * sizeof_real, dft_out, (size_t)portion * sizeof_real);
+ ((char *)dft_out)[((size_t)i + 1) * sizeof_real] = 0;
+ }
+ if (p->step.integer > 0)
+ rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
+ } else {
+ if (p->L == 1)
+ memcpy(dft_out, input, (size_t)f->dft_length * sizeof_real);
+ else {
+ memset(dft_out, 0, (size_t)f->dft_length * sizeof_real);
+ if (IS_FLOAT32)
+ for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
+ ((float *)dft_out)[i] = ((float *)input)[j];
+ else if (WITH_FLOAT64)
+ for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
+ ((double *)dft_out)[i] = ((double *)input)[j];
+ p->at.integer = p->L - 1 - divd.rem;
+ }
+ if (p->step.integer > 0)
+ rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
+ else
+ rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
+ }
+
+ if (p->step.integer > 0) {
+ rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs);
+ rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
+ if ((p->core_flags & CORE_SIMD_DFT) && p->step.integer == 1)
+ memcpy(output, dft_out, (size_t)f->dft_length * sizeof_real);
+ if (p->step.integer != 1) {
+ if (IS_FLOAT32)
+ for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
+ i += p->step.integer)
+ ((float *)output)[j] = ((float *)dft_out)[i];
+ else if (WITH_FLOAT64)
+ for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
+ i += p->step.integer)
+ ((double *)output)[j] = ((double *)dft_out)[i];
+ p->remM = i - (f->dft_length - overlap);
+ fifo_trim_by(output_fifo, f->dft_length - j);
+ }
+ else fifo_trim_by(output_fifo, overlap);
+ }
+ else { /* F-domain */
+ int m = -p->step.integer;
+ rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs);
+ rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch);
+ if (p->core_flags & CORE_SIMD_DFT)
+ memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof_real);
+ fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m);
+ }
+ (void)RDFT_CB;
+ }
+ p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L;
+}
+
+/* Set to 4 x nearest power of 2 or half of that */
+/* if danger of causing too many cache misses. */
+static int set_dft_length(int num_taps, int min, int large)
+{
+ double d = log((double)num_taps) / log(2.);
+ return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large));
+}
+
+static void dft_stage_init(
+ unsigned instance, double Fp, double Fs, double Fn, double att,
+ double phase_response, stage_t * p, int L, int M, double * multiplier,
+ unsigned min_dft_size, unsigned large_dft_size, core_flags_t core_flags,
+ fn_t const * RDFT_CB)
+{
+ dft_filter_t * f = &p->shared->dft_filter[instance];
+ int num_taps = 0, dft_length = f->dft_length, i, offset;
+ bool f_domain_m = abs(3-M) == 1 && Fs <= 1;
+ size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags);
+
+ if (!dft_length) {
+ int k = phase_response == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4;
+ double m, * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.);
+
+ if (phase_response != 50)
+ lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase_response);
+ else f->post_peak = num_taps / 2;
+
+ dft_length = set_dft_length(num_taps, (int)min_dft_size, (int)large_dft_size);
+ f->coefs = rdft_calloc((size_t)dft_length, sizeof_real);
+ offset = dft_length - num_taps + 1;
+ m = (1. / dft_length) * rdft_multiplier() * L * *multiplier;
+ if (IS_FLOAT32) for (i = 0; i < num_taps; ++i)
+ ((float *)f->coefs)[(i + offset) & (dft_length - 1)] =(float)(h[i] * m);
+ else if (WITH_FLOAT64) for (i = 0; i < num_taps; ++i)
+ ((double *)f->coefs)[(i + offset) & (dft_length - 1)] = h[i] * m;
+ free(h);
+ }
+
+ if (rdft_flags() & RDFT_IS_SIMD)
+ p->dft_out = rdft_malloc(sizeof_real * (size_t)dft_length);
+ if (rdft_flags() & RDFT_NEEDS_SCRATCH)
+ p->dft_scratch = rdft_malloc(2 * sizeof_real * (size_t)dft_length);
+
+ if (!f->dft_length) {
+ void * coef_setup = rdft_forward_setup(dft_length);
+ int Lp = lsx_is_power_of_2(L)? L : 1;
+ int Mp = f_domain_m? M : 1;
+ f->dft_forward_setup = rdft_forward_setup(dft_length / Lp);
+ f->dft_backward_setup = rdft_backward_setup(dft_length / Mp);
+ if (Mp == 1)
+ rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch);
+ else
+ rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch);
+ rdft_delete_setup(coef_setup);
+ f->num_taps = num_taps;
+ f->dft_length = dft_length;
+ lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i",
+ num_taps, dft_length, Fp, Fs, Fn, att, L, M);
+ }
+ *multiplier = 1;
+ p->out_in_ratio = (double)L / M;
+ p->core_flags = core_flags;
+ p->rdft_cb = RDFT_CB;
+ p->fn = dft_stage_fn;
+ p->preload = f->post_peak / L;
+ p->at.integer = f->post_peak % L;
+ p->L = L;
+ p->step.integer = f_domain_m? -M/2 : M;
+ p->dft_filter_num = instance;
+ p->block_len = f->dft_length - (f->num_taps - 1);
+ p->phase0 = p->at.integer / p->L;
+ p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L;
+}
+
+static struct half_fir_info const * find_half_fir(
+ struct half_fir_info const * firs, size_t len, double att)
+{
+ size_t i;
+ for (i = 0; i + 1 < len && att > firs[i].att; ++i);
+ return &firs[i];
+}
+
+#define have_pre_stage (preM * preL != 1)
+#define have_arb_stage (arbM * arbL != 1)
+#define have_post_stage (postM * postL != 1)
+
+#include "soxr.h"
+
+STATIC char const * _soxr_init(
+ rate_t * const p, /* Per audio channel. */
+ rate_shared_t * const shared, /* By channels undergoing same rate change. */
+ double const io_ratio, /* Input rate divided by output rate. */
+ soxr_quality_spec_t const * const q_spec,
+ soxr_runtime_spec_t const * const r_spec,
+ double multiplier, /* Linear gain to apply during conversion. */
+ cr_core_t const * const core,
+ core_flags_t const core_flags)
+{
+ size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags);
+ double const tolerance = 1 + 1e-5;
+
+ double bits = q_spec->precision;
+ rolloff_t const rolloff = (rolloff_t)(q_spec->flags & 3);
+ int interpolator = (int)(r_spec->flags & 3) - 1;
+ double const Fp0 = q_spec->passband_end, Fs0 = q_spec->stopband_begin;
+ double const phase_response = q_spec->phase_response, tbw0 = Fs0-Fp0;
+
+ bool const maintain_3dB_pt = !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT);
+ double tbw_tighten = 1, alpha;
+ #define tighten(x) (Fs0-(Fs0-(x))*tbw_tighten)
+
+ double arbM = io_ratio, Fn1, Fp1 = Fp0, Fs1 = Fs0, bits1 = min(bits,33);
+ double att = (bits1 + 1) * linear_to_dB(2.), attArb = att; /* +1: pass+stop */
+ int preL = 1, preM = 1, shr = 0, arbL = 1, postL = 1, postM = 1;
+ bool upsample=false, rational=false, iOpt=!(r_spec->flags&SOXR_NOSMALLINTOPT);
+ bool lq_bits= (q_spec->flags & SOXR_PROMOTE_TO_LQ)? bits <= 16 : bits == 16;
+ bool lq_Fp0 = (q_spec->flags & SOXR_PROMOTE_TO_LQ)? Fp0<=lq_bw0 : Fp0==lq_bw0;
+ int n = 0, i, mode = lq_bits && rolloff == rolloff_medium? io_ratio > 1 ||
+ phase_response != 50 || !lq_Fp0 || Fs0 != 1 : ((int)ceil(bits1) - 6) / 4;
+ struct half_fir_info const * half_fir_info;
+ stage_t * s;
+
+ if (io_ratio < 1 && Fs0 - 1 > 1 - Fp0 / tolerance)
+ return "imaging greater than rolloff";
+ if (.002 / tolerance > tbw0 || tbw0 > .5 * tolerance)
+ return "transition bandwidth not in [0.2,50] % of nyquist";
+ if (.5 / tolerance > Fp0 || Fs0 > 1.5 * tolerance)
+ return "transition band not within [50,150] % of nyquist";
+ if (bits!=0 && (15 > bits || bits > 33))
+ return "precision not in [15,33] bits";
+ if (io_ratio <= 0)
+ return "resampling factor not positive";
+ if (0 > phase_response || phase_response > 100)
+ return "phase response not in [0=min-phase,100=max-phase] %";
+
+ p->core = core;
+ p->io_ratio = io_ratio;
+ if (bits!=0) while (!n++) { /* Determine stages: */
+ int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 :
+ (int)ceil(r_spec->coef_size_kbytes * 1000. / (U100_l * (int)sizeof_real));
+ double d, epsilon = 0, frac;
+ upsample = arbM < 1;
+ for (i = (int)(.5 * arbM), shr = 0; i >>= 1; arbM *= .5, ++shr);
+ preM = upsample || (arbM > 1.5 && arbM < 2);
+ postM = 1 + (arbM > 1 && preM), arbM /= postM;
+ preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL;
+ if ((frac = arbM - (int)arbM)!=0)
+ epsilon = fabs(floor(frac * MULT32 + .5) / (frac * MULT32) - 1);
+ for (i = 1, rational = frac==0; i <= maxL && !rational; ++i) {
+ d = frac * i, try = (int)(d + .5);
+ if ((rational = fabs(try / d - 1) <= epsilon)) { /* No long doubles! */
+ if (try == i)
+ arbM = ceil(arbM), shr += x = arbM > 3, arbM /= 1 + x;
+ else arbM = i * (int)arbM + try, arbL = i;
+ }
+ }
+ L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x;
+ if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) {
+ for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1);
+ arbM = arbM * postL / arbL / preL, arbL = 1, n = 0;
+ } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt))
+ preL = L, preM = M, arbM = arbL = postM = 1;
+ if (!mode && (!rational || !n))
+ ++mode, n = 0;
+ }
+
+ p->num_stages = shr + have_pre_stage + have_arb_stage + have_post_stage;
+ if (!p->num_stages && multiplier != 1) {
+ bits = arbL = 0; /* Use cubic_stage in this case. */
+ ++p->num_stages;
+ }
+ p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages));
+ if (!p->stages)
+ return "out of memory";
+ for (i = 0; i < p->num_stages; ++i) {
+ p->stages[i].num = i;
+ p->stages[i].shared = shared;
+ p->stages[i].input_size = 4096;
+ }
+ p->stages[0].is_input = true;
+
+ alpha = postM / (io_ratio * (postL << 0));
+
+ if ((n = p->num_stages) > 1) { /* Att. budget: */
+ if (have_arb_stage)
+ att += linear_to_dB(2.), attArb = att, --n;
+ att += linear_to_dB((double)n);
+ }
+
+ half_fir_info = find_half_fir(core->half_firs, core->half_firs_len, att);
+ for (i = 0, s = p->stages; i < shr; ++i, ++s) {
+ s->fn = half_fir_info->fn;
+ s->coefs = half_fir_info->coefs;
+ s->n = half_fir_info->num_coefs;
+ s->pre_post = 4 * s->n;
+ s->preload = s->pre = s->pre_post >> 1;
+ }
+
+ if (have_pre_stage) {
+ if (maintain_3dB_pt && have_post_stage) { /* Trans. bands overlapping. */
+ double x = tbw0 * lsx_inv_f_resp(-3., att);
+ x = -lsx_f_resp(x / (max(2 * alpha - Fs0, alpha) - Fp0), att);
+ if (x > .035) {
+ tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014;
+ lsx_debug("tbw_tighten=%g (%gdB)", tbw_tighten, x);
+ }
+ }
+ Fn1 = preM? max(preL, preM) : arbM / arbL;
+ dft_stage_init(0, tighten(Fp1), Fs1, Fn1, att, phase_response, s++, preL,
+ max(preM, 1), &multiplier, r_spec->log2_min_dft_size,
+ r_spec->log2_large_dft_size, core_flags, core->rdft_cb);
+ Fp1 /= Fn1, Fs1 /= Fn1;
+ }
+
+ if (bits==0 && have_arb_stage) { /* `Quick' cubic arb stage: */
+ s->fn = core->cubic_stage_fn;
+ s->mult = multiplier, multiplier = 1;
+ s->step.whole = (int64_t)(arbM * MULT32 + .5);
+ s->pre_post = max(3, s->step.integer);
+ s->preload = s->pre = 1;
+ s->out_in_ratio = MULT32 / (double)s->step.whole;
+ }
+ else if (have_arb_stage) { /* Higher quality arb stage: */
+ static const float rolloffs[] = {-.01f, -.3f, 0, -.103f};
+ poly_fir_t const * f = &core->poly_firs[6*(upsample+!!preM)+mode-!upsample];
+ int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases;
+ size_t coefs_size;
+ double at, Fp = Fp1, Fs, Fn, mult = upsample? 1 : arbM / arbL;
+ poly_fir1_t const * f1;
+
+ if (!upsample && preM)
+ Fn = 2 * mult, Fs = 3 + fabs(Fs1 - 1);
+ else Fn = 1, Fs = 2 - (mode? Fp1 + (Fs1 - Fp1) * .7 : Fs1);
+
+ if (mode)
+ Fp = Fs - (Fs - Fp) / (1 - lsx_inv_f_resp(rolloffs[rolloff], attArb));
+
+ i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1;
+ do {
+ f1 = &f->interp[++i];
+ assert(f1->fn);
+ if (i)
+ arbM /= arbL, arbL = 1, rational = false;
+ phase_bits = (int)ceil(f1->scalar - log(mult)/log(2.));
+ phases = !rational? (1 << phase_bits) : arbL;
+ if (f->interp[0].scalar==0) {
+ int phases0 = max(phases, 19), n0 = 0;
+ lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta);
+ num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM;
+ }
+ if ((num_coefs & 1) && rational && (arbL & 1))
+ phases <<= 1, arbL <<= 1, arbM *= 2;
+ at = arbL * (s->phase0 = .5 * (num_coefs & 1));
+ order = i + (i && mode > 4);
+ coefs_size = (size_t)(num_coefs4 * phases * (order+1)) * sizeof_real;
+ } while (interpolator < 0 && i < 2 && f->interp[i+1].fn &&
+ coefs_size / 1000 > r_spec->coef_size_kbytes);
+
+ if (!s->shared->poly_fir_coefs) {
+ int num_taps = num_coefs * phases - 1;
+ double * coefs = lsx_design_lpf(
+ Fp, Fs, Fn, attArb, &num_taps, phases, f->beta);
+ s->shared->poly_fir_coefs = prepare_poly_fir_coefs(
+ coefs, num_coefs, phases, order, multiplier, core_flags, &core->mem);
+ lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk",
+ num_coefs, phases, order, (double)coefs_size / 1000.);
+ free(coefs);
+ }
+ multiplier = 1;
+ s->fn = f1->fn;
+ s->pre_post = num_coefs4 - 1;
+ s->preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs);
+ s->n = num_coefs4;
+ s->phase_bits = phase_bits;
+ s->L = arbL;
+ s->use_hi_prec_clock =
+ mode>1 && (q_spec->flags & SOXR_HI_PREC_CLOCK) && !rational;
+#if FLOAT_HI_PREC_CLOCK
+ if (s->use_hi_prec_clock) {
+ s->at.flt = at;
+ s->step.flt = arbM;
+ s->out_in_ratio = (double)(arbL / s->step.flt);
+ } else
+#endif
+ {
+ s->at.whole = (int64_t)(at * MULT32 + .5);
+#if !FLOAT_HI_PREC_CLOCK
+ if (s->use_hi_prec_clock) {
+ double M = arbM * MULT32;
+ s->at.fix.ls.parts.ms = 0x80000000ul;
+ s->step.whole = (int64_t)M;
+ M -= (double)s->step.whole;
+ M *= MULT32 * MULT32;
+ s->step.fix.ls.all = (uint64_t)M;
+ } else
+#endif
+ s->step.whole = (int64_t)(arbM * MULT32 + .5);
+ s->out_in_ratio = MULT32 * arbL / (double)s->step.whole;
+ }
+ ++s;
+ }
+
+ if (have_post_stage)
+ dft_stage_init(1, tighten(Fp0 / (upsample? alpha : 1)), upsample? max(2 -
+ Fs0 / alpha, 1) : Fs0, (double)max(postL, postM), att, phase_response,
+ s++, postL, postM, &multiplier, r_spec->log2_min_dft_size,
+ r_spec->log2_large_dft_size, core_flags, core->rdft_cb);
+
+ lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i %x", 1/io_ratio,
+ shr, preL, preM, arbL, arbM, postL, postM, core_flags);
+
+ for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) {
+ fifo_create(&s->fifo, (int)sizeof_real);
+ memset(fifo_reserve(&s->fifo, s->preload), 0,
+ sizeof_real * (size_t)s->preload);
+ lsx_debug_more("%5i|%-5i preload=%i remL=%i",
+ s->pre, s->pre_post-s->pre, s->preload, s->at.integer);
+ }
+ fifo_create(&s->fifo, (int)sizeof_real);
+ return 0;
+}
+
+static bool stage_process(stage_t * stage, bool flushing)
+{
+ fifo_t * fifo = &stage->fifo;
+ bool done = false;
+ int want;
+ while (!done && (want = stage->input_size - fifo_occupancy(fifo)) > 0) {
+ if (stage->is_input) {
+ if (flushing)
+ memset(fifo_reserve(fifo, want), 0, fifo->item_size * (size_t)want);
+ else done = true;
+ }
+ else done = stage_process(stage - 1, flushing);
+ }
+ stage->fn(stage, &stage[1].fifo);
+ return done && fifo_occupancy(fifo) < stage->input_size;
+}
+
+STATIC void _soxr_process(rate_t * p, size_t olen)
+{
+ int const n = p->flushing? min(-(int)p->samples_out, (int)olen) : (int)olen;
+ stage_t * stage = &p->stages[p->num_stages];
+ fifo_t * fifo = &stage->fifo;
+ bool done = false;
+ while (!done && fifo_occupancy(fifo) < (int)n)
+ done = stage->is_input || stage_process(stage - 1, p->flushing);
+}
+
+STATIC real * _soxr_input(rate_t * p, real const * samples, size_t n)
+{
+ if (p->flushing)
+ return 0;
+ p->samples_in += (int64_t)n;
+ return fifo_write(&p->stages[0].fifo, (int)n, samples);
+}
+
+STATIC real const * _soxr_output(rate_t * p, real * samples, size_t * n0)
+{
+ fifo_t * fifo = &p->stages[p->num_stages].fifo;
+ int n = p->flushing? min(-(int)p->samples_out, (int)*n0) : (int)*n0;
+ p->samples_out += n = min(n, fifo_occupancy(fifo));
+ return fifo_read(fifo, (int)(*n0 = (size_t)n), samples);
+}
+
+STATIC void _soxr_flush(rate_t * p)
+{
+ if (p->flushing) return;
+ p->samples_out -= (int64_t)((double)p->samples_in / p->io_ratio + .5);
+ p->samples_in = 0;
+ p->flushing = true;
+}
+
+STATIC void _soxr_close(rate_t * p)
+{
+ if (p->stages) {
+ fn_t const * const RDFT_CB = p->core->rdft_cb;
+ rate_shared_t * shared = p->stages[0].shared;
+ int i;
+
+ for (i = 0; i <= p->num_stages; ++i) {
+ stage_t * s = &p->stages[i];
+ rdft_free(s->dft_scratch);
+ rdft_free(s->dft_out);
+ fifo_delete(&s->fifo);
+ }
+ if (shared) {
+ for (i = 0; i < 2; ++i) {
+ dft_filter_t * f= &shared->dft_filter[i];
+ rdft_free(f->coefs);
+ rdft_delete_setup(f->dft_forward_setup);
+ rdft_delete_setup(f->dft_backward_setup);
+ }
+ p->core->mem.free(shared->poly_fir_coefs);
+ memset(shared, 0, sizeof(*shared));
+ }
+ free(p->stages);
+ (void)RDFT_CB;
+ }
+}
+
+#if defined SOXR_LIB
+STATIC double _soxr_delay(rate_t * p)
+{
+ return (double)p->samples_in / p->io_ratio - (double)p->samples_out;
+}
+
+STATIC void _soxr_sizes(size_t * shared, size_t * channel)
+{
+ *shared = sizeof(rate_shared_t);
+ *channel = sizeof(rate_t);
+}
+#endif
diff --git a/src/cr.h b/src/cr.h
new file mode 100644
index 0000000..7e20327
--- /dev/null
+++ b/src/cr.h
@@ -0,0 +1,175 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#if !defined soxr_rate1_included
+#define soxr_rate1_included
+
+#define FIFO_SIZE_T int
+#include "fifo.h"
+
+typedef void real; /* float or double */
+struct stage;
+typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
+typedef struct half_fir_info {int num_coefs; real const * coefs; stage_fn_t fn, dfn; float att;} half_fir_info_t;
+typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t;
+typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t;
+
+#define U100_l 42
+#define MULT32 (65536. * 65536.)
+
+/* Conceptually: coef_p is &coefs[num_phases][fir_len][interp_order+1]: */
+#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\
+ (fir_len) * ((interp_order) + 1) * (phase_num) + \
+ ((interp_order) + 1) * (fir_coef_num) + \
+ ((interp_order) - (coef_interp_num))]
+
+/* Conceptually: coef_p is &coefs[num_phases][fir_len/4][interp_order+1][4]: */
+#define coef4(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\
+ (fir_len) * ((interp_order) + 1) * (phase_num) + \
+ ((interp_order) + 1) * ((fir_coef_num) & ~3) + \
+ 4 * ((interp_order) - (coef_interp_num)) + \
+ ((fir_coef_num) & 3)]
+
+typedef union { /* Int64 in parts */
+ #if HAVE_BIGENDIAN
+ struct {int32_t ms; uint32_t ls;} parts;
+ #else
+ struct {uint32_t ls; int32_t ms;} parts;
+ #endif
+ int64_t all;
+} int64p_t;
+
+typedef union { /* Uint64 in parts */
+ #if HAVE_BIGENDIAN
+ struct {uint32_t ms, ls;} parts;
+ #else
+ struct {uint32_t ls, ms;} parts;
+ #endif
+ uint64_t all;
+} uint64p_t;
+
+#define FLOAT_HI_PREC_CLOCK 0 /* Non-float hi-prec has ~96 bits. */
+#define float_step_t long double /* __float128 is also a (slow) option */
+
+typedef struct {
+ int dft_length, num_taps, post_peak;
+ void * dft_forward_setup, * dft_backward_setup;
+ real * coefs;
+} dft_filter_t;
+
+typedef struct { /* So generated filter coefs may be shared between channels */
+ real * poly_fir_coefs;
+ dft_filter_t dft_filter[2];
+} rate_shared_t;
+
+typedef union { /* Fixed point arithmetic */
+ struct {uint64p_t ls; int64p_t ms;} fix;
+ float_step_t flt;
+} step_t;
+
+#define CORE_DBL 1
+#define CORE_SIMD_POLY 2
+#define CORE_SIMD_HALF 4
+#define CORE_SIMD_DFT 8
+#define LOG2_SIZEOF_REAL(core_flags) (2 + ((core_flags) & 1))
+
+typedef int core_flags_t;
+
+#if defined SOXR_LIB
+#include "rdft_t.h"
+#else
+typedef void fn_t;
+#endif
+
+typedef struct stage {
+ int num;
+
+ /* Common to all stage types: */
+ core_flags_t core_flags;
+ stage_fn_t fn;
+ fifo_t fifo;
+ int pre; /* Number of past samples to store */
+ int pre_post; /* pre + number of future samples to store */
+ int preload; /* Number of zero samples to pre-load the fifo */
+ double out_in_ratio; /* For buffer management. */
+ int input_size;
+ bool is_input;
+
+ /* For a stage with variable (run-time generated) filter coefs: */
+ fn_t const * rdft_cb;
+ rate_shared_t * shared;
+ unsigned dft_filter_num; /* Which, if any, of the 2 DFT filters to use */
+ real * dft_scratch;
+ float * dft_out;
+ real const * coefs;
+
+ /* For a stage with variable L/M: */
+ step_t at, step;
+ bool use_hi_prec_clock;
+ int L, remM;
+ int n, phase_bits, block_len;
+ double mult, phase0;
+} stage_t;
+
+#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
+#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
+#define integer fix.ms.parts.ms
+#define fraction fix.ms.parts.ls
+#define whole fix.ms.all
+
+
+#define lq_bw0 (1385/2048.) /* ~.67625, FP exact. */
+
+typedef enum {rolloff_small, rolloff_medium, rolloff_none} rolloff_t;
+
+
+typedef struct {
+ void * (* alloc)(size_t);
+ void * (* calloc)(size_t, size_t);
+ void (* free)(void *);
+} alloc_t;
+
+typedef struct {
+ alloc_t mem;
+ half_fir_info_t const * half_firs;
+ size_t half_firs_len;
+ half_fir_info_t const * doub_firs;
+ size_t doub_firs_len;
+ stage_fn_t cubic_stage_fn;
+ poly_fir_t const * poly_firs;
+ fn_t * rdft_cb;
+} cr_core_t;
+
+typedef struct rate rate_t;
+struct rate {
+ cr_core_t const * core;
+ double io_ratio;
+ int64_t samples_in, samples_out;
+ int num_stages, flushing;
+ stage_t * stages;
+};
+
+#if defined SOXR_LIB
+
+#include "soxr.h"
+
+char const * _soxr_init(
+ rate_t * const p, /* Per audio channel. */
+ rate_shared_t * const shared, /* Between channels (undergoing same rate change)*/
+ double const io_ratio, /* Input rate divided by output rate. */
+ soxr_quality_spec_t const * const q_spec,
+ soxr_runtime_spec_t const * const r_spec,
+ double multiplier, /* Linear gain to apply during conversion. 1 */
+ cr_core_t const * const core,
+ core_flags_t const);
+
+void _soxr_process(struct rate * p, size_t olen);
+real * _soxr_input(struct rate * p, real const * samples, size_t n);
+real const * _soxr_output(struct rate * p, real * samples, size_t * n0);
+void _soxr_flush(struct rate * p);
+void _soxr_close(struct rate * p);
+double _soxr_delay(struct rate * p);
+void _soxr_sizes(size_t * shared, size_t * channel);
+#endif
+
+#endif
diff --git a/src/cr32.c b/src/cr32.c
new file mode 100644
index 0000000..b9eb264
--- /dev/null
+++ b/src/cr32.c
@@ -0,0 +1,8 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#define RATE_CB _soxr_rate32_cb
+#define CORE_STR "cr32"
+
+#define CORE_TYPE 0
+#include "cr-core.c"
diff --git a/src/cr32s.c b/src/cr32s.c
new file mode 100644
index 0000000..5de2a43
--- /dev/null
+++ b/src/cr32s.c
@@ -0,0 +1,8 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#define RATE_CB _soxr_rate32s_cb
+#define CORE_STR "cr32s"
+
+#define CORE_TYPE (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
+#include "cr-core.c"
diff --git a/src/cr64.c b/src/cr64.c
new file mode 100644
index 0000000..518cdd7
--- /dev/null
+++ b/src/cr64.c
@@ -0,0 +1,8 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#define RATE_CB _soxr_rate64_cb
+#define CORE_STR "cr64"
+
+#define CORE_TYPE CORE_DBL
+#include "cr-core.c"
diff --git a/src/cr64s.c b/src/cr64s.c
new file mode 100644
index 0000000..5dcd6f1
--- /dev/null
+++ b/src/cr64s.c
@@ -0,0 +1,8 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#define RATE_CB _soxr_rate64s_cb
+#define CORE_STR "cr64s"
+
+#define CORE_TYPE (CORE_DBL|CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
+#include "cr-core.c"
diff --git a/src/data-io.c b/src/data-io.c
index 1081000..52144c2 100644
--- a/src/data-io.c
+++ b/src/data-io.c
@@ -23,7 +23,7 @@
-#if WITH_DOUBLE_PRECISION
+#if WITH_CR64 || WITH_CR64S
void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */
soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
{
@@ -40,7 +40,7 @@ void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */
-#if WITH_SINGLE_PRECISION
+#if WITH_CR32 || WITH_CR32S || WITH_VR32
void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
{
@@ -97,7 +97,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
#endif
#endif
-#if WITH_DOUBLE_PRECISION
+#if WITH_CR64 || WITH_CR64S
#define FLOATX double
#define LSX_RINT_CLIP_2 lsx_rint32_clip_2
@@ -139,7 +139,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
-#if WITH_SINGLE_PRECISION
+#if WITH_CR32 || WITH_CR32S || WITH_VR32
#define FLOATX float
#define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f
@@ -199,7 +199,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
return 0; \
} while (0)
-#if WITH_DOUBLE_PRECISION
+#if WITH_CR64 || WITH_CR64S
size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
double const * const * src, size_t n, unsigned ch, unsigned long * seed)
{
@@ -225,7 +225,7 @@ size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
}
#endif
-#if WITH_SINGLE_PRECISION
+#if WITH_CR32 || WITH_CR32S || WITH_VR32
size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0,
float const * const * src, size_t n, unsigned ch, unsigned long * seed)
{
diff --git a/src/fft4g.c b/src/fft4g.c
index 5fae8a6..cf6293a 100644
--- a/src/fft4g.c
+++ b/src/fft4g.c
@@ -282,22 +282,16 @@ Appendix :
*/
-#include
+#include "math-wrap.h"
#include "fft4g.h"
#ifdef FFT4G_FLOAT
#define double float
#define one_half 0.5f
-#if defined _MSC_VER
- #define sin (float)sin
- #define cos (float)cos
- #define atan (float)atan
-#else
- #define sin sinf
- #define cos cosf
- #define atan atanf
-#endif
+ #define sin(x) sinf(x)
+ #define cos(x) cosf(x)
+ #define atan(x) atanf(x)
#define cdft lsx_cdft_f
#define rdft lsx_rdft_f
@@ -818,7 +812,7 @@ static void bitrv2(int n, int *ip0, double *a)
static void bitrv2conj(int n, int *ip0, double *a)
{
- int j, j1, k, k1, l, m, m2, ip[256];
+ int j, j1, k, k1, l, m, m2, ip[512];
double xr, xi, yr, yi;
(void)ip0;
diff --git a/src/fft4g32.c b/src/fft4g32.c
index 8741394..5dcf34d 100644
--- a/src/fft4g32.c
+++ b/src/fft4g32.c
@@ -1,17 +1,19 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
+#include
#include "filter.h"
#define FFT4G_FLOAT
#include "fft4g.c"
+#include "rdft_t.h"
static void * null(void) {return 0;}
static void forward (int length, void * setup, double * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;}
static void backward(int length, void * setup, double * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
static int multiplier(void) {return 2;}
static void nothing(void) {}
+static int flags(void) {return 0;}
-typedef void (* fn_t)(void);
fn_t _soxr_rdft32_cb[] = {
(fn_t)null,
(fn_t)null,
@@ -24,4 +26,8 @@ fn_t _soxr_rdft32_cb[] = {
(fn_t)_soxr_ordered_partial_convolve_f,
(fn_t)multiplier,
(fn_t)nothing,
+ (fn_t)malloc,
+ (fn_t)calloc,
+ (fn_t)free,
+ (fn_t)flags,
};
diff --git a/src/fft4g32s.c b/src/fft4g32s.c
index 4a95a7d..34dae4b 100644
--- a/src/fft4g32s.c
+++ b/src/fft4g32s.c
@@ -3,14 +3,15 @@
#include "filter.h"
#include "simd.h"
+#include "rdft_t.h"
static void * null(void) {return 0;}
static void nothing(void) {}
static void forward (int length, void * setup, float * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;}
static void backward(int length, void * setup, float * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
static int multiplier(void) {return 2;}
+static int flags(void) {return RDFT_IS_SIMD;}
-typedef void (* fn_t)(void);
fn_t _soxr_rdft32s_cb[] = {
(fn_t)null,
(fn_t)null,
@@ -19,8 +20,12 @@ fn_t _soxr_rdft32s_cb[] = {
(fn_t)forward,
(fn_t)backward,
(fn_t)backward,
- (fn_t)_soxr_ordered_convolve_simd,
- (fn_t)_soxr_ordered_partial_convolve_simd,
+ (fn_t)ORDERED_CONVOLVE_SIMD,
+ (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
(fn_t)multiplier,
(fn_t)nothing,
+ (fn_t)SIMD_ALIGNED_MALLOC,
+ (fn_t)SIMD_ALIGNED_CALLOC,
+ (fn_t)SIMD_ALIGNED_FREE,
+ (fn_t)flags,
};
diff --git a/src/fft4g64.c b/src/fft4g64.c
index 4acb33b..0018516 100644
--- a/src/fft4g64.c
+++ b/src/fft4g64.c
@@ -1,16 +1,18 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
+#include
#include "filter.h"
#include "fft4g.c"
#include "soxr-config.h"
-#if WITH_DOUBLE_PRECISION
+#if WITH_CR64
static void * null(void) {return 0;}
static void nothing(void) {}
static void forward (int length, void * setup, double * H) {lsx_safe_rdft(length, 1, H); (void)setup;}
static void backward(int length, void * setup, double * H) {lsx_safe_rdft(length, -1, H); (void)setup;}
static int multiplier(void) {return 2;}
+static int flags(void) {return 0;}
typedef void (* fn_t)(void);
fn_t _soxr_rdft64_cb[] = {
@@ -25,5 +27,9 @@ fn_t _soxr_rdft64_cb[] = {
(fn_t)_soxr_ordered_partial_convolve,
(fn_t)multiplier,
(fn_t)nothing,
+ (fn_t)malloc,
+ (fn_t)calloc,
+ (fn_t)free,
+ (fn_t)flags,
};
#endif
diff --git a/src/fifo.h b/src/fifo.h
index b2bda43..19f6c1d 100644
--- a/src/fifo.h
+++ b/src/fifo.h
@@ -9,6 +9,7 @@
#endif
#if !defined FIFO_REALLOC
+#include
#define FIFO_REALLOC(a,b,c) realloc(a,b)
#undef FIFO_FREE
#define FIFO_FREE free
diff --git a/src/filter.c b/src/filter.c
index 482302e..aec0b6e 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -1,12 +1,9 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include "filter.h"
-#include
-#if !defined M_PI
-#define M_PI 3.14159265358979323846
-#endif
+#include "math-wrap.h"
#include
#include
#include
@@ -14,7 +11,7 @@
#include "fft4g.h"
#include "ccrw2.h"
-#if 1 || WITH_DOUBLE_PRECISION /* Always need this, for lsx_fir_to_phase. */
+#if 1 || WITH_CR64 || WITH_CR64S /* Always need this, for lsx_fir_to_phase. */
#define DFT_FLOAT double
#define DONE_WITH_FFT_CACHE done_with_fft_cache
#define FFT_CACHE_CCRW fft_cache_ccrw
@@ -31,7 +28,7 @@
#include "fft4g_cache.h"
#endif
-#if WITH_SINGLE_PRECISION && !AVCODEC_FOUND
+#if WITH_CR32 && !AVCODEC_FOUND
#define DFT_FLOAT float
#define DONE_WITH_FFT_CACHE done_with_fft_cache_f
#define FFT_CACHE_CCRW fft_cache_ccrw_f
@@ -48,14 +45,14 @@
#include "fft4g_cache.h"
#endif
-#if WITH_DOUBLE_PRECISION || !SOXR_LIB
+#if WITH_CR64 || WITH_CR64S || !SOXR_LIB
#define DFT_FLOAT double
#define ORDERED_CONVOLVE lsx_ordered_convolve
#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve
#include "rdft.h"
#endif
-#if WITH_SINGLE_PRECISION
+#if WITH_CR32
#define DFT_FLOAT float
#define ORDERED_CONVOLVE lsx_ordered_convolve_f
#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve_f
@@ -129,6 +126,9 @@ double * lsx_design_lpf(
int n = *num_taps, phases = max(k, 1), modulo = max(-k, 1);
double tr_bw, Fc, rho = phases == 1? .5 : att < 120? .63 : .75;
+ lsx_debug_more("./sinctest %-12.7g %-12.7g %g 0 %-5g %i %i 50 %g %g -4 >1",
+ Fp, Fs, Fn, att, *num_taps, k, beta, rho);
+
Fp /= fabs(Fn), Fs /= fabs(Fn); /* Normalise to Fn = 1 */
tr_bw = .5 * (Fs - Fp); /* Transition band-width: 6dB to stop points */
tr_bw /= phases, Fs /= phases;
@@ -243,3 +243,35 @@ void lsx_fir_to_phase(double * * h, int * len, int * post_len, double phase)
work[imp_peak], *len, *post_len, 100 - 100. * *post_len / (*len - 1));
free(pi_wraps), free(work);
}
+
+#define F_x(F,expr) static double F(double x) {return expr;}
+F_x(sinePhi, ((2.0517e-07*x-1.1303e-04)*x+.023154)*x+.55924 )
+F_x(sinePsi, ((9.0667e-08*x-5.6114e-05)*x+.013658)*x+1.0977 )
+F_x(sinePow, log(.5)/log(sin(x*.5)) )
+#define dB_to_linear(x) exp((x) * (M_LN10 * 0.05))
+
+double lsx_f_resp(double t, double a)
+{
+ double x;
+ if (t > (a <= 160? .8 : .82)) {
+ double a1 = a+15;
+ double p = .00035*a+.375;
+ double w = 1/(1-.597)*asin(pow((a1-10.6)/a1,1/p));
+ double c = 1+asin(pow(1-a/a1,1/p))/w;
+ return a1*(pow(sin((c-t)*w),p)-1);
+ }
+ if (t > .5)
+ x = sinePsi(a), x = pow(sin((1-t) * x), sinePow(x));
+ else
+ x = sinePhi(a), x = 1 - pow(sin(t * x), sinePow(x));
+ return linear_to_dB(x);
+}
+
+double lsx_inv_f_resp(double drop, double a)
+{
+ double x = sinePhi(a), s;
+ drop = dB_to_linear(drop);
+ s = drop > .5 ? 1 - drop : drop;
+ x = asin(pow(s, 1/sinePow(x))) / x;
+ return drop > .5? x : 1 -x;
+}
diff --git a/src/filter.h b/src/filter.h
index 435303b..56333ff 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -33,7 +33,12 @@ double * lsx_design_lpf(
int * num_taps, /* 0: value will be estimated */
int k, /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */
double beta); /* <0: value will be estimated */
+
void lsx_fir_to_phase(double * * h, int * len,
int * post_len, double phase0);
+double lsx_f_resp(double t, double a);
+double lsx_inv_f_resp(double drop, double a);
+#define lsx_to_3dB(a) (1 - lsx_inv_f_resp(-3., a))
+
#endif
diff --git a/src/filters.h b/src/filters.h
deleted file mode 100644
index e9a8011..0000000
--- a/src/filters.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1 See LICENCE for details. */
-
-#include "half_coefs.h"
-
-#define FUNCTION h8
-#define CONVOLVE _ _ _ _ _ _ _ _
-#define h8_l 8
-#define COEFS half_fir_coefs_8
-#include "half-fir.h"
-
-#define FUNCTION h9
-#define CONVOLVE _ _ _ _ _ _ _ _ _
-#define h9_l 9
-#define COEFS half_fir_coefs_9
-#include "half-fir.h"
-
-#define FUNCTION h10
-#define CONVOLVE _ _ _ _ _ _ _ _ _ _
-#define h10_l 10
-#define COEFS half_fir_coefs_10
-#include "half-fir.h"
-
-#define FUNCTION h11
-#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _
-#define h11_l 11
-#define COEFS half_fir_coefs_11
-#include "half-fir.h"
-
-#define FUNCTION h12
-#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _
-#define h12_l 12
-#define COEFS half_fir_coefs_12
-#include "half-fir.h"
-
-#define FUNCTION h13
-#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _
-#define h13_l 13
-#define COEFS half_fir_coefs_13
-#include "half-fir.h"
-
-static struct {int num_coefs; stage_fn_t fn; float att;} const half_firs[] = {
- { 8, h8 , 136.51f},
- { 9, h9 , 152.32f},
- {10, h10, 168.07f},
- {11, h11, 183.78f},
- {12, h12, 199.44f},
- {13, h13, 212.75f},
-};
-
-#define HI_PREC_CLOCK
-
-#define VAR_LENGTH p->n
-#define VAR_CONVOLVE while (j < FIR_LENGTH) _
-#define VAR_POLY_PHASE_BITS p->phase_bits
-
-#define FUNCTION vpoly0
-#define FIR_LENGTH VAR_LENGTH
-#define CONVOLVE VAR_CONVOLVE
-#include "poly-fir0.h"
-
-#define FUNCTION vpoly1
-#define COEF_INTERP 1
-#define PHASE_BITS VAR_POLY_PHASE_BITS
-#define FIR_LENGTH VAR_LENGTH
-#define CONVOLVE VAR_CONVOLVE
-#include "poly-fir.h"
-
-#define FUNCTION vpoly2
-#define COEF_INTERP 2
-#define PHASE_BITS VAR_POLY_PHASE_BITS
-#define FIR_LENGTH VAR_LENGTH
-#define CONVOLVE VAR_CONVOLVE
-#include "poly-fir.h"
-
-#define FUNCTION vpoly3
-#define COEF_INTERP 3
-#define PHASE_BITS VAR_POLY_PHASE_BITS
-#define FIR_LENGTH VAR_LENGTH
-#define CONVOLVE VAR_CONVOLVE
-#include "poly-fir.h"
-
-#undef HI_PREC_CLOCK
-
-#define U100_l 42
-#if RATE_SIMD_POLY
- #define U100_l_EXTRA _ _
- #define u100_l_EXTRA _
- #define U100_l_EXTRA_LENGTH 2
- #define u100_l_EXTRA_LENGTH 1
-#else
- #define U100_l_EXTRA
- #define u100_l_EXTRA
- #define U100_l_EXTRA_LENGTH 0
- #define u100_l_EXTRA_LENGTH 0
-#endif
-#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ U100_l_EXTRA
-#define FUNCTION U100_0
-#define FIR_LENGTH (U100_l + U100_l_EXTRA_LENGTH)
-#define CONVOLVE poly_fir_convolve_U100
-#include "poly-fir0.h"
-
-#define u100_l 11
-#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _ u100_l_EXTRA
-#define FUNCTION u100_0
-#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
-#define CONVOLVE poly_fir_convolve_u100
-#include "poly-fir0.h"
-
-#define FUNCTION u100_1
-#define COEF_INTERP 1
-#define PHASE_BITS 8
-#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
-#define CONVOLVE poly_fir_convolve_u100
-#include "poly-fir.h"
-#define u100_1_b 8
-
-#define FUNCTION u100_2
-#define COEF_INTERP 2
-#define PHASE_BITS 6
-#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
-#define CONVOLVE poly_fir_convolve_u100
-#include "poly-fir.h"
-#define u100_2_b 6
-
-typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t;
-typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t;
-
-static poly_fir_t const poly_firs[] = {
- {-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}},
- {-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}},
- {-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}},
- {-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}},
- {-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}},
- {-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}},
-
- {-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}},
- {-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}},
- {-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}},
- {-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}},
- {-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}},
- {-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}},
-
- {10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}},
- {11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}},
- {-1, {{0, vpoly0}, { 9, vpoly1}, { 6, vpoly2}}},
- {-1, {{0, vpoly0}, { 11, vpoly1}, { 7, vpoly2}}},
- {-1, {{0, vpoly0}, { 13, vpoly1}, { 8, vpoly2}}},
- {-1, {{0, vpoly0}, { 10, vpoly2}, { 8, vpoly3}}},
- {-1, {{0, vpoly0}, { 12, vpoly2}, { 9, vpoly3}}},
-};
diff --git a/src/half-coefs.h b/src/half-coefs.h
new file mode 100644
index 0000000..a5a0882
--- /dev/null
+++ b/src/half-coefs.h
@@ -0,0 +1,75 @@
+/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#if defined __GNUC__
+ #pragma GCC system_header
+#elif defined __SUNPRO_C
+ #pragma disable_warn
+#elif defined _MSC_VER
+ #pragma warning(push, 1)
+#endif
+
+#if CORE_TYPE & CORE_SIMD_HALF
+ #define VALIGN vAlign
+#else
+ #define VALIGN
+#endif
+
+#if !(CORE_TYPE & CORE_SIMD_HALF)
+static VALIGN const sample_t half_fir_coefs_7[] = {
+ 3.1062656496657370e-01, -8.4998810699955796e-02, 3.4007044621123500e-02,
+-1.2839903789829387e-02, 3.9899380181723145e-03, -8.9355202017945374e-04,
+ 1.0918292424806546e-04,
+};
+#endif
+
+static VALIGN const sample_t half_fir_coefs_8[] = {
+ 3.1154652365332069e-01, -8.7344917685739543e-02, 3.6814458353637280e-02,
+-1.5189204581464479e-02, 5.4540855610738801e-03, -1.5643862626630416e-03,
+ 3.1816575906323303e-04, -3.4799449225005688e-05,
+};
+
+static VALIGN const sample_t half_fir_coefs_9[] = {
+ 3.1227034755311189e-01, -8.9221517147969526e-02, 3.9139704015071934e-02,
+-1.7250558515852023e-02, 6.8589440230476112e-03, -2.3045049636430419e-03,
+ 6.0963740543348963e-04, -1.1323803957431231e-04, 1.1197769991000046e-05,
+};
+
+#if CORE_TYPE & CORE_DBL
+static VALIGN const sample_t half_fir_coefs_10[] = {
+ 3.1285456012000523e-01, -9.0756740799292787e-02, 4.1096398104193160e-02,
+-1.9066319572525220e-02, 8.1840569787684902e-03, -3.0766876176359834e-03,
+ 9.6396524429277980e-04, -2.3585679989922018e-04, 4.0252189026627833e-05,
+-3.6298196342497932e-06,
+};
+
+static VALIGN const sample_t half_fir_coefs_11[] = {
+ 3.1333588822574199e-01, -9.2035898673019811e-02, 4.2765169698406408e-02,
+-2.0673580894964429e-02, 9.4225426824512421e-03, -3.8563379950013192e-03,
+ 1.3634742159642453e-03, -3.9874150714431009e-04, 9.0586723632664806e-05,
+-1.4285617244076783e-05, 1.1834642946400529e-06,
+};
+
+static VALIGN const sample_t half_fir_coefs_12[] = {
+ 3.1373928463345568e-01, -9.3118180335301962e-02, 4.4205005881659098e-02,
+-2.2103860986973051e-02, 1.0574689371162864e-02, -4.6276428065385065e-03,
+ 1.7936153397572132e-03, -5.9617527051353237e-04, 1.6314517495669067e-04,
+-3.4555126770115446e-05, 5.0617615610782593e-06, -3.8768958592971409e-07,
+};
+
+static VALIGN const sample_t half_fir_coefs_13[] = {
+ 3.1408224847888910e-01, -9.4045836332667387e-02, 4.5459878763259978e-02,
+-2.3383369012219993e-02, 1.1644273044890753e-02, -5.3806714579057013e-03,
+ 2.2429072878264022e-03, -8.2204347506606424e-04, 2.5724946477840893e-04,
+-6.6072709864248668e-05, 1.3099163296288644e-05, -1.7907147069136000e-06,
+ 1.2750825595240592e-07,
+};
+#endif
+
+#undef VALIGN
+
+#if defined __SUNPRO_C
+ #pragma enable_warn
+#elif defined _MSC_VER
+ #pragma warning(pop)
+#endif
diff --git a/src/half-fir.h b/src/half-fir.h
index 0a8ee97..782be1b 100644
--- a/src/half-fir.h
+++ b/src/half-fir.h
@@ -1,25 +1,61 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
-/* Down-sample by a factor of 2 using a FIR with odd length (LEN).*/
+/* Decimate by 2 using a FIR with odd length (LEN). */
/* Input must be preceded and followed by LEN >> 1 samples. */
-#define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
-static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+#define COEFS ((sample_t const *)p->coefs)
+
+#if SIMD_SSE
+ #define BEGINNING v4_t sum, q1, q2, t
+ #define ____ \
+ q1 = _mm_shuffle_ps(t=vLdu(input+2*j),vLdu(input+2*j+4),_MM_SHUFFLE(3,1,3,1)); \
+ q2 = _mm_shuffle_ps(vLdu(input-2*j-4),vLdu(input-2*j-8),_MM_SHUFFLE(1,3,1,3)); \
+ sum = vAdd(j? sum : vMul(vSet1(.5), t), vMul(vAdd(q1, q2), vLd(COEFS+j))); \
+ j += 4;
+ #define __ \
+ q1 = _mm_shuffle_ps(vLdu(input+2*j), vLdu(input-2*j-4), _MM_SHUFFLE(1,3,3,1)); \
+ q2 = _mm_loadl_pi(q2, (__m64*)(COEFS+j)), q2 = _mm_movelh_ps(q2, q2); \
+ sum = vAdd(sum, vMul(q1, q2)); \
+ j += 2;
+ #define _ \
+ q1 = _mm_add_ss(_mm_load_ss(input+2*j+1), _mm_load_ss(input-2*j-1)); \
+ sum = _mm_add_ss(sum, _mm_mul_ss(q1, _mm_load_ss(COEFS+j))); \
+ ++j;
+ #define END vStorSum(output+i, sum)
+/* #elif SIMD_AVX; No good solution found. */
+/* #elif SIMD_NEON; No need: gcc -O3 does a good job by itself. */
+#else
+ #define BEGINNING sample_t sum = input[0] * .5f
+ #define ____ __ __
+ #define __ _ _
+ #define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
+ #define END output[i] = sum
+#endif
+
+
+
+static void FUNCTION_H(stage_t * p, fifo_t * output_fifo)
{
- sample_t const * input = stage_read_p(p);
- int i, num_out = (stage_occupancy(p) + 1) / 2;
- sample_t * output = fifo_reserve(output_fifo, num_out);
+ sample_t const * __restrict input = stage_read_p(p);
+ int num_in = min(stage_occupancy(p), p->input_size);
+ int i, num_out = (num_in + 1) >> 1;
+ sample_t * __restrict output = fifo_reserve(output_fifo, num_out);
for (i = 0; i < num_out; ++i, input += 2) {
int j = 0;
- sample_t sum = input[0] * .5f;
- CONVOLVE
- output[i] = sum;
+ BEGINNING; CONVOLVE; END;
}
fifo_read(&p->fifo, 2 * num_out, NULL);
}
+
+
+
#undef _
+#undef __
+#undef ____
+#undef BEGINNING
+#undef END
#undef COEFS
#undef CONVOLVE
-#undef FUNCTION
+#undef FUNCTION_H
diff --git a/src/half_coefs.h b/src/half_coefs.h
deleted file mode 100644
index aac7769..0000000
--- a/src/half_coefs.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1 See LICENCE for details. */
-
-#if defined __GNUC__
- #pragma GCC system_header
-#elif defined __SUNPRO_C
- #pragma disable_warn
-#elif defined _MSC_VER
- #pragma warning(push, 1)
-#endif
-
-static const sample_t half_fir_coefs_8[] = {
- 0.3115465451887802, -0.08734497241282892, 0.03681452335604365,
- -0.01518925831569441, 0.005454118437408876, -0.001564400922162005,
- 0.0003181701445034203, -3.48001341225749e-5,
-};
-
-static const sample_t half_fir_coefs_9[] = {
- 0.3122703613711853, -0.08922155288172305, 0.03913974805854332,
- -0.01725059723447163, 0.006858970092378141, -0.002304518467568703,
- 0.0006096426006051062, -0.0001132393923815236, 1.119795386287666e-5,
-};
-
-static const sample_t half_fir_coefs_10[] = {
- 0.3128545521327376, -0.09075671986104322, 0.04109637155154835,
- -0.01906629512749895, 0.008184039342054333, -0.0030766775017262,
- 0.0009639607022414314, -0.0002358552746579827, 4.025184282444155e-5,
- -3.629779111541012e-6,
-};
-
-static const sample_t half_fir_coefs_11[] = {
- 0.3133358837508807, -0.09203588680609488, 0.04276515428384758,
- -0.02067356614745591, 0.00942253142371517, -0.003856330993895144,
- 0.001363470684892284, -0.0003987400965541919, 9.058629923971627e-5,
- -1.428553070915318e-5, 1.183455238783835e-6,
-};
-
-static const sample_t half_fir_coefs_12[] = {
- 0.3137392991811407, -0.0931182192961332, 0.0442050575271454,
- -0.02210391200618091, 0.01057473015666001, -0.00462766983973885,
- 0.001793630226239453, -0.0005961819959665878, 0.0001631475979359577,
- -3.45557865639653e-5, 5.06188341942088e-6, -3.877010943315563e-7,
-};
-
-static const sample_t half_fir_coefs_13[] = {
- 0.3140822554324578, -0.0940458550886253, 0.04545990399121566,
- -0.02338339450796002, 0.01164429409071052, -0.005380686021429845,
- 0.002242915773871009, -0.000822047600000082, 0.0002572510962395222,
- -6.607320708956279e-5, 1.309926399120154e-5, -1.790719575255006e-6,
- 1.27504961098836e-7,
-};
-
-#if defined __SUNPRO_C
- #pragma enable_warn
-#elif defined _MSC_VER
- #pragma warning(pop)
-#endif
diff --git a/src/internal.h b/src/internal.h
index 6b4fb24..ee691a0 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -6,11 +6,15 @@
#include "std-types.h"
+
+
#undef min
#undef max
#define min(a, b) ((a) <= (b) ? (a) : (b))
#define max(a, b) ((a) >= (b) ? (a) : (b))
+
+
#define range_limit(x, lower, upper) (min(max(x, lower), upper))
#define linear_to_dB(x) (log10(x) * 20)
#define array_length(a) (sizeof(a)/sizeof(a[0]))
@@ -20,12 +24,16 @@
#define iAL(a) (int)AL(a)
#define sqr(a) ((a) * (a))
+
+
#if defined __GNUC__
#define UNUSED __attribute__ ((unused))
#else
#define UNUSED
#endif
+
+
#if !WITH_DEV_TRACE
#ifdef __GNUC__
void lsx_dummy(char const *, ...);
@@ -35,10 +43,41 @@
#define lsx_debug if(0) lsx_dummy
#define lsx_debug_more lsx_debug
#else
- int _soxr_trace_level(void);
+ extern int _soxr_trace_level;
void _soxr_trace(char const * fmt, ...);
- #define lsx_debug if (_soxr_trace_level() >= 4) _soxr_trace
- #define lsx_debug_more if (_soxr_trace_level() >= 5) _soxr_trace
+ #define lsx_debug if (_soxr_trace_level > 0) _soxr_trace
+ #define lsx_debug_more if (_soxr_trace_level > 1) _soxr_trace
#endif
+
+
+/* soxr_quality_spec_t.flags: */
+
+#define SOXR_ROLLOFF_LSR2Q 3u /* Reserved for internal use. */
+#define SOXR_ROLLOFF_MASK 3u /* For masking these bits. */
+#define SOXR_PROMOTE_TO_LQ 64u /* Reserved for internal use. */
+
+
+
+/* soxr_runtime_spec_t.flags: */
+
+#define SOXR_STRICT_BUFFERING 4u /* Reserved for future use. */
+#define SOXR_NOSMALLINTOPT 8u /* For test purposes only. */
+
+
+
+/* soxr_quality_spec recipe: */
+
+#define SOXR_PRECISIONQ 11 /* Quality specified by the precision parameter. */
+
+#define SOXR_PHASE_MASK 0x30 /* For masking these bits. */
+
+
+
+/* soxr_quality_spec flags: */
+
+#define RESET_ON_CLEAR (1u<<31)
+
+
+
#endif
diff --git a/src/math-wrap.h b/src/math-wrap.h
new file mode 100644
index 0000000..8a526f1
--- /dev/null
+++ b/src/math-wrap.h
@@ -0,0 +1,31 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#if !defined soxr_math_wrap_included
+#define soxr_math_wrap_included
+
+#include
+
+#if defined __STRICT_ANSI__
+ #define sinf(x) (float)sin ((double)(x))
+ #define cosf(x) (float)cos ((double)(x))
+ #define atanf(x) (float)atan((double)(x))
+#endif
+
+#if !defined M_PI
+ #define M_PI 3.141592653589793238462643383279502884
+#endif
+
+#if !defined M_LN10
+ #define M_LN10 2.302585092994045684017991454684364208
+#endif
+
+#if !defined M_SQRT2
+ #define M_SQRT2 1.414213562373095048801688724209698079
+#endif
+
+#if !defined M_LN2
+ #define M_LN2 0.693147180559945309417232121458176568
+#endif
+
+#endif
diff --git a/src/pffft-wrap.c b/src/pffft-wrap.c
index f410892..806547c 100644
--- a/src/pffft-wrap.c
+++ b/src/pffft-wrap.c
@@ -3,24 +3,23 @@
#if !defined PFFT_MACROS_ONLY
-#include "simd.h"
-#include
+#include "math-wrap.h"
-#define pffft_aligned_free _soxr_simd_aligned_free
-#define pffft_aligned_malloc _soxr_simd_aligned_malloc
-#define pffft_aligned_calloc _soxr_simd_aligned_calloc
+#if PFFFT_DOUBLE
+ #include "simd64.h"
+#else
+ #include "simd32.h"
+ #define sin(x) sinf(x)
+ #define cos(x) cosf(x)
+#endif
+
+#define pffft_aligned_free SIMD_ALIGNED_FREE
+#define pffft_aligned_malloc SIMD_ALIGNED_MALLOC
+#define pffft_aligned_calloc SIMD_ALIGNED_CALLOC
#undef inline
#define inline __inline
-#if defined _MSC_VER
- #define sin (float)sin
- #define cos (float)cos
-#else
- #define sin(x) sinf((float)(x))
- #define cos(x) cosf((float)(x))
-#endif
-
#endif
diff --git a/src/pffft.c b/src/pffft.c
index b68f86c..5729274 100644
--- a/src/pffft.c
+++ b/src/pffft.c
@@ -133,9 +133,11 @@ inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_p
*/
#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86))
+# define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */
+
+#if !PFFFT_DOUBLE
#include
typedef __m128 v4sf;
-# define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */
# define VZERO() _mm_setzero_ps()
# define VMUL(a,b) _mm_mul_ps(a,b)
# define VADD(a,b) _mm_add_ps(a,b)
@@ -148,6 +150,10 @@ typedef __m128 v4sf;
# define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0))
# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
+#else
+#include "avx.h"
+#endif
+
/*
ARM NEON support macros
*/
@@ -181,6 +187,10 @@ typedef float32x4_t v4sf;
# endif
#endif
+#if PFFFT_DOUBLE
+#define float double
+#endif
+
/* fallback mode for situations where SSE/Altivec are not available, use scalar mode instead */
#ifdef PFFFT_SIMD_DISABLE
typedef float v4sf;
@@ -202,6 +212,8 @@ typedef float v4sf;
#define SVMUL(f,v) VMUL(LD_PS1(f),v)
#endif
+#if !defined PFFT_MACROS_ONLY
+
#if !defined(PFFFT_SIMD_DISABLE)
typedef union v4sf_union {
v4sf v;
@@ -214,7 +226,8 @@ typedef union v4sf_union {
#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3))
/* detect bugs with the vector support macros */
-void validate_pffft_simd() {
+void validate_pffft_simd(void);
+void validate_pffft_simd(void) {
float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 };
v4sf_union a0, a1, a2, a3, t, u;
memcpy(a0.f, f, 4*sizeof(float));
@@ -230,7 +243,6 @@ void validate_pffft_simd() {
printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
t.v = VMADD(a1.v, a2.v,a0.v);
printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
-
INTERLEAVE2(a1.v,a2.v,t.v,u.v);
printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
@@ -271,8 +283,6 @@ void pffft_aligned_free(void *p) {
int pffft_simd_size() { return SIMD_SZ; }
#endif
-#if !defined PFFT_MACROS_ONLY
-
/*
passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
*/
diff --git a/src/pffft.h b/src/pffft.h
index 5ff8466..63522ca 100644
--- a/src/pffft.h
+++ b/src/pffft.h
@@ -86,6 +86,10 @@
#ifdef __cplusplus
extern "C" {
+#endif
+
+#if PFFFT_DOUBLE
+#define float double
#endif
/* opaque struct holding internal stuff (precomputed twiddle factors)
@@ -182,6 +186,8 @@ extern "C" {
int pffft_simd_size();
#endif
+#undef float
+
#ifdef __cplusplus
}
#endif
diff --git a/src/pffft32.c b/src/pffft32.c
index fb7400f..f480809 100644
--- a/src/pffft32.c
+++ b/src/pffft32.c
@@ -1,11 +1,14 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
-#define _soxr_simd_aligned_free free
-#define _soxr_simd_aligned_malloc malloc
+#define SIMD_ALIGNED_FREE free
+#define SIMD_ALIGNED_MALLOC malloc
#define PFFFT_SIMD_DISABLE
+#define PFFFT_DOUBLE 0
#include "pffft-wrap.c"
+
#include "filter.h"
+#include "rdft_t.h"
static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
static void delete_setup(void * setup) {pffft_destroy_setup(setup);}
@@ -15,8 +18,8 @@ static void backward (int length, void * setup, float * H, float * scratch) {pff
static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H); (void)length;}
static int multiplier(void) {return 1;}
+static int flags(void) {return RDFT_NEEDS_SCRATCH;}
-typedef void (* fn_t)(void);
fn_t _soxr_rdft32_cb[] = {
(fn_t)setup,
(fn_t)setup,
@@ -29,4 +32,8 @@ fn_t _soxr_rdft32_cb[] = {
(fn_t)_soxr_ordered_partial_convolve_f,
(fn_t)multiplier,
(fn_t)pffft_reorder_back,
+ (fn_t)malloc,
+ (fn_t)calloc,
+ (fn_t)free,
+ (fn_t)flags,
};
diff --git a/src/pffft32s.c b/src/pffft32s.c
index b067ad2..7798a45 100644
--- a/src/pffft32s.c
+++ b/src/pffft32s.c
@@ -1,17 +1,20 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
+#define PFFFT_DOUBLE 0
#include "pffft-wrap.c"
+#include "rdft_t.h"
+
static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
static void forward (int length, void * setup, float * h, float * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
-static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H); (void)length;}
+static void convolve(int length, void * setup, float * H, float const * with) {pffft_zconvolve(setup, H, with, H); (void)length;}
static int multiplier(void) {return 1;}
+static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;}
-typedef void (* fn_t)(void);
fn_t _soxr_rdft32s_cb[] = {
(fn_t)setup,
(fn_t)setup,
@@ -21,7 +24,11 @@ fn_t _soxr_rdft32s_cb[] = {
(fn_t)backward,
(fn_t)obackward,
(fn_t)convolve,
- (fn_t)_soxr_ordered_partial_convolve_simd,
+ (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
(fn_t)multiplier,
(fn_t)pffft_reorder_back,
+ (fn_t)SIMD_ALIGNED_MALLOC,
+ (fn_t)SIMD_ALIGNED_CALLOC,
+ (fn_t)SIMD_ALIGNED_FREE,
+ (fn_t)flags,
};
diff --git a/src/pffft64s.c b/src/pffft64s.c
new file mode 100644
index 0000000..7c37c9d
--- /dev/null
+++ b/src/pffft64s.c
@@ -0,0 +1,34 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#define PFFFT_DOUBLE 1
+#include "pffft-wrap.c"
+
+#include "rdft_t.h"
+
+static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
+static void forward (int length, void * setup, double * h, double * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void oforward (int length, void * setup, double * h, double * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void backward (int length, void * setup, double * H, double * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void obackward(int length, void * setup, double * H, double * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void convolve(int length, void * setup, double * H, double const * with) {pffft_zconvolve(setup, H, with, H); (void)length;}
+static int multiplier(void) {return 1;}
+static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;}
+
+fn_t _soxr_rdft64s_cb[] = {
+ (fn_t)setup,
+ (fn_t)setup,
+ (fn_t)pffft_destroy_setup,
+ (fn_t)forward,
+ (fn_t)oforward,
+ (fn_t)backward,
+ (fn_t)obackward,
+ (fn_t)convolve,
+ (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
+ (fn_t)multiplier,
+ (fn_t)pffft_reorder_back,
+ (fn_t)SIMD_ALIGNED_MALLOC,
+ (fn_t)SIMD_ALIGNED_CALLOC,
+ (fn_t)SIMD_ALIGNED_FREE,
+ (fn_t)flags,
+};
diff --git a/src/poly-fir.h b/src/poly-fir.h
index f7b4261..94db90e 100644
--- a/src/poly-fir.h
+++ b/src/poly-fir.h
@@ -1,97 +1,138 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
-/* Resample using an interpolated poly-phase FIR with length LEN.*/
-/* Input must be followed by LEN-1 samples. */
+/* Resample using an interpolated poly-phase FIR with length LEN. */
+/* Input must be followed by FIR_LENGTH-1 samples. */
-#define a (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 0,j))
-#define b (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 1,j))
-#define c (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 2,j))
-#define d (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 3,j))
-#if COEF_INTERP == 0
- #define _ sum += a *in[j], ++j;
-#elif COEF_INTERP == 1
- #define _ sum += (b *x + a)*in[j], ++j;
-#elif COEF_INTERP == 2
- #define _ sum += ((c *x + b)*x + a)*in[j], ++j;
-#elif COEF_INTERP == 3
- #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j;
-#else
+#if COEF_INTERP != 1 && COEF_INTERP != 2 && COEF_INTERP != 3
#error COEF_INTERP
#endif
+#if SIMD_AVX || SIMD_SSE || SIMD_NEON
+ #define N (FIR_LENGTH>>2)
+
+ #if COEF_INTERP == 1
+ #define _ sum=vMac(vMac(b,X,a),vLdu(in+j*4),sum), ++j;
+ #elif COEF_INTERP == 2
+ #define _ sum=vMac(vMac(vMac(c,X,b),X,a),vLdu(in+j*4),sum), ++j;
+ #else
+ #define _ sum=vMac(vMac(vMac(vMac(d,X,c),X,b),X,a),vLdu(in+j*4),sum), ++j;
+ #endif
+
+ #define a coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-0)]
+ #define b coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-1)]
+ #define c coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-2)]
+ #define d coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-3)]
+
+ #define BEGINNING v4_t X = vLds(x), sum = vZero(); \
+ v4_t const * const __restrict coefs = (v4_t *)COEFS
+ #define MIDDLE switch (N) {case 3: CONVOLVE(3); break; case 4: CONVOLVE(4); \
+ break; case 5: CONVOLVE(5); break; default: CONVOLVE(N); }
+ #define END vStorSum(output+i, sum)
+ #define cc(n) case n: core(n); break
+ #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);}
+#else
+ #define N FIR_LENGTH
+
+ #if COEF_INTERP == 1
+ #define _ sum += (b*x + a)*in[j], ++j;
+ #elif COEF_INTERP == 2
+ #define _ sum += ((c*x + b)*x + a)*in[j], ++j;
+ #else
+ #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j;
+ #endif
+
+ #define a (coef(COEFS, COEF_INTERP, N, phase, 0,j))
+ #define b (coef(COEFS, COEF_INTERP, N, phase, 1,j))
+ #define c (coef(COEFS, COEF_INTERP, N, phase, 2,j))
+ #define d (coef(COEFS, COEF_INTERP, N, phase, 3,j))
+
+ #define BEGINNING sample_t sum = 0
+ #define MIDDLE CONVOLVE(N)
+ #define END output[i] = sum
+ #define CORE(n) core(n)
+#endif
+
+#define fphpCore(n) \
+ if (p->use_hi_prec_clock) { \
+ float_step_t at = p->at.flt; \
+ for (i = 0; (int)at < num_in; ++i, at += p->step.flt) { \
+ sample_t const * const __restrict in = input + (int)at; \
+ float_step_t frac = at - (int)at; \
+ int phase = (int)(frac * (1 << PHASE_BITS)); \
+ sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase); \
+ int j = 0; \
+ BEGINNING; CONVOLVE(n); END; \
+ } \
+ fifo_read(&p->fifo, (int)at, NULL); \
+ p->at.flt = at - (int)at; \
+ } else
+
+#define hpCore(n) \
+ if (p->use_hi_prec_clock) { \
+ for (i = 0; p->at.integer < num_in; ++i, \
+ p->at.fix.ls.all += p->step.fix.ls.all, \
+ p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) { \
+ sample_t const * const __restrict in = input + p->at.integer; \
+ uint32_t frac = p->at.fraction; \
+ int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \
+ sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); /* low-order bits, scaled to [0,1) */ \
+ int j = 0; \
+ BEGINNING; CONVOLVE(n); END; \
+ } \
+ fifo_read(&p->fifo, p->at.integer, NULL); \
+ p->at.integer = 0; \
+ } else
+
+#define spCore(n) { \
+ for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) { \
+ sample_t const * const __restrict in = input + p->at.integer; \
+ uint32_t frac = p->at.fraction; \
+ int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \
+ sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); /* low-order bits, scaled to [0,1) */ \
+ int j = 0; \
+ BEGINNING; CONVOLVE(n); END; \
+ } \
+ fifo_read(&p->fifo, p->at.integer, NULL); \
+ p->at.integer = 0; }
+
+#if defined HI_PREC_CLOCK && FLOAT_HI_PREC_CLOCK
+ #define core(n) fphpCore(n) spCore(n)
+#elif defined HI_PREC_CLOCK
+ #define core(n) hpCore(n) spCore(n)
+#else
+ #define core(n) spCore(n)
+#endif
+
+
+
static void FUNCTION(stage_t * p, fifo_t * output_fifo)
{
sample_t const * input = stage_read_p(p);
- int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
- sample_t * output = fifo_reserve(output_fifo, max_num_out);
+ int num_in = min(stage_occupancy(p), p->input_size);
+ int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio);
+ sample_t * const __restrict output = fifo_reserve(output_fifo, max_num_out);
-#if defined HI_PREC_CLOCK
-#if FLOAT_HI_PREC_CLOCK
- if (p->use_hi_prec_clock) {
- float_step_t at = p->at.flt;
- for (i = 0; (int)at < num_in; ++i, at += p->step.flt) {
- sample_t const * in = input + (int)at;
- float_step_t frac = at - (int)at;
- int phase = (int)(frac * (1 << PHASE_BITS));
-#if COEF_INTERP > 0
- sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase);
-#endif
- sample_t sum = 0;
- int j = 0;
- CONVOLVE
- output[i] = sum;
- }
- fifo_read(&p->fifo, (int)at, NULL);
- p->at.flt = at - (int)at;
- } else
-#else
- if (p->use_hi_prec_clock) {
- for (i = 0; p->at.integer < num_in; ++i,
- p->at.fix.ls.all += p->step.fix.ls.all,
- p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) {
- sample_t const * in = input + p->at.integer;
- uint32_t frac = p->at.fraction;
- int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */
-#if COEF_INTERP > 0 /* low-order bits, scaled to [0,1) */
- sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32));
-#endif
- sample_t sum = 0;
- int j = 0;
- CONVOLVE
- output[i] = sum;
- }
- fifo_read(&p->fifo, p->at.integer, NULL);
- p->at.integer = 0;
- } else
-#endif
-#endif
- {
- for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
- sample_t const * in = input + p->at.integer;
- uint32_t frac = p->at.fraction;
- int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */
-#if COEF_INTERP > 0 /* low-order bits, scaled to [0,1) */
- sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32));
-#endif
- sample_t sum = 0;
- int j = 0;
- CONVOLVE
- output[i] = sum;
- }
- fifo_read(&p->fifo, p->at.integer, NULL);
- p->at.integer = 0;
- }
+ CORE(N);
assert(max_num_out - i >= 0);
fifo_trim_by(output_fifo, max_num_out - i);
}
+
+
#undef _
#undef a
#undef b
#undef c
#undef d
+#undef CORE
+#undef cc
+#undef core
#undef COEF_INTERP
+#undef N
+#undef BEGINNING
+#undef MIDDLE
+#undef END
#undef CONVOLVE
#undef FIR_LENGTH
#undef FUNCTION
diff --git a/src/poly-fir0.h b/src/poly-fir0.h
index 52d85b3..0f28c69 100644
--- a/src/poly-fir0.h
+++ b/src/poly-fir0.h
@@ -1,32 +1,56 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
-/* Resample using a non-interpolated poly-phase FIR with length LEN.*/
-/* Input must be followed by LEN-1 samples. */
+/* Resample using a non-interpolated poly-phase FIR with length LEN. */
+/* Input must be followed by FIR_LENGTH-1 samples. */
-#define _ sum += (coef(p->shared->poly_fir_coefs, 0, FIR_LENGTH, rem, 0, j)) *at[j], ++j;
+#if SIMD_AVX || SIMD_SSE || SIMD_NEON
+ #define N (FIR_LENGTH>>2)
+ #define BEGINNING v4_t sum = vZero(); \
+ v4_t const * const __restrict coefs = (v4_t *)COEFS + N * rem;
+ #define _ sum = vMac(vLdu(at+j*4), coefs[j], sum), ++j;
+ #define END vStorSum(output+i, sum)
+ #define cc(n) case n: core(n); break
+ #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);}
+#else
+ #define N FIR_LENGTH
+ #define BEGINNING sample_t sum = 0; \
+ sample_t const * const __restrict coefs = (sample_t *)COEFS + N * rem;
+ #define _ sum += coefs[j]*at[j], ++j;
+ #define END output[i] = sum
+ #define CORE(n) core(n)
+#endif
+
+#define core(n) \
+ for (i = 0; p->at.integer < num_in * p->L; ++i, \
+ p->at.integer += p->step.integer) { \
+ int const div = p->at.integer / p->L, rem = p->at.integer % p->L; \
+ sample_t const * const __restrict at = input + div; \
+ int j = 0; BEGINNING; CONVOLVE(n); END;}
static void FUNCTION(stage_t * p, fifo_t * output_fifo)
{
- sample_t const * input = stage_read_p(p);
- int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
- sample_t * output = fifo_reserve(output_fifo, max_num_out);
+ int num_in = min(stage_occupancy(p), p->input_size);
+ if (num_in) {
+ sample_t const * input = stage_read_p(p);
+ int i, num_out = (num_in * p->L - p->at.integer + p->step.integer - 1) / p->step.integer;
+ sample_t * __restrict output = fifo_reserve(output_fifo, num_out);
- for (i = 0; p->at.integer < num_in * p->L; ++i, p->at.integer += p->step.integer) {
- int div = p->at.integer / p->L, rem = p->at.integer % p->L;
- sample_t const * at = input + div;
- sample_t sum = 0;
- int j = 0;
- CONVOLVE
- output[i] = sum;
+ CORE(N);
+ assert(i == num_out);
+ fifo_read(&p->fifo, p->at.integer / p->L, NULL);
+ p->at.integer = p->at.integer % p->L;
}
- assert(max_num_out - i >= 0);
- fifo_trim_by(output_fifo, max_num_out - i);
- fifo_read(&p->fifo, p->at.integer / p->L, NULL);
- p->at.integer = p->at.integer % p->L;
}
#undef _
+#undef CORE
+#undef cc
+#undef core
+#undef N
+#undef BEGINNING
+#undef MIDDLE
+#undef END
#undef CONVOLVE
#undef FIR_LENGTH
#undef FUNCTION
diff --git a/src/rate.h b/src/rate.h
deleted file mode 100644
index b0598f7..0000000
--- a/src/rate.h
+++ /dev/null
@@ -1,726 +0,0 @@
-/* SoX Resampler Library Copyright (c) 2007-14 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1 See LICENCE for details. */
-
-#include
-#include
-#include
-#include
-
-#include "filter.h"
-
-#if defined SOXR_LIB
-#include "internal.h"
-
-typedef void (* fn_t)(void);
-extern fn_t RDFT_CB[11];
-
-#define rdft_forward_setup (*(void * (*)(int))RDFT_CB[0])
-#define rdft_backward_setup (*(void * (*)(int))RDFT_CB[1])
-#define rdft_delete_setup (*(void (*)(void *))RDFT_CB[2])
-#define rdft_forward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[3])
-#define rdft_oforward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[4])
-#define rdft_backward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[5])
-#define rdft_obackward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[6])
-#define rdft_convolve (*(void (*)(int, void *, sample_t *, sample_t const *))RDFT_CB[7])
-#define rdft_convolve_portion (*(void (*)(int, sample_t *, sample_t const *))RDFT_CB[8])
-#define rdft_multiplier (*(int (*)(void))RDFT_CB[9])
-#define rdft_reorder_back (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[10])
-
-#endif
-
-#if RATE_SIMD /* Align for SIMD: */
- #include "simd.h"
-#if 0 /* Not using this yet. */
- #define RATE_SIMD_POLY 1
- #define num_coefs4 ((num_coefs + 3) & ~3)
- #define coefs4_check(i) ((i) < num_coefs)
-#else
- #define RATE_SIMD_POLY 0
- #define num_coefs4 num_coefs
- #define coefs4_check(i) 1
-#endif
-
- #define aligned_free _soxr_simd_aligned_free
- #define aligned_malloc _soxr_simd_aligned_malloc
- #define aligned_calloc _soxr_simd_aligned_calloc
-#if 0
- #define FIFO_REALLOC aligned_realloc
- #define FIFO_MALLOC aligned_malloc
- #define FIFO_FREE aligned_free
-
- static void * aligned_realloc(void * q, size_t nb_bytes, size_t copy_bytes) {
- void * p = aligned_malloc(nb_bytes);
- if (p) memcpy(p, q, copy_bytes);
- aligned_free(q);
- return p;
- }
-#endif
-#else
- #define RATE_SIMD_POLY 0
- #define num_coefs4 num_coefs
- #define coefs4_check(i) 1
-
- #define aligned_free free
- #define aligned_malloc malloc
- #define aligned_calloc calloc
-#endif
-
-#define FIFO_SIZE_T int
-#include "fifo.h"
-
-typedef union { /* Int64 in parts */
- #if HAVE_BIGENDIAN
- struct {int32_t ms; uint32_t ls;} parts;
- #else
- struct {uint32_t ls; int32_t ms;} parts;
- #endif
- int64_t all;
-} int64p_t;
-
-typedef union { /* Uint64 in parts */
- #if HAVE_BIGENDIAN
- struct {uint32_t ms, ls;} parts;
- #else
- struct {uint32_t ls, ms;} parts;
- #endif
- uint64_t all;
-} uint64p_t;
-
-#define FLOAT_HI_PREC_CLOCK 0 /* Non-float hi-prec has ~96 bits. */
-#define float_step_t long double /* __float128 is also a (slow) option */
-
-#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
-
-#define raw_coef_t double
-
-static sample_t * prepare_coefs(raw_coef_t const * coefs, int num_coefs,
- int num_phases, int interp_order, double multiplier)
-{
- int i, j, length = num_coefs4 * num_phases;
- sample_t * result = malloc((size_t)(length * (interp_order + 1)) * sizeof(*result));
- double fm1 = coefs[0], f1 = 0, f2 = 0;
-
- for (i = num_coefs4 - 1; i >= 0; --i)
- for (j = num_phases - 1; j >= 0; --j) {
- double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
- int pos = i * num_phases + j - 1;
- fm1 = coefs4_check(i) && pos > 0 ? coefs[pos - 1] * multiplier : 0;
- switch (interp_order) {
- case 1: b = f1 - f0; break;
- case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
- case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break;
- default: if (interp_order) assert(0);
- }
- #define coef_coef(x) \
- coef(result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i)
- coef_coef(0) = (sample_t)f0;
- if (interp_order > 0) coef_coef(1) = (sample_t)b;
- if (interp_order > 1) coef_coef(2) = (sample_t)c;
- if (interp_order > 2) coef_coef(3) = (sample_t)d;
- #undef coef_coef
- f2 = f1, f1 = f0;
- }
- return result;
-}
-
-typedef struct {
- int dft_length, num_taps, post_peak;
- void * dft_forward_setup, * dft_backward_setup;
- sample_t * coefs;
-} dft_filter_t;
-
-typedef struct { /* So generated filter coefs may be shared between channels */
- sample_t * poly_fir_coefs;
- dft_filter_t dft_filter[2];
-} rate_shared_t;
-
-typedef enum {
- irrational_stage = 1,
- cubic_stage,
- dft_stage,
- half_stage,
- rational_stage
-} stage_type_t;
-
-struct stage;
-typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
-#define MULT32 (65536. * 65536.)
-
-typedef union { /* Fixed point arithmetic */
- struct {uint64p_t ls; int64p_t ms;} fix;
- float_step_t flt;
-} step_t;
-
-typedef struct stage {
- /* Common to all stage types: */
- stage_type_t type;
- stage_fn_t fn;
- fifo_t fifo;
- int pre; /* Number of past samples to store */
- int pre_post; /* pre + number of future samples to store */
- int preload; /* Number of zero samples to pre-load the fifo */
- double out_in_ratio; /* For buffer management. */
-
- /* For a stage with variable (run-time generated) filter coefs: */
- rate_shared_t * shared;
- unsigned dft_filter_num; /* Which, if any, of the 2 DFT filters to use */
- sample_t * dft_scratch, * dft_out;
-
- /* For a stage with variable L/M: */
- step_t at, step;
- bool use_hi_prec_clock;
- int L, remM;
- int n, phase_bits, block_len;
- double mult, phase0;
-} stage_t;
-
-#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
-#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
-
-static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo)
-{
- int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
- sample_t const * input = stage_read_p(p);
- sample_t * output = fifo_reserve(output_fifo, max_num_out);
-
-#define integer fix.ms.parts.ms
-#define fraction fix.ms.parts.ls
-#define whole fix.ms.all
- for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
- sample_t const * s = input + p->at.integer;
- double x = p->at.fraction * (1 / MULT32);
- double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b);
- double c = s[1]-*s-a-b;
- output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s));
- }
- assert(max_num_out - i >= 0);
- fifo_trim_by(output_fifo, max_num_out - i);
- fifo_read(&p->fifo, p->at.integer, NULL);
- p->at.integer = 0;
-}
-
-#if RATE_SIMD
- #define dft_out p->dft_out
-#else
- #define dft_out output
-#endif
-
-static void dft_stage_fn(stage_t * p, fifo_t * output_fifo)
-{
- sample_t * output;
- int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
- rate_shared_t const * s = p->shared;
- dft_filter_t const * f = &s->dft_filter[p->dft_filter_num];
- int const overlap = f->num_taps - 1;
-
- while (p->at.integer + p->L * num_in >= f->dft_length) {
- div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L);
- sample_t const * input = fifo_read_ptr(&p->fifo);
- fifo_read(&p->fifo, divd.quot, NULL);
- num_in -= divd.quot;
-
- output = fifo_reserve(output_fifo, f->dft_length);
-
- if (lsx_is_power_of_2(p->L)) { /* F-domain */
- int portion = f->dft_length / p->L;
- memcpy(dft_out, input, (unsigned)portion * sizeof(*dft_out));
- rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch);
- for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
- dft_out[i] = dft_out[(portion << 1) - i],
- dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
- dft_out[portion] = dft_out[1];
- dft_out[portion + 1] = 0;
- dft_out[1] = dft_out[0];
-
- for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) {
- memcpy(dft_out + i, dft_out, (size_t)portion * sizeof(*dft_out));
- dft_out[i + 1] = 0;
- }
- if (p->step.integer > 0)
- rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
- } else {
- if (p->L == 1)
- memcpy(dft_out, input, (size_t)f->dft_length * sizeof(*dft_out));
- else {
- memset(dft_out, 0, (size_t)f->dft_length * sizeof(*dft_out));
- for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
- dft_out[i] = input[j];
- p->at.integer = p->L - 1 - divd.rem;
- }
- if (p->step.integer > 0)
- rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
- else
- rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
- }
-
- if (p->step.integer > 0) {
- rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs);
- rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
-#if RATE_SIMD
- if (p->step.integer == 1)
- memcpy(output, dft_out, (size_t)f->dft_length * sizeof(sample_t));
-#endif
- if (p->step.integer != 1) {
- for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
- i += p->step.integer)
- output[j] = dft_out[i];
- p->remM = i - (f->dft_length - overlap);
- fifo_trim_by(output_fifo, f->dft_length - j);
- }
- else fifo_trim_by(output_fifo, overlap);
- }
- else { /* F-domain */
- int m = -p->step.integer;
- rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs);
- rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch);
-#if RATE_SIMD
- memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof(sample_t));
-#endif
- fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m);
- }
- }
-}
-
-#undef dft_out
-
-/* Set to 4 x nearest power of 2 */
-/* or half of that if danger of causing too many cache misses. */
-static int set_dft_length(int num_taps, int min, int large)
-{
- double d = log((double)num_taps) / log(2.);
- return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large));
-}
-
-static void dft_stage_init(
- unsigned instance, double Fp, double Fs, double Fn, double att,
- double phase, stage_t * p, int L, int M, double * multiplier,
- int min_dft_size, int large_dft_size)
-{
- dft_filter_t * f = &p->shared->dft_filter[instance];
- int num_taps = 0, dft_length = f->dft_length, i;
- bool f_domain_m = abs(3-M) == 1 && Fs <= 1;
-
- if (!dft_length) {
- int k = phase == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4;
- double * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.);
-
- if (phase != 50)
- lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase);
- else f->post_peak = num_taps / 2;
-
- dft_length = set_dft_length(num_taps, min_dft_size, large_dft_size);
- f->coefs = aligned_calloc((size_t)dft_length, sizeof(*f->coefs));
- for (i = 0; i < num_taps; ++i)
- f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
- = (sample_t)(h[i] * ((1. / dft_length) * rdft_multiplier() * L * *multiplier));
- free(h);
- }
-
-#if RATE_SIMD
- p->dft_out = aligned_malloc(sizeof(sample_t) * (size_t)dft_length);
-#endif
-#if 1 /* In fact, currently, only pffft needs this. */
- p->dft_scratch = aligned_malloc(2 * sizeof(sample_t) * (size_t)dft_length);
-#endif
-
- if (!f->dft_length) {
- void * coef_setup = rdft_forward_setup(dft_length);
- int Lp = lsx_is_power_of_2(L)? L : 1;
- int Mp = f_domain_m? M : 1;
- f->dft_forward_setup = rdft_forward_setup(dft_length / Lp);
- f->dft_backward_setup = rdft_backward_setup(dft_length / Mp);
- if (Mp == 1)
- rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch);
- else
- rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch);
- rdft_delete_setup(coef_setup);
- f->num_taps = num_taps;
- f->dft_length = dft_length;
- lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i",
- num_taps, dft_length, Fp, Fs, Fn, att, L, M);
- }
- *multiplier = 1;
- p->out_in_ratio = (double)L / M;
- p->type = dft_stage;
- p->fn = dft_stage_fn;
- p->preload = f->post_peak / L;
- p->at.integer = f->post_peak % L;
- p->L = L;
- p->step.integer = f_domain_m? -M/2 : M;
- p->dft_filter_num = instance;
- p->block_len = f->dft_length - (f->num_taps - 1);
- p->phase0 = p->at.integer / p->L;
-}
-
-#include "filters.h"
-
-typedef struct {
- double factor;
- uint64_t samples_in, samples_out;
- int num_stages;
- stage_t * stages;
-} rate_t;
-
-#define pre_stage p->stages[shift]
-#define arb_stage p->stages[shift + have_pre_stage]
-#define post_stage p->stages[shift + have_pre_stage + have_arb_stage]
-#define have_pre_stage (preM * preL != 1)
-#define have_arb_stage (arbM * arbL != 1)
-#define have_post_stage (postM * postL != 1)
-
-#define TO_3dB(a) ((1.6e-6*a-7.5e-4)*a+.646)
-#define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
-
-typedef enum {
- rolloff_none, rolloff_small /* <= 0.01 dB */, rolloff_medium /* <= 0.35 dB */
-} rolloff_t;
-
-
-static char const * rate_init(
- /* Private work areas (to be supplied by the client): */
- rate_t * p, /* Per audio channel. */
- rate_shared_t * shared, /* Between channels (undergoing same rate change)*/
-
- /* Public parameters: Typically */
- double factor, /* Input rate divided by output rate. */
- double bits, /* Required bit-accuracy (pass + stop) 16|20|28 */
- double phase, /* Linear/minimum etc. filter phase. 50 */
- double passband_end, /* 0dB pt. bandwidth to preserve; nyquist=1 0.913*/
- double stopband_begin, /* Aliasing/imaging control; > passband_end 1 */
- rolloff_t rolloff, /* Pass-band roll-off small */
- bool maintain_3dB_pt, /* true */
- double multiplier, /* Linear gain to apply during conversion. 1 */
-
- /* Primarily for test/development purposes: */
- bool use_hi_prec_clock, /* Increase irrational ratio accuracy. false */
- int interpolator, /* Force a particular coef interpolator. -1 */
- size_t max_coefs_size, /* k bytes of coefs to try to keep below. 400 */
- bool noSmallIntOpt, /* Disable small integer optimisations. false */
- int log2_min_dft_size,
- int log2_large_dft_size)
-{
- double att = (bits + 1) * linear_to_dB(2.), attArb = att; /* pass + stop */
- double tbw0 = 1 - passband_end, Fs_a = stopband_begin;
- double arbM = factor, tbw_tighten = 1;
- int n = 0, i, preL = 1, preM = 1, shift = 0, arbL = 1, postL = 1, postM = 1;
- bool upsample = false, rational = false, iOpt = !noSmallIntOpt;
- int mode = rolloff > rolloff_small? factor > 1 || passband_end > LOW_Q_BW0:
- (int)ceil(2 + (bits - 17) / 4);
- stage_t * s;
-
- assert(factor > 0);
- assert(!bits || (15 <= bits && bits <= 33));
- assert(0 <= phase && phase <= 100);
- assert(.53 <= passband_end);
- assert(stopband_begin <= 1.2);
- assert(passband_end + .005 < stopband_begin);
-
- p->factor = factor;
- if (bits!=0) while (!n++) { /* Determine stages: */
- int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 :
- (int)ceil((double)max_coefs_size * 1000. / (U100_l * sizeof(sample_t)));
- double d, epsilon = 0, frac;
- upsample = arbM < 1;
- for (i = (int)(arbM * .5), shift = 0; i >>= 1; arbM *= .5, ++shift);
- preM = upsample || (arbM > 1.5 && arbM < 2);
- postM = 1 + (arbM > 1 && preM), arbM /= postM;
- preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL;
- if ((frac = arbM - (int)arbM)!=0)
- epsilon = fabs(floor(frac * MULT32 + .5) / (frac * MULT32) - 1);
- for (i = 1, rational = frac==0; i <= maxL && !rational; ++i) {
- d = frac * i, try = (int)(d + .5);
- if ((rational = fabs(try / d - 1) <= epsilon)) { /* No long doubles! */
- if (try == i)
- arbM = ceil(arbM), shift += x = arbM > 3, arbM /= 1 + x;
- else arbM = i * (int)arbM + try, arbL = i;
- }
- }
- L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x;
- if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) {
- for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1);
- arbM = arbM * postL / arbL / preL, arbL = 1, n = 0;
- } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt))
- preL = L, preM = M, arbM = arbL = postM = 1;
- if (!mode && (!rational || !n))
- ++mode, n = 0;
- }
-
- p->num_stages = shift + have_pre_stage + have_arb_stage + have_post_stage;
- if (!p->num_stages && multiplier != 1) {
- bits = arbL = 0; /* Use cubic_stage in this case. */
- ++p->num_stages;
- }
- p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages));
- for (i = 0; i < p->num_stages; ++i)
- p->stages[i].shared = shared;
-
- if ((n = p->num_stages) > 1) { /* Att. budget: */
- if (have_arb_stage)
- att += linear_to_dB(2.), attArb = att, --n;
- att += linear_to_dB((double)n);
- }
-
- for (n = 0; (size_t)n + 1 < array_length(half_firs) && att > half_firs[n].att; ++n);
- for (i = 0, s = p->stages; i < shift; ++i, ++s) {
- s->type = half_stage;
- s->fn = half_firs[n].fn;
- s->pre_post = 4 * half_firs[n].num_coefs;
- s->preload = s->pre = s->pre_post >> 1;
- }
-
- if (have_pre_stage) {
- if (maintain_3dB_pt && have_post_stage) { /* Trans. bands overlapping. */
- double tbw3 = tbw0 * TO_3dB(att); /* FFS: consider Fs_a. */
- double x = ((2.1429e-4 - 5.2083e-7 * att) * att - .015863) * att + 3.95;
- x = att * pow((tbw0 - tbw3) / (postM / (factor * postL) - 1 + tbw0), x);
- if (x > .035) {
- tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014;
- lsx_debug("x=%g tbw_tighten=%g", x, tbw_tighten);
- }
- }
- dft_stage_init(0, 1 - tbw0 * tbw_tighten, Fs_a, preM? max(preL, preM) :
- arbM / arbL, att, phase, &pre_stage, preL, max(preM, 1), &multiplier,
- log2_min_dft_size, log2_large_dft_size);
- }
-
- if (bits==0 && have_arb_stage) { /* `Quick' cubic arb stage: */
- arb_stage.type = cubic_stage;
- arb_stage.fn = cubic_stage_fn;
- arb_stage.mult = multiplier, multiplier = 1;
- arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5);
- arb_stage.pre_post = max(3, arb_stage.step.integer);
- arb_stage.preload = arb_stage.pre = 1;
- arb_stage.out_in_ratio = MULT32 / (double)arb_stage.step.whole;
- }
- else if (have_arb_stage) { /* Higher quality arb stage: */
- poly_fir_t const * f = &poly_firs[6*(upsample + !!preM) + mode - !upsample];
- int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases;
- size_t coefs_size;
- double x = .5, at, Fp, Fs, Fn, mult = upsample? 1 : arbL / arbM;
- poly_fir1_t const * f1;
-
- Fn = !upsample && preM? x = arbM / arbL : 1;
- Fp = !preM? mult : mode? .5 : 1;
- Fs = 2 - Fp; /* Ignore Fs_a; it would have little benefit here. */
- Fp *= 1 - tbw0;
- if (rolloff > rolloff_small && mode)
- Fp = !preM? mult * .5 - .125 : mult * .05 + .1;
- else if (rolloff == rolloff_small)
- Fp = Fs - (Fs - .148 * x - Fp * .852) * (.00813 * bits + .973);
-
- i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1;
- do {
- f1 = &f->interp[++i];
- assert(f1->fn);
- if (i)
- arbM /= arbL, arbL = 1, rational = false;
- phase_bits = (int)ceil(f1->scalar + log(mult)/log(2.));
- phases = !rational? (1 << phase_bits) : arbL;
- if (f->interp[0].scalar==0) {
- int phases0 = max(phases, 19), n0 = 0;
- lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta);
- num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM;
- }
- if ((num_coefs & 1) && rational && (arbL & 1))
- phases <<= 1, arbL <<= 1, arbM *= 2;
- at = arbL * (arb_stage.phase0 = .5 * (num_coefs & 1));
- order = i + (i && mode > 4);
- coefs_size = (size_t)(num_coefs4 * phases * (order + 1)) * sizeof(sample_t);
- } while (interpolator < 0 && i < 2 && f->interp[i+1].fn &&
- coefs_size / 1000 > max_coefs_size);
-
- if (!arb_stage.shared->poly_fir_coefs) {
- int num_taps = num_coefs * phases - 1;
- raw_coef_t * coefs = lsx_design_lpf(
- Fp, Fs, Fn, attArb, &num_taps, phases, f->beta);
- arb_stage.shared->poly_fir_coefs = prepare_coefs(
- coefs, num_coefs, phases, order, multiplier);
- lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk",
- num_coefs, phases, order, (double)coefs_size / 1000.);
- free(coefs);
- }
- multiplier = 1;
- arb_stage.type = rational? rational_stage : irrational_stage;
- arb_stage.fn = f1->fn;
- arb_stage.pre_post = num_coefs4 - 1;
- arb_stage.preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs);
- arb_stage.n = num_coefs4;
- arb_stage.phase_bits = phase_bits;
- arb_stage.L = arbL;
- arb_stage.use_hi_prec_clock = mode > 1 && use_hi_prec_clock && !rational;
-#if FLOAT_HI_PREC_CLOCK
- if (arb_stage.use_hi_prec_clock) {
- arb_stage.at.flt = at;
- arb_stage.step.flt = arbM;
- arb_stage.out_in_ratio = (double)(arbL / arb_stage.step.flt);
- } else
-#endif
- {
- arb_stage.at.whole = (int64_t)(at * MULT32 + .5);
-#if !FLOAT_HI_PREC_CLOCK
- if (arb_stage.use_hi_prec_clock) {
- arb_stage.at.fix.ls.parts.ms = 0x80000000ul;
- arbM *= MULT32;
- arb_stage.step.whole = (int64_t)arbM;
- arbM -= (double)arb_stage.step.whole;
- arbM *= MULT32 * MULT32;
- arb_stage.step.fix.ls.all = (uint64_t)arbM;
- } else
-#endif
- arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5);
- arb_stage.out_in_ratio = MULT32 * arbL / (double)arb_stage.step.whole;
- }
- }
-
- if (have_post_stage)
- dft_stage_init(1, 1 - (1 - (1 - tbw0) *
- (upsample? factor * postL / postM : 1)) * tbw_tighten, Fs_a,
- (double)max(postL, postM), att, phase, &post_stage, postL, postM,
- &multiplier, log2_min_dft_size, log2_large_dft_size);
-
-
- lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i",
- 1/factor, shift, preL, preM, arbL, arbM, postL, postM);
- for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) {
- fifo_create(&s->fifo, (int)sizeof(sample_t));
- memset(fifo_reserve(&s->fifo, s->preload), 0, sizeof(sample_t) * (size_t)s->preload);
- lsx_debug("%5i|%-5i preload=%i remL=%i o/i=%g",
- s->pre, s->pre_post - s->pre, s->preload, s->at.integer, s->out_in_ratio);
- }
- fifo_create(&s->fifo, (int)sizeof(sample_t));
- return 0;
-}
-
-static void rate_process(rate_t * p)
-{
- stage_t * stage = p->stages;
- int i;
- for (i = 0; i < p->num_stages; ++i, ++stage)
- stage->fn(stage, &(stage+1)->fifo);
-}
-
-static sample_t * rate_input(rate_t * p, sample_t const * samples, size_t n)
-{
- p->samples_in += n;
- return fifo_write(&p->stages[0].fifo, (int)n, samples);
-}
-
-static sample_t const * rate_output(rate_t * p, sample_t * samples, size_t * n)
-{
- fifo_t * fifo = &p->stages[p->num_stages].fifo;
- p->samples_out += *n = min(*n, (size_t)fifo_occupancy(fifo));
- return fifo_read(fifo, (int)*n, samples);
-}
-
-static void rate_flush(rate_t * p)
-{
- fifo_t * fifo = &p->stages[p->num_stages].fifo;
-#if defined _MSC_VER && _MSC_VER == 1200
- uint64_t samples_out = (uint64_t)(int64_t)((double)(int64_t)p->samples_in / p->factor + .5);
-#else
- uint64_t samples_out = (uint64_t)((double)p->samples_in / p->factor + .5);
-#endif
- size_t remaining = (size_t)(samples_out - p->samples_out);
-
- if ((size_t)fifo_occupancy(fifo) < remaining) {
- uint64_t samples_in = p->samples_in;
- sample_t * buff = calloc(1024, sizeof(*buff));
-
- while ((size_t)fifo_occupancy(fifo) < remaining) {
- rate_input(p, buff, 1024);
- rate_process(p);
- }
- fifo_trim_to(fifo, (int)remaining);
- p->samples_in = samples_in;
- free(buff);
- }
-}
-
-static void rate_close(rate_t * p)
-{
- rate_shared_t * shared = p->stages[0].shared;
- int i;
-
- for (i = 0; i <= p->num_stages; ++i) {
- stage_t * s = &p->stages[i];
- aligned_free(s->dft_scratch);
- aligned_free(s->dft_out);
- fifo_delete(&s->fifo);
- }
- if (shared) {
- for (i = 0; i < 2; ++i) {
- dft_filter_t * f= &shared->dft_filter[i];
- aligned_free(f->coefs);
- rdft_delete_setup(f->dft_forward_setup);
- rdft_delete_setup(f->dft_backward_setup);
- }
- free(shared->poly_fir_coefs);
- memset(shared, 0, sizeof(*shared));
- }
- free(p->stages);
-}
-
-#if defined SOXR_LIB
-static double rate_delay(rate_t * p)
-{
-#if defined _MSC_VER && _MSC_VER == 1200
- double samples_out = (double)(int64_t)p->samples_in / p->factor;
- return max(0, samples_out - (double)(int64_t)p->samples_out);
-#else
- double samples_out = (double)p->samples_in / p->factor;
- return max(0, samples_out - (double)p->samples_out);
-#endif
-}
-
-static void rate_sizes(size_t * shared, size_t * channel)
-{
- *shared = sizeof(rate_shared_t);
- *channel = sizeof(rate_t);
-}
-
-#include "soxr.h"
-
-static char const * rate_create(
- void * channel,
- void * shared,
- double io_ratio,
- soxr_quality_spec_t * q_spec,
- soxr_runtime_spec_t * r_spec,
- double scale)
-{
- return rate_init(
- channel, shared,
- io_ratio,
- q_spec->precision,
- q_spec->phase_response,
- q_spec->passband_end,
- q_spec->stopband_begin,
- (rolloff_t)"\1\2\0"[q_spec->flags & 3],
- !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT),
- scale,
- !!(q_spec->flags & SOXR_HI_PREC_CLOCK),
- (int)(r_spec->flags & 3) - 1,
- r_spec->coef_size_kbytes,
- !!(r_spec->flags & SOXR_NOSMALLINTOPT),
- (int)r_spec->log2_min_dft_size,
- (int)r_spec->log2_large_dft_size);
-}
-
-static char const * id(void)
-{
- return RATE_ID;
-}
-
-fn_t RATE_CB[] = {
- (fn_t)rate_input,
- (fn_t)rate_process,
- (fn_t)rate_output,
- (fn_t)rate_flush,
- (fn_t)rate_close,
- (fn_t)rate_delay,
- (fn_t)rate_sizes,
- (fn_t)rate_create,
- (fn_t)0,
- (fn_t)id,
-};
-#endif
diff --git a/src/rate32.c b/src/rate32.c
deleted file mode 100644
index fe85bae..0000000
--- a/src/rate32.c
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1 See LICENCE for details. */
-
-#define sample_t float
-#define RATE_SIMD 0
-#define RDFT_CB _soxr_rdft32_cb
-#define RATE_CB _soxr_rate32_cb
-#define RATE_ID "cr32"
-#include "rate.h"
diff --git a/src/rate32s.c b/src/rate32s.c
deleted file mode 100644
index 3acfcb4..0000000
--- a/src/rate32s.c
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1 See LICENCE for details. */
-
-#define sample_t float
-#define RATE_SIMD 1
-#define RDFT_CB _soxr_rdft32s_cb
-#define RATE_CB _soxr_rate32s_cb
-#define RATE_ID "cr32s"
-#include "rate.h"
diff --git a/src/rate64.c b/src/rate64.c
deleted file mode 100644
index 6f25143..0000000
--- a/src/rate64.c
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1 See LICENCE for details. */
-
-#define sample_t double
-#define RATE_SIMD 0
-#define RDFT_CB _soxr_rdft64_cb
-#define RATE_CB _soxr_rate64_cb
-#define RATE_ID "cr64"
-#include "rate.h"
diff --git a/src/rdft_t.h b/src/rdft_t.h
new file mode 100644
index 0000000..293d9c3
--- /dev/null
+++ b/src/rdft_t.h
@@ -0,0 +1,24 @@
+/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+typedef void (* fn_t)(void);
+
+#define rdft_forward_setup (*(void * (*)(int))RDFT_CB[0])
+#define rdft_backward_setup (*(void * (*)(int))RDFT_CB[1])
+#define rdft_delete_setup (*(void (*)(void *))RDFT_CB[2])
+#define rdft_forward (*(void (*)(int, void *, void *, void *))RDFT_CB[3])
+#define rdft_oforward (*(void (*)(int, void *, void *, void *))RDFT_CB[4])
+#define rdft_backward (*(void (*)(int, void *, void *, void *))RDFT_CB[5])
+#define rdft_obackward (*(void (*)(int, void *, void *, void *))RDFT_CB[6])
+#define rdft_convolve (*(void (*)(int, void *, void *, void const *))RDFT_CB[7])
+#define rdft_convolve_portion (*(void (*)(int, void *, void const *))RDFT_CB[8])
+#define rdft_multiplier (*(int (*)(void))RDFT_CB[9])
+#define rdft_reorder_back (*(void (*)(int, void *, void *, void *))RDFT_CB[10])
+#define rdft_malloc (*(void * (*)(size_t))RDFT_CB[11])
+#define rdft_calloc (*(void * (*)(size_t, size_t))RDFT_CB[12])
+#define rdft_free (*(void (*)(void *))RDFT_CB[13])
+#define rdft_flags (*(int (*)(void))RDFT_CB[14])
+
+/* Flag templates: */
+#define RDFT_IS_SIMD 1
+#define RDFT_NEEDS_SCRATCH 2
diff --git a/src/simd-dev.h b/src/simd-dev.h
deleted file mode 100644
index 019325c..0000000
--- a/src/simd-dev.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1 See LICENCE for details. */
-
-#define PFFT_MACROS_ONLY
-#include "pffft.c"
diff --git a/src/simd.c b/src/simd.c
index 48d440f..ec548fd 100644
--- a/src/simd.c
+++ b/src/simd.c
@@ -1,21 +1,15 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include
#include
#include
-#include "simd.h"
-#include "simd-dev.h"
+
#include "soxr-config.h"
-#if AVCODEC_FOUND
- #define SIMD_ALIGNMENT (sizeof(double) * 4)
-#else
- #define SIMD_ALIGNMENT (sizeof(float) * 4)
-#endif
+#define SIMD_ALIGNMENT (sizeof(float) * (1 + (PFFFT_DOUBLE|AVCODEC_FOUND)) * 4)
-
-void * _soxr_simd_aligned_malloc(size_t size)
+void * SIMD_ALIGNED_MALLOC(size_t size)
{
char * p1 = 0, * p = malloc(size + SIMD_ALIGNMENT);
if (p) {
@@ -27,9 +21,9 @@ void * _soxr_simd_aligned_malloc(size_t size)
-void * _soxr_simd_aligned_calloc(size_t nmemb, size_t size)
+void * SIMD_ALIGNED_CALLOC(size_t nmemb, size_t size)
{
- void * p = _soxr_simd_aligned_malloc(nmemb * size);
+ void * p = SIMD_ALIGNED_MALLOC(nmemb * size);
if (p)
memset(p, 0, nmemb * size);
return p;
@@ -37,7 +31,7 @@ void * _soxr_simd_aligned_calloc(size_t nmemb, size_t size)
-void _soxr_simd_aligned_free(void * p1)
+void SIMD_ALIGNED_FREE(void * p1)
{
if (p1)
free(*((void * *)p1 - 1));
@@ -45,11 +39,16 @@ void _soxr_simd_aligned_free(void * p1)
-void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b)
+#define PFFT_MACROS_ONLY
+#include "pffft.c"
+
+
+
+void ORDERED_CONVOLVE_SIMD(int n, void * not_used, float * a, float const * b)
{
int i;
float ab0, ab1;
- v4sf * /*RESTRICT*/ va = (v4sf *)a;
+ v4sf * RESTRICT va = (v4sf *)a;
v4sf const * RESTRICT vb = (v4sf const *)b;
assert(VALIGNED(a) && VALIGNED(b));
ab0 = a[0] * b[0], ab1 = a[1] * b[1];
@@ -68,11 +67,11 @@ void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float
-void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b)
+void ORDERED_PARTIAL_CONVOLVE_SIMD(int n, float * a, float const * b)
{
int i;
float ab0;
- v4sf * /*RESTRICT*/ va = (v4sf *)a;
+ v4sf * RESTRICT va = (v4sf *)a;
v4sf const * RESTRICT vb = (v4sf const *)b;
assert(VALIGNED(a) && VALIGNED(b));
ab0 = a[0] * b[0];
diff --git a/src/simd.h b/src/simd.h
deleted file mode 100644
index 71eefc6..0000000
--- a/src/simd.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
- * Licence for this file: LGPL v2.1 See LICENCE for details. */
-
-#if !defined simd_included
-#define simd_included
-
-#include
-
-void * _soxr_simd_aligned_malloc(size_t);
-void * _soxr_simd_aligned_calloc(size_t, size_t);
-void _soxr_simd_aligned_free(void *);
-
-void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b);
-void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b);
-
-#endif
diff --git a/src/simd32-dev.h b/src/simd32-dev.h
new file mode 100644
index 0000000..0408758
--- /dev/null
+++ b/src/simd32-dev.h
@@ -0,0 +1,54 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#if !defined soxr_simd_dev_included
+#define soxr_simd_dev_included
+
+#if defined __GNUC__
+ #define SIMD_INLINE(T) static __inline T __attribute__((always_inline))
+ #define vAlign __attribute__((aligned (16)))
+#elif defined _MSC_VER
+ #define SIMD_INLINE(T) static __forceinline T
+ #define vAlign __declspec(align(16))
+#endif
+
+#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86
+
+#include
+
+#define vZero() _mm_setzero_ps()
+#define vSet1(a) _mm_set_ss(a)
+#define vMul(a,b) _mm_mul_ps(a,b)
+#define vAdd(a,b) _mm_add_ps(a,b)
+#define vMac(a,b,c) vAdd(vMul(a,b),c)
+#define vLds(a) _mm_set1_ps(a)
+#define vLd(a) _mm_load_ps(a)
+#define vLdu(a) _mm_loadu_ps(a)
+
+typedef __m128 v4_t;
+
+SIMD_INLINE(void) vStorSum(float * a, v4_t b) {
+ v4_t t = vAdd(_mm_movehl_ps(b, b), b);
+ _mm_store_ss(a, vAdd(t, _mm_shuffle_ps(t,t,1)));}
+
+#elif defined __arm__
+
+#include
+
+#define vZero() vdupq_n_f32(0)
+#define vMul(a,b) vmulq_f32(a,b)
+#define vAdd(a,b) vaddq_f32(a,b)
+#define vMac(a,b,c) vmlaq_f32(c,a,b)
+#define vLds(a) vld1q_dup_f32(&(a))
+#define vLd(a) vld1q_f32(a)
+#define vLdu(a) vld1q_f32(a)
+
+typedef float32x4_t v4_t;
+
+SIMD_INLINE(void) vStorSum(float * a, v4_t b) {
+ float32x2_t t = vadd_f32(vget_high_f32(b), vget_low_f32(b));
+ *a = vget_lane_f32(vpadd_f32(t, t), 0);}
+
+#endif
+
+#endif
diff --git a/src/simd32.c b/src/simd32.c
new file mode 100644
index 0000000..3f6cb81
--- /dev/null
+++ b/src/simd32.c
@@ -0,0 +1,8 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#define PFFFT_DOUBLE 0
+
+#include "simd32.h"
+
+#include "simd.c"
diff --git a/src/simd32.h b/src/simd32.h
new file mode 100644
index 0000000..bc185da
--- /dev/null
+++ b/src/simd32.h
@@ -0,0 +1,23 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#if !defined simd32_included
+#define simd32_included
+
+#include
+
+void * _soxr_simd32_aligned_malloc(size_t);
+void * _soxr_simd32_aligned_calloc(size_t, size_t);
+void _soxr_simd32_aligned_free(void *);
+
+#define SIMD_ALIGNED_MALLOC _soxr_simd32_aligned_malloc
+#define SIMD_ALIGNED_CALLOC _soxr_simd32_aligned_calloc
+#define SIMD_ALIGNED_FREE _soxr_simd32_aligned_free
+
+void _soxr_ordered_convolve_simd32(int n, void * not_used, float * a, float const * b);
+void _soxr_ordered_partial_convolve_simd32(int n, float * a, float const * b);
+
+#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd32
+#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd32
+
+#endif
diff --git a/src/simd64-dev.h b/src/simd64-dev.h
new file mode 100644
index 0000000..37484e4
--- /dev/null
+++ b/src/simd64-dev.h
@@ -0,0 +1,42 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#if !defined soxr_simd64_dev_included
+#define soxr_simd64_dev_included
+
+#if defined __GNUC__
+ #define SIMD_INLINE(T) static __inline T __attribute__((always_inline))
+ #define vAlign __attribute__((aligned (32)))
+#elif defined _MSC_VER
+ #define SIMD_INLINE(T) static __forceinline T
+ #define vAlign __declspec(align(32))
+#else
+ #define SIMD_INLINE(T) static __inline T
+#endif
+
+#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86
+
+#include
+
+#if defined __AVX__
+
+#define vZero() _mm256_setzero_pd()
+#define vSet1(a) _mm256_set_pd(0,0,0,a)
+#define vMul(a,b) _mm256_mul_pd(a,b)
+#define vAdd(a,b) _mm256_add_pd(a,b)
+#define vMac(a,b,c) vAdd(vMul(a,b),c) /* Note: gcc -mfma will `fuse' these */
+#define vLds(a) _mm256_set1_pd(a)
+#define vLd(a) _mm256_load_pd(a)
+#define vLdu(a) _mm256_loadu_pd(a)
+
+typedef __m256d v4_t;
+
+SIMD_INLINE(void) vStorSum(double * a, v4_t b) {
+ b = _mm256_hadd_pd(b, _mm256_permute2f128_pd(b,b,1));
+ _mm_store_sd(a, _mm256_castpd256_pd128(_mm256_hadd_pd(b,b)));}
+
+#endif
+
+#endif
+
+#endif
diff --git a/src/simd64.c b/src/simd64.c
new file mode 100644
index 0000000..b601750
--- /dev/null
+++ b/src/simd64.c
@@ -0,0 +1,8 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#define PFFFT_DOUBLE 1
+
+#include "simd64.h"
+
+#include "simd.c"
diff --git a/src/simd64.h b/src/simd64.h
new file mode 100644
index 0000000..0ebc439
--- /dev/null
+++ b/src/simd64.h
@@ -0,0 +1,23 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#if !defined simd64_included
+#define simd64_included
+
+#include
+
+void * _soxr_simd64_aligned_malloc(size_t);
+void * _soxr_simd64_aligned_calloc(size_t, size_t);
+void _soxr_simd64_aligned_free(void *);
+
+#define SIMD_ALIGNED_MALLOC _soxr_simd64_aligned_malloc
+#define SIMD_ALIGNED_CALLOC _soxr_simd64_aligned_calloc
+#define SIMD_ALIGNED_FREE _soxr_simd64_aligned_free
+
+void _soxr_ordered_convolve_simd64(int n, void * not_used, double * a, double const * b);
+void _soxr_ordered_partial_convolve_simd64(int n, double * a, double const * b);
+
+#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd64
+#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd64
+
+#endif
diff --git a/src/soxr-lsr.h b/src/soxr-lsr.h
index c0923aa..2acd138 100644
--- a/src/soxr-lsr.h
+++ b/src/soxr-lsr.h
@@ -37,9 +37,9 @@
#endif
typedef float SRC_SAMPLE;
-#if !defined SOXR_LIB
enum SRC_SRCTYPE_e {SRC_SINC_BEST_QUALITY, SRC_SINC_MEDIUM_QUALITY,
SRC_SINC_FASTEST, SRC_ZERO_ORDER_HOLD, SRC_LINEAR};
+#if !defined SOXR_LIB
typedef int SRC_SRCTYPE;
typedef int SRC_ERROR;
typedef long (* src_callback_t)(void *, SRC_SAMPLE * *);
diff --git a/src/soxr.c b/src/soxr.c
index d36891a..d963bec 100644
--- a/src/soxr.c
+++ b/src/soxr.c
@@ -1,10 +1,8 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include
-#include
#include
-#include
#include
#include
@@ -13,20 +11,17 @@
#include "internal.h"
#if AVUTIL_FOUND
-#include
+ #include
#endif
#if WITH_DEV_TRACE
-int _soxr_trace_level(void)
-{
- char const * e = getenv("SOXR_TRACE");
- return e? atoi(e) : 4;
-}
-
+#include
+#include
+int _soxr_trace_level;
void _soxr_trace(char const * fmt, ...)
{
@@ -96,52 +91,6 @@ struct soxr {
-#define RESET_ON_CLEAR (1u<<31)
-
-/* TODO: these should not be here. */
-#define TO_3dB(a) ((1.6e-6*a-7.5e-4)*a+.646)
-#define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
-
-
-
-soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags)
-{
- soxr_quality_spec_t spec, * p = &spec;
- unsigned quality = recipe & 0xf;
- double rej;
- memset(p, 0, sizeof(*p));
- if (quality > 13) {
- p->e = "invalid quality type";
- return spec;
- }
- flags |= quality < SOXR_LSR0Q? RESET_ON_CLEAR : 0;
- if (quality == 13)
- quality = 6;
- else if (quality > 10)
- quality = 0;
- p->phase_response = "\62\31\144"[(recipe & 0x30) >> 4];
- p->stopband_begin = 1;
- p->precision = !quality? 0: quality < 3? 16 : quality < 8? 4 + quality * 4 : 55 - quality * 4;
- rej = p->precision * linear_to_dB(2.);
- p->flags = flags;
- if (quality < 8) {
- p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / TO_3dB(rej);
- if (quality <= 2)
- p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
- }
- else {
- static float const bw[] = {.931f, .832f, .663f};
- p->passband_end = bw[quality - 8];
- if (quality - 8 == 2)
- p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
- }
- if (recipe & SOXR_STEEP_FILTER)
- p->passband_end = 1 - .01 / TO_3dB(rej);
- return spec;
-}
-
-
-
char const * soxr_engine(soxr_t p)
{
return resampler_id();
@@ -163,82 +112,132 @@ soxr_error_t soxr_error(soxr_t p)
-soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads)
-{
- soxr_runtime_spec_t spec, * p = &spec;
- memset(p, 0, sizeof(*p));
- p->log2_min_dft_size = 10;
- p->log2_large_dft_size = 17;
- p->coef_size_kbytes = 400;
- p->num_threads = num_threads;
- return spec;
-}
-
-
-
-soxr_io_spec_t soxr_io_spec(
- soxr_datatype_t itype,
- soxr_datatype_t otype)
-{
- soxr_io_spec_t spec, * p = &spec;
- memset(p, 0, sizeof(*p));
- if ((itype | otype) >= SOXR_SPLIT * 2)
- p->e = "invalid io datatype(s)";
- else {
- p->itype = itype;
- p->otype = otype;
- p->scale = 1;
- }
- return spec;
-}
-
-
-
-#if SIMD_FOUND
-static bool cpu_has_simd(void)
-{
-#if defined __x86_64__ || defined _M_X64
- return true;
-#elif defined __GNUC__ && defined i386
- uint32_t eax, ebx, ecx, edx;
- __asm__ __volatile__ (
- "pushl %%ebx \n\t"
- "cpuid \n\t"
- "movl %%ebx, %1\n\t"
- "popl %%ebx \n\t"
- : "=a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx)
- : "a"(1)
- : "cc" );
- return !!(edx & 0x06000000);
-#elif defined _MSC_VER && defined _M_IX86
- uint32_t d;
- __asm {
- xor eax, eax
- inc eax
- push ebx
- cpuid
- pop ebx
- mov d, edx
- }
- return !!(d & 0x06000000);
-#elif defined AV_CPU_FLAG_NEON
- return !!(av_get_cpu_flags() & AV_CPU_FLAG_NEON);
-#endif
- return false;
-}
-
-
-
-static bool should_use_simd(void)
-{
- char const * e = getenv("SOXR_USE_SIMD");
- return e? !!atoi(e) : cpu_has_simd();
-}
+#if WITH_CR32S || WITH_CR64S
+ #if defined __GNUC__ && defined __x86_64__
+ #define CPUID(type, eax_, ebx_, ecx_, edx_) \
+ __asm__ __volatile__ ( \
+ "cpuid \n\t" \
+ : "=a" (eax_), "=b" (ebx_), "=c" (ecx_), "=d" (edx_) \
+ : "a" (type), "c" (0));
+ #elif defined __GNUC__ && defined __i386__
+ #define CPUID(type, eax_, ebx_, ecx_, edx_) \
+ __asm__ __volatile__ ( \
+ "mov %%ebx, %%edi \n\t" \
+ "cpuid \n\t" \
+ "xchg %%edi, %%ebx \n\t" \
+ : "=a" (eax_), "=D" (ebx_), "=c" (ecx_), "=d" (edx_) \
+ : "a" (type), "c" (0));
+ #elif defined _M_X64 && defined _MSC_VER && _MSC_VER > 1500
+ void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue);
+ #pragma intrinsic(__cpuidex)
+ #define CPUID(type, eax_, ebx_, ecx_, edx_) do { \
+ int regs[4]; \
+ __cpuidex(regs, type, 0); \
+ eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \
+ } while(0)
+ #elif defined _M_X64 && defined _MSC_VER
+ void __cpuidex(int CPUInfo[4], int info_type);
+ #pragma intrinsic(__cpuidex)
+ #define CPUID(type, eax_, ebx_, ecx_, edx_) do { \
+ int regs[4]; \
+ __cpuidex(regs, type); \
+ eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \
+ } while(0)
+ #elif defined _M_IX86 && defined _MSC_VER
+ #define CPUID(type, eax_, ebx_, ecx_, edx_) \
+ __asm pushad \
+ __asm mov eax, type \
+ __asm xor ecx, ecx \
+ __asm cpuid \
+ __asm mov eax_, eax \
+ __asm mov ebx_, ebx \
+ __asm mov ecx_, ecx \
+ __asm mov edx_, edx \
+ __asm popad
+ #endif
#endif
-extern control_block_t _soxr_rate32s_cb, _soxr_rate32_cb, _soxr_rate64_cb, _soxr_vr32_cb;
+#if WITH_CR32S
+ static bool cpu_has_simd32(void)
+ {
+ #if defined __x86_64__ || defined _M_X64
+ return true;
+ #elif defined __i386__ || defined _M_IX86
+ enum {SSE = 1 << 25, SSE2 = 1 << 26};
+ unsigned eax_, ebx_, ecx_, edx_;
+ CPUID(1, eax_, ebx_, ecx_, edx_);
+ return (edx_ & (SSE|SSE2)) != 0;
+ #elif defined AV_CPU_FLAG_NEON
+ return !!(av_get_cpu_flags() & AV_CPU_FLAG_NEON);
+ #else
+ return false;
+ #endif
+ }
+
+ static bool should_use_simd32(void)
+ {
+ char const * e = getenv("SOXR_USE_SIMD32");
+ return e? !!atoi(e) : cpu_has_simd32();
+ }
+#endif
+
+
+
+#if WITH_CR64S
+ #if defined __GNUC__
+ #define XGETBV(type, eax_, edx_) \
+ __asm__ __volatile__ ( \
+ ".byte 0x0f, 0x01, 0xd0\n" \
+ : "=a"(eax_), "=d"(edx_) : "c" (type));
+ #elif defined _M_X64 && defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219
+ #include
+ #define XGETBV(type, eax_, edx_) do { \
+ union {uint64_t x; uint32_t y[2];} a = {_xgetbv(0)}; \
+ eax_ = a.y[0], edx_ = a.y[1]; \
+ } while(0)
+ #elif defined _M_IX86 && defined _MSC_VER
+ #define XGETBV(type, eax_, edx_) \
+ __asm pushad \
+ __asm mov ecx, type \
+ __asm _emit 0x0f \
+ __asm _emit 0x01 \
+ __asm _emit 0xd0 \
+ __asm mov eax_, eax \
+ __asm mov edx_, edx \
+ __asm popad
+ #else
+ #define XGETBV(type, eax_, edx_) eax_ = edx_ = 0
+ #endif
+
+ static bool cpu_has_simd64(void)
+ {
+ enum {OSXSAVE = 1 << 27, AVX = 1 << 28};
+ unsigned eax_, ebx_, ecx_, edx_;
+ CPUID(1, eax_, ebx_, ecx_, edx_);
+ if ((ecx_ & (OSXSAVE|AVX)) == (OSXSAVE|AVX)) {
+ XGETBV(0, eax_, edx_);
+ return (eax_ & 6) == 6;
+ }
+ return false;
+ }
+
+ static bool should_use_simd64(void)
+ {
+ char const * e = getenv("SOXR_USE_SIMD64");
+ return e? !!atoi(e) : cpu_has_simd64();
+ }
+#endif
+
+
+
+extern control_block_t
+ _soxr_rate32_cb,
+ _soxr_rate32s_cb,
+ _soxr_rate64_cb,
+ _soxr_rate64s_cb,
+ _soxr_vr32_cb;
@@ -280,6 +279,11 @@ soxr_t soxr_create(
soxr_t p = 0;
soxr_error_t error = 0;
+#if WITH_DEV_TRACE
+ char const * e = getenv("SOXR_TRACE");
+ _soxr_trace_level = e? atoi(e) : 0;
+#endif
+
if (q_spec && q_spec->e) error = q_spec->e;
else if (io_spec && (io_spec->itype | io_spec->otype) >= SOXR_SPLIT * 2)
error = "invalid io datatype(s)";
@@ -319,27 +323,39 @@ soxr_t soxr_create(
p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p;
-#if WITH_SINGLE_PRECISION
- if (!WITH_DOUBLE_PRECISION || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION))
- || (p->q_spec.flags & SOXR_VR)) {
+#if WITH_CR32 || WITH_CR32S || WITH_VR32
+ if (0
+#if WITH_VR32
+ || ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))
+#endif
+#if WITH_CR32 || WITH_CR32S
+ || !(WITH_CR64 || WITH_CR64S) || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION))
+#endif
+ ) {
p->deinterleave = (deinterleave_t)_soxr_deinterleave_f;
p->interleave = (interleave_t)_soxr_interleave_f;
memcpy(&p->control_block,
- (p->q_spec.flags & SOXR_VR)? &_soxr_vr32_cb :
-#if SIMD_FOUND
- should_use_simd()? &_soxr_rate32s_cb :
+#if WITH_VR32
+ ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))? &_soxr_vr32_cb :
+#endif
+#if WITH_CR32S
+ !WITH_CR32 || should_use_simd32()? &_soxr_rate32s_cb :
#endif
&_soxr_rate32_cb, sizeof(p->control_block));
}
-#if WITH_DOUBLE_PRECISION
+#if WITH_CR64 || WITH_CR64S
else
#endif
#endif
-#if WITH_DOUBLE_PRECISION
+#if WITH_CR64 || WITH_CR64S
{
p->deinterleave = (deinterleave_t)_soxr_deinterleave;
p->interleave = (interleave_t)_soxr_interleave;
- memcpy(&p->control_block, &_soxr_rate64_cb, sizeof(p->control_block));
+ memcpy(&p->control_block,
+#if WITH_CR64S
+ !WITH_CR64 || should_use_simd64()? &_soxr_rate64s_cb :
+#endif
+ &_soxr_rate64_cb, sizeof(p->control_block));
}
#endif
diff --git a/src/soxr.h b/src/soxr.h
index 56132cb..640b698 100644
--- a/src/soxr.h
+++ b/src/soxr.h
@@ -1,4 +1,4 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
@@ -269,9 +269,6 @@ struct soxr_runtime_spec { /* Typically */
#define SOXR_COEF_INTERP_LOW 2u /* Man. select: less CPU, more memory. */
#define SOXR_COEF_INTERP_HIGH 3u /* Man. select: more CPU, less memory. */
-#define SOXR_STRICT_BUFFERING 4u /* Reserved for future use. */
-#define SOXR_NOSMALLINTOPT 8u /* For test purposes only. */
-
/* -------------------------- API type constructors ------------------------- */
@@ -296,7 +293,7 @@ SOXR soxr_quality_spec_t soxr_quality_spec(
#define SOXR_24_BITQ 5
#define SOXR_28_BITQ 6
#define SOXR_32_BITQ 7
- /* Libsamplerate equivalent qualities: */
+ /* For internal use only; to be removed: */
#define SOXR_LSR0Q 8 /* 'Best sinc'. */
#define SOXR_LSR1Q 9 /* 'Medium sinc'. */
#define SOXR_LSR2Q 10 /* 'Fast sinc'. */
@@ -304,8 +301,8 @@ SOXR soxr_quality_spec_t soxr_quality_spec(
#define SOXR_LINEAR_PHASE 0x00
#define SOXR_INTERMEDIATE_PHASE 0x10
#define SOXR_MINIMUM_PHASE 0x30
+
#define SOXR_STEEP_FILTER 0x40
-#define SOXR_ALLOW_ALIASING 0x80 /* Reserved for future use. */
diff --git a/src/vr32.c b/src/vr32.c
index 9ad17d1..8b1a259 100644
--- a/src/vr32.c
+++ b/src/vr32.c
@@ -1,16 +1,10 @@
-/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Variable-rate resampling. */
#include
-#include
-#if !defined M_PI
-#define M_PI 3.14159265358979323846
-#endif
-#if !defined M_LN2
-#define M_LN2 0.69314718055994530942
-#endif
+#include "math-wrap.h"
#include
#include
#include "internal.h"
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 0eba7e0..3c28f9c 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -10,7 +10,10 @@ foreach (fe ${SOURCES})
add_executable (${f} ${fe})
endforeach ()
-enable_testing ()
+# Can't use c89 for this file:
+if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
+ set_property (SOURCE throughput APPEND_STRING PROPERTY COMPILE_FLAGS "-std=gnu89")
+endif ()
set (sweep_to_freq 22050)
set (leader 1)
@@ -20,33 +23,36 @@ math (EXPR base_rate "${sweep_to_freq} + ${sweep_to_freq}")
macro (add_vector r)
set (output ${CMAKE_CURRENT_BINARY_DIR}/ref-${r}.s32)
add_custom_command (OUTPUT ${output} DEPENDS vector-gen ${CMAKE_CURRENT_LIST_FILE}
- COMMAND vector-gen ${r} ${leader} ${len} ${sweep_to_freq} 1 ${output})
+ COMMAND vector-gen ${r} ${leader} ${len} 0 ${sweep_to_freq} 1 ${output})
set (vectors ${output} ${vectors})
endmacro ()
-macro (add_cmp_test from to bits)
- set (name ${bits}-bit-perfect-${from}-${to})
- add_test (NAME ${name} COMMAND ${CMAKE_COMMAND} -Dbits=${bits} -DBIN=${BIN} -DEXAMPLES_BIN=${EXAMPLES_BIN} -Dleader=${leader} -Dto=${to}
- -Dfrom=${from} -Dlen=${len} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmp-test.cmake)
- add_vector (${from})
- add_vector (${to})
+macro (add_cmp_test irate orate bits)
+ set (name ${bits}-bit-perfect-${irate}-${orate})
+ add_test (NAME ${name} COMMAND ${CMAKE_COMMAND} -Dbits=${bits} -DBIN=${BIN}
+ -DEXAMPLES_BIN=${EXAMPLES_BIN} -DlenToSkip=${leader} -Dorate=${orate}
+ -Dirate=${irate} -Dlen=${len} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmp-test.cmake)
+ add_vector (${irate})
+ add_vector (${orate})
endmacro ()
unset (test_bits)
-if (WITH_SINGLE_PRECISION)
+if (WITH_CR32 OR WITH_CR32S OR WITH_CR64 OR WITH_CR64S)
set (test_bits 20)
endif ()
-if (WITH_DOUBLE_PRECISION)
- set (test_bits ${test_bits} 24)
+if (WITH_CR64 OR WITH_CR64S)
+ set (test_bits ${test_bits} 28)
endif ()
foreach (b ${test_bits})
- foreach (r 96000 65537)
+ foreach (r 192000 65537)
add_cmp_test (${base_rate} ${r} ${b})
add_cmp_test (${r} ${base_rate} ${b})
endforeach ()
endforeach ()
-add_custom_target (test-vectors ALL DEPENDS ${vectors})
+if (NOT CMAKE_CROSSCOMPILING)
+ add_custom_target (test-vectors ALL DEPENDS ${vectors})
+endif ()
add_test (1-delay-clear ${BIN}1-delay-clear)
diff --git a/tests/cmp-test.cmake b/tests/cmp-test.cmake
index 8db76c5..a836322 100644
--- a/tests/cmp-test.cmake
+++ b/tests/cmp-test.cmake
@@ -1,17 +1,13 @@
# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
-if (${bits} STREQUAL 24)
- set (quality 45)
-else ()
- set (quality 44)
-endif ()
+math (EXPR quality "43 + (${bits} - 13) / 4")
+set (ofile ${irate}-${orate}-${quality}.s32)
+#message (STATUS "Output file = [${ofile}]")
-set (output ${from}-${to}-${quality}.s32)
-
-execute_process(COMMAND ${EXAMPLES_BIN}3-options-input-fn ${from} ${to} 1 2 2 ${quality} a
- INPUT_FILE ref-${from}.s32
- OUTPUT_FILE ${output}
+execute_process(COMMAND ${EXAMPLES_BIN}3-options-input-fn ${irate} ${orate} 1 2 2 ${quality} a
+ INPUT_FILE ref-${irate}.s32
+ OUTPUT_FILE ${ofile}
ERROR_VARIABLE test_error
RESULT_VARIABLE test_result)
@@ -19,7 +15,11 @@ if (test_result)
message (FATAL_ERROR "Resampling failure: ${test_error}")
endif ()
-execute_process(COMMAND ${BIN}vector-cmp ref-${to}.s32 ${output} ${to} ${leader} ${len} ${bits} 98
+set (percentageToCheck 98)
+math (EXPR lenToCheck "${len} * ${percentageToCheck}")
+string (REGEX REPLACE "(..)$" ".\\1" lenToCheck "${lenToCheck}") # Divide by 100
+
+execute_process(COMMAND ${BIN}vector-cmp ref-${orate}.s32 ${ofile} ${orate} ${lenToSkip} ${lenToCheck} ${bits}
OUTPUT_VARIABLE test_output
RESULT_VARIABLE test_result)
diff --git a/tests/io-test b/tests/io-test
index 4205c71..608bc9a 100755
--- a/tests/io-test
+++ b/tests/io-test
@@ -35,7 +35,7 @@ test z$1 != z && j=$1 || j=1
for c in `seq 1 $j`; do
for n in `seq 0 3`; do
- sox -r $ir -n $c.${types[$n]} synth $len sin $f gain -.1
+ sox -R -r $ir -n $c.${types[$n]} synth $len sin $f gain -.1
done
n=0
diff --git a/tests/large-ratio-test b/tests/large-ratio-test
index 74cd0a4..540c5df 100755
--- a/tests/large-ratio-test
+++ b/tests/large-ratio-test
@@ -1,22 +1,22 @@
#!/usr/bin/env bash
set -e
-# SoX Resampler Library Copyright (c) 2007-15 robs@users.sourceforge.net
+# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
# Tests interpolating then decimating by the same, large ratio.
tool=../examples/3-options-input-fn
w=$(echo -e "`sox --ver |sed 's/.*SoX v//'` d\n14.4.1 k"|sort -Vr|head -1|sed 's/.* //')
-q=6
-ratio=2e4
-srate=8000
-nrate=`expr $srate / 2`
+q=4
+test x$1 = x && ratio=1e5 || ratio=$1
+test x$2 = x && rate=8000 || rate=$2
-../tests/vector-gen $srate 0 8 $nrate .9375 1.s32
+sox -r$rate -n 1.s32 synth 10 sin 0:`expr $rate / 2` vol .9375
+sync
-$tool 1 $ratio 1 2 1 $q < 1.s32 | $tool $ratio 1 1 1 2 $q > 2.s32
+time { $tool 1 $ratio 1 2 1 $q a < 1.s32 | $tool $ratio 1 1 1 2 $q a > 2.s32;}
-sox -M -r $srate -c1 1.s32 -r $srate -c1 2.s32 -n spectrogram -hw$w -Z-10 -z180 -o lr-$w.png -c "large-ratio-test q:$q ratio:$ratio"
+sox -mv-1 -r$rate -c1 1.s32 -r$rate -c1 2.s32 -n spectrogram -hw$w -z150 -o lr-$w.png -c "large-ratio-test q:$q ratio:$ratio"
rm [12].s32
diff --git a/tests/throughput-test b/tests/throughput-test
new file mode 100755
index 0000000..544c620
--- /dev/null
+++ b/tests/throughput-test
@@ -0,0 +1,4 @@
+#!/bin/sh
+set -e
+
+for n in `seq 0 3`; do ./throughput 44.1 48 1 0 $n; done
diff --git a/tests/throughput-test.bat b/tests/throughput-test.bat
new file mode 100644
index 0000000..a183881
--- /dev/null
+++ b/tests/throughput-test.bat
@@ -0,0 +1 @@
+for /L %%i in (0,1,3) DO throughput 44.1 48 1 0 %%i
diff --git a/tests/throughput.c b/tests/throughput.c
new file mode 100644
index 0000000..21a4c32
--- /dev/null
+++ b/tests/throughput.c
@@ -0,0 +1,111 @@
+/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1 See LICENCE for details. */
+
+#include
+#include "rint.h"
+#include "../examples/examples-common.h"
+
+#define k 1000
+#if defined _WIN32
+ #define WIN32_LEAN_AND_MEAN
+ #include
+ #define timerDecl LARGE_INTEGER start, stop, tmp
+ #define timerStart(msecs) QueryPerformanceCounter(&start), \
+ QueryPerformanceFrequency(&tmp), \
+ stop.QuadPart = (msecs * tmp.QuadPart + k/2) / k
+ #define timerRunning() (QueryPerformanceCounter(&tmp), \
+ (tmp.QuadPart-start.QuadPart < stop.QuadPart))
+#else
+ #include
+ #define timerDecl struct timespec stop, tmp
+ #define timerStart(msecs) clock_gettime(CLOCK_MONOTONIC, &stop), \
+ stop.tv_nsec += (msecs%k)*(k*k), \
+ stop.tv_sec += msecs/k + stop.tv_nsec/(k*k*k), \
+ stop.tv_nsec %= k*k*k
+ #define timerRunning() (clock_gettime(CLOCK_MONOTONIC, &tmp), \
+ (tmp.tv_sec < stop.tv_sec || tmp.tv_nsec < stop.tv_nsec))
+#endif
+
+int main(int n, char const * arg[])
+{
+ char const * const arg0 = n? --n, *arg++ : "", * engine = "";
+ double const irate = n? --n, atof(*arg++) : 96000.;
+ double const orate = n? --n, atof(*arg++) : 44100.;
+ unsigned const chans = n? --n, (unsigned)atoi(*arg++) : 1;
+ soxr_datatype_t const itype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+ unsigned const ospec = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+ unsigned long const q_recipe= n? --n, strtoul(*arg++, 0, 16) : SOXR_HQ;
+ unsigned long const q_flags = n? --n, strtoul(*arg++, 0, 16) : 0;
+ double const passband_end = n? --n, atof(*arg++) : 0;
+ double const stopband_begin = n? --n, atof(*arg++) : 0;
+ double const phase_response = n? --n, atof(*arg++) : -1;
+ int const use_threads = n? --n, atoi(*arg++) : 1;
+ soxr_datatype_t const otype = ospec & 3;
+
+ soxr_quality_spec_t q_spec = soxr_quality_spec(q_recipe, q_flags);
+ soxr_io_spec_t io_spec = soxr_io_spec(itype, otype);
+ soxr_runtime_spec_t const runtime_spec = soxr_runtime_spec(!use_threads);
+
+ /* Allocate resampling input and output buffers in proportion to the input
+ * and output rates: */
+ #define buf_total_len 15000 /* In samples per channel. */
+ size_t const osize = soxr_datatype_size(otype) * chans;
+ size_t const isize = soxr_datatype_size(itype) * chans;
+ size_t const olen0= (size_t)(orate * buf_total_len / (irate + orate) + .5);
+ size_t const olen = min(max(olen0, 1), buf_total_len - 1);
+ size_t const ilen = buf_total_len - olen;
+ void * const obuf = malloc(osize * olen);
+ void * const ibuf = malloc(isize * ilen);
+
+ size_t odone = 0, clips = 0, omax = 0, i;
+ soxr_error_t error;
+ soxr_t soxr;
+
+
+ /* Overrides (if given): */
+ if (passband_end > 0) q_spec.passband_end = passband_end / 100;
+ if (stopband_begin > 0) q_spec.stopband_begin = stopband_begin / 100;
+ if (phase_response >=0) q_spec.phase_response = phase_response;
+ io_spec.flags = ospec & ~7u;
+
+ /* Create a stream resampler: */
+ soxr = soxr_create(
+ irate, orate, chans, /* Input rate, output rate, # of channels. */
+ &error, /* To report any error during creation. */
+ &io_spec, &q_spec, &runtime_spec);
+
+ if (!error) { /* If all is well, run the resampler: */
+ engine = soxr_engine(soxr);
+#define RAND ((rand()*(1./RAND_MAX)-.5)*1)
+ switch (itype & 3) {
+ case 0: for (i=0;i
#include
-#include
#include "../src/rint.h"
+#include "../examples/examples-common.h"
-int main(int bit, char const * arg[])
+#define TYPE 0 /* As vector-gen */
+
+#if TYPE
+ #define sample_t double
+ #define N 50
+ #define DIFF(s1,s2) abs(rint32((s1-s2)*ldexp(1,N-1)))
+#else
+ #define sample_t int32_t
+ #define N 32
+ #define DIFF(s1,s2) abs((int)(s1-s2))
+#endif
+
+int main(int argc, char const * arg[])
{
- FILE * f1 = fopen(arg[1], "rb"),
- * f2 = fopen(arg[2], "rb");
- double rate = atof (arg[3]), /* Rate for this vector */
- leader_len = atof (arg[4]), /* Leader length in seconds */
- len = atof (arg[5]), /* Sweep length (excl. leader_len) */
- expect_bits= atof (arg[6]),
- expect_bw = atof (arg[7]);
+ int two = !!arg[2][0];
+ FILE * f1 = fopen(arg[1], "rb"), * f2 = two? fopen(arg[2], "rb") : 0;
+ double rate = atof (arg[3]), /* Sample-rate */
+ skip_len = atof (arg[4]), /* Skip length in seconds */
+ len = atof (arg[5]), /* Compare length in seconds */ r;
+ int i = 0, count = rint32(rate * len), max = 0, diff;
+ sample_t s1, s2;
- int32_t s1, s2;
- long count = 0;
- static long thresh[32];
- double bw, prev = 0;
-
- for (; fread(&s1, sizeof(s1), 1, f1) == 1 &&
- fread(&s2, sizeof(s2), 1, f2) == 1; ++count) {
- long diff = abs((int)(s1 - s2));
- for (bit = 0; diff && bit < 32; bit++, diff >>= 1)
- if ((diff & 1) && !thresh[bit])
- thresh[bit] = count + 1;
- }
-
- if (count != (long)((leader_len + len) * rate + .5)) {
- printf("incorrect file length\n");
- exit(1);
- }
-
- for (bit = 0; bit < 32; ++bit) {
- bw = ((double)thresh[bit] - 1) / rate - leader_len;
- if (bit && bw >= 0 && (bw - prev) * 100 / len < .08) {
- --bit;
- break;
+ fseek(f1, rint32(rate * skip_len) * (int)sizeof(s1), SEEK_CUR);
+ if (two) {
+ fseek(f2, rint32(rate * skip_len) * (int)sizeof(s2), SEEK_CUR);
+ for (; i < count &&
+ fread(&s1, sizeof(s1), 1, f1) &&
+ fread(&s2, sizeof(s2), 1, f2); ++i) {
+ diff = DIFF(s1, s2);
+ max = max(max, diff);
}
- prev = bw;
}
- bit = 32 - bit;
- bw = bw * 100 / len;
- printf("Bit perfect to %i bits, from DC to %.2f%% nyquist.\n", bit, bw);
- return !(bit >= expect_bits && bw >= expect_bw);
+ else for (; i < count && fread(&s1, sizeof(s1), 1, f1); ++i) {
+ diff = DIFF(s1, 0);
+ max = max(max, diff);
+ }
+
+ if (i != count) {
+ fprintf(stderr, "incorrect file length\n");
+ return 1;
+ }
+ printf("%f\n", r = N-log(max)/log(2));
+ return argc>6? r 1
#include
#endif
-#include
+#include "math-wrap.h"
#include
#include
-#if !defined M_PI
- #define M_PI 3.14159265358979323846
-#endif
-#if QUAD
- #define modf modfq
- #define cos cosq
- #define sin sinq
- #undef M_PI
- #define M_PI M_PIq
- #define real __float128
- #define atof(x) strtoflt128(x, 0)
+#if TYPE
+ #if TYPE > 1
+ #define modf modfq
+ #define cos cosq
+ #define sin sinq
+ #define PI M_PIq
+ #define real __float128
+ #define atof(x) strtoflt128(x, 0)
+ #else
+ #define modf modfl
+ #define cos cosl
+ #define sin sinl
+ #define PI M_PIl
+ #define real long double
+ #endif
+ #define MULT 1
+ #define OUT(d) double output = d
#else
+ #define PI M_PI
#define real double
#include "rint.h"
+ #define MULT (32768. * 65536 - 1/scale)
+ #define OUT(d) int32_t output = rint32(d)
#endif
-int main(int i, char const * argv[])
+int main(int argc, char const * argv[])
{
- real rate = atof(argv[1]), /* Rate for this vector */
- lead_in_len = atof(argv[2]), /* Lead-in length in seconds */
- len = atof(argv[3]), /* Sweep length (excl. lead_in_len) */
- sweep_to_freq = atof(argv[4]), /* Sweep from DC to this freq. */
- multiplier = atof(argv[5]), /* For headroom */
- f1 = -sweep_to_freq / len * lead_in_len, f2 = sweep_to_freq,
- n1 = rate * -lead_in_len, n2 = rate * len,
- m = (f2 - f1) / (n2 - n1) / 2, dummy;
- FILE * file = fopen(argv[6], "wb");
- i = (int)n1;
- if (!file || i != n1)
- exit(1);
- for (; i < (int)(n2 + .5); ++i) {
- double d1 = multiplier * sin(2 * M_PI * modf(i * m * i / rate, &dummy));
- double d = i < 0? d1 * (1 - cos(M_PI * (i + n1) / n1)) * .5 : d1;
-#if QUAD
- size_t actual = fwrite(&d, sizeof(d), 1, file);
-#else
- int32_t out = rint32(d * (32768. * 65536 - 1));
- size_t actual = fwrite(&out, sizeof(out), 1, file);
-#endif
- if (actual != 1)
- return 1;
+ real rate = atof(argv[1]), /* Rate for this vector */
+ lead_in_len = atof(argv[2]), /* Lead-in length in seconds */
+ len = atof(argv[3]), /* Sweep length (excl. lead_in_len) */
+ f1 = atof(argv[4]),
+ f2 = atof(argv[5]),
+ scale = atof(argv[6]), /* For headroom */
+ n1 = rate * -lead_in_len,
+ m = (f2 - f1) / (rate * len * 2), dummy;
+ FILE * file = fopen(argv[7], "wb");
+ int i = (int)n1, err = !file || i != n1;
+ for (; !err && i < (int)(rate*(len+lead_in_len)+.5); ++i) {
+ real d = sin(2 * PI * modf((f1 + i * m) * i / rate, &dummy));
+ OUT((double)(scale * MULT * d));
+ err = fwrite(&output, sizeof(output), 1, file) != 1;
}
- return 0;
+ return err |!argc;
}