diff --git a/CMakeLists.txt b/CMakeLists.txt index 69e6425..60a5c33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,8 @@ cmake_minimum_required (VERSION 3.0 FATAL_ERROR) project (soxr C) -set (DESCRIPTION_SUMMARY "High quality, one-dimensional sample-rate conversion library") +set (DESCRIPTION_SUMMARY + "High quality, one-dimensional sample-rate conversion library") @@ -15,15 +16,20 @@ set (PROJECT_VERSION_MINOR 1) set (PROJECT_VERSION_PATCH 2) # For shared-object; if, since the last public release: -# * library code changed at all: ++revision -# * interfaces changed at all: ++current, revision = 0 -# * interfaces added: ++age -# * interfaces removed: age = 0 +# +# 1) library code changed at all: ++revision +# 2) interfaces changed at all: ++current, revision = 0 +# 3) interfaces added: ++age +# 4) interfaces removed: age = 0 set (SO_VERSION_CURRENT 1) set (SO_VERSION_REVISION 1) set (SO_VERSION_AGE 1) +math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}") +math (EXPR SO_VERSION_MINOR "${SO_VERSION_AGE}") +math (EXPR SO_VERSION_PATCH "${SO_VERSION_REVISION}") + # Main options: @@ -31,31 +37,45 @@ set (SO_VERSION_AGE 1) include (CMakeDependentOption) if (NOT CMAKE_BUILD_TYPE) - set (CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) + set (CMAKE_BUILD_TYPE Release CACHE STRING + "Build type, one of: None Debug Release RelWithDebInfo MinSizeRel." FORCE) endif () -option (BUILD_SHARED_LIBS "Build shared libraries." ON) +option (BUILD_TESTS "Build sanity-tests." ON) option (BUILD_EXAMPLES "Build examples." OFF) option (WITH_OPENMP "Include OpenMP threading." ON) option (WITH_LSR_BINDINGS "Include a `libsamplerate'-like interface." ON) -cmake_dependent_option (BUILD_TESTS "Build sanity-tests." ON - "NOT CMAKE_CROSSCOMPILING" OFF) -cmake_dependent_option (WITH_SINGLE_PRECISION "Build with single precision (for up to 20-bit accuracy)." ON - "WITH_DOUBLE_PRECISION" ON) -cmake_dependent_option (WITH_DOUBLE_PRECISION "Build with double precision (for up to 32-bit accuracy)." ON - "WITH_SINGLE_PRECISION" ON) -cmake_dependent_option (WITH_SIMD "Use SIMD (for faster single precision)." ON - "WITH_SINGLE_PRECISION" OFF) -cmake_dependent_option (WITH_AVFFT "Use libavcodec (LGPL) for SIMD DFT." OFF - "WITH_SIMD;NOT WITH_PFFFT" OFF) -cmake_dependent_option (WITH_PFFFT "Use PFFFT (BSD-like licence) for SIMD DFT." ON - "WITH_SIMD;NOT WITH_AVFFT" OFF) +cmake_dependent_option (BUILD_SHARED_LIBS + "Build shared (dynamic) soxr libraries." ON + "NOT WITH_DEV_GPROF" OFF) +cmake_dependent_option (WITH_VR32 + "Include HQ variable-rate resampling engine." ON + "WITH_CR32 OR WITH_CR64 OR WITH_CR32S OR WITH_CR64S OR NOT DEFINED WITH_VR32" ON) +cmake_dependent_option (WITH_CR32 + "Include HQ constant-rate resampling engine." ON + "WITH_VR32 OR WITH_CR64 OR WITH_CR32S OR WITH_CR64S" ON) +cmake_dependent_option (WITH_CR64 + "Include VHQ constant-rate resampling engine." ON + "WITH_VR32 OR WITH_CR32 OR WITH_CR32S OR WITH_CR64S" ON) +cmake_dependent_option (WITH_CR64S + "Include VHQ SIMD constant-rate resampling engine." ON + "WITH_VR32 OR WITH_CR32 OR WITH_CR32S OR WITH_CR64" ON) +cmake_dependent_option (WITH_CR32S + "Include HQ SIMD constant-rate resampling engine." ON + "WITH_VR32 OR WITH_CR64 OR WITH_CR32 OR WITH_CR64S" ON) +cmake_dependent_option (WITH_AVFFT + "Use libavcodec (LGPL) for HQ SIMD DFT." OFF + "WITH_CR32S;NOT WITH_PFFFT" OFF) +cmake_dependent_option (WITH_PFFFT + "Use PFFFT (BSD-like licence) for HQ SIMD DFT." ON + "WITH_CR32S;NOT WITH_AVFFT" OFF) cmake_dependent_option (BUILD_LSR_TESTS "Build LSR tests." OFF "UNIX;NOT CMAKE_CROSSCOMPILING;EXISTS ${PROJECT_SOURCE_DIR}/lsr-tests;WITH_LSR_BINDINGS" OFF) -option (WITH_DEV_TRACE "Enable developer trace output." OFF) -mark_as_advanced (WITH_DEV_TRACE) +option (WITH_DEV_TRACE "Enable developer trace capability." ON) +option (WITH_DEV_GPROF "Enable developer grpof output." OFF) +mark_as_advanced (WITH_DEV_TRACE WITH_DEV_GPROF) @@ -79,15 +99,23 @@ endif () if (WITH_OPENMP) find_package (OpenMP) - if (OPENMP_FOUND) + if (OpenMP_FOUND) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}") + set (CMAKE_EXE_LINKER_FLAGS + "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + set (CMAKE_SHARED_LINKER_FLAGS + "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}") endif () endif () -if (WITH_SIMD) - find_package (SIMD) +if (WITH_CR32S) + find_package (SIMD32) + set (WITH_CR32S ${SIMD32_FOUND}) +endif () + +if (WITH_CR64S) + find_package (SIMD64) + set (WITH_CR64S ${SIMD64_FOUND}) endif () if (WITH_AVFFT) @@ -98,7 +126,7 @@ if (WITH_AVFFT) endif () endif () -if (WITH_AVFFT OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND SIMD_FOUND)) +if (WITH_AVFFT OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND SIMD32_FOUND)) find_package (LibAVUtil) if (AVUTIL_FOUND) include_directories (${AVUTIL_INCLUDE_DIRS}) @@ -117,13 +145,22 @@ test_big_endian (HAVE_BIGENDIAN) # Compiler configuration: if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang") - set (PROJECT_CXX_FLAGS "-Wconversion -Wall -W -pedantic -Wundef -Wcast-align -Wpointer-arith -Wno-long-long") - set (PROJECT_C_FLAGS "${PROJECT_CXX_FLAGS} -std=gnu89 -Wnested-externs -Wmissing-prototypes -Wstrict-prototypes") + set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wconversion -Wall -Wextra") + set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -pedantic -Wundef -Wpointer-arith") + set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wno-long-long -Wno-keyword-macro") + if (WITH_DEV_GPROF) + set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -pg") + endif () + # Can use std=c89, but gnu89 should give faster sinf, cosf, etc.: + set (PROJECT_C_FLAGS "${PROJECT_CXX_FLAGS} -std=gnu89 -Wnested-externs") + set (PROJECT_C_FLAGS "${PROJECT_C_FLAGS} -Wmissing-prototypes -Wstrict-prototypes") if (CMAKE_BUILD_TYPE STREQUAL "Release") set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s") # strip endif () - cmake_dependent_option (VISIBILITY_HIDDEN "Build with -fvisibility=hidden." ON + cmake_dependent_option (VISIBILITY_HIDDEN + "Build shared libraries with -fvisibility=hidden." ON "BUILD_SHARED_LIBS" OFF) + mark_as_advanced (VISIBILITY_HIDDEN) if (VISIBILITY_HIDDEN) add_definitions (-fvisibility=hidden -DSOXR_VISIBILITY) endif () @@ -131,9 +168,14 @@ endif () if (MSVC) add_definitions (-D_USE_MATH_DEFINES -D_CRT_SECURE_NO_WARNINGS) - option (ENABLE_STATIC_RUNTIME "Visual Studio, link with runtime statically." OFF) - if (ENABLE_STATIC_RUNTIME) - foreach (flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + option (BUILD_SHARED_RUNTIME "MSVC, link with runtime dynamically." ON) + if (NOT BUILD_SHARED_RUNTIME) + foreach (flag_var + CMAKE_C_FLAGS CMAKE_CXX_FLAGS + CMAKE_C_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG + CMAKE_C_FLAGS_RELEASE CMAKE_CXX_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS_RELWITHDEBINFO) string (REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") endforeach () endif () @@ -147,7 +189,8 @@ endif () # Build configuration: -if (${BUILD_SHARED_LIBS} AND ${CMAKE_SYSTEM_NAME} STREQUAL Windows) # Allow exes to find dlls: +if (${BUILD_SHARED_LIBS} AND ${CMAKE_SYSTEM_NAME} STREQUAL Windows) + # Allow exes to find dlls: set (BIN ${PROJECT_BINARY_DIR}/bin/) set (EXAMPLES_BIN ${BIN}) set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${BIN}) @@ -188,17 +231,16 @@ endif () if (APPLE) option (BUILD_FRAMEWORK "Build an OS X framework." OFF) - set (FRAMEWORK_INSTALL_DIR "/Library/Frameworks" CACHE STRING "Directory to install frameworks to.") + set (FRAMEWORK_INSTALL_DIR + "/Library/Frameworks" CACHE STRING "Directory to install frameworks to.") endif () # Top-level: -set (PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}) -math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}") -math (EXPR SO_VERSION_MINOR "${SO_VERSION_AGE}") -math (EXPR SO_VERSION_PATCH "${SO_VERSION_REVISION}") +set (PROJECT_VERSION + ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}) set (SO_VERSION ${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH}) configure_file ( @@ -206,7 +248,7 @@ configure_file ( ${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.h) include_directories (${PROJECT_BINARY_DIR}) -if (BUILD_TESTS OR BUILD_LSR_TESTS) +if (NOT CMAKE_CROSSCOMPILING AND (BUILD_TESTS OR BUILD_LSR_TESTS)) enable_testing () endif () diff --git a/INSTALL b/INSTALL index 3f30772..c429e61 100644 --- a/INSTALL +++ b/INSTALL @@ -78,32 +78,27 @@ To list help on the available options, enter: Options, if given, should be preceded with '-D', e.g. - cmake -DWITH_SIMD:BOOL=OFF .. + cmake -DBUILD_SHARED_LIBS:BOOL=OFF .. Resampling engines -As available on a given system, the library may include up to four resampling -‘engines’, as follows: +As available on a given system, options for including up to five resampling +‘engines’ are available (per above) as follows: - cr32: for constant-rate resampling with precision up to 20 bits, - cr32s: SIMD variant of cr32, - cr64: for constant-rate resampling with precision greater than 20 bits, - vr32: for variable-rate resampling. + WITH_CR32: for constant-rate HQ resampling, + WITH_CR32S: SIMD variant of previous, + WITH_CR64: for constant-rate VHQ resampling, + WITH_CR64S: SIMD variant of previous, + WITH_VR32: for variable-rate HQ resampling. -Engine inclusion is controlled (as above) by the following cmake option -variables: +By default, these options are all set to ON. - cr32: WITH_SINGLE_PRECISION - cr32s: WITH_SINGLE_PRECISION, WITH_SIMD - cr32: WITH_DOUBLE_PRECISION - -By default, these variables are all set to ON. - -When both cr32 and cr32s engines are included, run-time selection is automatic -(based on CPU capability) for x86 CPUs, and can be automatic for ARM CPUs if -the 3rd-party library `libavutil' is available at libsoxr build-time. +When both SIMD and non-SIMD engine variants are included, run-time selection +is automatic (based on CPU capability) for x86 CPUs, and can be automatic for +ARM CPUs if the 3rd-party library `libavutil' is available at libsoxr +build-time. @@ -114,13 +109,13 @@ E.g. targeting a Linux ARM system: mkdir build cd build cmake -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc-5 \ + -DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc \ .. or, also building the examples (one of which uses C++): cmake -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc-5 \ - -DCMAKE_CXX_COMPILER=arm-linux-gnueabi-g++-5 \ + -DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc \ + -DCMAKE_CXX_COMPILER=arm-linux-gnueabi-g++ \ -DBUILD_EXAMPLES=1 \ .. @@ -158,7 +153,7 @@ Autotools-based systems might find it useful to create a file called cmake -DBUILD_SHARED_LIBS=OFF . (or with other build options as required). -For MS visual studio, see msvc/README. +For MS Visual C++, see msvc/README. @@ -168,7 +163,7 @@ The libsoxr API structure ‘soxr_runtime_spec_t’ allows application developer to optimise some aspects of libsoxr’s operation for a particular application. However, since optimal performance might depend on an individual end-user’s run-time system and the end-user’s preferences, environment variables are -available to set (override) the run-time parameters: +available to set (override) run-time parameters as follows: Env. variable Equivalent soxr_runtime_spec_t item ------------------ ----------------------------------- @@ -178,8 +173,8 @@ available to set (override) the run-time parameters: SOXR_MIN_DFT_SIZE log2_min_dft_size SOXR_NUM_THREADS num_threads -Additionally, the SOXR_USE_SIMD environment variable may be used to override -automatic selection (or to provide manual selection where automatic selection -is not available) between the cr32 and cr32s resampling engines. (Which engine -is selected for a specific configuration of libsoxr can be checked using -example #3, which reports it.) +Additionally, the SOXR_USE_SIMD32 and SOXR_USE_SIMD64 environment variables +may be used to override automatic selection (or to provide manual selection +where automatic selection is not available) between SIMD and non-SIMD engine +variants. (Which engine is selected for a specific configuration of libsoxr +can be checked using example #3, which reports it.) diff --git a/LICENCE b/LICENCE index 1c61878..52f84ee 100644 --- a/LICENCE +++ b/LICENCE @@ -1,4 +1,4 @@ -SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -11,8 +11,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License -along with this library; if not, write to the Free Software Foundation, -Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +along with this library; if not, see . Notes diff --git a/NEWS b/NEWS index a2eca85..d368fa2 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,8 @@ Version 0.1.3 (2016-mm-dd) - * Better support for clang, ARM+SIMD, and cross-compilation. + * SIMD enhancements: SSE, AVX, Neon. + * Improved support for clang, ARM, and cross-compilation. * Other minor fixes/improvements to build/tests/documentation. + * N.B. some cmake configuration variable name changes. Version 0.1.2 (2015-09-05) * Fix conversion failure when I/O types differ but I/O rates don't. diff --git a/README b/README index 06f11e6..0070896 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net The SoX Resampler library `libsoxr' performs one-dimensional sample-rate conversion -- it may be used, for example, to resample PCM-encoded audio. diff --git a/TODO b/TODO index 1c4a31b..c699c0c 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,2 @@ * SOXR_ALLOW_ALIASING * Explicit flush API fn, perhaps. -* More SIMD. diff --git a/cmake/Modules/FindCFlags.cmake b/cmake/Modules/FindCFlags.cmake new file mode 100644 index 0000000..f118727 --- /dev/null +++ b/cmake/Modules/FindCFlags.cmake @@ -0,0 +1,35 @@ +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net +# Licence for this file: LGPL v2.1 See LICENCE for details. + +# - Function to find C compiler feature flags + +include (CheckCSourceCompiles) +include (FindPackageHandleStandardArgs) + +function (FindCFlags PKG_NAME PKG_DESC TRIAL_C_FLAGS TEST_C_SOURCE) + +foreach (TRIAL_C_FLAG ${TRIAL_C_FLAGS}) + message (STATUS "Trying ${PKG_NAME} C flags: ${TRIAL_C_FLAG}") + unset (DETECT_${PKG_NAME}_C_FLAGS CACHE) #displayed by check_c_source_compiles + + set (TMP "${CMAKE_REQUIRED_FLAGS}") + set (CMAKE_REQUIRED_FLAGS "${TRIAL_C_FLAG}") + check_c_source_compiles ("${TEST_C_SOURCE}" DETECT_${PKG_NAME}_C_FLAGS) + set (CMAKE_REQUIRED_FLAGS "${TMP}") + + if (DETECT_${PKG_NAME}_C_FLAGS) + set (DETECTED_C_FLAGS "${TRIAL_C_FLAG}") + break () + endif () +endforeach () + +# N.B. Will not overwrite existing cache variable: +set (${PKG_NAME}_C_FLAGS "${DETECTED_C_FLAGS}" + CACHE STRING "C compiler flags for ${PKG_DESC}") + +find_package_handle_standard_args ( + ${PKG_NAME} DEFAULT_MSG ${PKG_NAME}_C_FLAGS ${PKG_NAME}_C_FLAGS) +mark_as_advanced (${PKG_NAME}_C_FLAGS) +set (${PKG_NAME}_FOUND ${${PKG_NAME}_FOUND} PARENT_SCOPE) + +endfunction () diff --git a/cmake/Modules/FindOpenMP.cmake b/cmake/Modules/FindOpenMP.cmake index 74e5bc5..3664eed 100644 --- a/cmake/Modules/FindOpenMP.cmake +++ b/cmake/Modules/FindOpenMP.cmake @@ -1,117 +1,38 @@ +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net +# Licence for this file: LGPL v2.1 See LICENCE for details. + # - Finds OpenMP support -# This module can be used to detect OpenMP support in a compiler. -# If the compiler supports OpenMP, the flags required to compile with -# openmp support are set. # # The following variables are set: -# OpenMP_C_FLAGS - flags to add to the C compiler for OpenMP support -# OPENMP_FOUND - true if openmp is detected -# -# Supported compilers can be found at http://openmp.org/wp/openmp-compilers/ -# -# Modifications for soxr: -# * don't rely on presence of C++ compiler -# * support MINGW -# -#============================================================================= -# Copyright 2009 Kitware, Inc. -# Copyright 2008-2009 André Rigland Brodtkorb -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * The names of Kitware, Inc., the Insight Consortium, or the names of -# any consortium members, or of any contributors, may not be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# OpenMP_C_FLAGS - flags to add to the C compiler for this package. +# OpenMP_FOUND - true if support for this package is found. -include (CheckCSourceCompiles) -include (FindPackageHandleStandardArgs) - -set (OpenMP_C_FLAG_CANDIDATES - #Gnu - "-fopenmp" - #Clang - "-fopenmp=libiomp5" - #Microsoft Visual Studio - "/openmp" - #Intel windows - "-Qopenmp" - #Intel - "-openmp" - #Empty, if compiler automatically accepts openmp - " " - #Sun - "-xopenmp" - #HP - "+Oopenmp" - #IBM XL C/c++ - "-qsmp" - #Portland Group - "-mp" -) - -# sample openmp source code to test -set (OpenMP_C_TEST_SOURCE -" -#include -int main() { -#ifdef _OPENMP - return 0; -#else - breaks_on_purpose -#endif -} -") -# if these are set then do not try to find them again, -# by avoiding any try_compiles for the flags if (DEFINED OpenMP_C_FLAGS) - set (OpenMP_C_FLAG_CANDIDATES) -endif (DEFINED OpenMP_C_FLAGS) + set (TRIAL_C_FLAGS) +else () + set (TRIAL_C_FLAGS + "-fopenmp" # Gnu + "-fopenmp=libiomp5" # Clang + "/openmp" # MSVC + " " + ) -# check c compiler -foreach (FLAG ${OpenMP_C_FLAG_CANDIDATES}) - set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") - set (CMAKE_REQUIRED_FLAGS "${FLAG}") - unset (OpenMP_FLAG_DETECTED CACHE) - message (STATUS "Try OpenMP C flag = [${FLAG}]") - check_c_source_compiles ("${OpenMP_C_TEST_SOURCE}" OpenMP_FLAG_DETECTED) - set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}") - if (OpenMP_FLAG_DETECTED) - set (OpenMP_C_FLAGS_INTERNAL "${FLAG}") - break () - endif (OpenMP_FLAG_DETECTED) -endforeach (FLAG ${OpenMP_C_FLAG_CANDIDATES}) + set (TEST_C_SOURCE " + #ifndef _OPENMP + #error + #endif + #include + int main() {return 0;} + ") +endif () -set (OpenMP_C_FLAGS "${OpenMP_C_FLAGS_INTERNAL}" - CACHE STRING "C compiler flags for OpenMP parallization") +include (FindCFlags) -# handle the standard arguments for find_package -find_package_handle_standard_args (OpenMP DEFAULT_MSG - OpenMP_C_FLAGS) +FindCFlags ("OpenMP" "OpenMP threading" + "${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}") if (MINGW) set (OpenMP_SHARED_LINKER_FLAGS "${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_C_FLAGS}") set (OpenMP_EXE_LINKER_FLAGS "${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_C_FLAGS}") + mark_as_advanced (OpenMP_SHARED_LINKER_FLAGS OpenMP_EXE_LINKER_FLAGS) endif () - -mark_as_advanced (OpenMP_C_FLAGS OpenMP_SHARED_LINKER_FLAGS OpenMP_EXE_LINKER_FLAGS) diff --git a/cmake/Modules/FindSIMD.cmake b/cmake/Modules/FindSIMD.cmake deleted file mode 100644 index 30dca70..0000000 --- a/cmake/Modules/FindSIMD.cmake +++ /dev/null @@ -1,104 +0,0 @@ -# - Finds SIMD support -# -# The following variables are set: -# SIMD_C_FLAGS - flags to add to the C compiler for this package. -# SIMD_FOUND - true if support for this package is found. -# -#============================================================================= -# Based on FindOpenMP.cmake, which is: -# -# Copyright 2009 Kitware, Inc. -# Copyright 2008-2009 André Rigland Brodtkorb -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * The names of Kitware, Inc., the Insight Consortium, or the names of -# any consortium members, or of any contributors, may not be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -include (CheckCSourceCompiles) -include (FindPackageHandleStandardArgs) - -if (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") - set (SIMD_C_FLAG_CANDIDATES - # Gcc - "-Wno-cast-align -mfpu=neon-vfpv4 -mcpu=cortex-a7" - "-Wno-cast-align -mfpu=neon -mfloat-abi=hard" - "-Wno-cast-align -mfpu=neon -mfloat-abi=softfp" - "-Wno-cast-align -mfpu=neon -mfloat-abi=soft" - ) - set (SIMD_C_TEST_SOURCE " - #include - int main() { - float32x4_t a = vdupq_n_f32(0), b = a, c = vaddq_f32(a,b); - return 0; - } - ") -else () - if (WIN32) # Safety for when mixed lib/app compilers (but performance hit) - set (GCC_WIN32_SIMD_OPTS "-mincoming-stack-boundary=2") - endif () - - set (SIMD_C_FLAG_CANDIDATES - # x64 - "-Wno-cast-align" - " " - # Microsoft Visual Studio x86 - "/arch:SSE /fp:fast -D__SSE__" - # Gcc x86 - "-Wno-cast-align -msse -mfpmath=sse ${GCC_WIN32_SIMD_OPTS}" - # Gcc x86 (old versions) - "-msse -mfpmath=sse" - ) - set (SIMD_C_TEST_SOURCE " - #include - int main() { - __m128 a = _mm_setzero_ps(), b = a, c = _mm_add_ps(a,b); - return 0; - } - ") -endif () - -if (DEFINED SIMD_C_FLAGS) - set (SIMD_C_FLAG_CANDIDATES) -endif () - -foreach (FLAG ${SIMD_C_FLAG_CANDIDATES}) - set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") - set (CMAKE_REQUIRED_FLAGS "${FLAG}") - unset (SIMD_FLAG_DETECTED CACHE) - message (STATUS "Try SIMD C flag = [${FLAG}]") - check_c_source_compiles ("${SIMD_C_TEST_SOURCE}" SIMD_FLAG_DETECTED) - set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}") - if (SIMD_FLAG_DETECTED) - set (SIMD_C_FLAGS_INTERNAL "${FLAG}") - break () - endif () -endforeach () - -set (SIMD_C_FLAGS "${SIMD_C_FLAGS_INTERNAL}" - CACHE STRING "C compiler flags for SIMD vectorization") - -find_package_handle_standard_args (SIMD DEFAULT_MSG SIMD_C_FLAGS SIMD_C_FLAGS) -mark_as_advanced (SIMD_C_FLAGS) diff --git a/cmake/Modules/FindSIMD32.cmake b/cmake/Modules/FindSIMD32.cmake new file mode 100644 index 0000000..c215455 --- /dev/null +++ b/cmake/Modules/FindSIMD32.cmake @@ -0,0 +1,54 @@ +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net +# Licence for this file: LGPL v2.1 See LICENCE for details. + +# - Finds SIMD32 support +# +# The following variables are set: +# SIMD32_C_FLAGS - flags to add to the C compiler for this package. +# SIMD32_FOUND - true if support for this package is found. + +if (DEFINED SIMD32_C_FLAGS) + set (TRIAL_C_FLAGS) +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") + set (TRIAL_C_FLAGS + # Gcc + "-mfpu=neon-vfpv4 -mcpu=cortex-a7" + "-mfpu=neon -mfloat-abi=hard" + "-mfpu=neon -mfloat-abi=softfp" + "-mfpu=neon -mfloat-abi=soft" + ) + set (TEST_C_SOURCE " + #include + int main() { + float32x4_t a = vdupq_n_f32(0), b = a, c = vaddq_f32(a,b); + return 0; + } + ") +else () + if (WIN32) # Safety for when mixed lib/app compilers (but performance hit) + set (GCC_WIN32_SIMD32_OPTS "-mincoming-stack-boundary=2") + endif () + + set (TRIAL_C_FLAGS + # x64 + " " + # MSVC x86 + "/arch:SSE /fp:fast -D__SSE__" + # Gcc x86 + "-Wno-cast-align -msse -mfpmath=sse ${GCC_WIN32_SIMD32_OPTS}" + # Gcc x86 (old versions) + "-msse -mfpmath=sse" + ) + set (TEST_C_SOURCE " + #include + int main() { + __m128 a = _mm_setzero_ps(), b = a, c = _mm_add_ps(a,b); + return 0; + } + ") +endif () + +include (FindCFlags) + +FindCFlags ("SIMD32" "FLOAT-32 (single-precision) SIMD vectorization" + "${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}") diff --git a/cmake/Modules/FindSIMD64.cmake b/cmake/Modules/FindSIMD64.cmake new file mode 100644 index 0000000..5bf82c9 --- /dev/null +++ b/cmake/Modules/FindSIMD64.cmake @@ -0,0 +1,29 @@ +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net +# Licence for this file: LGPL v2.1 See LICENCE for details. + +# - Finds SIMD64 support +# +# The following variables are set: +# SIMD64_C_FLAGS - flags to add to the C compiler for this package. +# SIMD64_FOUND - true if support for this package is found. + +if (DEFINED SIMD64_C_FLAGS OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") + set (TRIAL_C_FLAGS) +else () + set (TRIAL_C_FLAGS + "-mavx" # Gcc + "/arch:AVX" # MSVC + ) + set (TEST_C_SOURCE " + #include + int main() { + __m256d a = _mm256_setzero_pd(), b = a, c = _mm256_add_pd(a,b); + return 0; + } + ") +endif () + +include (FindCFlags) + +FindCFlags ("SIMD64" "FLOAT-64 (double-precision) SIMD vectorization" + "${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}") diff --git a/cmake/Modules/SetSystemProcessor.cmake b/cmake/Modules/SetSystemProcessor.cmake index d1973f8..9bafe05 100644 --- a/cmake/Modules/SetSystemProcessor.cmake +++ b/cmake/Modules/SetSystemProcessor.cmake @@ -5,13 +5,16 @@ macro (set_system_processor) if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "") + unset(CMAKE_SYSTEM_PROCESSOR) + endif () + if (NOT DEFINED CMAKE_SYSTEM_PROCESSOR) include (CheckCSourceCompiles) - set (CPU_CANDIDATES + set (CPU_LINES "#if defined __x86_64__ || defined _M_X64 /*\;x86_64\;*/" "#if defined __i386__ || defined _M_IX86 /*\;x86_32\;*/" "#if defined __arm__ || defined _M_ARM /*\;arm\;*/" ) - foreach (CPU_LINE ${CPU_CANDIDATES}) + foreach (CPU_LINE ${CPU_LINES}) string (CONCAT CPU_SOURCE "${CPU_LINE}" " int main() {return 0;} #endif @@ -19,10 +22,14 @@ macro (set_system_processor) unset (SYSTEM_PROCESSOR_DETECTED CACHE) check_c_source_compiles ("${CPU_SOURCE}" SYSTEM_PROCESSOR_DETECTED) if (SYSTEM_PROCESSOR_DETECTED) - list (GET CPU_LINE 1 CMAKE_SYSTEM_PROCESSOR) - message (STATUS "CMAKE_SYSTEM_PROCESSOR set to ${CMAKE_SYSTEM_PROCESSOR}") + list (GET CPU_LINE 1 DETECTED_SYSTEM_PROCESSOR) + message (STATUS "CMAKE_SYSTEM_PROCESSOR is ${DETECTED_SYSTEM_PROCESSOR}") break () endif () endforeach () endif () + + # N.B. Will not overwrite existing cache variable: + set (CMAKE_SYSTEM_PROCESSOR "${DETECTED_SYSTEM_PROCESSOR}" + CACHE STRING "Target system processor") endmacro () diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b7b50f8..7a95823 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,16 +1,18 @@ -# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net # Licence for this file: LGPL v2.1 See LICENCE for details. +if (${BUILD_EXAMPLES} OR ${BUILD_TESTS}) + set (SOURCES 3-options-input-fn) + if (${WITH_LSR_BINDINGS}) + set (LSR_SOURCES 1a-lsr) + endif () +endif () + if (${BUILD_EXAMPLES}) project (soxr) # Adds c++ compiler - file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/[1-9]-*.[cC]) -elseif (${BUILD_TESTS}) - file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/3*.c) -endif () - -if (${BUILD_EXAMPLES} OR ${BUILD_TESTS}) - if (${WITH_LSR_BINDINGS}) - set (LSR_SOURCES 1a-lsr.c) + list (APPEND SOURCES 1-single-block 2-stream 4-split-channels) + if (${WITH_VR32}) + list (APPEND SOURCES 5-variable-rate) endif () endif () @@ -34,4 +36,5 @@ if (${BUILD_TESTS} AND ${WITH_LSR_BINDINGS}) endif () file (GLOB INSTALL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.[cCh]) -install (FILES ${INSTALL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/README DESTINATION ${DOC_INSTALL_DIR}/examples) +install (FILES ${INSTALL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/README + DESTINATION ${DOC_INSTALL_DIR}/examples) diff --git a/examples/examples-common.h b/examples/examples-common.h index 25bd48c..7ebde73 100644 --- a/examples/examples-common.h +++ b/examples/examples-common.h @@ -1,4 +1,4 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ /* Common includes etc. for the examples. */ @@ -6,7 +6,7 @@ #include #include #include -#include +#include "math-wrap.h" #include #include #include @@ -17,10 +17,7 @@ #include #include #define USE_STD_STDIO _setmode(_fileno(stdout), _O_BINARY), \ - _setmode(_fileno(stdin ), _O_BINARY); - /* Sometimes missing, so ensure that it is defined: */ - #undef M_PI - #define M_PI 3.14159265358979323846 + _setmode(_fileno(stdin ), _O_BINARY) #else #define USE_STD_STDIO #endif diff --git a/lsr-tests/CMakeLists.txt b/lsr-tests/CMakeLists.txt index 1ac041e..4f718f7 100644 --- a/lsr-tests/CMakeLists.txt +++ b/lsr-tests/CMakeLists.txt @@ -35,7 +35,7 @@ set (tests callback_hang_test callback_test downsample_test float_short_test misc_test multi_channel_test reset_test simple_test termination_test varispeed_test) -if (WITH_DOUBLE_PRECISION) +if (WITH_CR64 OR WITH_CR64S) set (tests ${tests} snr_bw_test) endif () diff --git a/lsr-tests/config.h.in b/lsr-tests/config.h.in index 3de8799..1095e00 100644 --- a/lsr-tests/config.h.in +++ b/lsr-tests/config.h.in @@ -1,4 +1,4 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ #if !defined soxsrc_lsr_tests_config_included diff --git a/lsr-tests/snr_bw_test.c b/lsr-tests/snr_bw_test.c index 26fb279..55130b4 100644 --- a/lsr-tests/snr_bw_test.c +++ b/lsr-tests/snr_bw_test.c @@ -118,9 +118,9 @@ main (int argc, char *argv []) BOOLEAN_TRUE, { { 1, { 0.01111111111 }, 3.0, 1, 130.0, 1.0 }, { 1, { 0.01111111111 }, 0.6, 1, 132.0, 1.0 }, - { 1, { 0.01111111111 }, 0.3, 1, 138.0, 1.0 }, + { 1, { 0.01111111111 }, 0.3, 1, 135.0, 1.0 }, { 1, { 0.01111111111 }, 1.0, 1, 155.0, 1.0 }, - { 1, { 0.01111111111 }, 1.001, 1, 134.0, 1.0 }, + { 1, { 0.01111111111 }, 1.001, 1, 133.0, 1.0 }, { 2, { 0.011111, 0.324 }, 1.9999, 2, 127.0, 1.0 }, { 2, { 0.012345, 0.457 }, 0.456789, 1, 124.0, 0.5 }, { 2, { 0.011111, 0.45 }, 0.6, 1, 126.0, 0.5 }, @@ -135,10 +135,10 @@ main (int argc, char *argv []) { 1, { 0.01111111111 }, 0.6, 1, 147.0, 1.0 }, { 1, { 0.01111111111 }, 0.3, 1, 147.0, 1.0 }, { 1, { 0.01111111111 }, 1.0, 1, 155.0, 1.0 }, - { 1, { 0.01111111111 }, 1.001, 1, 147.0, 1.0 }, + { 1, { 0.01111111111 }, 1.001, 1, 146.0, 1.0 }, { 2, { 0.011111, 0.324 }, 1.9999, 2, 147.0, 1.0 }, { 2, { 0.012345, 0.457 }, 0.456789, 1, 148.0, 0.5 }, - { 2, { 0.011111, 0.45 }, 0.6, 1, 149.0, 0.5 }, + { 2, { 0.011111, 0.45 }, 0.6, 1, 145.0, 0.5 }, { 1, { 0.43111111111 }, 1.33, 1, 145.0, 1.0 } } }, diff --git a/msvc/libsoxr.vcproj b/msvc/libsoxr.vcproj index b1e1714..2dd7be0 100644 --- a/msvc/libsoxr.vcproj +++ b/msvc/libsoxr.vcproj @@ -60,6 +60,12 @@ + + + + + + @@ -67,10 +73,9 @@ - - - - + + + diff --git a/msvc/soxr-config.h b/msvc/soxr-config.h index d6b07ac..a1fcfb4 100644 --- a/msvc/soxr-config.h +++ b/msvc/soxr-config.h @@ -7,12 +7,14 @@ #if !defined soxr_config_included #define soxr_config_included -#define WITH_SINGLE_PRECISION 1 -#define WITH_DOUBLE_PRECISION 1 +#define WITH_CR32 1 +#define WITH_CR32S 1 +#define WITH_CR64 1 +#define WITH_CR64S 1 +#define WITH_VR32 1 #define AVCODEC_FOUND 0 #define AVUTIL_FOUND 0 -#define SIMD_FOUND 1 #define HAVE_FENV_H 0 #define HAVE_STDBOOL_H 0 diff --git a/multi-arch b/multi-arch new file mode 100755 index 0000000..174cc4b --- /dev/null +++ b/multi-arch @@ -0,0 +1,29 @@ +#!/bin/sh +set -e + +# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +# Licence for this file: LGPL v2.1 See LICENCE for details. + +rm -f CMakeCache.txt # Prevent interference from any in-tree build + +j=-j4 +build=Release + +for n in \ + cc: \ + clang: \ + arm-linux-gnueabi-gcc:Linux \ + x86_64-w64-mingw32-gcc:Windows \ + i686-w64-mingw32-gcc:Windows \ + ; do + compiler=$(echo $n | sed 's/:.*//') + system=$(echo $n | sed 's/.*://') + dir=$build-$compiler + which $compiler > /dev/null || echo $compiler not found && ( + echo "***" $dir + mkdir -p $dir + cd $dir + cmake -DCMAKE_BUILD_TYPE=$build -DCMAKE_C_COMPILER=$compiler -DCMAKE_SYSTEM_NAME="$system" .. + make $j && [ /$system = / ] && ctest -j || true + ) +done diff --git a/soxr-config.h.in b/soxr-config.h.in index 3c7dbe9..37ccdb2 100644 --- a/soxr-config.h.in +++ b/soxr-config.h.in @@ -4,12 +4,14 @@ #if !defined soxr_config_included #define soxr_config_included -#cmakedefine01 WITH_SINGLE_PRECISION -#cmakedefine01 WITH_DOUBLE_PRECISION +#cmakedefine01 WITH_CR32 +#cmakedefine01 WITH_CR32S +#cmakedefine01 WITH_CR64 +#cmakedefine01 WITH_CR64S +#cmakedefine01 WITH_VR32 #cmakedefine01 AVCODEC_FOUND #cmakedefine01 AVUTIL_FOUND -#cmakedefine01 SIMD_FOUND #cmakedefine01 HAVE_FENV_H #cmakedefine01 HAVE_STDBOOL_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 19c7522..67d34d7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net # Licence for this file: LGPL v2.1 See LICENCE for details. @@ -7,8 +7,10 @@ if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vr-coefs.h) include_directories(${CMAKE_CURRENT_BINARY_DIR}) - set_property(SOURCE vr32.c APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h) + set_property(SOURCE vr32.c + APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h) add_executable (vr-coefs vr-coefs.c) + target_link_libraries (vr-coefs ${LIBM_LIBRARIES}) ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h COMMAND vr-coefs > ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h DEPENDS vr-coefs) @@ -29,22 +31,41 @@ if (AVCODEC_FOUND) elseif (WITH_PFFFT) #set (RDFT32 pffft32) set (RDFT32S pffft32s) -elseif (SIMD_FOUND) - set (RDFT32S fft4g32s) +elseif (WITH_CR32S) + set (RDFT32S fft4g32s fft4g32) endif () -if (WITH_DOUBLE_PRECISION) - set (DP_SOURCES rate64) +set (SOURCES ${PROJECT_NAME}.c constructors data-io) + +if (WITH_CR32 OR WITH_CR32S OR WITH_CR64 OR WITH_CR64S) + list (APPEND SOURCES dbesi0 filter fft4g64 cr.c) endif () -if (WITH_SINGLE_PRECISION) - set (SP_SOURCES rate32 ${RDFT32}) +if (WITH_CR32) + list (APPEND SOURCES cr32 ${RDFT32}) endif () -if (SIMD_FOUND) - set (SIMD_SOURCES rate32s ${RDFT32S} simd) - foreach (source ${SIMD_SOURCES}) - set_property (SOURCE ${source} PROPERTY COMPILE_FLAGS ${SIMD_C_FLAGS}) +if (WITH_CR64) + list (APPEND SOURCES cr64) +endif () + +if (WITH_VR32) + list (APPEND SOURCES vr32) +endif () + +if (WITH_CR32S) + foreach (source cr32s ${RDFT32S} simd32) + list (APPEND SOURCES ${source}) + set_property (SOURCE ${source} + APPEND_STRING PROPERTY COMPILE_FLAGS ${SIMD32_C_FLAGS}) + endforeach () +endif () + +if (WITH_CR64S) + foreach (source cr64s pffft64s simd64) + list (APPEND SOURCES ${source}) + set_property (SOURCE ${source} + APPEND_STRING PROPERTY COMPILE_FLAGS ${SIMD64_C_FLAGS}) endforeach () endif () @@ -52,8 +73,7 @@ endif () # Libsoxr: -add_library (${PROJECT_NAME} ${LIB_TYPE} ${PROJECT_NAME}.c data-io dbesi0 filter fft4g64 - ${SP_SOURCES} vr32 ${DP_SOURCES} ${SIMD_SOURCES}) +add_library (${PROJECT_NAME} ${LIB_TYPE} ${SOURCES}) target_link_libraries (${PROJECT_NAME} PRIVATE ${LIBS} ${LIBM_LIBRARIES}) set_target_properties (${PROJECT_NAME} PROPERTIES VERSION "${SO_VERSION}" diff --git a/src/aliases.h b/src/aliases.h index eb42bdc..ebcce41 100644 --- a/src/aliases.h +++ b/src/aliases.h @@ -18,8 +18,10 @@ #define lsx_dfst_f _soxr_dfst_f #define lsx_dfst _soxr_dfst #define lsx_fir_to_phase _soxr_fir_to_phase +#define lsx_f_resp _soxr_f_resp #define lsx_init_fft_cache_f _soxr_init_fft_cache_f #define lsx_init_fft_cache _soxr_init_fft_cache +#define lsx_inv_f_resp _soxr_inv_f_resp #define lsx_kaiser_beta _soxr_kaiser_beta #define lsx_kaiser_params _soxr_kaiser_params #define lsx_make_lpf _soxr_make_lpf diff --git a/src/avfft32.c b/src/avfft32.c index 5be13d2..fe651f5 100644 --- a/src/avfft32.c +++ b/src/avfft32.c @@ -1,17 +1,19 @@ /* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ +#include #include #include #include "filter.h" +#include "rdft_t.h" static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);} static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);} static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;} static int multiplier(void) {return 2;} static void nothing(void) {} +static int flags(void) {return 0;} -typedef void (* fn_t)(void); fn_t _soxr_rdft32_cb[] = { (fn_t)forward_setup, (fn_t)backward_setup, @@ -24,4 +26,8 @@ fn_t _soxr_rdft32_cb[] = { (fn_t)_soxr_ordered_partial_convolve_f, (fn_t)multiplier, (fn_t)nothing, + (fn_t)malloc, + (fn_t)calloc, + (fn_t)free, + (fn_t)flags, }; diff --git a/src/avfft32s.c b/src/avfft32s.c index 75e485e..8b1da80 100644 --- a/src/avfft32s.c +++ b/src/avfft32s.c @@ -3,15 +3,16 @@ #include #include -#include "simd.h" +#include "simd32.h" +#include "rdft_t.h" static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);} static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);} static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;} static int multiplier(void) {return 2;} static void nothing(void) {} +static int flags(void) {return RDFT_IS_SIMD;} -typedef void (* fn_t)(void); fn_t _soxr_rdft32s_cb[] = { (fn_t)forward_setup, (fn_t)backward_setup, @@ -20,8 +21,12 @@ fn_t _soxr_rdft32s_cb[] = { (fn_t)rdft, (fn_t)rdft, (fn_t)rdft, - (fn_t)_soxr_ordered_convolve_simd, - (fn_t)_soxr_ordered_partial_convolve_simd, + (fn_t)ORDERED_CONVOLVE_SIMD, + (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD, (fn_t)multiplier, (fn_t)nothing, + (fn_t)SIMD_ALIGNED_MALLOC, + (fn_t)SIMD_ALIGNED_CALLOC, + (fn_t)SIMD_ALIGNED_FREE, + (fn_t)flags, }; diff --git a/src/avx.h b/src/avx.h new file mode 100644 index 0000000..ace19b5 --- /dev/null +++ b/src/avx.h @@ -0,0 +1,40 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +/* AVX support macros */ + +#if !defined soxr_avx_included +#define soxr_avx_included + +#include + +typedef __m256d v4sf; +#define VZERO() _mm256_setzero_pd() +#define VMUL(a,b) _mm256_mul_pd(a,b) +#define VADD(a,b) _mm256_add_pd(a,b) +#define VMADD(a,b,c) VADD(VMUL(a,b),c) /* Note: gcc -mfma will `fuse' these */ +#define VSUB(a,b) _mm256_sub_pd(a,b) +#define LD_PS1(p) _mm256_set1_pd(p) +#define INTERLEAVE2(in1, in2, out1, out2) {v4sf \ + t1 = _mm256_unpacklo_pd(in1, in2), \ + t2 = _mm256_unpackhi_pd(in1, in2); \ + out1 = _mm256_permute2f128_pd(t1,t2,0x20); \ + out2 = _mm256_permute2f128_pd(t1,t2,0x31); } +#define UNINTERLEAVE2(in1, in2, out1, out2) {v4sf \ + t1 = _mm256_permute2f128_pd(in1,in2,0x20), \ + t2 = _mm256_permute2f128_pd(in1,in2,0x31); \ + out1 = _mm256_unpacklo_pd(t1, t2); \ + out2 = _mm256_unpackhi_pd(t1, t2);} +#define VTRANSPOSE4(x0,x1,x2,x3) {v4sf \ + t0 = _mm256_shuffle_pd(x0,x1, 0x0), \ + t2 = _mm256_shuffle_pd(x0,x1, 0xf), \ + t1 = _mm256_shuffle_pd(x2,x3, 0x0), \ + t3 = _mm256_shuffle_pd(x2,x3, 0xf); \ + x0 = _mm256_permute2f128_pd(t0,t1, 0x20); \ + x1 = _mm256_permute2f128_pd(t2,t3, 0x20); \ + x2 = _mm256_permute2f128_pd(t0,t1, 0x31); \ + x3 = _mm256_permute2f128_pd(t2,t3, 0x31);} +#define VSWAPHL(a,b) _mm256_permute2f128_pd(b, a, 0x30) +#define VALIGNED(ptr) ((((long)(ptr)) & 0x1F) == 0) + +#endif diff --git a/src/constructors.c b/src/constructors.c new file mode 100644 index 0000000..0128990 --- /dev/null +++ b/src/constructors.c @@ -0,0 +1,85 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#include "soxr.h" +#include "filter.h" +#include "internal.h" +#include +#include +#include + +#if !WITH_CR32 && !WITH_CR32S && !WITH_CR64 && !WITH_CR64S +#undef lsx_to_3dB +#define lsx_to_3dB(x) ((x)/(x)) +#endif + + + +soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags) +{ + soxr_quality_spec_t spec, * p = &spec; + unsigned quality = recipe & 0xf; + double rej; + memset(p, 0, sizeof(*p)); + if (quality > SOXR_PRECISIONQ) { + p->e = "invalid quality type"; + return spec; + } + flags |= quality < SOXR_LSR0Q ? RESET_ON_CLEAR : 0; + p->phase_response = "\62\31\144"[(recipe & 0x30)>>4]; + p->stopband_begin = 1; + p->precision = + quality == SOXR_QQ ? 0 : + quality <= SOXR_16_BITQ ? 16 : + quality <= SOXR_32_BITQ ? 4 + quality * 4 : + quality <= SOXR_LSR2Q ? 55 - quality * 4 : /* TODO: move to lsr.c */ + 0; + rej = p->precision * linear_to_dB(2.); + p->flags = flags; + if (quality <= SOXR_32_BITQ || quality == SOXR_PRECISIONQ) { + #define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */ + p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / lsx_to_3dB(rej); + if (quality <= 2) + p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM; + } + else { /* TODO: move to lsr.c */ + static float const bw[] = {.931f, .832f, .663f}; + p->passband_end = bw[quality - SOXR_LSR0Q]; + if (quality == SOXR_LSR2Q) + p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_LSR2Q | SOXR_PROMOTE_TO_LQ; + } + if (recipe & SOXR_STEEP_FILTER) + p->passband_end = 1 - .01 / lsx_to_3dB(rej); + return spec; +} + + + +soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads) +{ + soxr_runtime_spec_t spec, * p = &spec; + memset(p, 0, sizeof(*p)); + p->log2_min_dft_size = 10; + p->log2_large_dft_size = 17; + p->coef_size_kbytes = 400; + p->num_threads = num_threads; + return spec; +} + + + +soxr_io_spec_t soxr_io_spec( + soxr_datatype_t itype, + soxr_datatype_t otype) +{ + soxr_io_spec_t spec, * p = &spec; + memset(p, 0, sizeof(*p)); + if ((itype | otype) >= SOXR_SPLIT * 2) + p->e = "invalid io datatype(s)"; + else { + p->itype = itype; + p->otype = otype; + p->scale = 1; + } + return spec; +} diff --git a/src/cr-core.c b/src/cr-core.c new file mode 100644 index 0000000..bc6282c --- /dev/null +++ b/src/cr-core.c @@ -0,0 +1,297 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. + * + * Constant-rate resampling engine-specific code. */ + +#include +#include +#include +#include + +#include "filter.h" + +#if defined SOXR_LIB + #include "internal.h" + #include "cr.h" + #if CORE_TYPE & CORE_DBL + typedef double sample_t; + #if CORE_TYPE & CORE_SIMD_DFT + #define RDFT_CB _soxr_rdft64s_cb + #else + #define RDFT_CB _soxr_rdft64_cb + #endif + #else + typedef float sample_t; + #if CORE_TYPE & CORE_SIMD_DFT + #define RDFT_CB _soxr_rdft32s_cb + #else + #define RDFT_CB _soxr_rdft32_cb + #endif + #endif + + #if CORE_TYPE & (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT) + #if CORE_TYPE & CORE_DBL + #include "simd64.h" + #include "simd64-dev.h" + #else + #include "simd32.h" + #include "simd32-dev.h" + #endif + #endif + + extern fn_t RDFT_CB[]; +#else + #define RDFT_CB 0 +#endif + + + +static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo) +{ + sample_t const * input = stage_read_p(p); + int num_in = min(stage_occupancy(p), p->input_size); + int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio); + sample_t * output = fifo_reserve(output_fifo, max_num_out); + + for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) { + sample_t const * s = input + p->at.integer; + double x = p->at.fraction * (1 / MULT32); + double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b); + double c = s[1]-*s-a-b; + output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s)); + } + assert(max_num_out - i >= 0); + fifo_trim_by(output_fifo, max_num_out - i); + fifo_read(&p->fifo, p->at.integer, NULL); + p->at.integer = 0; +} + + + +#if CORE_TYPE & CORE_DBL + #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_HALF) && defined __AVX__) + #define SIMD_SSE 0 +#else + #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_HALF) && (defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86)) + #define SIMD_AVX 0 +#endif + +#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_HALF) && defined __arm__) + + + + +#include "half-coefs.h" + +#if !(CORE_TYPE & CORE_SIMD_HALF) +#define FUNCTION_H h7 +#define CONVOLVE ____ __ _ +#include "half-fir.h" +#endif + +#define FUNCTION_H h8 +#define CONVOLVE ____ ____ +#include "half-fir.h" + +#define FUNCTION_H h9 +#define CONVOLVE ____ ____ _ +#include "half-fir.h" + +#if CORE_TYPE & CORE_DBL + #define FUNCTION_H h10 + #define CONVOLVE ____ ____ __ + #include "half-fir.h" + + #define FUNCTION_H h11 + #define CONVOLVE ____ ____ __ _ + #include "half-fir.h" + + #define FUNCTION_H h12 + #define CONVOLVE ____ ____ ____ + #include "half-fir.h" + + #define FUNCTION_H h13 + #define CONVOLVE ____ ____ ____ _ + #include "half-fir.h" +#endif + +static half_fir_info_t const half_firs[] = { +#if !(CORE_TYPE & CORE_SIMD_HALF) + { 7, half_fir_coefs_7 , h7 , 0 , 120.65f}, +#endif + { 8, half_fir_coefs_8 , h8 , 0 , 136.51f}, + { 9, half_fir_coefs_9 , h9 , 0 , 152.32f}, +#if CORE_TYPE & CORE_DBL + {10, half_fir_coefs_10, h10, 0 , 168.08f}, + {11, half_fir_coefs_11, h11, 0 , 183.79f}, + {12, half_fir_coefs_12, h12, 0 , 199.46f}, + {13, half_fir_coefs_13, h13, 0 , 215.12f}, +#endif +}; + +#undef SIMD_AVX +#undef SIMD_NEON +#undef SIMD_SSE + + + +#if CORE_TYPE & CORE_DBL + #define SIMD_AVX ((CORE_TYPE & CORE_SIMD_POLY) && defined __AVX__) + #define SIMD_SSE 0 +#else + #define SIMD_SSE ((CORE_TYPE & CORE_SIMD_POLY) && (defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86)) + #define SIMD_AVX 0 +#endif + +#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_POLY) && defined __arm__) + + + + +#define HI_PREC_CLOCK +#define COEFS (sample_t * __restrict)p->shared->poly_fir_coefs +#define VAR_LENGTH p->n +#define VAR_CONVOLVE(n) while (j < (n)) _ +#define VAR_POLY_PHASE_BITS p->phase_bits + + + +#define FUNCTION vpoly0 +#define FIR_LENGTH VAR_LENGTH +#define CONVOLVE(n) VAR_CONVOLVE(n) +#include "poly-fir0.h" + +#define FUNCTION vpoly1 +#define COEF_INTERP 1 +#define PHASE_BITS VAR_POLY_PHASE_BITS +#define FIR_LENGTH VAR_LENGTH +#define CONVOLVE(n) VAR_CONVOLVE(n) +#include "poly-fir.h" + +#define FUNCTION vpoly2 +#define COEF_INTERP 2 +#define PHASE_BITS VAR_POLY_PHASE_BITS +#define FIR_LENGTH VAR_LENGTH +#define CONVOLVE(n) VAR_CONVOLVE(n) +#include "poly-fir.h" + +#define FUNCTION vpoly3 +#define COEF_INTERP 3 +#define PHASE_BITS VAR_POLY_PHASE_BITS +#define FIR_LENGTH VAR_LENGTH +#define CONVOLVE(n) VAR_CONVOLVE(n) +#include "poly-fir.h" + + + +#if !(CORE_TYPE & CORE_SIMD_POLY) + +#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +#define FUNCTION U100_0 +#define FIR_LENGTH U100_l +#define CONVOLVE(n) poly_fir_convolve_U100 +#include "poly-fir0.h" + +#define u100_l 11 +#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _ +#define FUNCTION u100_0 +#define FIR_LENGTH u100_l +#define CONVOLVE(n) poly_fir_convolve_u100 +#include "poly-fir0.h" + +#define FUNCTION u100_1 +#define COEF_INTERP 1 +#define PHASE_BITS 8 +#define FIR_LENGTH u100_l +#define CONVOLVE(n) poly_fir_convolve_u100 +#include "poly-fir.h" + +#define FUNCTION u100_2 +#define COEF_INTERP 2 +#define PHASE_BITS 6 +#define FIR_LENGTH u100_l +#define CONVOLVE(n) poly_fir_convolve_u100 +#include "poly-fir.h" + +#endif + +#define u100_1_b 8 +#define u100_2_b 6 + + + +static poly_fir_t const poly_firs[] = { + {-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}}, + {-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}}, + {-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}}, + {-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}}, + {-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}}, + {-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}}, + + {-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}}, + {-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}}, + {-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}}, + {-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}}, + {-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}}, + {-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}}, + +#if CORE_TYPE & CORE_SIMD_POLY + {10.62f, {{0, vpoly0}, {0, 0}, {0, 0}}}, + {-1, {{0, vpoly0}, {u100_1_b, vpoly1}, {u100_2_b, vpoly2}}}, +#else + {10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}}, + {11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}}, +#endif + {-1, {{0, vpoly0}, { 9, vpoly1}, { 6, vpoly2}}}, + {-1, {{0, vpoly0}, { 11, vpoly1}, { 7, vpoly2}}}, + {-1, {{0, vpoly0}, { 13, vpoly1}, { 8, vpoly2}}}, + {-1, {{0, vpoly0}, { 10, vpoly2}, { 8, vpoly3}}}, + {-1, {{0, vpoly0}, { 12, vpoly2}, { 9, vpoly3}}}, +}; + + + +static cr_core_t const cr_core = { + +#if CORE_TYPE & CORE_SIMD_POLY + {SIMD_ALIGNED_MALLOC, SIMD_ALIGNED_CALLOC, SIMD_ALIGNED_FREE}, +#else + {malloc, calloc, free}, +#endif + half_firs, array_length(half_firs), + 0, 0, + cubic_stage_fn, + poly_firs, RDFT_CB +}; + + + +#if defined SOXR_LIB + +#include "soxr.h" + +static char const * rate_create(void * channel, void * shared, double io_ratio, + soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale) +{ + return _soxr_init(channel, shared, io_ratio, q_spec, r_spec, scale, + &cr_core, CORE_TYPE); +} + + + +static char const * id(void) {return CORE_STR;} + +fn_t RATE_CB[] = { + (fn_t)_soxr_input, + (fn_t)_soxr_process, + (fn_t)_soxr_output, + (fn_t)_soxr_flush, + (fn_t)_soxr_close, + (fn_t)_soxr_delay, + (fn_t)_soxr_sizes, + (fn_t)rate_create, + (fn_t)0, + (fn_t)id, +}; + +#endif diff --git a/src/cr.c b/src/cr.c new file mode 100644 index 0000000..eb65a04 --- /dev/null +++ b/src/cr.c @@ -0,0 +1,581 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. + * + * Constant-rate resampling common code. */ + +#include +#include +#include +#include + +#include "filter.h" + +#if defined SOXR_LIB + #include "internal.h" + #define STATIC +#endif + +#include "cr.h" + +#define num_coefs4 ((core_flags&CORE_SIMD_POLY)? ((num_coefs+3)&~3) : num_coefs) + +#define coef_coef(C,T,x) \ + C((T*)result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i) + +#define STORE(C,T) { \ + if (interp_order > 2) coef_coef(C,T,3) = (T)d; \ + if (interp_order > 1) coef_coef(C,T,2) = (T)c; \ + if (interp_order > 0) coef_coef(C,T,1) = (T)b; \ + coef_coef(C,T,0) = (T)f0;} + +static real * prepare_poly_fir_coefs(double const * coefs, int num_coefs, + int num_phases, int interp_order, double multiplier, + core_flags_t core_flags, alloc_t const * mem) +{ + int i, j, length = num_coefs4 * num_phases * (interp_order + 1); + real * result = mem->calloc(1,(size_t)length << LOG2_SIZEOF_REAL(core_flags)); + double fm1 = coefs[0], f1 = 0, f2 = 0; + + for (i = num_coefs - 1; i >= 0; --i) + for (j = num_phases - 1; j >= 0; --j) { + double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */ + int pos = i * num_phases + j - 1; + fm1 = pos > 0 ? coefs[pos - 1] * multiplier : 0; + switch (interp_order) { + case 1: b = f1 - f0; break; + case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break; + case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break; + default: assert(!interp_order); + } + switch (core_flags & 3) { + case 0: if (WITH_CR32 ) STORE(coef , float ); break; + case 1: if (WITH_CR64 ) STORE(coef , double); break; + case 2: if (WITH_CR32S) STORE(coef4, float ); break; + default:if (WITH_CR64S) STORE(coef4, double); break; + } + f2 = f1, f1 = f0; + } + return result; +} + +#undef STORE +#undef coef_coef + +#define IS_FLOAT32 (WITH_CR32 || WITH_CR32S) && \ + (!(WITH_CR64 || WITH_CR64S) || sizeof_real == sizeof(float)) +#define WITH_FLOAT64 WITH_CR64 || WITH_CR64S + +static void dft_stage_fn(stage_t * p, fifo_t * output_fifo) +{ + real * output, * dft_out; + int i, j, num_in = max(0, fifo_occupancy(&p->fifo)); + rate_shared_t const * s = p->shared; + dft_filter_t const * f = &s->dft_filter[p->dft_filter_num]; + int const overlap = f->num_taps - 1; + + if (p->at.integer + p->L * num_in >= f->dft_length) { + fn_t const * const RDFT_CB = p->rdft_cb; + size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(p->core_flags); + div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L); + real const * input = fifo_read_ptr(&p->fifo); + fifo_read(&p->fifo, divd.quot, NULL); + num_in -= divd.quot; + + output = fifo_reserve(output_fifo, f->dft_length); + dft_out = (p->core_flags & CORE_SIMD_DFT)? p->dft_out : output; + + if (lsx_is_power_of_2(p->L)) { /* F-domain */ + int portion = f->dft_length / p->L; + memcpy(dft_out, input, (unsigned)portion * sizeof_real); + rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch); + if (IS_FLOAT32) { +#define dft_out ((float *)dft_out) + for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */ + dft_out[i] = dft_out[(portion << 1) - i], + dft_out[i+1] = -dft_out[(portion << 1) - i + 1]; + dft_out[portion] = dft_out[1]; + dft_out[portion + 1] = 0; + dft_out[1] = dft_out[0]; +#undef dft_out + } + else if (WITH_FLOAT64) { +#define dft_out ((double *)dft_out) + for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */ + dft_out[i] = dft_out[(portion << 1) - i], + dft_out[i+1] = -dft_out[(portion << 1) - i + 1]; + dft_out[portion] = dft_out[1]; + dft_out[portion + 1] = 0; + dft_out[1] = dft_out[0]; +#undef dft_out + } + + for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) { + memcpy((char *)dft_out + (size_t)i * sizeof_real, dft_out, (size_t)portion * sizeof_real); + ((char *)dft_out)[((size_t)i + 1) * sizeof_real] = 0; + } + if (p->step.integer > 0) + rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch); + } else { + if (p->L == 1) + memcpy(dft_out, input, (size_t)f->dft_length * sizeof_real); + else { + memset(dft_out, 0, (size_t)f->dft_length * sizeof_real); + if (IS_FLOAT32) + for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L) + ((float *)dft_out)[i] = ((float *)input)[j]; + else if (WITH_FLOAT64) + for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L) + ((double *)dft_out)[i] = ((double *)input)[j]; + p->at.integer = p->L - 1 - divd.rem; + } + if (p->step.integer > 0) + rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch); + else + rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch); + } + + if (p->step.integer > 0) { + rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs); + rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch); + if ((p->core_flags & CORE_SIMD_DFT) && p->step.integer == 1) + memcpy(output, dft_out, (size_t)f->dft_length * sizeof_real); + if (p->step.integer != 1) { + if (IS_FLOAT32) + for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j, + i += p->step.integer) + ((float *)output)[j] = ((float *)dft_out)[i]; + else if (WITH_FLOAT64) + for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j, + i += p->step.integer) + ((double *)output)[j] = ((double *)dft_out)[i]; + p->remM = i - (f->dft_length - overlap); + fifo_trim_by(output_fifo, f->dft_length - j); + } + else fifo_trim_by(output_fifo, overlap); + } + else { /* F-domain */ + int m = -p->step.integer; + rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs); + rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch); + if (p->core_flags & CORE_SIMD_DFT) + memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof_real); + fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m); + } + (void)RDFT_CB; + } + p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L; +} + +/* Set to 4 x nearest power of 2 or half of that */ +/* if danger of causing too many cache misses. */ +static int set_dft_length(int num_taps, int min, int large) +{ + double d = log((double)num_taps) / log(2.); + return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large)); +} + +static void dft_stage_init( + unsigned instance, double Fp, double Fs, double Fn, double att, + double phase_response, stage_t * p, int L, int M, double * multiplier, + unsigned min_dft_size, unsigned large_dft_size, core_flags_t core_flags, + fn_t const * RDFT_CB) +{ + dft_filter_t * f = &p->shared->dft_filter[instance]; + int num_taps = 0, dft_length = f->dft_length, i, offset; + bool f_domain_m = abs(3-M) == 1 && Fs <= 1; + size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags); + + if (!dft_length) { + int k = phase_response == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4; + double m, * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.); + + if (phase_response != 50) + lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase_response); + else f->post_peak = num_taps / 2; + + dft_length = set_dft_length(num_taps, (int)min_dft_size, (int)large_dft_size); + f->coefs = rdft_calloc((size_t)dft_length, sizeof_real); + offset = dft_length - num_taps + 1; + m = (1. / dft_length) * rdft_multiplier() * L * *multiplier; + if (IS_FLOAT32) for (i = 0; i < num_taps; ++i) + ((float *)f->coefs)[(i + offset) & (dft_length - 1)] =(float)(h[i] * m); + else if (WITH_FLOAT64) for (i = 0; i < num_taps; ++i) + ((double *)f->coefs)[(i + offset) & (dft_length - 1)] = h[i] * m; + free(h); + } + + if (rdft_flags() & RDFT_IS_SIMD) + p->dft_out = rdft_malloc(sizeof_real * (size_t)dft_length); + if (rdft_flags() & RDFT_NEEDS_SCRATCH) + p->dft_scratch = rdft_malloc(2 * sizeof_real * (size_t)dft_length); + + if (!f->dft_length) { + void * coef_setup = rdft_forward_setup(dft_length); + int Lp = lsx_is_power_of_2(L)? L : 1; + int Mp = f_domain_m? M : 1; + f->dft_forward_setup = rdft_forward_setup(dft_length / Lp); + f->dft_backward_setup = rdft_backward_setup(dft_length / Mp); + if (Mp == 1) + rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch); + else + rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch); + rdft_delete_setup(coef_setup); + f->num_taps = num_taps; + f->dft_length = dft_length; + lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i", + num_taps, dft_length, Fp, Fs, Fn, att, L, M); + } + *multiplier = 1; + p->out_in_ratio = (double)L / M; + p->core_flags = core_flags; + p->rdft_cb = RDFT_CB; + p->fn = dft_stage_fn; + p->preload = f->post_peak / L; + p->at.integer = f->post_peak % L; + p->L = L; + p->step.integer = f_domain_m? -M/2 : M; + p->dft_filter_num = instance; + p->block_len = f->dft_length - (f->num_taps - 1); + p->phase0 = p->at.integer / p->L; + p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L; +} + +static struct half_fir_info const * find_half_fir( + struct half_fir_info const * firs, size_t len, double att) +{ + size_t i; + for (i = 0; i + 1 < len && att > firs[i].att; ++i); + return &firs[i]; +} + +#define have_pre_stage (preM * preL != 1) +#define have_arb_stage (arbM * arbL != 1) +#define have_post_stage (postM * postL != 1) + +#include "soxr.h" + +STATIC char const * _soxr_init( + rate_t * const p, /* Per audio channel. */ + rate_shared_t * const shared, /* By channels undergoing same rate change. */ + double const io_ratio, /* Input rate divided by output rate. */ + soxr_quality_spec_t const * const q_spec, + soxr_runtime_spec_t const * const r_spec, + double multiplier, /* Linear gain to apply during conversion. */ + cr_core_t const * const core, + core_flags_t const core_flags) +{ + size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags); + double const tolerance = 1 + 1e-5; + + double bits = q_spec->precision; + rolloff_t const rolloff = (rolloff_t)(q_spec->flags & 3); + int interpolator = (int)(r_spec->flags & 3) - 1; + double const Fp0 = q_spec->passband_end, Fs0 = q_spec->stopband_begin; + double const phase_response = q_spec->phase_response, tbw0 = Fs0-Fp0; + + bool const maintain_3dB_pt = !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT); + double tbw_tighten = 1, alpha; + #define tighten(x) (Fs0-(Fs0-(x))*tbw_tighten) + + double arbM = io_ratio, Fn1, Fp1 = Fp0, Fs1 = Fs0, bits1 = min(bits,33); + double att = (bits1 + 1) * linear_to_dB(2.), attArb = att; /* +1: pass+stop */ + int preL = 1, preM = 1, shr = 0, arbL = 1, postL = 1, postM = 1; + bool upsample=false, rational=false, iOpt=!(r_spec->flags&SOXR_NOSMALLINTOPT); + bool lq_bits= (q_spec->flags & SOXR_PROMOTE_TO_LQ)? bits <= 16 : bits == 16; + bool lq_Fp0 = (q_spec->flags & SOXR_PROMOTE_TO_LQ)? Fp0<=lq_bw0 : Fp0==lq_bw0; + int n = 0, i, mode = lq_bits && rolloff == rolloff_medium? io_ratio > 1 || + phase_response != 50 || !lq_Fp0 || Fs0 != 1 : ((int)ceil(bits1) - 6) / 4; + struct half_fir_info const * half_fir_info; + stage_t * s; + + if (io_ratio < 1 && Fs0 - 1 > 1 - Fp0 / tolerance) + return "imaging greater than rolloff"; + if (.002 / tolerance > tbw0 || tbw0 > .5 * tolerance) + return "transition bandwidth not in [0.2,50] % of nyquist"; + if (.5 / tolerance > Fp0 || Fs0 > 1.5 * tolerance) + return "transition band not within [50,150] % of nyquist"; + if (bits!=0 && (15 > bits || bits > 33)) + return "precision not in [15,33] bits"; + if (io_ratio <= 0) + return "resampling factor not positive"; + if (0 > phase_response || phase_response > 100) + return "phase response not in [0=min-phase,100=max-phase] %"; + + p->core = core; + p->io_ratio = io_ratio; + if (bits!=0) while (!n++) { /* Determine stages: */ + int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 : + (int)ceil(r_spec->coef_size_kbytes * 1000. / (U100_l * (int)sizeof_real)); + double d, epsilon = 0, frac; + upsample = arbM < 1; + for (i = (int)(.5 * arbM), shr = 0; i >>= 1; arbM *= .5, ++shr); + preM = upsample || (arbM > 1.5 && arbM < 2); + postM = 1 + (arbM > 1 && preM), arbM /= postM; + preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL; + if ((frac = arbM - (int)arbM)!=0) + epsilon = fabs(floor(frac * MULT32 + .5) / (frac * MULT32) - 1); + for (i = 1, rational = frac==0; i <= maxL && !rational; ++i) { + d = frac * i, try = (int)(d + .5); + if ((rational = fabs(try / d - 1) <= epsilon)) { /* No long doubles! */ + if (try == i) + arbM = ceil(arbM), shr += x = arbM > 3, arbM /= 1 + x; + else arbM = i * (int)arbM + try, arbL = i; + } + } + L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x; + if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) { + for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1); + arbM = arbM * postL / arbL / preL, arbL = 1, n = 0; + } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt)) + preL = L, preM = M, arbM = arbL = postM = 1; + if (!mode && (!rational || !n)) + ++mode, n = 0; + } + + p->num_stages = shr + have_pre_stage + have_arb_stage + have_post_stage; + if (!p->num_stages && multiplier != 1) { + bits = arbL = 0; /* Use cubic_stage in this case. */ + ++p->num_stages; + } + p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages)); + if (!p->stages) + return "out of memory"; + for (i = 0; i < p->num_stages; ++i) { + p->stages[i].num = i; + p->stages[i].shared = shared; + p->stages[i].input_size = 4096; + } + p->stages[0].is_input = true; + + alpha = postM / (io_ratio * (postL << 0)); + + if ((n = p->num_stages) > 1) { /* Att. budget: */ + if (have_arb_stage) + att += linear_to_dB(2.), attArb = att, --n; + att += linear_to_dB((double)n); + } + + half_fir_info = find_half_fir(core->half_firs, core->half_firs_len, att); + for (i = 0, s = p->stages; i < shr; ++i, ++s) { + s->fn = half_fir_info->fn; + s->coefs = half_fir_info->coefs; + s->n = half_fir_info->num_coefs; + s->pre_post = 4 * s->n; + s->preload = s->pre = s->pre_post >> 1; + } + + if (have_pre_stage) { + if (maintain_3dB_pt && have_post_stage) { /* Trans. bands overlapping. */ + double x = tbw0 * lsx_inv_f_resp(-3., att); + x = -lsx_f_resp(x / (max(2 * alpha - Fs0, alpha) - Fp0), att); + if (x > .035) { + tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014; + lsx_debug("tbw_tighten=%g (%gdB)", tbw_tighten, x); + } + } + Fn1 = preM? max(preL, preM) : arbM / arbL; + dft_stage_init(0, tighten(Fp1), Fs1, Fn1, att, phase_response, s++, preL, + max(preM, 1), &multiplier, r_spec->log2_min_dft_size, + r_spec->log2_large_dft_size, core_flags, core->rdft_cb); + Fp1 /= Fn1, Fs1 /= Fn1; + } + + if (bits==0 && have_arb_stage) { /* `Quick' cubic arb stage: */ + s->fn = core->cubic_stage_fn; + s->mult = multiplier, multiplier = 1; + s->step.whole = (int64_t)(arbM * MULT32 + .5); + s->pre_post = max(3, s->step.integer); + s->preload = s->pre = 1; + s->out_in_ratio = MULT32 / (double)s->step.whole; + } + else if (have_arb_stage) { /* Higher quality arb stage: */ + static const float rolloffs[] = {-.01f, -.3f, 0, -.103f}; + poly_fir_t const * f = &core->poly_firs[6*(upsample+!!preM)+mode-!upsample]; + int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases; + size_t coefs_size; + double at, Fp = Fp1, Fs, Fn, mult = upsample? 1 : arbM / arbL; + poly_fir1_t const * f1; + + if (!upsample && preM) + Fn = 2 * mult, Fs = 3 + fabs(Fs1 - 1); + else Fn = 1, Fs = 2 - (mode? Fp1 + (Fs1 - Fp1) * .7 : Fs1); + + if (mode) + Fp = Fs - (Fs - Fp) / (1 - lsx_inv_f_resp(rolloffs[rolloff], attArb)); + + i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1; + do { + f1 = &f->interp[++i]; + assert(f1->fn); + if (i) + arbM /= arbL, arbL = 1, rational = false; + phase_bits = (int)ceil(f1->scalar - log(mult)/log(2.)); + phases = !rational? (1 << phase_bits) : arbL; + if (f->interp[0].scalar==0) { + int phases0 = max(phases, 19), n0 = 0; + lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta); + num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM; + } + if ((num_coefs & 1) && rational && (arbL & 1)) + phases <<= 1, arbL <<= 1, arbM *= 2; + at = arbL * (s->phase0 = .5 * (num_coefs & 1)); + order = i + (i && mode > 4); + coefs_size = (size_t)(num_coefs4 * phases * (order+1)) * sizeof_real; + } while (interpolator < 0 && i < 2 && f->interp[i+1].fn && + coefs_size / 1000 > r_spec->coef_size_kbytes); + + if (!s->shared->poly_fir_coefs) { + int num_taps = num_coefs * phases - 1; + double * coefs = lsx_design_lpf( + Fp, Fs, Fn, attArb, &num_taps, phases, f->beta); + s->shared->poly_fir_coefs = prepare_poly_fir_coefs( + coefs, num_coefs, phases, order, multiplier, core_flags, &core->mem); + lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk", + num_coefs, phases, order, (double)coefs_size / 1000.); + free(coefs); + } + multiplier = 1; + s->fn = f1->fn; + s->pre_post = num_coefs4 - 1; + s->preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs); + s->n = num_coefs4; + s->phase_bits = phase_bits; + s->L = arbL; + s->use_hi_prec_clock = + mode>1 && (q_spec->flags & SOXR_HI_PREC_CLOCK) && !rational; +#if FLOAT_HI_PREC_CLOCK + if (s->use_hi_prec_clock) { + s->at.flt = at; + s->step.flt = arbM; + s->out_in_ratio = (double)(arbL / s->step.flt); + } else +#endif + { + s->at.whole = (int64_t)(at * MULT32 + .5); +#if !FLOAT_HI_PREC_CLOCK + if (s->use_hi_prec_clock) { + double M = arbM * MULT32; + s->at.fix.ls.parts.ms = 0x80000000ul; + s->step.whole = (int64_t)M; + M -= (double)s->step.whole; + M *= MULT32 * MULT32; + s->step.fix.ls.all = (uint64_t)M; + } else +#endif + s->step.whole = (int64_t)(arbM * MULT32 + .5); + s->out_in_ratio = MULT32 * arbL / (double)s->step.whole; + } + ++s; + } + + if (have_post_stage) + dft_stage_init(1, tighten(Fp0 / (upsample? alpha : 1)), upsample? max(2 - + Fs0 / alpha, 1) : Fs0, (double)max(postL, postM), att, phase_response, + s++, postL, postM, &multiplier, r_spec->log2_min_dft_size, + r_spec->log2_large_dft_size, core_flags, core->rdft_cb); + + lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i %x", 1/io_ratio, + shr, preL, preM, arbL, arbM, postL, postM, core_flags); + + for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) { + fifo_create(&s->fifo, (int)sizeof_real); + memset(fifo_reserve(&s->fifo, s->preload), 0, + sizeof_real * (size_t)s->preload); + lsx_debug_more("%5i|%-5i preload=%i remL=%i", + s->pre, s->pre_post-s->pre, s->preload, s->at.integer); + } + fifo_create(&s->fifo, (int)sizeof_real); + return 0; +} + +static bool stage_process(stage_t * stage, bool flushing) +{ + fifo_t * fifo = &stage->fifo; + bool done = false; + int want; + while (!done && (want = stage->input_size - fifo_occupancy(fifo)) > 0) { + if (stage->is_input) { + if (flushing) + memset(fifo_reserve(fifo, want), 0, fifo->item_size * (size_t)want); + else done = true; + } + else done = stage_process(stage - 1, flushing); + } + stage->fn(stage, &stage[1].fifo); + return done && fifo_occupancy(fifo) < stage->input_size; +} + +STATIC void _soxr_process(rate_t * p, size_t olen) +{ + int const n = p->flushing? min(-(int)p->samples_out, (int)olen) : (int)olen; + stage_t * stage = &p->stages[p->num_stages]; + fifo_t * fifo = &stage->fifo; + bool done = false; + while (!done && fifo_occupancy(fifo) < (int)n) + done = stage->is_input || stage_process(stage - 1, p->flushing); +} + +STATIC real * _soxr_input(rate_t * p, real const * samples, size_t n) +{ + if (p->flushing) + return 0; + p->samples_in += (int64_t)n; + return fifo_write(&p->stages[0].fifo, (int)n, samples); +} + +STATIC real const * _soxr_output(rate_t * p, real * samples, size_t * n0) +{ + fifo_t * fifo = &p->stages[p->num_stages].fifo; + int n = p->flushing? min(-(int)p->samples_out, (int)*n0) : (int)*n0; + p->samples_out += n = min(n, fifo_occupancy(fifo)); + return fifo_read(fifo, (int)(*n0 = (size_t)n), samples); +} + +STATIC void _soxr_flush(rate_t * p) +{ + if (p->flushing) return; + p->samples_out -= (int64_t)((double)p->samples_in / p->io_ratio + .5); + p->samples_in = 0; + p->flushing = true; +} + +STATIC void _soxr_close(rate_t * p) +{ + if (p->stages) { + fn_t const * const RDFT_CB = p->core->rdft_cb; + rate_shared_t * shared = p->stages[0].shared; + int i; + + for (i = 0; i <= p->num_stages; ++i) { + stage_t * s = &p->stages[i]; + rdft_free(s->dft_scratch); + rdft_free(s->dft_out); + fifo_delete(&s->fifo); + } + if (shared) { + for (i = 0; i < 2; ++i) { + dft_filter_t * f= &shared->dft_filter[i]; + rdft_free(f->coefs); + rdft_delete_setup(f->dft_forward_setup); + rdft_delete_setup(f->dft_backward_setup); + } + p->core->mem.free(shared->poly_fir_coefs); + memset(shared, 0, sizeof(*shared)); + } + free(p->stages); + (void)RDFT_CB; + } +} + +#if defined SOXR_LIB +STATIC double _soxr_delay(rate_t * p) +{ + return (double)p->samples_in / p->io_ratio - (double)p->samples_out; +} + +STATIC void _soxr_sizes(size_t * shared, size_t * channel) +{ + *shared = sizeof(rate_shared_t); + *channel = sizeof(rate_t); +} +#endif diff --git a/src/cr.h b/src/cr.h new file mode 100644 index 0000000..7e20327 --- /dev/null +++ b/src/cr.h @@ -0,0 +1,175 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#if !defined soxr_rate1_included +#define soxr_rate1_included + +#define FIFO_SIZE_T int +#include "fifo.h" + +typedef void real; /* float or double */ +struct stage; +typedef void (* stage_fn_t)(struct stage * input, fifo_t * output); +typedef struct half_fir_info {int num_coefs; real const * coefs; stage_fn_t fn, dfn; float att;} half_fir_info_t; +typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t; +typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t; + +#define U100_l 42 +#define MULT32 (65536. * 65536.) + +/* Conceptually: coef_p is &coefs[num_phases][fir_len][interp_order+1]: */ +#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\ + (fir_len) * ((interp_order) + 1) * (phase_num) + \ + ((interp_order) + 1) * (fir_coef_num) + \ + ((interp_order) - (coef_interp_num))] + +/* Conceptually: coef_p is &coefs[num_phases][fir_len/4][interp_order+1][4]: */ +#define coef4(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\ + (fir_len) * ((interp_order) + 1) * (phase_num) + \ + ((interp_order) + 1) * ((fir_coef_num) & ~3) + \ + 4 * ((interp_order) - (coef_interp_num)) + \ + ((fir_coef_num) & 3)] + +typedef union { /* Int64 in parts */ + #if HAVE_BIGENDIAN + struct {int32_t ms; uint32_t ls;} parts; + #else + struct {uint32_t ls; int32_t ms;} parts; + #endif + int64_t all; +} int64p_t; + +typedef union { /* Uint64 in parts */ + #if HAVE_BIGENDIAN + struct {uint32_t ms, ls;} parts; + #else + struct {uint32_t ls, ms;} parts; + #endif + uint64_t all; +} uint64p_t; + +#define FLOAT_HI_PREC_CLOCK 0 /* Non-float hi-prec has ~96 bits. */ +#define float_step_t long double /* __float128 is also a (slow) option */ + +typedef struct { + int dft_length, num_taps, post_peak; + void * dft_forward_setup, * dft_backward_setup; + real * coefs; +} dft_filter_t; + +typedef struct { /* So generated filter coefs may be shared between channels */ + real * poly_fir_coefs; + dft_filter_t dft_filter[2]; +} rate_shared_t; + +typedef union { /* Fixed point arithmetic */ + struct {uint64p_t ls; int64p_t ms;} fix; + float_step_t flt; +} step_t; + +#define CORE_DBL 1 +#define CORE_SIMD_POLY 2 +#define CORE_SIMD_HALF 4 +#define CORE_SIMD_DFT 8 +#define LOG2_SIZEOF_REAL(core_flags) (2 + ((core_flags) & 1)) + +typedef int core_flags_t; + +#if defined SOXR_LIB +#include "rdft_t.h" +#else +typedef void fn_t; +#endif + +typedef struct stage { + int num; + + /* Common to all stage types: */ + core_flags_t core_flags; + stage_fn_t fn; + fifo_t fifo; + int pre; /* Number of past samples to store */ + int pre_post; /* pre + number of future samples to store */ + int preload; /* Number of zero samples to pre-load the fifo */ + double out_in_ratio; /* For buffer management. */ + int input_size; + bool is_input; + + /* For a stage with variable (run-time generated) filter coefs: */ + fn_t const * rdft_cb; + rate_shared_t * shared; + unsigned dft_filter_num; /* Which, if any, of the 2 DFT filters to use */ + real * dft_scratch; + float * dft_out; + real const * coefs; + + /* For a stage with variable L/M: */ + step_t at, step; + bool use_hi_prec_clock; + int L, remM; + int n, phase_bits, block_len; + double mult, phase0; +} stage_t; + +#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post) +#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre) +#define integer fix.ms.parts.ms +#define fraction fix.ms.parts.ls +#define whole fix.ms.all + + +#define lq_bw0 (1385/2048.) /* ~.67625, FP exact. */ + +typedef enum {rolloff_small, rolloff_medium, rolloff_none} rolloff_t; + + +typedef struct { + void * (* alloc)(size_t); + void * (* calloc)(size_t, size_t); + void (* free)(void *); +} alloc_t; + +typedef struct { + alloc_t mem; + half_fir_info_t const * half_firs; + size_t half_firs_len; + half_fir_info_t const * doub_firs; + size_t doub_firs_len; + stage_fn_t cubic_stage_fn; + poly_fir_t const * poly_firs; + fn_t * rdft_cb; +} cr_core_t; + +typedef struct rate rate_t; +struct rate { + cr_core_t const * core; + double io_ratio; + int64_t samples_in, samples_out; + int num_stages, flushing; + stage_t * stages; +}; + +#if defined SOXR_LIB + +#include "soxr.h" + +char const * _soxr_init( + rate_t * const p, /* Per audio channel. */ + rate_shared_t * const shared, /* Between channels (undergoing same rate change)*/ + double const io_ratio, /* Input rate divided by output rate. */ + soxr_quality_spec_t const * const q_spec, + soxr_runtime_spec_t const * const r_spec, + double multiplier, /* Linear gain to apply during conversion. 1 */ + cr_core_t const * const core, + core_flags_t const); + +void _soxr_process(struct rate * p, size_t olen); +real * _soxr_input(struct rate * p, real const * samples, size_t n); +real const * _soxr_output(struct rate * p, real * samples, size_t * n0); +void _soxr_flush(struct rate * p); +void _soxr_close(struct rate * p); +double _soxr_delay(struct rate * p); +void _soxr_sizes(size_t * shared, size_t * channel); +#endif + +#endif diff --git a/src/cr32.c b/src/cr32.c new file mode 100644 index 0000000..b9eb264 --- /dev/null +++ b/src/cr32.c @@ -0,0 +1,8 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#define RATE_CB _soxr_rate32_cb +#define CORE_STR "cr32" + +#define CORE_TYPE 0 +#include "cr-core.c" diff --git a/src/cr32s.c b/src/cr32s.c new file mode 100644 index 0000000..5de2a43 --- /dev/null +++ b/src/cr32s.c @@ -0,0 +1,8 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#define RATE_CB _soxr_rate32s_cb +#define CORE_STR "cr32s" + +#define CORE_TYPE (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT) +#include "cr-core.c" diff --git a/src/cr64.c b/src/cr64.c new file mode 100644 index 0000000..518cdd7 --- /dev/null +++ b/src/cr64.c @@ -0,0 +1,8 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#define RATE_CB _soxr_rate64_cb +#define CORE_STR "cr64" + +#define CORE_TYPE CORE_DBL +#include "cr-core.c" diff --git a/src/cr64s.c b/src/cr64s.c new file mode 100644 index 0000000..5dcd6f1 --- /dev/null +++ b/src/cr64s.c @@ -0,0 +1,8 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#define RATE_CB _soxr_rate64s_cb +#define CORE_STR "cr64s" + +#define CORE_TYPE (CORE_DBL|CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT) +#include "cr-core.c" diff --git a/src/data-io.c b/src/data-io.c index 1081000..52144c2 100644 --- a/src/data-io.c +++ b/src/data-io.c @@ -23,7 +23,7 @@ -#if WITH_DOUBLE_PRECISION +#if WITH_CR64 || WITH_CR64S void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */ soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch) { @@ -40,7 +40,7 @@ void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */ -#if WITH_SINGLE_PRECISION +#if WITH_CR32 || WITH_CR32S || WITH_VR32 void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch) { @@ -97,7 +97,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ #endif #endif -#if WITH_DOUBLE_PRECISION +#if WITH_CR64 || WITH_CR64S #define FLOATX double #define LSX_RINT_CLIP_2 lsx_rint32_clip_2 @@ -139,7 +139,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ -#if WITH_SINGLE_PRECISION +#if WITH_CR32 || WITH_CR32S || WITH_VR32 #define FLOATX float #define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f @@ -199,7 +199,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */ return 0; \ } while (0) -#if WITH_DOUBLE_PRECISION +#if WITH_CR64 || WITH_CR64S size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0, double const * const * src, size_t n, unsigned ch, unsigned long * seed) { @@ -225,7 +225,7 @@ size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0, } #endif -#if WITH_SINGLE_PRECISION +#if WITH_CR32 || WITH_CR32S || WITH_VR32 size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0, float const * const * src, size_t n, unsigned ch, unsigned long * seed) { diff --git a/src/fft4g.c b/src/fft4g.c index 5fae8a6..cf6293a 100644 --- a/src/fft4g.c +++ b/src/fft4g.c @@ -282,22 +282,16 @@ Appendix : */ -#include +#include "math-wrap.h" #include "fft4g.h" #ifdef FFT4G_FLOAT #define double float #define one_half 0.5f -#if defined _MSC_VER - #define sin (float)sin - #define cos (float)cos - #define atan (float)atan -#else - #define sin sinf - #define cos cosf - #define atan atanf -#endif + #define sin(x) sinf(x) + #define cos(x) cosf(x) + #define atan(x) atanf(x) #define cdft lsx_cdft_f #define rdft lsx_rdft_f @@ -818,7 +812,7 @@ static void bitrv2(int n, int *ip0, double *a) static void bitrv2conj(int n, int *ip0, double *a) { - int j, j1, k, k1, l, m, m2, ip[256]; + int j, j1, k, k1, l, m, m2, ip[512]; double xr, xi, yr, yi; (void)ip0; diff --git a/src/fft4g32.c b/src/fft4g32.c index 8741394..5dcf34d 100644 --- a/src/fft4g32.c +++ b/src/fft4g32.c @@ -1,17 +1,19 @@ /* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ +#include #include "filter.h" #define FFT4G_FLOAT #include "fft4g.c" +#include "rdft_t.h" static void * null(void) {return 0;} static void forward (int length, void * setup, double * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;} static void backward(int length, void * setup, double * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;} static int multiplier(void) {return 2;} static void nothing(void) {} +static int flags(void) {return 0;} -typedef void (* fn_t)(void); fn_t _soxr_rdft32_cb[] = { (fn_t)null, (fn_t)null, @@ -24,4 +26,8 @@ fn_t _soxr_rdft32_cb[] = { (fn_t)_soxr_ordered_partial_convolve_f, (fn_t)multiplier, (fn_t)nothing, + (fn_t)malloc, + (fn_t)calloc, + (fn_t)free, + (fn_t)flags, }; diff --git a/src/fft4g32s.c b/src/fft4g32s.c index 4a95a7d..34dae4b 100644 --- a/src/fft4g32s.c +++ b/src/fft4g32s.c @@ -3,14 +3,15 @@ #include "filter.h" #include "simd.h" +#include "rdft_t.h" static void * null(void) {return 0;} static void nothing(void) {} static void forward (int length, void * setup, float * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;} static void backward(int length, void * setup, float * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;} static int multiplier(void) {return 2;} +static int flags(void) {return RDFT_IS_SIMD;} -typedef void (* fn_t)(void); fn_t _soxr_rdft32s_cb[] = { (fn_t)null, (fn_t)null, @@ -19,8 +20,12 @@ fn_t _soxr_rdft32s_cb[] = { (fn_t)forward, (fn_t)backward, (fn_t)backward, - (fn_t)_soxr_ordered_convolve_simd, - (fn_t)_soxr_ordered_partial_convolve_simd, + (fn_t)ORDERED_CONVOLVE_SIMD, + (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD, (fn_t)multiplier, (fn_t)nothing, + (fn_t)SIMD_ALIGNED_MALLOC, + (fn_t)SIMD_ALIGNED_CALLOC, + (fn_t)SIMD_ALIGNED_FREE, + (fn_t)flags, }; diff --git a/src/fft4g64.c b/src/fft4g64.c index 4acb33b..0018516 100644 --- a/src/fft4g64.c +++ b/src/fft4g64.c @@ -1,16 +1,18 @@ /* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ +#include #include "filter.h" #include "fft4g.c" #include "soxr-config.h" -#if WITH_DOUBLE_PRECISION +#if WITH_CR64 static void * null(void) {return 0;} static void nothing(void) {} static void forward (int length, void * setup, double * H) {lsx_safe_rdft(length, 1, H); (void)setup;} static void backward(int length, void * setup, double * H) {lsx_safe_rdft(length, -1, H); (void)setup;} static int multiplier(void) {return 2;} +static int flags(void) {return 0;} typedef void (* fn_t)(void); fn_t _soxr_rdft64_cb[] = { @@ -25,5 +27,9 @@ fn_t _soxr_rdft64_cb[] = { (fn_t)_soxr_ordered_partial_convolve, (fn_t)multiplier, (fn_t)nothing, + (fn_t)malloc, + (fn_t)calloc, + (fn_t)free, + (fn_t)flags, }; #endif diff --git a/src/fifo.h b/src/fifo.h index b2bda43..19f6c1d 100644 --- a/src/fifo.h +++ b/src/fifo.h @@ -9,6 +9,7 @@ #endif #if !defined FIFO_REALLOC +#include #define FIFO_REALLOC(a,b,c) realloc(a,b) #undef FIFO_FREE #define FIFO_FREE free diff --git a/src/filter.c b/src/filter.c index 482302e..aec0b6e 100644 --- a/src/filter.c +++ b/src/filter.c @@ -1,12 +1,9 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ #include "filter.h" -#include -#if !defined M_PI -#define M_PI 3.14159265358979323846 -#endif +#include "math-wrap.h" #include #include #include @@ -14,7 +11,7 @@ #include "fft4g.h" #include "ccrw2.h" -#if 1 || WITH_DOUBLE_PRECISION /* Always need this, for lsx_fir_to_phase. */ +#if 1 || WITH_CR64 || WITH_CR64S /* Always need this, for lsx_fir_to_phase. */ #define DFT_FLOAT double #define DONE_WITH_FFT_CACHE done_with_fft_cache #define FFT_CACHE_CCRW fft_cache_ccrw @@ -31,7 +28,7 @@ #include "fft4g_cache.h" #endif -#if WITH_SINGLE_PRECISION && !AVCODEC_FOUND +#if WITH_CR32 && !AVCODEC_FOUND #define DFT_FLOAT float #define DONE_WITH_FFT_CACHE done_with_fft_cache_f #define FFT_CACHE_CCRW fft_cache_ccrw_f @@ -48,14 +45,14 @@ #include "fft4g_cache.h" #endif -#if WITH_DOUBLE_PRECISION || !SOXR_LIB +#if WITH_CR64 || WITH_CR64S || !SOXR_LIB #define DFT_FLOAT double #define ORDERED_CONVOLVE lsx_ordered_convolve #define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve #include "rdft.h" #endif -#if WITH_SINGLE_PRECISION +#if WITH_CR32 #define DFT_FLOAT float #define ORDERED_CONVOLVE lsx_ordered_convolve_f #define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve_f @@ -129,6 +126,9 @@ double * lsx_design_lpf( int n = *num_taps, phases = max(k, 1), modulo = max(-k, 1); double tr_bw, Fc, rho = phases == 1? .5 : att < 120? .63 : .75; + lsx_debug_more("./sinctest %-12.7g %-12.7g %g 0 %-5g %i %i 50 %g %g -4 >1", + Fp, Fs, Fn, att, *num_taps, k, beta, rho); + Fp /= fabs(Fn), Fs /= fabs(Fn); /* Normalise to Fn = 1 */ tr_bw = .5 * (Fs - Fp); /* Transition band-width: 6dB to stop points */ tr_bw /= phases, Fs /= phases; @@ -243,3 +243,35 @@ void lsx_fir_to_phase(double * * h, int * len, int * post_len, double phase) work[imp_peak], *len, *post_len, 100 - 100. * *post_len / (*len - 1)); free(pi_wraps), free(work); } + +#define F_x(F,expr) static double F(double x) {return expr;} +F_x(sinePhi, ((2.0517e-07*x-1.1303e-04)*x+.023154)*x+.55924 ) +F_x(sinePsi, ((9.0667e-08*x-5.6114e-05)*x+.013658)*x+1.0977 ) +F_x(sinePow, log(.5)/log(sin(x*.5)) ) +#define dB_to_linear(x) exp((x) * (M_LN10 * 0.05)) + +double lsx_f_resp(double t, double a) +{ + double x; + if (t > (a <= 160? .8 : .82)) { + double a1 = a+15; + double p = .00035*a+.375; + double w = 1/(1-.597)*asin(pow((a1-10.6)/a1,1/p)); + double c = 1+asin(pow(1-a/a1,1/p))/w; + return a1*(pow(sin((c-t)*w),p)-1); + } + if (t > .5) + x = sinePsi(a), x = pow(sin((1-t) * x), sinePow(x)); + else + x = sinePhi(a), x = 1 - pow(sin(t * x), sinePow(x)); + return linear_to_dB(x); +} + +double lsx_inv_f_resp(double drop, double a) +{ + double x = sinePhi(a), s; + drop = dB_to_linear(drop); + s = drop > .5 ? 1 - drop : drop; + x = asin(pow(s, 1/sinePow(x))) / x; + return drop > .5? x : 1 -x; +} diff --git a/src/filter.h b/src/filter.h index 435303b..56333ff 100644 --- a/src/filter.h +++ b/src/filter.h @@ -33,7 +33,12 @@ double * lsx_design_lpf( int * num_taps, /* 0: value will be estimated */ int k, /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */ double beta); /* <0: value will be estimated */ + void lsx_fir_to_phase(double * * h, int * len, int * post_len, double phase0); +double lsx_f_resp(double t, double a); +double lsx_inv_f_resp(double drop, double a); +#define lsx_to_3dB(a) (1 - lsx_inv_f_resp(-3., a)) + #endif diff --git a/src/filters.h b/src/filters.h deleted file mode 100644 index e9a8011..0000000 --- a/src/filters.h +++ /dev/null @@ -1,151 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include "half_coefs.h" - -#define FUNCTION h8 -#define CONVOLVE _ _ _ _ _ _ _ _ -#define h8_l 8 -#define COEFS half_fir_coefs_8 -#include "half-fir.h" - -#define FUNCTION h9 -#define CONVOLVE _ _ _ _ _ _ _ _ _ -#define h9_l 9 -#define COEFS half_fir_coefs_9 -#include "half-fir.h" - -#define FUNCTION h10 -#define CONVOLVE _ _ _ _ _ _ _ _ _ _ -#define h10_l 10 -#define COEFS half_fir_coefs_10 -#include "half-fir.h" - -#define FUNCTION h11 -#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ -#define h11_l 11 -#define COEFS half_fir_coefs_11 -#include "half-fir.h" - -#define FUNCTION h12 -#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ -#define h12_l 12 -#define COEFS half_fir_coefs_12 -#include "half-fir.h" - -#define FUNCTION h13 -#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _ -#define h13_l 13 -#define COEFS half_fir_coefs_13 -#include "half-fir.h" - -static struct {int num_coefs; stage_fn_t fn; float att;} const half_firs[] = { - { 8, h8 , 136.51f}, - { 9, h9 , 152.32f}, - {10, h10, 168.07f}, - {11, h11, 183.78f}, - {12, h12, 199.44f}, - {13, h13, 212.75f}, -}; - -#define HI_PREC_CLOCK - -#define VAR_LENGTH p->n -#define VAR_CONVOLVE while (j < FIR_LENGTH) _ -#define VAR_POLY_PHASE_BITS p->phase_bits - -#define FUNCTION vpoly0 -#define FIR_LENGTH VAR_LENGTH -#define CONVOLVE VAR_CONVOLVE -#include "poly-fir0.h" - -#define FUNCTION vpoly1 -#define COEF_INTERP 1 -#define PHASE_BITS VAR_POLY_PHASE_BITS -#define FIR_LENGTH VAR_LENGTH -#define CONVOLVE VAR_CONVOLVE -#include "poly-fir.h" - -#define FUNCTION vpoly2 -#define COEF_INTERP 2 -#define PHASE_BITS VAR_POLY_PHASE_BITS -#define FIR_LENGTH VAR_LENGTH -#define CONVOLVE VAR_CONVOLVE -#include "poly-fir.h" - -#define FUNCTION vpoly3 -#define COEF_INTERP 3 -#define PHASE_BITS VAR_POLY_PHASE_BITS -#define FIR_LENGTH VAR_LENGTH -#define CONVOLVE VAR_CONVOLVE -#include "poly-fir.h" - -#undef HI_PREC_CLOCK - -#define U100_l 42 -#if RATE_SIMD_POLY - #define U100_l_EXTRA _ _ - #define u100_l_EXTRA _ - #define U100_l_EXTRA_LENGTH 2 - #define u100_l_EXTRA_LENGTH 1 -#else - #define U100_l_EXTRA - #define u100_l_EXTRA - #define U100_l_EXTRA_LENGTH 0 - #define u100_l_EXTRA_LENGTH 0 -#endif -#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ U100_l_EXTRA -#define FUNCTION U100_0 -#define FIR_LENGTH (U100_l + U100_l_EXTRA_LENGTH) -#define CONVOLVE poly_fir_convolve_U100 -#include "poly-fir0.h" - -#define u100_l 11 -#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _ u100_l_EXTRA -#define FUNCTION u100_0 -#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH) -#define CONVOLVE poly_fir_convolve_u100 -#include "poly-fir0.h" - -#define FUNCTION u100_1 -#define COEF_INTERP 1 -#define PHASE_BITS 8 -#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH) -#define CONVOLVE poly_fir_convolve_u100 -#include "poly-fir.h" -#define u100_1_b 8 - -#define FUNCTION u100_2 -#define COEF_INTERP 2 -#define PHASE_BITS 6 -#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH) -#define CONVOLVE poly_fir_convolve_u100 -#include "poly-fir.h" -#define u100_2_b 6 - -typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t; -typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t; - -static poly_fir_t const poly_firs[] = { - {-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}}, - {-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}}, - {-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}}, - {-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}}, - {-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}}, - {-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}}, - - {-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}}, - {-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}}, - {-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}}, - {-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}}, - {-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}}, - {-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}}, - - {10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}}, - {11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}}, - {-1, {{0, vpoly0}, { 9, vpoly1}, { 6, vpoly2}}}, - {-1, {{0, vpoly0}, { 11, vpoly1}, { 7, vpoly2}}}, - {-1, {{0, vpoly0}, { 13, vpoly1}, { 8, vpoly2}}}, - {-1, {{0, vpoly0}, { 10, vpoly2}, { 8, vpoly3}}}, - {-1, {{0, vpoly0}, { 12, vpoly2}, { 9, vpoly3}}}, -}; diff --git a/src/half-coefs.h b/src/half-coefs.h new file mode 100644 index 0000000..a5a0882 --- /dev/null +++ b/src/half-coefs.h @@ -0,0 +1,75 @@ +/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#if defined __GNUC__ + #pragma GCC system_header +#elif defined __SUNPRO_C + #pragma disable_warn +#elif defined _MSC_VER + #pragma warning(push, 1) +#endif + +#if CORE_TYPE & CORE_SIMD_HALF + #define VALIGN vAlign +#else + #define VALIGN +#endif + +#if !(CORE_TYPE & CORE_SIMD_HALF) +static VALIGN const sample_t half_fir_coefs_7[] = { + 3.1062656496657370e-01, -8.4998810699955796e-02, 3.4007044621123500e-02, +-1.2839903789829387e-02, 3.9899380181723145e-03, -8.9355202017945374e-04, + 1.0918292424806546e-04, +}; +#endif + +static VALIGN const sample_t half_fir_coefs_8[] = { + 3.1154652365332069e-01, -8.7344917685739543e-02, 3.6814458353637280e-02, +-1.5189204581464479e-02, 5.4540855610738801e-03, -1.5643862626630416e-03, + 3.1816575906323303e-04, -3.4799449225005688e-05, +}; + +static VALIGN const sample_t half_fir_coefs_9[] = { + 3.1227034755311189e-01, -8.9221517147969526e-02, 3.9139704015071934e-02, +-1.7250558515852023e-02, 6.8589440230476112e-03, -2.3045049636430419e-03, + 6.0963740543348963e-04, -1.1323803957431231e-04, 1.1197769991000046e-05, +}; + +#if CORE_TYPE & CORE_DBL +static VALIGN const sample_t half_fir_coefs_10[] = { + 3.1285456012000523e-01, -9.0756740799292787e-02, 4.1096398104193160e-02, +-1.9066319572525220e-02, 8.1840569787684902e-03, -3.0766876176359834e-03, + 9.6396524429277980e-04, -2.3585679989922018e-04, 4.0252189026627833e-05, +-3.6298196342497932e-06, +}; + +static VALIGN const sample_t half_fir_coefs_11[] = { + 3.1333588822574199e-01, -9.2035898673019811e-02, 4.2765169698406408e-02, +-2.0673580894964429e-02, 9.4225426824512421e-03, -3.8563379950013192e-03, + 1.3634742159642453e-03, -3.9874150714431009e-04, 9.0586723632664806e-05, +-1.4285617244076783e-05, 1.1834642946400529e-06, +}; + +static VALIGN const sample_t half_fir_coefs_12[] = { + 3.1373928463345568e-01, -9.3118180335301962e-02, 4.4205005881659098e-02, +-2.2103860986973051e-02, 1.0574689371162864e-02, -4.6276428065385065e-03, + 1.7936153397572132e-03, -5.9617527051353237e-04, 1.6314517495669067e-04, +-3.4555126770115446e-05, 5.0617615610782593e-06, -3.8768958592971409e-07, +}; + +static VALIGN const sample_t half_fir_coefs_13[] = { + 3.1408224847888910e-01, -9.4045836332667387e-02, 4.5459878763259978e-02, +-2.3383369012219993e-02, 1.1644273044890753e-02, -5.3806714579057013e-03, + 2.2429072878264022e-03, -8.2204347506606424e-04, 2.5724946477840893e-04, +-6.6072709864248668e-05, 1.3099163296288644e-05, -1.7907147069136000e-06, + 1.2750825595240592e-07, +}; +#endif + +#undef VALIGN + +#if defined __SUNPRO_C + #pragma enable_warn +#elif defined _MSC_VER + #pragma warning(pop) +#endif diff --git a/src/half-fir.h b/src/half-fir.h index 0a8ee97..782be1b 100644 --- a/src/half-fir.h +++ b/src/half-fir.h @@ -1,25 +1,61 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ -/* Down-sample by a factor of 2 using a FIR with odd length (LEN).*/ +/* Decimate by 2 using a FIR with odd length (LEN). */ /* Input must be preceded and followed by LEN >> 1 samples. */ -#define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j; -static void FUNCTION(stage_t * p, fifo_t * output_fifo) +#define COEFS ((sample_t const *)p->coefs) + +#if SIMD_SSE + #define BEGINNING v4_t sum, q1, q2, t + #define ____ \ + q1 = _mm_shuffle_ps(t=vLdu(input+2*j),vLdu(input+2*j+4),_MM_SHUFFLE(3,1,3,1)); \ + q2 = _mm_shuffle_ps(vLdu(input-2*j-4),vLdu(input-2*j-8),_MM_SHUFFLE(1,3,1,3)); \ + sum = vAdd(j? sum : vMul(vSet1(.5), t), vMul(vAdd(q1, q2), vLd(COEFS+j))); \ + j += 4; + #define __ \ + q1 = _mm_shuffle_ps(vLdu(input+2*j), vLdu(input-2*j-4), _MM_SHUFFLE(1,3,3,1)); \ + q2 = _mm_loadl_pi(q2, (__m64*)(COEFS+j)), q2 = _mm_movelh_ps(q2, q2); \ + sum = vAdd(sum, vMul(q1, q2)); \ + j += 2; + #define _ \ + q1 = _mm_add_ss(_mm_load_ss(input+2*j+1), _mm_load_ss(input-2*j-1)); \ + sum = _mm_add_ss(sum, _mm_mul_ss(q1, _mm_load_ss(COEFS+j))); \ + ++j; + #define END vStorSum(output+i, sum) +/* #elif SIMD_AVX; No good solution found. */ +/* #elif SIMD_NEON; No need: gcc -O3 does a good job by itself. */ +#else + #define BEGINNING sample_t sum = input[0] * .5f + #define ____ __ __ + #define __ _ _ + #define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j; + #define END output[i] = sum +#endif + + + +static void FUNCTION_H(stage_t * p, fifo_t * output_fifo) { - sample_t const * input = stage_read_p(p); - int i, num_out = (stage_occupancy(p) + 1) / 2; - sample_t * output = fifo_reserve(output_fifo, num_out); + sample_t const * __restrict input = stage_read_p(p); + int num_in = min(stage_occupancy(p), p->input_size); + int i, num_out = (num_in + 1) >> 1; + sample_t * __restrict output = fifo_reserve(output_fifo, num_out); for (i = 0; i < num_out; ++i, input += 2) { int j = 0; - sample_t sum = input[0] * .5f; - CONVOLVE - output[i] = sum; + BEGINNING; CONVOLVE; END; } fifo_read(&p->fifo, 2 * num_out, NULL); } + + + #undef _ +#undef __ +#undef ____ +#undef BEGINNING +#undef END #undef COEFS #undef CONVOLVE -#undef FUNCTION +#undef FUNCTION_H diff --git a/src/half_coefs.h b/src/half_coefs.h deleted file mode 100644 index aac7769..0000000 --- a/src/half_coefs.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if defined __GNUC__ - #pragma GCC system_header -#elif defined __SUNPRO_C - #pragma disable_warn -#elif defined _MSC_VER - #pragma warning(push, 1) -#endif - -static const sample_t half_fir_coefs_8[] = { - 0.3115465451887802, -0.08734497241282892, 0.03681452335604365, - -0.01518925831569441, 0.005454118437408876, -0.001564400922162005, - 0.0003181701445034203, -3.48001341225749e-5, -}; - -static const sample_t half_fir_coefs_9[] = { - 0.3122703613711853, -0.08922155288172305, 0.03913974805854332, - -0.01725059723447163, 0.006858970092378141, -0.002304518467568703, - 0.0006096426006051062, -0.0001132393923815236, 1.119795386287666e-5, -}; - -static const sample_t half_fir_coefs_10[] = { - 0.3128545521327376, -0.09075671986104322, 0.04109637155154835, - -0.01906629512749895, 0.008184039342054333, -0.0030766775017262, - 0.0009639607022414314, -0.0002358552746579827, 4.025184282444155e-5, - -3.629779111541012e-6, -}; - -static const sample_t half_fir_coefs_11[] = { - 0.3133358837508807, -0.09203588680609488, 0.04276515428384758, - -0.02067356614745591, 0.00942253142371517, -0.003856330993895144, - 0.001363470684892284, -0.0003987400965541919, 9.058629923971627e-5, - -1.428553070915318e-5, 1.183455238783835e-6, -}; - -static const sample_t half_fir_coefs_12[] = { - 0.3137392991811407, -0.0931182192961332, 0.0442050575271454, - -0.02210391200618091, 0.01057473015666001, -0.00462766983973885, - 0.001793630226239453, -0.0005961819959665878, 0.0001631475979359577, - -3.45557865639653e-5, 5.06188341942088e-6, -3.877010943315563e-7, -}; - -static const sample_t half_fir_coefs_13[] = { - 0.3140822554324578, -0.0940458550886253, 0.04545990399121566, - -0.02338339450796002, 0.01164429409071052, -0.005380686021429845, - 0.002242915773871009, -0.000822047600000082, 0.0002572510962395222, - -6.607320708956279e-5, 1.309926399120154e-5, -1.790719575255006e-6, - 1.27504961098836e-7, -}; - -#if defined __SUNPRO_C - #pragma enable_warn -#elif defined _MSC_VER - #pragma warning(pop) -#endif diff --git a/src/internal.h b/src/internal.h index 6b4fb24..ee691a0 100644 --- a/src/internal.h +++ b/src/internal.h @@ -6,11 +6,15 @@ #include "std-types.h" + + #undef min #undef max #define min(a, b) ((a) <= (b) ? (a) : (b)) #define max(a, b) ((a) >= (b) ? (a) : (b)) + + #define range_limit(x, lower, upper) (min(max(x, lower), upper)) #define linear_to_dB(x) (log10(x) * 20) #define array_length(a) (sizeof(a)/sizeof(a[0])) @@ -20,12 +24,16 @@ #define iAL(a) (int)AL(a) #define sqr(a) ((a) * (a)) + + #if defined __GNUC__ #define UNUSED __attribute__ ((unused)) #else #define UNUSED #endif + + #if !WITH_DEV_TRACE #ifdef __GNUC__ void lsx_dummy(char const *, ...); @@ -35,10 +43,41 @@ #define lsx_debug if(0) lsx_dummy #define lsx_debug_more lsx_debug #else - int _soxr_trace_level(void); + extern int _soxr_trace_level; void _soxr_trace(char const * fmt, ...); - #define lsx_debug if (_soxr_trace_level() >= 4) _soxr_trace - #define lsx_debug_more if (_soxr_trace_level() >= 5) _soxr_trace + #define lsx_debug if (_soxr_trace_level > 0) _soxr_trace + #define lsx_debug_more if (_soxr_trace_level > 1) _soxr_trace #endif + + +/* soxr_quality_spec_t.flags: */ + +#define SOXR_ROLLOFF_LSR2Q 3u /* Reserved for internal use. */ +#define SOXR_ROLLOFF_MASK 3u /* For masking these bits. */ +#define SOXR_PROMOTE_TO_LQ 64u /* Reserved for internal use. */ + + + +/* soxr_runtime_spec_t.flags: */ + +#define SOXR_STRICT_BUFFERING 4u /* Reserved for future use. */ +#define SOXR_NOSMALLINTOPT 8u /* For test purposes only. */ + + + +/* soxr_quality_spec recipe: */ + +#define SOXR_PRECISIONQ 11 /* Quality specified by the precision parameter. */ + +#define SOXR_PHASE_MASK 0x30 /* For masking these bits. */ + + + +/* soxr_quality_spec flags: */ + +#define RESET_ON_CLEAR (1u<<31) + + + #endif diff --git a/src/math-wrap.h b/src/math-wrap.h new file mode 100644 index 0000000..8a526f1 --- /dev/null +++ b/src/math-wrap.h @@ -0,0 +1,31 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#if !defined soxr_math_wrap_included +#define soxr_math_wrap_included + +#include + +#if defined __STRICT_ANSI__ + #define sinf(x) (float)sin ((double)(x)) + #define cosf(x) (float)cos ((double)(x)) + #define atanf(x) (float)atan((double)(x)) +#endif + +#if !defined M_PI + #define M_PI 3.141592653589793238462643383279502884 +#endif + +#if !defined M_LN10 + #define M_LN10 2.302585092994045684017991454684364208 +#endif + +#if !defined M_SQRT2 + #define M_SQRT2 1.414213562373095048801688724209698079 +#endif + +#if !defined M_LN2 + #define M_LN2 0.693147180559945309417232121458176568 +#endif + +#endif diff --git a/src/pffft-wrap.c b/src/pffft-wrap.c index f410892..806547c 100644 --- a/src/pffft-wrap.c +++ b/src/pffft-wrap.c @@ -3,24 +3,23 @@ #if !defined PFFT_MACROS_ONLY -#include "simd.h" -#include +#include "math-wrap.h" -#define pffft_aligned_free _soxr_simd_aligned_free -#define pffft_aligned_malloc _soxr_simd_aligned_malloc -#define pffft_aligned_calloc _soxr_simd_aligned_calloc +#if PFFFT_DOUBLE + #include "simd64.h" +#else + #include "simd32.h" + #define sin(x) sinf(x) + #define cos(x) cosf(x) +#endif + +#define pffft_aligned_free SIMD_ALIGNED_FREE +#define pffft_aligned_malloc SIMD_ALIGNED_MALLOC +#define pffft_aligned_calloc SIMD_ALIGNED_CALLOC #undef inline #define inline __inline -#if defined _MSC_VER - #define sin (float)sin - #define cos (float)cos -#else - #define sin(x) sinf((float)(x)) - #define cos(x) cosf((float)(x)) -#endif - #endif diff --git a/src/pffft.c b/src/pffft.c index b68f86c..5729274 100644 --- a/src/pffft.c +++ b/src/pffft.c @@ -133,9 +133,11 @@ inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_p */ #elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)) +# define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */ + +#if !PFFFT_DOUBLE #include typedef __m128 v4sf; -# define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */ # define VZERO() _mm_setzero_ps() # define VMUL(a,b) _mm_mul_ps(a,b) # define VADD(a,b) _mm_add_ps(a,b) @@ -148,6 +150,10 @@ typedef __m128 v4sf; # define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0)) # define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0) +#else +#include "avx.h" +#endif + /* ARM NEON support macros */ @@ -181,6 +187,10 @@ typedef float32x4_t v4sf; # endif #endif +#if PFFFT_DOUBLE +#define float double +#endif + /* fallback mode for situations where SSE/Altivec are not available, use scalar mode instead */ #ifdef PFFFT_SIMD_DISABLE typedef float v4sf; @@ -202,6 +212,8 @@ typedef float v4sf; #define SVMUL(f,v) VMUL(LD_PS1(f),v) #endif +#if !defined PFFT_MACROS_ONLY + #if !defined(PFFFT_SIMD_DISABLE) typedef union v4sf_union { v4sf v; @@ -214,7 +226,8 @@ typedef union v4sf_union { #define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3)) /* detect bugs with the vector support macros */ -void validate_pffft_simd() { +void validate_pffft_simd(void); +void validate_pffft_simd(void) { float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 }; v4sf_union a0, a1, a2, a3, t, u; memcpy(a0.f, f, 4*sizeof(float)); @@ -230,7 +243,6 @@ void validate_pffft_simd() { printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77); t.v = VMADD(a1.v, a2.v,a0.v); printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80); - INTERLEAVE2(a1.v,a2.v,t.v,u.v); printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11); @@ -271,8 +283,6 @@ void pffft_aligned_free(void *p) { int pffft_simd_size() { return SIMD_SZ; } #endif -#if !defined PFFT_MACROS_ONLY - /* passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2 */ diff --git a/src/pffft.h b/src/pffft.h index 5ff8466..63522ca 100644 --- a/src/pffft.h +++ b/src/pffft.h @@ -86,6 +86,10 @@ #ifdef __cplusplus extern "C" { +#endif + +#if PFFFT_DOUBLE +#define float double #endif /* opaque struct holding internal stuff (precomputed twiddle factors) @@ -182,6 +186,8 @@ extern "C" { int pffft_simd_size(); #endif +#undef float + #ifdef __cplusplus } #endif diff --git a/src/pffft32.c b/src/pffft32.c index fb7400f..f480809 100644 --- a/src/pffft32.c +++ b/src/pffft32.c @@ -1,11 +1,14 @@ /* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ -#define _soxr_simd_aligned_free free -#define _soxr_simd_aligned_malloc malloc +#define SIMD_ALIGNED_FREE free +#define SIMD_ALIGNED_MALLOC malloc #define PFFFT_SIMD_DISABLE +#define PFFFT_DOUBLE 0 #include "pffft-wrap.c" + #include "filter.h" +#include "rdft_t.h" static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);} static void delete_setup(void * setup) {pffft_destroy_setup(setup);} @@ -15,8 +18,8 @@ static void backward (int length, void * setup, float * H, float * scratch) {pff static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H); (void)length;} static int multiplier(void) {return 1;} +static int flags(void) {return RDFT_NEEDS_SCRATCH;} -typedef void (* fn_t)(void); fn_t _soxr_rdft32_cb[] = { (fn_t)setup, (fn_t)setup, @@ -29,4 +32,8 @@ fn_t _soxr_rdft32_cb[] = { (fn_t)_soxr_ordered_partial_convolve_f, (fn_t)multiplier, (fn_t)pffft_reorder_back, + (fn_t)malloc, + (fn_t)calloc, + (fn_t)free, + (fn_t)flags, }; diff --git a/src/pffft32s.c b/src/pffft32s.c index b067ad2..7798a45 100644 --- a/src/pffft32s.c +++ b/src/pffft32s.c @@ -1,17 +1,20 @@ /* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ +#define PFFFT_DOUBLE 0 #include "pffft-wrap.c" +#include "rdft_t.h" + static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);} static void forward (int length, void * setup, float * h, float * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;} static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;} static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} -static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H); (void)length;} +static void convolve(int length, void * setup, float * H, float const * with) {pffft_zconvolve(setup, H, with, H); (void)length;} static int multiplier(void) {return 1;} +static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;} -typedef void (* fn_t)(void); fn_t _soxr_rdft32s_cb[] = { (fn_t)setup, (fn_t)setup, @@ -21,7 +24,11 @@ fn_t _soxr_rdft32s_cb[] = { (fn_t)backward, (fn_t)obackward, (fn_t)convolve, - (fn_t)_soxr_ordered_partial_convolve_simd, + (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD, (fn_t)multiplier, (fn_t)pffft_reorder_back, + (fn_t)SIMD_ALIGNED_MALLOC, + (fn_t)SIMD_ALIGNED_CALLOC, + (fn_t)SIMD_ALIGNED_FREE, + (fn_t)flags, }; diff --git a/src/pffft64s.c b/src/pffft64s.c new file mode 100644 index 0000000..7c37c9d --- /dev/null +++ b/src/pffft64s.c @@ -0,0 +1,34 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#define PFFFT_DOUBLE 1 +#include "pffft-wrap.c" + +#include "rdft_t.h" + +static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);} +static void forward (int length, void * setup, double * h, double * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;} +static void oforward (int length, void * setup, double * h, double * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;} +static void backward (int length, void * setup, double * H, double * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} +static void obackward(int length, void * setup, double * H, double * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;} +static void convolve(int length, void * setup, double * H, double const * with) {pffft_zconvolve(setup, H, with, H); (void)length;} +static int multiplier(void) {return 1;} +static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;} + +fn_t _soxr_rdft64s_cb[] = { + (fn_t)setup, + (fn_t)setup, + (fn_t)pffft_destroy_setup, + (fn_t)forward, + (fn_t)oforward, + (fn_t)backward, + (fn_t)obackward, + (fn_t)convolve, + (fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD, + (fn_t)multiplier, + (fn_t)pffft_reorder_back, + (fn_t)SIMD_ALIGNED_MALLOC, + (fn_t)SIMD_ALIGNED_CALLOC, + (fn_t)SIMD_ALIGNED_FREE, + (fn_t)flags, +}; diff --git a/src/poly-fir.h b/src/poly-fir.h index f7b4261..94db90e 100644 --- a/src/poly-fir.h +++ b/src/poly-fir.h @@ -1,97 +1,138 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ -/* Resample using an interpolated poly-phase FIR with length LEN.*/ -/* Input must be followed by LEN-1 samples. */ +/* Resample using an interpolated poly-phase FIR with length LEN. */ +/* Input must be followed by FIR_LENGTH-1 samples. */ -#define a (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 0,j)) -#define b (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 1,j)) -#define c (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 2,j)) -#define d (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 3,j)) -#if COEF_INTERP == 0 - #define _ sum += a *in[j], ++j; -#elif COEF_INTERP == 1 - #define _ sum += (b *x + a)*in[j], ++j; -#elif COEF_INTERP == 2 - #define _ sum += ((c *x + b)*x + a)*in[j], ++j; -#elif COEF_INTERP == 3 - #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j; -#else +#if COEF_INTERP != 1 && COEF_INTERP != 2 && COEF_INTERP != 3 #error COEF_INTERP #endif +#if SIMD_AVX || SIMD_SSE || SIMD_NEON + #define N (FIR_LENGTH>>2) + + #if COEF_INTERP == 1 + #define _ sum=vMac(vMac(b,X,a),vLdu(in+j*4),sum), ++j; + #elif COEF_INTERP == 2 + #define _ sum=vMac(vMac(vMac(c,X,b),X,a),vLdu(in+j*4),sum), ++j; + #else + #define _ sum=vMac(vMac(vMac(vMac(d,X,c),X,b),X,a),vLdu(in+j*4),sum), ++j; + #endif + + #define a coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-0)] + #define b coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-1)] + #define c coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-2)] + #define d coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-3)] + + #define BEGINNING v4_t X = vLds(x), sum = vZero(); \ + v4_t const * const __restrict coefs = (v4_t *)COEFS + #define MIDDLE switch (N) {case 3: CONVOLVE(3); break; case 4: CONVOLVE(4); \ + break; case 5: CONVOLVE(5); break; default: CONVOLVE(N); } + #define END vStorSum(output+i, sum) + #define cc(n) case n: core(n); break + #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);} +#else + #define N FIR_LENGTH + + #if COEF_INTERP == 1 + #define _ sum += (b*x + a)*in[j], ++j; + #elif COEF_INTERP == 2 + #define _ sum += ((c*x + b)*x + a)*in[j], ++j; + #else + #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j; + #endif + + #define a (coef(COEFS, COEF_INTERP, N, phase, 0,j)) + #define b (coef(COEFS, COEF_INTERP, N, phase, 1,j)) + #define c (coef(COEFS, COEF_INTERP, N, phase, 2,j)) + #define d (coef(COEFS, COEF_INTERP, N, phase, 3,j)) + + #define BEGINNING sample_t sum = 0 + #define MIDDLE CONVOLVE(N) + #define END output[i] = sum + #define CORE(n) core(n) +#endif + +#define fphpCore(n) \ + if (p->use_hi_prec_clock) { \ + float_step_t at = p->at.flt; \ + for (i = 0; (int)at < num_in; ++i, at += p->step.flt) { \ + sample_t const * const __restrict in = input + (int)at; \ + float_step_t frac = at - (int)at; \ + int phase = (int)(frac * (1 << PHASE_BITS)); \ + sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase); \ + int j = 0; \ + BEGINNING; CONVOLVE(n); END; \ + } \ + fifo_read(&p->fifo, (int)at, NULL); \ + p->at.flt = at - (int)at; \ + } else + +#define hpCore(n) \ + if (p->use_hi_prec_clock) { \ + for (i = 0; p->at.integer < num_in; ++i, \ + p->at.fix.ls.all += p->step.fix.ls.all, \ + p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) { \ + sample_t const * const __restrict in = input + p->at.integer; \ + uint32_t frac = p->at.fraction; \ + int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \ + sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); /* low-order bits, scaled to [0,1) */ \ + int j = 0; \ + BEGINNING; CONVOLVE(n); END; \ + } \ + fifo_read(&p->fifo, p->at.integer, NULL); \ + p->at.integer = 0; \ + } else + +#define spCore(n) { \ + for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) { \ + sample_t const * const __restrict in = input + p->at.integer; \ + uint32_t frac = p->at.fraction; \ + int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \ + sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); /* low-order bits, scaled to [0,1) */ \ + int j = 0; \ + BEGINNING; CONVOLVE(n); END; \ + } \ + fifo_read(&p->fifo, p->at.integer, NULL); \ + p->at.integer = 0; } + +#if defined HI_PREC_CLOCK && FLOAT_HI_PREC_CLOCK + #define core(n) fphpCore(n) spCore(n) +#elif defined HI_PREC_CLOCK + #define core(n) hpCore(n) spCore(n) +#else + #define core(n) spCore(n) +#endif + + + static void FUNCTION(stage_t * p, fifo_t * output_fifo) { sample_t const * input = stage_read_p(p); - int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio); - sample_t * output = fifo_reserve(output_fifo, max_num_out); + int num_in = min(stage_occupancy(p), p->input_size); + int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio); + sample_t * const __restrict output = fifo_reserve(output_fifo, max_num_out); -#if defined HI_PREC_CLOCK -#if FLOAT_HI_PREC_CLOCK - if (p->use_hi_prec_clock) { - float_step_t at = p->at.flt; - for (i = 0; (int)at < num_in; ++i, at += p->step.flt) { - sample_t const * in = input + (int)at; - float_step_t frac = at - (int)at; - int phase = (int)(frac * (1 << PHASE_BITS)); -#if COEF_INTERP > 0 - sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase); -#endif - sample_t sum = 0; - int j = 0; - CONVOLVE - output[i] = sum; - } - fifo_read(&p->fifo, (int)at, NULL); - p->at.flt = at - (int)at; - } else -#else - if (p->use_hi_prec_clock) { - for (i = 0; p->at.integer < num_in; ++i, - p->at.fix.ls.all += p->step.fix.ls.all, - p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) { - sample_t const * in = input + p->at.integer; - uint32_t frac = p->at.fraction; - int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ -#if COEF_INTERP > 0 /* low-order bits, scaled to [0,1) */ - sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); -#endif - sample_t sum = 0; - int j = 0; - CONVOLVE - output[i] = sum; - } - fifo_read(&p->fifo, p->at.integer, NULL); - p->at.integer = 0; - } else -#endif -#endif - { - for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) { - sample_t const * in = input + p->at.integer; - uint32_t frac = p->at.fraction; - int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ -#if COEF_INTERP > 0 /* low-order bits, scaled to [0,1) */ - sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); -#endif - sample_t sum = 0; - int j = 0; - CONVOLVE - output[i] = sum; - } - fifo_read(&p->fifo, p->at.integer, NULL); - p->at.integer = 0; - } + CORE(N); assert(max_num_out - i >= 0); fifo_trim_by(output_fifo, max_num_out - i); } + + #undef _ #undef a #undef b #undef c #undef d +#undef CORE +#undef cc +#undef core #undef COEF_INTERP +#undef N +#undef BEGINNING +#undef MIDDLE +#undef END #undef CONVOLVE #undef FIR_LENGTH #undef FUNCTION diff --git a/src/poly-fir0.h b/src/poly-fir0.h index 52d85b3..0f28c69 100644 --- a/src/poly-fir0.h +++ b/src/poly-fir0.h @@ -1,32 +1,56 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ -/* Resample using a non-interpolated poly-phase FIR with length LEN.*/ -/* Input must be followed by LEN-1 samples. */ +/* Resample using a non-interpolated poly-phase FIR with length LEN. */ +/* Input must be followed by FIR_LENGTH-1 samples. */ -#define _ sum += (coef(p->shared->poly_fir_coefs, 0, FIR_LENGTH, rem, 0, j)) *at[j], ++j; +#if SIMD_AVX || SIMD_SSE || SIMD_NEON + #define N (FIR_LENGTH>>2) + #define BEGINNING v4_t sum = vZero(); \ + v4_t const * const __restrict coefs = (v4_t *)COEFS + N * rem; + #define _ sum = vMac(vLdu(at+j*4), coefs[j], sum), ++j; + #define END vStorSum(output+i, sum) + #define cc(n) case n: core(n); break + #define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);} +#else + #define N FIR_LENGTH + #define BEGINNING sample_t sum = 0; \ + sample_t const * const __restrict coefs = (sample_t *)COEFS + N * rem; + #define _ sum += coefs[j]*at[j], ++j; + #define END output[i] = sum + #define CORE(n) core(n) +#endif + +#define core(n) \ + for (i = 0; p->at.integer < num_in * p->L; ++i, \ + p->at.integer += p->step.integer) { \ + int const div = p->at.integer / p->L, rem = p->at.integer % p->L; \ + sample_t const * const __restrict at = input + div; \ + int j = 0; BEGINNING; CONVOLVE(n); END;} static void FUNCTION(stage_t * p, fifo_t * output_fifo) { - sample_t const * input = stage_read_p(p); - int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio); - sample_t * output = fifo_reserve(output_fifo, max_num_out); + int num_in = min(stage_occupancy(p), p->input_size); + if (num_in) { + sample_t const * input = stage_read_p(p); + int i, num_out = (num_in * p->L - p->at.integer + p->step.integer - 1) / p->step.integer; + sample_t * __restrict output = fifo_reserve(output_fifo, num_out); - for (i = 0; p->at.integer < num_in * p->L; ++i, p->at.integer += p->step.integer) { - int div = p->at.integer / p->L, rem = p->at.integer % p->L; - sample_t const * at = input + div; - sample_t sum = 0; - int j = 0; - CONVOLVE - output[i] = sum; + CORE(N); + assert(i == num_out); + fifo_read(&p->fifo, p->at.integer / p->L, NULL); + p->at.integer = p->at.integer % p->L; } - assert(max_num_out - i >= 0); - fifo_trim_by(output_fifo, max_num_out - i); - fifo_read(&p->fifo, p->at.integer / p->L, NULL); - p->at.integer = p->at.integer % p->L; } #undef _ +#undef CORE +#undef cc +#undef core +#undef N +#undef BEGINNING +#undef MIDDLE +#undef END #undef CONVOLVE #undef FIR_LENGTH #undef FUNCTION diff --git a/src/rate.h b/src/rate.h deleted file mode 100644 index b0598f7..0000000 --- a/src/rate.h +++ /dev/null @@ -1,726 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-14 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#include -#include -#include -#include - -#include "filter.h" - -#if defined SOXR_LIB -#include "internal.h" - -typedef void (* fn_t)(void); -extern fn_t RDFT_CB[11]; - -#define rdft_forward_setup (*(void * (*)(int))RDFT_CB[0]) -#define rdft_backward_setup (*(void * (*)(int))RDFT_CB[1]) -#define rdft_delete_setup (*(void (*)(void *))RDFT_CB[2]) -#define rdft_forward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[3]) -#define rdft_oforward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[4]) -#define rdft_backward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[5]) -#define rdft_obackward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[6]) -#define rdft_convolve (*(void (*)(int, void *, sample_t *, sample_t const *))RDFT_CB[7]) -#define rdft_convolve_portion (*(void (*)(int, sample_t *, sample_t const *))RDFT_CB[8]) -#define rdft_multiplier (*(int (*)(void))RDFT_CB[9]) -#define rdft_reorder_back (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[10]) - -#endif - -#if RATE_SIMD /* Align for SIMD: */ - #include "simd.h" -#if 0 /* Not using this yet. */ - #define RATE_SIMD_POLY 1 - #define num_coefs4 ((num_coefs + 3) & ~3) - #define coefs4_check(i) ((i) < num_coefs) -#else - #define RATE_SIMD_POLY 0 - #define num_coefs4 num_coefs - #define coefs4_check(i) 1 -#endif - - #define aligned_free _soxr_simd_aligned_free - #define aligned_malloc _soxr_simd_aligned_malloc - #define aligned_calloc _soxr_simd_aligned_calloc -#if 0 - #define FIFO_REALLOC aligned_realloc - #define FIFO_MALLOC aligned_malloc - #define FIFO_FREE aligned_free - - static void * aligned_realloc(void * q, size_t nb_bytes, size_t copy_bytes) { - void * p = aligned_malloc(nb_bytes); - if (p) memcpy(p, q, copy_bytes); - aligned_free(q); - return p; - } -#endif -#else - #define RATE_SIMD_POLY 0 - #define num_coefs4 num_coefs - #define coefs4_check(i) 1 - - #define aligned_free free - #define aligned_malloc malloc - #define aligned_calloc calloc -#endif - -#define FIFO_SIZE_T int -#include "fifo.h" - -typedef union { /* Int64 in parts */ - #if HAVE_BIGENDIAN - struct {int32_t ms; uint32_t ls;} parts; - #else - struct {uint32_t ls; int32_t ms;} parts; - #endif - int64_t all; -} int64p_t; - -typedef union { /* Uint64 in parts */ - #if HAVE_BIGENDIAN - struct {uint32_t ms, ls;} parts; - #else - struct {uint32_t ls, ms;} parts; - #endif - uint64_t all; -} uint64p_t; - -#define FLOAT_HI_PREC_CLOCK 0 /* Non-float hi-prec has ~96 bits. */ -#define float_step_t long double /* __float128 is also a (slow) option */ - -#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)] - -#define raw_coef_t double - -static sample_t * prepare_coefs(raw_coef_t const * coefs, int num_coefs, - int num_phases, int interp_order, double multiplier) -{ - int i, j, length = num_coefs4 * num_phases; - sample_t * result = malloc((size_t)(length * (interp_order + 1)) * sizeof(*result)); - double fm1 = coefs[0], f1 = 0, f2 = 0; - - for (i = num_coefs4 - 1; i >= 0; --i) - for (j = num_phases - 1; j >= 0; --j) { - double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */ - int pos = i * num_phases + j - 1; - fm1 = coefs4_check(i) && pos > 0 ? coefs[pos - 1] * multiplier : 0; - switch (interp_order) { - case 1: b = f1 - f0; break; - case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break; - case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break; - default: if (interp_order) assert(0); - } - #define coef_coef(x) \ - coef(result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i) - coef_coef(0) = (sample_t)f0; - if (interp_order > 0) coef_coef(1) = (sample_t)b; - if (interp_order > 1) coef_coef(2) = (sample_t)c; - if (interp_order > 2) coef_coef(3) = (sample_t)d; - #undef coef_coef - f2 = f1, f1 = f0; - } - return result; -} - -typedef struct { - int dft_length, num_taps, post_peak; - void * dft_forward_setup, * dft_backward_setup; - sample_t * coefs; -} dft_filter_t; - -typedef struct { /* So generated filter coefs may be shared between channels */ - sample_t * poly_fir_coefs; - dft_filter_t dft_filter[2]; -} rate_shared_t; - -typedef enum { - irrational_stage = 1, - cubic_stage, - dft_stage, - half_stage, - rational_stage -} stage_type_t; - -struct stage; -typedef void (* stage_fn_t)(struct stage * input, fifo_t * output); -#define MULT32 (65536. * 65536.) - -typedef union { /* Fixed point arithmetic */ - struct {uint64p_t ls; int64p_t ms;} fix; - float_step_t flt; -} step_t; - -typedef struct stage { - /* Common to all stage types: */ - stage_type_t type; - stage_fn_t fn; - fifo_t fifo; - int pre; /* Number of past samples to store */ - int pre_post; /* pre + number of future samples to store */ - int preload; /* Number of zero samples to pre-load the fifo */ - double out_in_ratio; /* For buffer management. */ - - /* For a stage with variable (run-time generated) filter coefs: */ - rate_shared_t * shared; - unsigned dft_filter_num; /* Which, if any, of the 2 DFT filters to use */ - sample_t * dft_scratch, * dft_out; - - /* For a stage with variable L/M: */ - step_t at, step; - bool use_hi_prec_clock; - int L, remM; - int n, phase_bits, block_len; - double mult, phase0; -} stage_t; - -#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post) -#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre) - -static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo) -{ - int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio); - sample_t const * input = stage_read_p(p); - sample_t * output = fifo_reserve(output_fifo, max_num_out); - -#define integer fix.ms.parts.ms -#define fraction fix.ms.parts.ls -#define whole fix.ms.all - for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) { - sample_t const * s = input + p->at.integer; - double x = p->at.fraction * (1 / MULT32); - double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b); - double c = s[1]-*s-a-b; - output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s)); - } - assert(max_num_out - i >= 0); - fifo_trim_by(output_fifo, max_num_out - i); - fifo_read(&p->fifo, p->at.integer, NULL); - p->at.integer = 0; -} - -#if RATE_SIMD - #define dft_out p->dft_out -#else - #define dft_out output -#endif - -static void dft_stage_fn(stage_t * p, fifo_t * output_fifo) -{ - sample_t * output; - int i, j, num_in = max(0, fifo_occupancy(&p->fifo)); - rate_shared_t const * s = p->shared; - dft_filter_t const * f = &s->dft_filter[p->dft_filter_num]; - int const overlap = f->num_taps - 1; - - while (p->at.integer + p->L * num_in >= f->dft_length) { - div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L); - sample_t const * input = fifo_read_ptr(&p->fifo); - fifo_read(&p->fifo, divd.quot, NULL); - num_in -= divd.quot; - - output = fifo_reserve(output_fifo, f->dft_length); - - if (lsx_is_power_of_2(p->L)) { /* F-domain */ - int portion = f->dft_length / p->L; - memcpy(dft_out, input, (unsigned)portion * sizeof(*dft_out)); - rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch); - for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */ - dft_out[i] = dft_out[(portion << 1) - i], - dft_out[i+1] = -dft_out[(portion << 1) - i + 1]; - dft_out[portion] = dft_out[1]; - dft_out[portion + 1] = 0; - dft_out[1] = dft_out[0]; - - for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) { - memcpy(dft_out + i, dft_out, (size_t)portion * sizeof(*dft_out)); - dft_out[i + 1] = 0; - } - if (p->step.integer > 0) - rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch); - } else { - if (p->L == 1) - memcpy(dft_out, input, (size_t)f->dft_length * sizeof(*dft_out)); - else { - memset(dft_out, 0, (size_t)f->dft_length * sizeof(*dft_out)); - for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L) - dft_out[i] = input[j]; - p->at.integer = p->L - 1 - divd.rem; - } - if (p->step.integer > 0) - rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch); - else - rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch); - } - - if (p->step.integer > 0) { - rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs); - rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch); -#if RATE_SIMD - if (p->step.integer == 1) - memcpy(output, dft_out, (size_t)f->dft_length * sizeof(sample_t)); -#endif - if (p->step.integer != 1) { - for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j, - i += p->step.integer) - output[j] = dft_out[i]; - p->remM = i - (f->dft_length - overlap); - fifo_trim_by(output_fifo, f->dft_length - j); - } - else fifo_trim_by(output_fifo, overlap); - } - else { /* F-domain */ - int m = -p->step.integer; - rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs); - rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch); -#if RATE_SIMD - memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof(sample_t)); -#endif - fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m); - } - } -} - -#undef dft_out - -/* Set to 4 x nearest power of 2 */ -/* or half of that if danger of causing too many cache misses. */ -static int set_dft_length(int num_taps, int min, int large) -{ - double d = log((double)num_taps) / log(2.); - return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large)); -} - -static void dft_stage_init( - unsigned instance, double Fp, double Fs, double Fn, double att, - double phase, stage_t * p, int L, int M, double * multiplier, - int min_dft_size, int large_dft_size) -{ - dft_filter_t * f = &p->shared->dft_filter[instance]; - int num_taps = 0, dft_length = f->dft_length, i; - bool f_domain_m = abs(3-M) == 1 && Fs <= 1; - - if (!dft_length) { - int k = phase == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4; - double * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.); - - if (phase != 50) - lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase); - else f->post_peak = num_taps / 2; - - dft_length = set_dft_length(num_taps, min_dft_size, large_dft_size); - f->coefs = aligned_calloc((size_t)dft_length, sizeof(*f->coefs)); - for (i = 0; i < num_taps; ++i) - f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)] - = (sample_t)(h[i] * ((1. / dft_length) * rdft_multiplier() * L * *multiplier)); - free(h); - } - -#if RATE_SIMD - p->dft_out = aligned_malloc(sizeof(sample_t) * (size_t)dft_length); -#endif -#if 1 /* In fact, currently, only pffft needs this. */ - p->dft_scratch = aligned_malloc(2 * sizeof(sample_t) * (size_t)dft_length); -#endif - - if (!f->dft_length) { - void * coef_setup = rdft_forward_setup(dft_length); - int Lp = lsx_is_power_of_2(L)? L : 1; - int Mp = f_domain_m? M : 1; - f->dft_forward_setup = rdft_forward_setup(dft_length / Lp); - f->dft_backward_setup = rdft_backward_setup(dft_length / Mp); - if (Mp == 1) - rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch); - else - rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch); - rdft_delete_setup(coef_setup); - f->num_taps = num_taps; - f->dft_length = dft_length; - lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i", - num_taps, dft_length, Fp, Fs, Fn, att, L, M); - } - *multiplier = 1; - p->out_in_ratio = (double)L / M; - p->type = dft_stage; - p->fn = dft_stage_fn; - p->preload = f->post_peak / L; - p->at.integer = f->post_peak % L; - p->L = L; - p->step.integer = f_domain_m? -M/2 : M; - p->dft_filter_num = instance; - p->block_len = f->dft_length - (f->num_taps - 1); - p->phase0 = p->at.integer / p->L; -} - -#include "filters.h" - -typedef struct { - double factor; - uint64_t samples_in, samples_out; - int num_stages; - stage_t * stages; -} rate_t; - -#define pre_stage p->stages[shift] -#define arb_stage p->stages[shift + have_pre_stage] -#define post_stage p->stages[shift + have_pre_stage + have_arb_stage] -#define have_pre_stage (preM * preL != 1) -#define have_arb_stage (arbM * arbL != 1) -#define have_post_stage (postM * postL != 1) - -#define TO_3dB(a) ((1.6e-6*a-7.5e-4)*a+.646) -#define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */ - -typedef enum { - rolloff_none, rolloff_small /* <= 0.01 dB */, rolloff_medium /* <= 0.35 dB */ -} rolloff_t; - - -static char const * rate_init( - /* Private work areas (to be supplied by the client): */ - rate_t * p, /* Per audio channel. */ - rate_shared_t * shared, /* Between channels (undergoing same rate change)*/ - - /* Public parameters: Typically */ - double factor, /* Input rate divided by output rate. */ - double bits, /* Required bit-accuracy (pass + stop) 16|20|28 */ - double phase, /* Linear/minimum etc. filter phase. 50 */ - double passband_end, /* 0dB pt. bandwidth to preserve; nyquist=1 0.913*/ - double stopband_begin, /* Aliasing/imaging control; > passband_end 1 */ - rolloff_t rolloff, /* Pass-band roll-off small */ - bool maintain_3dB_pt, /* true */ - double multiplier, /* Linear gain to apply during conversion. 1 */ - - /* Primarily for test/development purposes: */ - bool use_hi_prec_clock, /* Increase irrational ratio accuracy. false */ - int interpolator, /* Force a particular coef interpolator. -1 */ - size_t max_coefs_size, /* k bytes of coefs to try to keep below. 400 */ - bool noSmallIntOpt, /* Disable small integer optimisations. false */ - int log2_min_dft_size, - int log2_large_dft_size) -{ - double att = (bits + 1) * linear_to_dB(2.), attArb = att; /* pass + stop */ - double tbw0 = 1 - passband_end, Fs_a = stopband_begin; - double arbM = factor, tbw_tighten = 1; - int n = 0, i, preL = 1, preM = 1, shift = 0, arbL = 1, postL = 1, postM = 1; - bool upsample = false, rational = false, iOpt = !noSmallIntOpt; - int mode = rolloff > rolloff_small? factor > 1 || passband_end > LOW_Q_BW0: - (int)ceil(2 + (bits - 17) / 4); - stage_t * s; - - assert(factor > 0); - assert(!bits || (15 <= bits && bits <= 33)); - assert(0 <= phase && phase <= 100); - assert(.53 <= passband_end); - assert(stopband_begin <= 1.2); - assert(passband_end + .005 < stopband_begin); - - p->factor = factor; - if (bits!=0) while (!n++) { /* Determine stages: */ - int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 : - (int)ceil((double)max_coefs_size * 1000. / (U100_l * sizeof(sample_t))); - double d, epsilon = 0, frac; - upsample = arbM < 1; - for (i = (int)(arbM * .5), shift = 0; i >>= 1; arbM *= .5, ++shift); - preM = upsample || (arbM > 1.5 && arbM < 2); - postM = 1 + (arbM > 1 && preM), arbM /= postM; - preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL; - if ((frac = arbM - (int)arbM)!=0) - epsilon = fabs(floor(frac * MULT32 + .5) / (frac * MULT32) - 1); - for (i = 1, rational = frac==0; i <= maxL && !rational; ++i) { - d = frac * i, try = (int)(d + .5); - if ((rational = fabs(try / d - 1) <= epsilon)) { /* No long doubles! */ - if (try == i) - arbM = ceil(arbM), shift += x = arbM > 3, arbM /= 1 + x; - else arbM = i * (int)arbM + try, arbL = i; - } - } - L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x; - if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) { - for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1); - arbM = arbM * postL / arbL / preL, arbL = 1, n = 0; - } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt)) - preL = L, preM = M, arbM = arbL = postM = 1; - if (!mode && (!rational || !n)) - ++mode, n = 0; - } - - p->num_stages = shift + have_pre_stage + have_arb_stage + have_post_stage; - if (!p->num_stages && multiplier != 1) { - bits = arbL = 0; /* Use cubic_stage in this case. */ - ++p->num_stages; - } - p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages)); - for (i = 0; i < p->num_stages; ++i) - p->stages[i].shared = shared; - - if ((n = p->num_stages) > 1) { /* Att. budget: */ - if (have_arb_stage) - att += linear_to_dB(2.), attArb = att, --n; - att += linear_to_dB((double)n); - } - - for (n = 0; (size_t)n + 1 < array_length(half_firs) && att > half_firs[n].att; ++n); - for (i = 0, s = p->stages; i < shift; ++i, ++s) { - s->type = half_stage; - s->fn = half_firs[n].fn; - s->pre_post = 4 * half_firs[n].num_coefs; - s->preload = s->pre = s->pre_post >> 1; - } - - if (have_pre_stage) { - if (maintain_3dB_pt && have_post_stage) { /* Trans. bands overlapping. */ - double tbw3 = tbw0 * TO_3dB(att); /* FFS: consider Fs_a. */ - double x = ((2.1429e-4 - 5.2083e-7 * att) * att - .015863) * att + 3.95; - x = att * pow((tbw0 - tbw3) / (postM / (factor * postL) - 1 + tbw0), x); - if (x > .035) { - tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014; - lsx_debug("x=%g tbw_tighten=%g", x, tbw_tighten); - } - } - dft_stage_init(0, 1 - tbw0 * tbw_tighten, Fs_a, preM? max(preL, preM) : - arbM / arbL, att, phase, &pre_stage, preL, max(preM, 1), &multiplier, - log2_min_dft_size, log2_large_dft_size); - } - - if (bits==0 && have_arb_stage) { /* `Quick' cubic arb stage: */ - arb_stage.type = cubic_stage; - arb_stage.fn = cubic_stage_fn; - arb_stage.mult = multiplier, multiplier = 1; - arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5); - arb_stage.pre_post = max(3, arb_stage.step.integer); - arb_stage.preload = arb_stage.pre = 1; - arb_stage.out_in_ratio = MULT32 / (double)arb_stage.step.whole; - } - else if (have_arb_stage) { /* Higher quality arb stage: */ - poly_fir_t const * f = &poly_firs[6*(upsample + !!preM) + mode - !upsample]; - int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases; - size_t coefs_size; - double x = .5, at, Fp, Fs, Fn, mult = upsample? 1 : arbL / arbM; - poly_fir1_t const * f1; - - Fn = !upsample && preM? x = arbM / arbL : 1; - Fp = !preM? mult : mode? .5 : 1; - Fs = 2 - Fp; /* Ignore Fs_a; it would have little benefit here. */ - Fp *= 1 - tbw0; - if (rolloff > rolloff_small && mode) - Fp = !preM? mult * .5 - .125 : mult * .05 + .1; - else if (rolloff == rolloff_small) - Fp = Fs - (Fs - .148 * x - Fp * .852) * (.00813 * bits + .973); - - i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1; - do { - f1 = &f->interp[++i]; - assert(f1->fn); - if (i) - arbM /= arbL, arbL = 1, rational = false; - phase_bits = (int)ceil(f1->scalar + log(mult)/log(2.)); - phases = !rational? (1 << phase_bits) : arbL; - if (f->interp[0].scalar==0) { - int phases0 = max(phases, 19), n0 = 0; - lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta); - num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM; - } - if ((num_coefs & 1) && rational && (arbL & 1)) - phases <<= 1, arbL <<= 1, arbM *= 2; - at = arbL * (arb_stage.phase0 = .5 * (num_coefs & 1)); - order = i + (i && mode > 4); - coefs_size = (size_t)(num_coefs4 * phases * (order + 1)) * sizeof(sample_t); - } while (interpolator < 0 && i < 2 && f->interp[i+1].fn && - coefs_size / 1000 > max_coefs_size); - - if (!arb_stage.shared->poly_fir_coefs) { - int num_taps = num_coefs * phases - 1; - raw_coef_t * coefs = lsx_design_lpf( - Fp, Fs, Fn, attArb, &num_taps, phases, f->beta); - arb_stage.shared->poly_fir_coefs = prepare_coefs( - coefs, num_coefs, phases, order, multiplier); - lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk", - num_coefs, phases, order, (double)coefs_size / 1000.); - free(coefs); - } - multiplier = 1; - arb_stage.type = rational? rational_stage : irrational_stage; - arb_stage.fn = f1->fn; - arb_stage.pre_post = num_coefs4 - 1; - arb_stage.preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs); - arb_stage.n = num_coefs4; - arb_stage.phase_bits = phase_bits; - arb_stage.L = arbL; - arb_stage.use_hi_prec_clock = mode > 1 && use_hi_prec_clock && !rational; -#if FLOAT_HI_PREC_CLOCK - if (arb_stage.use_hi_prec_clock) { - arb_stage.at.flt = at; - arb_stage.step.flt = arbM; - arb_stage.out_in_ratio = (double)(arbL / arb_stage.step.flt); - } else -#endif - { - arb_stage.at.whole = (int64_t)(at * MULT32 + .5); -#if !FLOAT_HI_PREC_CLOCK - if (arb_stage.use_hi_prec_clock) { - arb_stage.at.fix.ls.parts.ms = 0x80000000ul; - arbM *= MULT32; - arb_stage.step.whole = (int64_t)arbM; - arbM -= (double)arb_stage.step.whole; - arbM *= MULT32 * MULT32; - arb_stage.step.fix.ls.all = (uint64_t)arbM; - } else -#endif - arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5); - arb_stage.out_in_ratio = MULT32 * arbL / (double)arb_stage.step.whole; - } - } - - if (have_post_stage) - dft_stage_init(1, 1 - (1 - (1 - tbw0) * - (upsample? factor * postL / postM : 1)) * tbw_tighten, Fs_a, - (double)max(postL, postM), att, phase, &post_stage, postL, postM, - &multiplier, log2_min_dft_size, log2_large_dft_size); - - - lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i", - 1/factor, shift, preL, preM, arbL, arbM, postL, postM); - for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) { - fifo_create(&s->fifo, (int)sizeof(sample_t)); - memset(fifo_reserve(&s->fifo, s->preload), 0, sizeof(sample_t) * (size_t)s->preload); - lsx_debug("%5i|%-5i preload=%i remL=%i o/i=%g", - s->pre, s->pre_post - s->pre, s->preload, s->at.integer, s->out_in_ratio); - } - fifo_create(&s->fifo, (int)sizeof(sample_t)); - return 0; -} - -static void rate_process(rate_t * p) -{ - stage_t * stage = p->stages; - int i; - for (i = 0; i < p->num_stages; ++i, ++stage) - stage->fn(stage, &(stage+1)->fifo); -} - -static sample_t * rate_input(rate_t * p, sample_t const * samples, size_t n) -{ - p->samples_in += n; - return fifo_write(&p->stages[0].fifo, (int)n, samples); -} - -static sample_t const * rate_output(rate_t * p, sample_t * samples, size_t * n) -{ - fifo_t * fifo = &p->stages[p->num_stages].fifo; - p->samples_out += *n = min(*n, (size_t)fifo_occupancy(fifo)); - return fifo_read(fifo, (int)*n, samples); -} - -static void rate_flush(rate_t * p) -{ - fifo_t * fifo = &p->stages[p->num_stages].fifo; -#if defined _MSC_VER && _MSC_VER == 1200 - uint64_t samples_out = (uint64_t)(int64_t)((double)(int64_t)p->samples_in / p->factor + .5); -#else - uint64_t samples_out = (uint64_t)((double)p->samples_in / p->factor + .5); -#endif - size_t remaining = (size_t)(samples_out - p->samples_out); - - if ((size_t)fifo_occupancy(fifo) < remaining) { - uint64_t samples_in = p->samples_in; - sample_t * buff = calloc(1024, sizeof(*buff)); - - while ((size_t)fifo_occupancy(fifo) < remaining) { - rate_input(p, buff, 1024); - rate_process(p); - } - fifo_trim_to(fifo, (int)remaining); - p->samples_in = samples_in; - free(buff); - } -} - -static void rate_close(rate_t * p) -{ - rate_shared_t * shared = p->stages[0].shared; - int i; - - for (i = 0; i <= p->num_stages; ++i) { - stage_t * s = &p->stages[i]; - aligned_free(s->dft_scratch); - aligned_free(s->dft_out); - fifo_delete(&s->fifo); - } - if (shared) { - for (i = 0; i < 2; ++i) { - dft_filter_t * f= &shared->dft_filter[i]; - aligned_free(f->coefs); - rdft_delete_setup(f->dft_forward_setup); - rdft_delete_setup(f->dft_backward_setup); - } - free(shared->poly_fir_coefs); - memset(shared, 0, sizeof(*shared)); - } - free(p->stages); -} - -#if defined SOXR_LIB -static double rate_delay(rate_t * p) -{ -#if defined _MSC_VER && _MSC_VER == 1200 - double samples_out = (double)(int64_t)p->samples_in / p->factor; - return max(0, samples_out - (double)(int64_t)p->samples_out); -#else - double samples_out = (double)p->samples_in / p->factor; - return max(0, samples_out - (double)p->samples_out); -#endif -} - -static void rate_sizes(size_t * shared, size_t * channel) -{ - *shared = sizeof(rate_shared_t); - *channel = sizeof(rate_t); -} - -#include "soxr.h" - -static char const * rate_create( - void * channel, - void * shared, - double io_ratio, - soxr_quality_spec_t * q_spec, - soxr_runtime_spec_t * r_spec, - double scale) -{ - return rate_init( - channel, shared, - io_ratio, - q_spec->precision, - q_spec->phase_response, - q_spec->passband_end, - q_spec->stopband_begin, - (rolloff_t)"\1\2\0"[q_spec->flags & 3], - !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT), - scale, - !!(q_spec->flags & SOXR_HI_PREC_CLOCK), - (int)(r_spec->flags & 3) - 1, - r_spec->coef_size_kbytes, - !!(r_spec->flags & SOXR_NOSMALLINTOPT), - (int)r_spec->log2_min_dft_size, - (int)r_spec->log2_large_dft_size); -} - -static char const * id(void) -{ - return RATE_ID; -} - -fn_t RATE_CB[] = { - (fn_t)rate_input, - (fn_t)rate_process, - (fn_t)rate_output, - (fn_t)rate_flush, - (fn_t)rate_close, - (fn_t)rate_delay, - (fn_t)rate_sizes, - (fn_t)rate_create, - (fn_t)0, - (fn_t)id, -}; -#endif diff --git a/src/rate32.c b/src/rate32.c deleted file mode 100644 index fe85bae..0000000 --- a/src/rate32.c +++ /dev/null @@ -1,9 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define sample_t float -#define RATE_SIMD 0 -#define RDFT_CB _soxr_rdft32_cb -#define RATE_CB _soxr_rate32_cb -#define RATE_ID "cr32" -#include "rate.h" diff --git a/src/rate32s.c b/src/rate32s.c deleted file mode 100644 index 3acfcb4..0000000 --- a/src/rate32s.c +++ /dev/null @@ -1,9 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define sample_t float -#define RATE_SIMD 1 -#define RDFT_CB _soxr_rdft32s_cb -#define RATE_CB _soxr_rate32s_cb -#define RATE_ID "cr32s" -#include "rate.h" diff --git a/src/rate64.c b/src/rate64.c deleted file mode 100644 index 6f25143..0000000 --- a/src/rate64.c +++ /dev/null @@ -1,9 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define sample_t double -#define RATE_SIMD 0 -#define RDFT_CB _soxr_rdft64_cb -#define RATE_CB _soxr_rate64_cb -#define RATE_ID "cr64" -#include "rate.h" diff --git a/src/rdft_t.h b/src/rdft_t.h new file mode 100644 index 0000000..293d9c3 --- /dev/null +++ b/src/rdft_t.h @@ -0,0 +1,24 @@ +/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +typedef void (* fn_t)(void); + +#define rdft_forward_setup (*(void * (*)(int))RDFT_CB[0]) +#define rdft_backward_setup (*(void * (*)(int))RDFT_CB[1]) +#define rdft_delete_setup (*(void (*)(void *))RDFT_CB[2]) +#define rdft_forward (*(void (*)(int, void *, void *, void *))RDFT_CB[3]) +#define rdft_oforward (*(void (*)(int, void *, void *, void *))RDFT_CB[4]) +#define rdft_backward (*(void (*)(int, void *, void *, void *))RDFT_CB[5]) +#define rdft_obackward (*(void (*)(int, void *, void *, void *))RDFT_CB[6]) +#define rdft_convolve (*(void (*)(int, void *, void *, void const *))RDFT_CB[7]) +#define rdft_convolve_portion (*(void (*)(int, void *, void const *))RDFT_CB[8]) +#define rdft_multiplier (*(int (*)(void))RDFT_CB[9]) +#define rdft_reorder_back (*(void (*)(int, void *, void *, void *))RDFT_CB[10]) +#define rdft_malloc (*(void * (*)(size_t))RDFT_CB[11]) +#define rdft_calloc (*(void * (*)(size_t, size_t))RDFT_CB[12]) +#define rdft_free (*(void (*)(void *))RDFT_CB[13]) +#define rdft_flags (*(int (*)(void))RDFT_CB[14]) + +/* Flag templates: */ +#define RDFT_IS_SIMD 1 +#define RDFT_NEEDS_SCRATCH 2 diff --git a/src/simd-dev.h b/src/simd-dev.h deleted file mode 100644 index 019325c..0000000 --- a/src/simd-dev.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#define PFFT_MACROS_ONLY -#include "pffft.c" diff --git a/src/simd.c b/src/simd.c index 48d440f..ec548fd 100644 --- a/src/simd.c +++ b/src/simd.c @@ -1,21 +1,15 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ #include #include #include -#include "simd.h" -#include "simd-dev.h" + #include "soxr-config.h" -#if AVCODEC_FOUND - #define SIMD_ALIGNMENT (sizeof(double) * 4) -#else - #define SIMD_ALIGNMENT (sizeof(float) * 4) -#endif +#define SIMD_ALIGNMENT (sizeof(float) * (1 + (PFFFT_DOUBLE|AVCODEC_FOUND)) * 4) - -void * _soxr_simd_aligned_malloc(size_t size) +void * SIMD_ALIGNED_MALLOC(size_t size) { char * p1 = 0, * p = malloc(size + SIMD_ALIGNMENT); if (p) { @@ -27,9 +21,9 @@ void * _soxr_simd_aligned_malloc(size_t size) -void * _soxr_simd_aligned_calloc(size_t nmemb, size_t size) +void * SIMD_ALIGNED_CALLOC(size_t nmemb, size_t size) { - void * p = _soxr_simd_aligned_malloc(nmemb * size); + void * p = SIMD_ALIGNED_MALLOC(nmemb * size); if (p) memset(p, 0, nmemb * size); return p; @@ -37,7 +31,7 @@ void * _soxr_simd_aligned_calloc(size_t nmemb, size_t size) -void _soxr_simd_aligned_free(void * p1) +void SIMD_ALIGNED_FREE(void * p1) { if (p1) free(*((void * *)p1 - 1)); @@ -45,11 +39,16 @@ void _soxr_simd_aligned_free(void * p1) -void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b) +#define PFFT_MACROS_ONLY +#include "pffft.c" + + + +void ORDERED_CONVOLVE_SIMD(int n, void * not_used, float * a, float const * b) { int i; float ab0, ab1; - v4sf * /*RESTRICT*/ va = (v4sf *)a; + v4sf * RESTRICT va = (v4sf *)a; v4sf const * RESTRICT vb = (v4sf const *)b; assert(VALIGNED(a) && VALIGNED(b)); ab0 = a[0] * b[0], ab1 = a[1] * b[1]; @@ -68,11 +67,11 @@ void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float -void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b) +void ORDERED_PARTIAL_CONVOLVE_SIMD(int n, float * a, float const * b) { int i; float ab0; - v4sf * /*RESTRICT*/ va = (v4sf *)a; + v4sf * RESTRICT va = (v4sf *)a; v4sf const * RESTRICT vb = (v4sf const *)b; assert(VALIGNED(a) && VALIGNED(b)); ab0 = a[0] * b[0]; diff --git a/src/simd.h b/src/simd.h deleted file mode 100644 index 71eefc6..0000000 --- a/src/simd.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net - * Licence for this file: LGPL v2.1 See LICENCE for details. */ - -#if !defined simd_included -#define simd_included - -#include - -void * _soxr_simd_aligned_malloc(size_t); -void * _soxr_simd_aligned_calloc(size_t, size_t); -void _soxr_simd_aligned_free(void *); - -void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b); -void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b); - -#endif diff --git a/src/simd32-dev.h b/src/simd32-dev.h new file mode 100644 index 0000000..0408758 --- /dev/null +++ b/src/simd32-dev.h @@ -0,0 +1,54 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#if !defined soxr_simd_dev_included +#define soxr_simd_dev_included + +#if defined __GNUC__ + #define SIMD_INLINE(T) static __inline T __attribute__((always_inline)) + #define vAlign __attribute__((aligned (16))) +#elif defined _MSC_VER + #define SIMD_INLINE(T) static __forceinline T + #define vAlign __declspec(align(16)) +#endif + +#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86 + +#include + +#define vZero() _mm_setzero_ps() +#define vSet1(a) _mm_set_ss(a) +#define vMul(a,b) _mm_mul_ps(a,b) +#define vAdd(a,b) _mm_add_ps(a,b) +#define vMac(a,b,c) vAdd(vMul(a,b),c) +#define vLds(a) _mm_set1_ps(a) +#define vLd(a) _mm_load_ps(a) +#define vLdu(a) _mm_loadu_ps(a) + +typedef __m128 v4_t; + +SIMD_INLINE(void) vStorSum(float * a, v4_t b) { + v4_t t = vAdd(_mm_movehl_ps(b, b), b); + _mm_store_ss(a, vAdd(t, _mm_shuffle_ps(t,t,1)));} + +#elif defined __arm__ + +#include + +#define vZero() vdupq_n_f32(0) +#define vMul(a,b) vmulq_f32(a,b) +#define vAdd(a,b) vaddq_f32(a,b) +#define vMac(a,b,c) vmlaq_f32(c,a,b) +#define vLds(a) vld1q_dup_f32(&(a)) +#define vLd(a) vld1q_f32(a) +#define vLdu(a) vld1q_f32(a) + +typedef float32x4_t v4_t; + +SIMD_INLINE(void) vStorSum(float * a, v4_t b) { + float32x2_t t = vadd_f32(vget_high_f32(b), vget_low_f32(b)); + *a = vget_lane_f32(vpadd_f32(t, t), 0);} + +#endif + +#endif diff --git a/src/simd32.c b/src/simd32.c new file mode 100644 index 0000000..3f6cb81 --- /dev/null +++ b/src/simd32.c @@ -0,0 +1,8 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#define PFFFT_DOUBLE 0 + +#include "simd32.h" + +#include "simd.c" diff --git a/src/simd32.h b/src/simd32.h new file mode 100644 index 0000000..bc185da --- /dev/null +++ b/src/simd32.h @@ -0,0 +1,23 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#if !defined simd32_included +#define simd32_included + +#include + +void * _soxr_simd32_aligned_malloc(size_t); +void * _soxr_simd32_aligned_calloc(size_t, size_t); +void _soxr_simd32_aligned_free(void *); + +#define SIMD_ALIGNED_MALLOC _soxr_simd32_aligned_malloc +#define SIMD_ALIGNED_CALLOC _soxr_simd32_aligned_calloc +#define SIMD_ALIGNED_FREE _soxr_simd32_aligned_free + +void _soxr_ordered_convolve_simd32(int n, void * not_used, float * a, float const * b); +void _soxr_ordered_partial_convolve_simd32(int n, float * a, float const * b); + +#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd32 +#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd32 + +#endif diff --git a/src/simd64-dev.h b/src/simd64-dev.h new file mode 100644 index 0000000..37484e4 --- /dev/null +++ b/src/simd64-dev.h @@ -0,0 +1,42 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#if !defined soxr_simd64_dev_included +#define soxr_simd64_dev_included + +#if defined __GNUC__ + #define SIMD_INLINE(T) static __inline T __attribute__((always_inline)) + #define vAlign __attribute__((aligned (32))) +#elif defined _MSC_VER + #define SIMD_INLINE(T) static __forceinline T + #define vAlign __declspec(align(32)) +#else + #define SIMD_INLINE(T) static __inline T +#endif + +#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86 + +#include + +#if defined __AVX__ + +#define vZero() _mm256_setzero_pd() +#define vSet1(a) _mm256_set_pd(0,0,0,a) +#define vMul(a,b) _mm256_mul_pd(a,b) +#define vAdd(a,b) _mm256_add_pd(a,b) +#define vMac(a,b,c) vAdd(vMul(a,b),c) /* Note: gcc -mfma will `fuse' these */ +#define vLds(a) _mm256_set1_pd(a) +#define vLd(a) _mm256_load_pd(a) +#define vLdu(a) _mm256_loadu_pd(a) + +typedef __m256d v4_t; + +SIMD_INLINE(void) vStorSum(double * a, v4_t b) { + b = _mm256_hadd_pd(b, _mm256_permute2f128_pd(b,b,1)); + _mm_store_sd(a, _mm256_castpd256_pd128(_mm256_hadd_pd(b,b)));} + +#endif + +#endif + +#endif diff --git a/src/simd64.c b/src/simd64.c new file mode 100644 index 0000000..b601750 --- /dev/null +++ b/src/simd64.c @@ -0,0 +1,8 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#define PFFFT_DOUBLE 1 + +#include "simd64.h" + +#include "simd.c" diff --git a/src/simd64.h b/src/simd64.h new file mode 100644 index 0000000..0ebc439 --- /dev/null +++ b/src/simd64.h @@ -0,0 +1,23 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#if !defined simd64_included +#define simd64_included + +#include + +void * _soxr_simd64_aligned_malloc(size_t); +void * _soxr_simd64_aligned_calloc(size_t, size_t); +void _soxr_simd64_aligned_free(void *); + +#define SIMD_ALIGNED_MALLOC _soxr_simd64_aligned_malloc +#define SIMD_ALIGNED_CALLOC _soxr_simd64_aligned_calloc +#define SIMD_ALIGNED_FREE _soxr_simd64_aligned_free + +void _soxr_ordered_convolve_simd64(int n, void * not_used, double * a, double const * b); +void _soxr_ordered_partial_convolve_simd64(int n, double * a, double const * b); + +#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd64 +#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd64 + +#endif diff --git a/src/soxr-lsr.h b/src/soxr-lsr.h index c0923aa..2acd138 100644 --- a/src/soxr-lsr.h +++ b/src/soxr-lsr.h @@ -37,9 +37,9 @@ #endif typedef float SRC_SAMPLE; -#if !defined SOXR_LIB enum SRC_SRCTYPE_e {SRC_SINC_BEST_QUALITY, SRC_SINC_MEDIUM_QUALITY, SRC_SINC_FASTEST, SRC_ZERO_ORDER_HOLD, SRC_LINEAR}; +#if !defined SOXR_LIB typedef int SRC_SRCTYPE; typedef int SRC_ERROR; typedef long (* src_callback_t)(void *, SRC_SAMPLE * *); diff --git a/src/soxr.c b/src/soxr.c index d36891a..d963bec 100644 --- a/src/soxr.c +++ b/src/soxr.c @@ -1,10 +1,8 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ #include -#include #include -#include #include #include @@ -13,20 +11,17 @@ #include "internal.h" #if AVUTIL_FOUND -#include + #include #endif #if WITH_DEV_TRACE -int _soxr_trace_level(void) -{ - char const * e = getenv("SOXR_TRACE"); - return e? atoi(e) : 4; -} - +#include +#include +int _soxr_trace_level; void _soxr_trace(char const * fmt, ...) { @@ -96,52 +91,6 @@ struct soxr { -#define RESET_ON_CLEAR (1u<<31) - -/* TODO: these should not be here. */ -#define TO_3dB(a) ((1.6e-6*a-7.5e-4)*a+.646) -#define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */ - - - -soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags) -{ - soxr_quality_spec_t spec, * p = &spec; - unsigned quality = recipe & 0xf; - double rej; - memset(p, 0, sizeof(*p)); - if (quality > 13) { - p->e = "invalid quality type"; - return spec; - } - flags |= quality < SOXR_LSR0Q? RESET_ON_CLEAR : 0; - if (quality == 13) - quality = 6; - else if (quality > 10) - quality = 0; - p->phase_response = "\62\31\144"[(recipe & 0x30) >> 4]; - p->stopband_begin = 1; - p->precision = !quality? 0: quality < 3? 16 : quality < 8? 4 + quality * 4 : 55 - quality * 4; - rej = p->precision * linear_to_dB(2.); - p->flags = flags; - if (quality < 8) { - p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / TO_3dB(rej); - if (quality <= 2) - p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM; - } - else { - static float const bw[] = {.931f, .832f, .663f}; - p->passband_end = bw[quality - 8]; - if (quality - 8 == 2) - p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM; - } - if (recipe & SOXR_STEEP_FILTER) - p->passband_end = 1 - .01 / TO_3dB(rej); - return spec; -} - - - char const * soxr_engine(soxr_t p) { return resampler_id(); @@ -163,82 +112,132 @@ soxr_error_t soxr_error(soxr_t p) -soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads) -{ - soxr_runtime_spec_t spec, * p = &spec; - memset(p, 0, sizeof(*p)); - p->log2_min_dft_size = 10; - p->log2_large_dft_size = 17; - p->coef_size_kbytes = 400; - p->num_threads = num_threads; - return spec; -} - - - -soxr_io_spec_t soxr_io_spec( - soxr_datatype_t itype, - soxr_datatype_t otype) -{ - soxr_io_spec_t spec, * p = &spec; - memset(p, 0, sizeof(*p)); - if ((itype | otype) >= SOXR_SPLIT * 2) - p->e = "invalid io datatype(s)"; - else { - p->itype = itype; - p->otype = otype; - p->scale = 1; - } - return spec; -} - - - -#if SIMD_FOUND -static bool cpu_has_simd(void) -{ -#if defined __x86_64__ || defined _M_X64 - return true; -#elif defined __GNUC__ && defined i386 - uint32_t eax, ebx, ecx, edx; - __asm__ __volatile__ ( - "pushl %%ebx \n\t" - "cpuid \n\t" - "movl %%ebx, %1\n\t" - "popl %%ebx \n\t" - : "=a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx) - : "a"(1) - : "cc" ); - return !!(edx & 0x06000000); -#elif defined _MSC_VER && defined _M_IX86 - uint32_t d; - __asm { - xor eax, eax - inc eax - push ebx - cpuid - pop ebx - mov d, edx - } - return !!(d & 0x06000000); -#elif defined AV_CPU_FLAG_NEON - return !!(av_get_cpu_flags() & AV_CPU_FLAG_NEON); -#endif - return false; -} - - - -static bool should_use_simd(void) -{ - char const * e = getenv("SOXR_USE_SIMD"); - return e? !!atoi(e) : cpu_has_simd(); -} +#if WITH_CR32S || WITH_CR64S + #if defined __GNUC__ && defined __x86_64__ + #define CPUID(type, eax_, ebx_, ecx_, edx_) \ + __asm__ __volatile__ ( \ + "cpuid \n\t" \ + : "=a" (eax_), "=b" (ebx_), "=c" (ecx_), "=d" (edx_) \ + : "a" (type), "c" (0)); + #elif defined __GNUC__ && defined __i386__ + #define CPUID(type, eax_, ebx_, ecx_, edx_) \ + __asm__ __volatile__ ( \ + "mov %%ebx, %%edi \n\t" \ + "cpuid \n\t" \ + "xchg %%edi, %%ebx \n\t" \ + : "=a" (eax_), "=D" (ebx_), "=c" (ecx_), "=d" (edx_) \ + : "a" (type), "c" (0)); + #elif defined _M_X64 && defined _MSC_VER && _MSC_VER > 1500 + void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue); + #pragma intrinsic(__cpuidex) + #define CPUID(type, eax_, ebx_, ecx_, edx_) do { \ + int regs[4]; \ + __cpuidex(regs, type, 0); \ + eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \ + } while(0) + #elif defined _M_X64 && defined _MSC_VER + void __cpuidex(int CPUInfo[4], int info_type); + #pragma intrinsic(__cpuidex) + #define CPUID(type, eax_, ebx_, ecx_, edx_) do { \ + int regs[4]; \ + __cpuidex(regs, type); \ + eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \ + } while(0) + #elif defined _M_IX86 && defined _MSC_VER + #define CPUID(type, eax_, ebx_, ecx_, edx_) \ + __asm pushad \ + __asm mov eax, type \ + __asm xor ecx, ecx \ + __asm cpuid \ + __asm mov eax_, eax \ + __asm mov ebx_, ebx \ + __asm mov ecx_, ecx \ + __asm mov edx_, edx \ + __asm popad + #endif #endif -extern control_block_t _soxr_rate32s_cb, _soxr_rate32_cb, _soxr_rate64_cb, _soxr_vr32_cb; +#if WITH_CR32S + static bool cpu_has_simd32(void) + { + #if defined __x86_64__ || defined _M_X64 + return true; + #elif defined __i386__ || defined _M_IX86 + enum {SSE = 1 << 25, SSE2 = 1 << 26}; + unsigned eax_, ebx_, ecx_, edx_; + CPUID(1, eax_, ebx_, ecx_, edx_); + return (edx_ & (SSE|SSE2)) != 0; + #elif defined AV_CPU_FLAG_NEON + return !!(av_get_cpu_flags() & AV_CPU_FLAG_NEON); + #else + return false; + #endif + } + + static bool should_use_simd32(void) + { + char const * e = getenv("SOXR_USE_SIMD32"); + return e? !!atoi(e) : cpu_has_simd32(); + } +#endif + + + +#if WITH_CR64S + #if defined __GNUC__ + #define XGETBV(type, eax_, edx_) \ + __asm__ __volatile__ ( \ + ".byte 0x0f, 0x01, 0xd0\n" \ + : "=a"(eax_), "=d"(edx_) : "c" (type)); + #elif defined _M_X64 && defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219 + #include + #define XGETBV(type, eax_, edx_) do { \ + union {uint64_t x; uint32_t y[2];} a = {_xgetbv(0)}; \ + eax_ = a.y[0], edx_ = a.y[1]; \ + } while(0) + #elif defined _M_IX86 && defined _MSC_VER + #define XGETBV(type, eax_, edx_) \ + __asm pushad \ + __asm mov ecx, type \ + __asm _emit 0x0f \ + __asm _emit 0x01 \ + __asm _emit 0xd0 \ + __asm mov eax_, eax \ + __asm mov edx_, edx \ + __asm popad + #else + #define XGETBV(type, eax_, edx_) eax_ = edx_ = 0 + #endif + + static bool cpu_has_simd64(void) + { + enum {OSXSAVE = 1 << 27, AVX = 1 << 28}; + unsigned eax_, ebx_, ecx_, edx_; + CPUID(1, eax_, ebx_, ecx_, edx_); + if ((ecx_ & (OSXSAVE|AVX)) == (OSXSAVE|AVX)) { + XGETBV(0, eax_, edx_); + return (eax_ & 6) == 6; + } + return false; + } + + static bool should_use_simd64(void) + { + char const * e = getenv("SOXR_USE_SIMD64"); + return e? !!atoi(e) : cpu_has_simd64(); + } +#endif + + + +extern control_block_t + _soxr_rate32_cb, + _soxr_rate32s_cb, + _soxr_rate64_cb, + _soxr_rate64s_cb, + _soxr_vr32_cb; @@ -280,6 +279,11 @@ soxr_t soxr_create( soxr_t p = 0; soxr_error_t error = 0; +#if WITH_DEV_TRACE + char const * e = getenv("SOXR_TRACE"); + _soxr_trace_level = e? atoi(e) : 0; +#endif + if (q_spec && q_spec->e) error = q_spec->e; else if (io_spec && (io_spec->itype | io_spec->otype) >= SOXR_SPLIT * 2) error = "invalid io datatype(s)"; @@ -319,27 +323,39 @@ soxr_t soxr_create( p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p; -#if WITH_SINGLE_PRECISION - if (!WITH_DOUBLE_PRECISION || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION)) - || (p->q_spec.flags & SOXR_VR)) { +#if WITH_CR32 || WITH_CR32S || WITH_VR32 + if (0 +#if WITH_VR32 + || ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR)) +#endif +#if WITH_CR32 || WITH_CR32S + || !(WITH_CR64 || WITH_CR64S) || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION)) +#endif + ) { p->deinterleave = (deinterleave_t)_soxr_deinterleave_f; p->interleave = (interleave_t)_soxr_interleave_f; memcpy(&p->control_block, - (p->q_spec.flags & SOXR_VR)? &_soxr_vr32_cb : -#if SIMD_FOUND - should_use_simd()? &_soxr_rate32s_cb : +#if WITH_VR32 + ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))? &_soxr_vr32_cb : +#endif +#if WITH_CR32S + !WITH_CR32 || should_use_simd32()? &_soxr_rate32s_cb : #endif &_soxr_rate32_cb, sizeof(p->control_block)); } -#if WITH_DOUBLE_PRECISION +#if WITH_CR64 || WITH_CR64S else #endif #endif -#if WITH_DOUBLE_PRECISION +#if WITH_CR64 || WITH_CR64S { p->deinterleave = (deinterleave_t)_soxr_deinterleave; p->interleave = (interleave_t)_soxr_interleave; - memcpy(&p->control_block, &_soxr_rate64_cb, sizeof(p->control_block)); + memcpy(&p->control_block, +#if WITH_CR64S + !WITH_CR64 || should_use_simd64()? &_soxr_rate64s_cb : +#endif + &_soxr_rate64_cb, sizeof(p->control_block)); } #endif diff --git a/src/soxr.h b/src/soxr.h index 56132cb..640b698 100644 --- a/src/soxr.h +++ b/src/soxr.h @@ -1,4 +1,4 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by @@ -269,9 +269,6 @@ struct soxr_runtime_spec { /* Typically */ #define SOXR_COEF_INTERP_LOW 2u /* Man. select: less CPU, more memory. */ #define SOXR_COEF_INTERP_HIGH 3u /* Man. select: more CPU, less memory. */ -#define SOXR_STRICT_BUFFERING 4u /* Reserved for future use. */ -#define SOXR_NOSMALLINTOPT 8u /* For test purposes only. */ - /* -------------------------- API type constructors ------------------------- */ @@ -296,7 +293,7 @@ SOXR soxr_quality_spec_t soxr_quality_spec( #define SOXR_24_BITQ 5 #define SOXR_28_BITQ 6 #define SOXR_32_BITQ 7 - /* Libsamplerate equivalent qualities: */ + /* For internal use only; to be removed: */ #define SOXR_LSR0Q 8 /* 'Best sinc'. */ #define SOXR_LSR1Q 9 /* 'Medium sinc'. */ #define SOXR_LSR2Q 10 /* 'Fast sinc'. */ @@ -304,8 +301,8 @@ SOXR soxr_quality_spec_t soxr_quality_spec( #define SOXR_LINEAR_PHASE 0x00 #define SOXR_INTERMEDIATE_PHASE 0x10 #define SOXR_MINIMUM_PHASE 0x30 + #define SOXR_STEEP_FILTER 0x40 -#define SOXR_ALLOW_ALIASING 0x80 /* Reserved for future use. */ diff --git a/src/vr32.c b/src/vr32.c index 9ad17d1..8b1a259 100644 --- a/src/vr32.c +++ b/src/vr32.c @@ -1,16 +1,10 @@ -/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net * Licence for this file: LGPL v2.1 See LICENCE for details. */ /* Variable-rate resampling. */ #include -#include -#if !defined M_PI -#define M_PI 3.14159265358979323846 -#endif -#if !defined M_LN2 -#define M_LN2 0.69314718055994530942 -#endif +#include "math-wrap.h" #include #include #include "internal.h" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0eba7e0..3c28f9c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,7 +10,10 @@ foreach (fe ${SOURCES}) add_executable (${f} ${fe}) endforeach () -enable_testing () +# Can't use c89 for this file: +if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang") + set_property (SOURCE throughput APPEND_STRING PROPERTY COMPILE_FLAGS "-std=gnu89") +endif () set (sweep_to_freq 22050) set (leader 1) @@ -20,33 +23,36 @@ math (EXPR base_rate "${sweep_to_freq} + ${sweep_to_freq}") macro (add_vector r) set (output ${CMAKE_CURRENT_BINARY_DIR}/ref-${r}.s32) add_custom_command (OUTPUT ${output} DEPENDS vector-gen ${CMAKE_CURRENT_LIST_FILE} - COMMAND vector-gen ${r} ${leader} ${len} ${sweep_to_freq} 1 ${output}) + COMMAND vector-gen ${r} ${leader} ${len} 0 ${sweep_to_freq} 1 ${output}) set (vectors ${output} ${vectors}) endmacro () -macro (add_cmp_test from to bits) - set (name ${bits}-bit-perfect-${from}-${to}) - add_test (NAME ${name} COMMAND ${CMAKE_COMMAND} -Dbits=${bits} -DBIN=${BIN} -DEXAMPLES_BIN=${EXAMPLES_BIN} -Dleader=${leader} -Dto=${to} - -Dfrom=${from} -Dlen=${len} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmp-test.cmake) - add_vector (${from}) - add_vector (${to}) +macro (add_cmp_test irate orate bits) + set (name ${bits}-bit-perfect-${irate}-${orate}) + add_test (NAME ${name} COMMAND ${CMAKE_COMMAND} -Dbits=${bits} -DBIN=${BIN} + -DEXAMPLES_BIN=${EXAMPLES_BIN} -DlenToSkip=${leader} -Dorate=${orate} + -Dirate=${irate} -Dlen=${len} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmp-test.cmake) + add_vector (${irate}) + add_vector (${orate}) endmacro () unset (test_bits) -if (WITH_SINGLE_PRECISION) +if (WITH_CR32 OR WITH_CR32S OR WITH_CR64 OR WITH_CR64S) set (test_bits 20) endif () -if (WITH_DOUBLE_PRECISION) - set (test_bits ${test_bits} 24) +if (WITH_CR64 OR WITH_CR64S) + set (test_bits ${test_bits} 28) endif () foreach (b ${test_bits}) - foreach (r 96000 65537) + foreach (r 192000 65537) add_cmp_test (${base_rate} ${r} ${b}) add_cmp_test (${r} ${base_rate} ${b}) endforeach () endforeach () -add_custom_target (test-vectors ALL DEPENDS ${vectors}) +if (NOT CMAKE_CROSSCOMPILING) + add_custom_target (test-vectors ALL DEPENDS ${vectors}) +endif () add_test (1-delay-clear ${BIN}1-delay-clear) diff --git a/tests/cmp-test.cmake b/tests/cmp-test.cmake index 8db76c5..a836322 100644 --- a/tests/cmp-test.cmake +++ b/tests/cmp-test.cmake @@ -1,17 +1,13 @@ # SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net # Licence for this file: LGPL v2.1 See LICENCE for details. -if (${bits} STREQUAL 24) - set (quality 45) -else () - set (quality 44) -endif () +math (EXPR quality "43 + (${bits} - 13) / 4") +set (ofile ${irate}-${orate}-${quality}.s32) +#message (STATUS "Output file = [${ofile}]") -set (output ${from}-${to}-${quality}.s32) - -execute_process(COMMAND ${EXAMPLES_BIN}3-options-input-fn ${from} ${to} 1 2 2 ${quality} a - INPUT_FILE ref-${from}.s32 - OUTPUT_FILE ${output} +execute_process(COMMAND ${EXAMPLES_BIN}3-options-input-fn ${irate} ${orate} 1 2 2 ${quality} a + INPUT_FILE ref-${irate}.s32 + OUTPUT_FILE ${ofile} ERROR_VARIABLE test_error RESULT_VARIABLE test_result) @@ -19,7 +15,11 @@ if (test_result) message (FATAL_ERROR "Resampling failure: ${test_error}") endif () -execute_process(COMMAND ${BIN}vector-cmp ref-${to}.s32 ${output} ${to} ${leader} ${len} ${bits} 98 +set (percentageToCheck 98) +math (EXPR lenToCheck "${len} * ${percentageToCheck}") +string (REGEX REPLACE "(..)$" ".\\1" lenToCheck "${lenToCheck}") # Divide by 100 + +execute_process(COMMAND ${BIN}vector-cmp ref-${orate}.s32 ${ofile} ${orate} ${lenToSkip} ${lenToCheck} ${bits} OUTPUT_VARIABLE test_output RESULT_VARIABLE test_result) diff --git a/tests/io-test b/tests/io-test index 4205c71..608bc9a 100755 --- a/tests/io-test +++ b/tests/io-test @@ -35,7 +35,7 @@ test z$1 != z && j=$1 || j=1 for c in `seq 1 $j`; do for n in `seq 0 3`; do - sox -r $ir -n $c.${types[$n]} synth $len sin $f gain -.1 + sox -R -r $ir -n $c.${types[$n]} synth $len sin $f gain -.1 done n=0 diff --git a/tests/large-ratio-test b/tests/large-ratio-test index 74cd0a4..540c5df 100755 --- a/tests/large-ratio-test +++ b/tests/large-ratio-test @@ -1,22 +1,22 @@ #!/usr/bin/env bash set -e -# SoX Resampler Library Copyright (c) 2007-15 robs@users.sourceforge.net +# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net # Licence for this file: LGPL v2.1 See LICENCE for details. # Tests interpolating then decimating by the same, large ratio. tool=../examples/3-options-input-fn w=$(echo -e "`sox --ver |sed 's/.*SoX v//'` d\n14.4.1 k"|sort -Vr|head -1|sed 's/.* //') -q=6 -ratio=2e4 -srate=8000 -nrate=`expr $srate / 2` +q=4 +test x$1 = x && ratio=1e5 || ratio=$1 +test x$2 = x && rate=8000 || rate=$2 -../tests/vector-gen $srate 0 8 $nrate .9375 1.s32 +sox -r$rate -n 1.s32 synth 10 sin 0:`expr $rate / 2` vol .9375 +sync -$tool 1 $ratio 1 2 1 $q < 1.s32 | $tool $ratio 1 1 1 2 $q > 2.s32 +time { $tool 1 $ratio 1 2 1 $q a < 1.s32 | $tool $ratio 1 1 1 2 $q a > 2.s32;} -sox -M -r $srate -c1 1.s32 -r $srate -c1 2.s32 -n spectrogram -hw$w -Z-10 -z180 -o lr-$w.png -c "large-ratio-test q:$q ratio:$ratio" +sox -mv-1 -r$rate -c1 1.s32 -r$rate -c1 2.s32 -n spectrogram -hw$w -z150 -o lr-$w.png -c "large-ratio-test q:$q ratio:$ratio" rm [12].s32 diff --git a/tests/throughput-test b/tests/throughput-test new file mode 100755 index 0000000..544c620 --- /dev/null +++ b/tests/throughput-test @@ -0,0 +1,4 @@ +#!/bin/sh +set -e + +for n in `seq 0 3`; do ./throughput 44.1 48 1 0 $n; done diff --git a/tests/throughput-test.bat b/tests/throughput-test.bat new file mode 100644 index 0000000..a183881 --- /dev/null +++ b/tests/throughput-test.bat @@ -0,0 +1 @@ +for /L %%i in (0,1,3) DO throughput 44.1 48 1 0 %%i diff --git a/tests/throughput.c b/tests/throughput.c new file mode 100644 index 0000000..21a4c32 --- /dev/null +++ b/tests/throughput.c @@ -0,0 +1,111 @@ +/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net + * Licence for this file: LGPL v2.1 See LICENCE for details. */ + +#include +#include "rint.h" +#include "../examples/examples-common.h" + +#define k 1000 +#if defined _WIN32 + #define WIN32_LEAN_AND_MEAN + #include + #define timerDecl LARGE_INTEGER start, stop, tmp + #define timerStart(msecs) QueryPerformanceCounter(&start), \ + QueryPerformanceFrequency(&tmp), \ + stop.QuadPart = (msecs * tmp.QuadPart + k/2) / k + #define timerRunning() (QueryPerformanceCounter(&tmp), \ + (tmp.QuadPart-start.QuadPart < stop.QuadPart)) +#else + #include + #define timerDecl struct timespec stop, tmp + #define timerStart(msecs) clock_gettime(CLOCK_MONOTONIC, &stop), \ + stop.tv_nsec += (msecs%k)*(k*k), \ + stop.tv_sec += msecs/k + stop.tv_nsec/(k*k*k), \ + stop.tv_nsec %= k*k*k + #define timerRunning() (clock_gettime(CLOCK_MONOTONIC, &tmp), \ + (tmp.tv_sec < stop.tv_sec || tmp.tv_nsec < stop.tv_nsec)) +#endif + +int main(int n, char const * arg[]) +{ + char const * const arg0 = n? --n, *arg++ : "", * engine = ""; + double const irate = n? --n, atof(*arg++) : 96000.; + double const orate = n? --n, atof(*arg++) : 44100.; + unsigned const chans = n? --n, (unsigned)atoi(*arg++) : 1; + soxr_datatype_t const itype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0; + unsigned const ospec = n? --n, (soxr_datatype_t)atoi(*arg++) : 0; + unsigned long const q_recipe= n? --n, strtoul(*arg++, 0, 16) : SOXR_HQ; + unsigned long const q_flags = n? --n, strtoul(*arg++, 0, 16) : 0; + double const passband_end = n? --n, atof(*arg++) : 0; + double const stopband_begin = n? --n, atof(*arg++) : 0; + double const phase_response = n? --n, atof(*arg++) : -1; + int const use_threads = n? --n, atoi(*arg++) : 1; + soxr_datatype_t const otype = ospec & 3; + + soxr_quality_spec_t q_spec = soxr_quality_spec(q_recipe, q_flags); + soxr_io_spec_t io_spec = soxr_io_spec(itype, otype); + soxr_runtime_spec_t const runtime_spec = soxr_runtime_spec(!use_threads); + + /* Allocate resampling input and output buffers in proportion to the input + * and output rates: */ + #define buf_total_len 15000 /* In samples per channel. */ + size_t const osize = soxr_datatype_size(otype) * chans; + size_t const isize = soxr_datatype_size(itype) * chans; + size_t const olen0= (size_t)(orate * buf_total_len / (irate + orate) + .5); + size_t const olen = min(max(olen0, 1), buf_total_len - 1); + size_t const ilen = buf_total_len - olen; + void * const obuf = malloc(osize * olen); + void * const ibuf = malloc(isize * ilen); + + size_t odone = 0, clips = 0, omax = 0, i; + soxr_error_t error; + soxr_t soxr; + + + /* Overrides (if given): */ + if (passband_end > 0) q_spec.passband_end = passband_end / 100; + if (stopband_begin > 0) q_spec.stopband_begin = stopband_begin / 100; + if (phase_response >=0) q_spec.phase_response = phase_response; + io_spec.flags = ospec & ~7u; + + /* Create a stream resampler: */ + soxr = soxr_create( + irate, orate, chans, /* Input rate, output rate, # of channels. */ + &error, /* To report any error during creation. */ + &io_spec, &q_spec, &runtime_spec); + + if (!error) { /* If all is well, run the resampler: */ + engine = soxr_engine(soxr); +#define RAND ((rand()*(1./RAND_MAX)-.5)*1) + switch (itype & 3) { + case 0: for (i=0;i #include -#include #include "../src/rint.h" +#include "../examples/examples-common.h" -int main(int bit, char const * arg[]) +#define TYPE 0 /* As vector-gen */ + +#if TYPE + #define sample_t double + #define N 50 + #define DIFF(s1,s2) abs(rint32((s1-s2)*ldexp(1,N-1))) +#else + #define sample_t int32_t + #define N 32 + #define DIFF(s1,s2) abs((int)(s1-s2)) +#endif + +int main(int argc, char const * arg[]) { - FILE * f1 = fopen(arg[1], "rb"), - * f2 = fopen(arg[2], "rb"); - double rate = atof (arg[3]), /* Rate for this vector */ - leader_len = atof (arg[4]), /* Leader length in seconds */ - len = atof (arg[5]), /* Sweep length (excl. leader_len) */ - expect_bits= atof (arg[6]), - expect_bw = atof (arg[7]); + int two = !!arg[2][0]; + FILE * f1 = fopen(arg[1], "rb"), * f2 = two? fopen(arg[2], "rb") : 0; + double rate = atof (arg[3]), /* Sample-rate */ + skip_len = atof (arg[4]), /* Skip length in seconds */ + len = atof (arg[5]), /* Compare length in seconds */ r; + int i = 0, count = rint32(rate * len), max = 0, diff; + sample_t s1, s2; - int32_t s1, s2; - long count = 0; - static long thresh[32]; - double bw, prev = 0; - - for (; fread(&s1, sizeof(s1), 1, f1) == 1 && - fread(&s2, sizeof(s2), 1, f2) == 1; ++count) { - long diff = abs((int)(s1 - s2)); - for (bit = 0; diff && bit < 32; bit++, diff >>= 1) - if ((diff & 1) && !thresh[bit]) - thresh[bit] = count + 1; - } - - if (count != (long)((leader_len + len) * rate + .5)) { - printf("incorrect file length\n"); - exit(1); - } - - for (bit = 0; bit < 32; ++bit) { - bw = ((double)thresh[bit] - 1) / rate - leader_len; - if (bit && bw >= 0 && (bw - prev) * 100 / len < .08) { - --bit; - break; + fseek(f1, rint32(rate * skip_len) * (int)sizeof(s1), SEEK_CUR); + if (two) { + fseek(f2, rint32(rate * skip_len) * (int)sizeof(s2), SEEK_CUR); + for (; i < count && + fread(&s1, sizeof(s1), 1, f1) && + fread(&s2, sizeof(s2), 1, f2); ++i) { + diff = DIFF(s1, s2); + max = max(max, diff); } - prev = bw; } - bit = 32 - bit; - bw = bw * 100 / len; - printf("Bit perfect to %i bits, from DC to %.2f%% nyquist.\n", bit, bw); - return !(bit >= expect_bits && bw >= expect_bw); + else for (; i < count && fread(&s1, sizeof(s1), 1, f1); ++i) { + diff = DIFF(s1, 0); + max = max(max, diff); + } + + if (i != count) { + fprintf(stderr, "incorrect file length\n"); + return 1; + } + printf("%f\n", r = N-log(max)/log(2)); + return argc>6? r 1 #include #endif -#include +#include "math-wrap.h" #include #include -#if !defined M_PI - #define M_PI 3.14159265358979323846 -#endif -#if QUAD - #define modf modfq - #define cos cosq - #define sin sinq - #undef M_PI - #define M_PI M_PIq - #define real __float128 - #define atof(x) strtoflt128(x, 0) +#if TYPE + #if TYPE > 1 + #define modf modfq + #define cos cosq + #define sin sinq + #define PI M_PIq + #define real __float128 + #define atof(x) strtoflt128(x, 0) + #else + #define modf modfl + #define cos cosl + #define sin sinl + #define PI M_PIl + #define real long double + #endif + #define MULT 1 + #define OUT(d) double output = d #else + #define PI M_PI #define real double #include "rint.h" + #define MULT (32768. * 65536 - 1/scale) + #define OUT(d) int32_t output = rint32(d) #endif -int main(int i, char const * argv[]) +int main(int argc, char const * argv[]) { - real rate = atof(argv[1]), /* Rate for this vector */ - lead_in_len = atof(argv[2]), /* Lead-in length in seconds */ - len = atof(argv[3]), /* Sweep length (excl. lead_in_len) */ - sweep_to_freq = atof(argv[4]), /* Sweep from DC to this freq. */ - multiplier = atof(argv[5]), /* For headroom */ - f1 = -sweep_to_freq / len * lead_in_len, f2 = sweep_to_freq, - n1 = rate * -lead_in_len, n2 = rate * len, - m = (f2 - f1) / (n2 - n1) / 2, dummy; - FILE * file = fopen(argv[6], "wb"); - i = (int)n1; - if (!file || i != n1) - exit(1); - for (; i < (int)(n2 + .5); ++i) { - double d1 = multiplier * sin(2 * M_PI * modf(i * m * i / rate, &dummy)); - double d = i < 0? d1 * (1 - cos(M_PI * (i + n1) / n1)) * .5 : d1; -#if QUAD - size_t actual = fwrite(&d, sizeof(d), 1, file); -#else - int32_t out = rint32(d * (32768. * 65536 - 1)); - size_t actual = fwrite(&out, sizeof(out), 1, file); -#endif - if (actual != 1) - return 1; + real rate = atof(argv[1]), /* Rate for this vector */ + lead_in_len = atof(argv[2]), /* Lead-in length in seconds */ + len = atof(argv[3]), /* Sweep length (excl. lead_in_len) */ + f1 = atof(argv[4]), + f2 = atof(argv[5]), + scale = atof(argv[6]), /* For headroom */ + n1 = rate * -lead_in_len, + m = (f2 - f1) / (rate * len * 2), dummy; + FILE * file = fopen(argv[7], "wb"); + int i = (int)n1, err = !file || i != n1; + for (; !err && i < (int)(rate*(len+lead_in_len)+.5); ++i) { + real d = sin(2 * PI * modf((f1 + i * m) * i / rate, &dummy)); + OUT((double)(scale * MULT * d)); + err = fwrite(&output, sizeof(output), 1, file) != 1; } - return 0; + return err |!argc; }