reorg/SIMD

master
Rob Sykes 2016-05-23 07:55:16 +01:00
parent 61cf151ebd
commit 52888cd410
83 changed files with 2711 additions and 1762 deletions

View File

@ -4,7 +4,8 @@
cmake_minimum_required (VERSION 3.0 FATAL_ERROR)
project (soxr C)
set (DESCRIPTION_SUMMARY "High quality, one-dimensional sample-rate conversion library")
set (DESCRIPTION_SUMMARY
"High quality, one-dimensional sample-rate conversion library")
@ -15,15 +16,20 @@ set (PROJECT_VERSION_MINOR 1)
set (PROJECT_VERSION_PATCH 2)
# For shared-object; if, since the last public release:
# * library code changed at all: ++revision
# * interfaces changed at all: ++current, revision = 0
# * interfaces added: ++age
# * interfaces removed: age = 0
#
# 1) library code changed at all: ++revision
# 2) interfaces changed at all: ++current, revision = 0
# 3) interfaces added: ++age
# 4) interfaces removed: age = 0
set (SO_VERSION_CURRENT 1)
set (SO_VERSION_REVISION 1)
set (SO_VERSION_AGE 1)
math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}")
math (EXPR SO_VERSION_MINOR "${SO_VERSION_AGE}")
math (EXPR SO_VERSION_PATCH "${SO_VERSION_REVISION}")
# Main options:
@ -31,31 +37,45 @@ set (SO_VERSION_AGE 1)
include (CMakeDependentOption)
if (NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
set (CMAKE_BUILD_TYPE Release CACHE STRING
"Build type, one of: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif ()
option (BUILD_SHARED_LIBS "Build shared libraries." ON)
option (BUILD_TESTS "Build sanity-tests." ON)
option (BUILD_EXAMPLES "Build examples." OFF)
option (WITH_OPENMP "Include OpenMP threading." ON)
option (WITH_LSR_BINDINGS "Include a `libsamplerate'-like interface." ON)
cmake_dependent_option (BUILD_TESTS "Build sanity-tests." ON
"NOT CMAKE_CROSSCOMPILING" OFF)
cmake_dependent_option (WITH_SINGLE_PRECISION "Build with single precision (for up to 20-bit accuracy)." ON
"WITH_DOUBLE_PRECISION" ON)
cmake_dependent_option (WITH_DOUBLE_PRECISION "Build with double precision (for up to 32-bit accuracy)." ON
"WITH_SINGLE_PRECISION" ON)
cmake_dependent_option (WITH_SIMD "Use SIMD (for faster single precision)." ON
"WITH_SINGLE_PRECISION" OFF)
cmake_dependent_option (WITH_AVFFT "Use libavcodec (LGPL) for SIMD DFT." OFF
"WITH_SIMD;NOT WITH_PFFFT" OFF)
cmake_dependent_option (WITH_PFFFT "Use PFFFT (BSD-like licence) for SIMD DFT." ON
"WITH_SIMD;NOT WITH_AVFFT" OFF)
cmake_dependent_option (BUILD_SHARED_LIBS
"Build shared (dynamic) soxr libraries." ON
"NOT WITH_DEV_GPROF" OFF)
cmake_dependent_option (WITH_VR32
"Include HQ variable-rate resampling engine." ON
"WITH_CR32 OR WITH_CR64 OR WITH_CR32S OR WITH_CR64S OR NOT DEFINED WITH_VR32" ON)
cmake_dependent_option (WITH_CR32
"Include HQ constant-rate resampling engine." ON
"WITH_VR32 OR WITH_CR64 OR WITH_CR32S OR WITH_CR64S" ON)
cmake_dependent_option (WITH_CR64
"Include VHQ constant-rate resampling engine." ON
"WITH_VR32 OR WITH_CR32 OR WITH_CR32S OR WITH_CR64S" ON)
cmake_dependent_option (WITH_CR64S
"Include VHQ SIMD constant-rate resampling engine." ON
"WITH_VR32 OR WITH_CR32 OR WITH_CR32S OR WITH_CR64" ON)
cmake_dependent_option (WITH_CR32S
"Include HQ SIMD constant-rate resampling engine." ON
"WITH_VR32 OR WITH_CR64 OR WITH_CR32 OR WITH_CR64S" ON)
cmake_dependent_option (WITH_AVFFT
"Use libavcodec (LGPL) for HQ SIMD DFT." OFF
"WITH_CR32S;NOT WITH_PFFFT" OFF)
cmake_dependent_option (WITH_PFFFT
"Use PFFFT (BSD-like licence) for HQ SIMD DFT." ON
"WITH_CR32S;NOT WITH_AVFFT" OFF)
cmake_dependent_option (BUILD_LSR_TESTS "Build LSR tests." OFF
"UNIX;NOT CMAKE_CROSSCOMPILING;EXISTS ${PROJECT_SOURCE_DIR}/lsr-tests;WITH_LSR_BINDINGS" OFF)
option (WITH_DEV_TRACE "Enable developer trace output." OFF)
mark_as_advanced (WITH_DEV_TRACE)
option (WITH_DEV_TRACE "Enable developer trace capability." ON)
option (WITH_DEV_GPROF "Enable developer grpof output." OFF)
mark_as_advanced (WITH_DEV_TRACE WITH_DEV_GPROF)
@ -79,15 +99,23 @@ endif ()
if (WITH_OPENMP)
find_package (OpenMP)
if (OPENMP_FOUND)
if (OpenMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS
"${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
set (CMAKE_SHARED_LINKER_FLAGS
"${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
endif ()
endif ()
if (WITH_SIMD)
find_package (SIMD)
if (WITH_CR32S)
find_package (SIMD32)
set (WITH_CR32S ${SIMD32_FOUND})
endif ()
if (WITH_CR64S)
find_package (SIMD64)
set (WITH_CR64S ${SIMD64_FOUND})
endif ()
if (WITH_AVFFT)
@ -98,7 +126,7 @@ if (WITH_AVFFT)
endif ()
endif ()
if (WITH_AVFFT OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND SIMD_FOUND))
if (WITH_AVFFT OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND SIMD32_FOUND))
find_package (LibAVUtil)
if (AVUTIL_FOUND)
include_directories (${AVUTIL_INCLUDE_DIRS})
@ -117,13 +145,22 @@ test_big_endian (HAVE_BIGENDIAN)
# Compiler configuration:
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
set (PROJECT_CXX_FLAGS "-Wconversion -Wall -W -pedantic -Wundef -Wcast-align -Wpointer-arith -Wno-long-long")
set (PROJECT_C_FLAGS "${PROJECT_CXX_FLAGS} -std=gnu89 -Wnested-externs -Wmissing-prototypes -Wstrict-prototypes")
set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wconversion -Wall -Wextra")
set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -pedantic -Wundef -Wpointer-arith")
set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -Wno-long-long -Wno-keyword-macro")
if (WITH_DEV_GPROF)
set (PROJECT_CXX_FLAGS "${PROJECT_CXX_FLAGS} -pg")
endif ()
# Can use std=c89, but gnu89 should give faster sinf, cosf, etc.:
set (PROJECT_C_FLAGS "${PROJECT_CXX_FLAGS} -std=gnu89 -Wnested-externs")
set (PROJECT_C_FLAGS "${PROJECT_C_FLAGS} -Wmissing-prototypes -Wstrict-prototypes")
if (CMAKE_BUILD_TYPE STREQUAL "Release")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s") # strip
endif ()
cmake_dependent_option (VISIBILITY_HIDDEN "Build with -fvisibility=hidden." ON
cmake_dependent_option (VISIBILITY_HIDDEN
"Build shared libraries with -fvisibility=hidden." ON
"BUILD_SHARED_LIBS" OFF)
mark_as_advanced (VISIBILITY_HIDDEN)
if (VISIBILITY_HIDDEN)
add_definitions (-fvisibility=hidden -DSOXR_VISIBILITY)
endif ()
@ -131,9 +168,14 @@ endif ()
if (MSVC)
add_definitions (-D_USE_MATH_DEFINES -D_CRT_SECURE_NO_WARNINGS)
option (ENABLE_STATIC_RUNTIME "Visual Studio, link with runtime statically." OFF)
if (ENABLE_STATIC_RUNTIME)
foreach (flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
option (BUILD_SHARED_RUNTIME "MSVC, link with runtime dynamically." ON)
if (NOT BUILD_SHARED_RUNTIME)
foreach (flag_var
CMAKE_C_FLAGS CMAKE_CXX_FLAGS
CMAKE_C_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE CMAKE_CXX_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS_RELWITHDEBINFO)
string (REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endforeach ()
endif ()
@ -147,7 +189,8 @@ endif ()
# Build configuration:
if (${BUILD_SHARED_LIBS} AND ${CMAKE_SYSTEM_NAME} STREQUAL Windows) # Allow exes to find dlls:
if (${BUILD_SHARED_LIBS} AND ${CMAKE_SYSTEM_NAME} STREQUAL Windows)
# Allow exes to find dlls:
set (BIN ${PROJECT_BINARY_DIR}/bin/)
set (EXAMPLES_BIN ${BIN})
set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${BIN})
@ -188,17 +231,16 @@ endif ()
if (APPLE)
option (BUILD_FRAMEWORK "Build an OS X framework." OFF)
set (FRAMEWORK_INSTALL_DIR "/Library/Frameworks" CACHE STRING "Directory to install frameworks to.")
set (FRAMEWORK_INSTALL_DIR
"/Library/Frameworks" CACHE STRING "Directory to install frameworks to.")
endif ()
# Top-level:
set (PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH})
math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}")
math (EXPR SO_VERSION_MINOR "${SO_VERSION_AGE}")
math (EXPR SO_VERSION_PATCH "${SO_VERSION_REVISION}")
set (PROJECT_VERSION
${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH})
set (SO_VERSION ${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH})
configure_file (
@ -206,7 +248,7 @@ configure_file (
${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.h)
include_directories (${PROJECT_BINARY_DIR})
if (BUILD_TESTS OR BUILD_LSR_TESTS)
if (NOT CMAKE_CROSSCOMPILING AND (BUILD_TESTS OR BUILD_LSR_TESTS))
enable_testing ()
endif ()

51
INSTALL
View File

@ -78,32 +78,27 @@ To list help on the available options, enter:
Options, if given, should be preceded with '-D', e.g.
cmake -DWITH_SIMD:BOOL=OFF ..
cmake -DBUILD_SHARED_LIBS:BOOL=OFF ..
Resampling engines
As available on a given system, the library may include up to four resampling
engines, as follows:
As available on a given system, options for including up to five resampling
engines are available (per above) as follows:
cr32: for constant-rate resampling with precision up to 20 bits,
cr32s: SIMD variant of cr32,
cr64: for constant-rate resampling with precision greater than 20 bits,
vr32: for variable-rate resampling.
WITH_CR32: for constant-rate HQ resampling,
WITH_CR32S: SIMD variant of previous,
WITH_CR64: for constant-rate VHQ resampling,
WITH_CR64S: SIMD variant of previous,
WITH_VR32: for variable-rate HQ resampling.
Engine inclusion is controlled (as above) by the following cmake option
variables:
By default, these options are all set to ON.
cr32: WITH_SINGLE_PRECISION
cr32s: WITH_SINGLE_PRECISION, WITH_SIMD
cr32: WITH_DOUBLE_PRECISION
By default, these variables are all set to ON.
When both cr32 and cr32s engines are included, run-time selection is automatic
(based on CPU capability) for x86 CPUs, and can be automatic for ARM CPUs if
the 3rd-party library `libavutil' is available at libsoxr build-time.
When both SIMD and non-SIMD engine variants are included, run-time selection
is automatic (based on CPU capability) for x86 CPUs, and can be automatic for
ARM CPUs if the 3rd-party library `libavutil' is available at libsoxr
build-time.
@ -114,13 +109,13 @@ E.g. targeting a Linux ARM system:
mkdir build
cd build
cmake -DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc-5 \
-DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc \
..
or, also building the examples (one of which uses C++):
cmake -DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc-5 \
-DCMAKE_CXX_COMPILER=arm-linux-gnueabi-g++-5 \
-DCMAKE_C_COMPILER=arm-linux-gnueabi-gcc \
-DCMAKE_CXX_COMPILER=arm-linux-gnueabi-g++ \
-DBUILD_EXAMPLES=1 \
..
@ -158,7 +153,7 @@ Autotools-based systems might find it useful to create a file called
cmake -DBUILD_SHARED_LIBS=OFF .
(or with other build options as required).
For MS visual studio, see msvc/README.
For MS Visual C++, see msvc/README.
@ -168,7 +163,7 @@ The libsoxr API structure soxr_runtime_spec_t allows application developer
to optimise some aspects of libsoxrs operation for a particular application.
However, since optimal performance might depend on an individual end-users
run-time system and the end-users preferences, environment variables are
available to set (override) the run-time parameters:
available to set (override) run-time parameters as follows:
Env. variable Equivalent soxr_runtime_spec_t item
------------------ -----------------------------------
@ -178,8 +173,8 @@ available to set (override) the run-time parameters:
SOXR_MIN_DFT_SIZE log2_min_dft_size
SOXR_NUM_THREADS num_threads
Additionally, the SOXR_USE_SIMD environment variable may be used to override
automatic selection (or to provide manual selection where automatic selection
is not available) between the cr32 and cr32s resampling engines. (Which engine
is selected for a specific configuration of libsoxr can be checked using
example #3, which reports it.)
Additionally, the SOXR_USE_SIMD32 and SOXR_USE_SIMD64 environment variables
may be used to override automatic selection (or to provide manual selection
where automatic selection is not available) between SIMD and non-SIMD engine
variants. (Which engine is selected for a specific configuration of libsoxr
can be checked using example #3, which reports it.)

View File

@ -1,4 +1,4 @@
SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
@ -11,8 +11,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
along with this library; if not, see <http://www.gnu.org/licenses/>.
Notes

4
NEWS
View File

@ -1,6 +1,8 @@
Version 0.1.3 (2016-mm-dd)
* Better support for clang, ARM+SIMD, and cross-compilation.
* SIMD enhancements: SSE, AVX, Neon.
* Improved support for clang, ARM, and cross-compilation.
* Other minor fixes/improvements to build/tests/documentation.
* N.B. some cmake configuration variable name changes.
Version 0.1.2 (2015-09-05)
* Fix conversion failure when I/O types differ but I/O rates don't.

2
README
View File

@ -1,4 +1,4 @@
SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
The SoX Resampler library `libsoxr' performs one-dimensional sample-rate
conversion -- it may be used, for example, to resample PCM-encoded audio.

1
TODO
View File

@ -1,3 +1,2 @@
* SOXR_ALLOW_ALIASING
* Explicit flush API fn, perhaps.
* More SIMD.

View File

@ -0,0 +1,35 @@
# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
# - Function to find C compiler feature flags
include (CheckCSourceCompiles)
include (FindPackageHandleStandardArgs)
function (FindCFlags PKG_NAME PKG_DESC TRIAL_C_FLAGS TEST_C_SOURCE)
foreach (TRIAL_C_FLAG ${TRIAL_C_FLAGS})
message (STATUS "Trying ${PKG_NAME} C flags: ${TRIAL_C_FLAG}")
unset (DETECT_${PKG_NAME}_C_FLAGS CACHE) #displayed by check_c_source_compiles
set (TMP "${CMAKE_REQUIRED_FLAGS}")
set (CMAKE_REQUIRED_FLAGS "${TRIAL_C_FLAG}")
check_c_source_compiles ("${TEST_C_SOURCE}" DETECT_${PKG_NAME}_C_FLAGS)
set (CMAKE_REQUIRED_FLAGS "${TMP}")
if (DETECT_${PKG_NAME}_C_FLAGS)
set (DETECTED_C_FLAGS "${TRIAL_C_FLAG}")
break ()
endif ()
endforeach ()
# N.B. Will not overwrite existing cache variable:
set (${PKG_NAME}_C_FLAGS "${DETECTED_C_FLAGS}"
CACHE STRING "C compiler flags for ${PKG_DESC}")
find_package_handle_standard_args (
${PKG_NAME} DEFAULT_MSG ${PKG_NAME}_C_FLAGS ${PKG_NAME}_C_FLAGS)
mark_as_advanced (${PKG_NAME}_C_FLAGS)
set (${PKG_NAME}_FOUND ${${PKG_NAME}_FOUND} PARENT_SCOPE)
endfunction ()

View File

@ -1,117 +1,38 @@
# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
# - Finds OpenMP support
# This module can be used to detect OpenMP support in a compiler.
# If the compiler supports OpenMP, the flags required to compile with
# openmp support are set.
#
# The following variables are set:
# OpenMP_C_FLAGS - flags to add to the C compiler for OpenMP support
# OPENMP_FOUND - true if openmp is detected
#
# Supported compilers can be found at http://openmp.org/wp/openmp-compilers/
#
# Modifications for soxr:
# * don't rely on presence of C++ compiler
# * support MINGW
#
#=============================================================================
# Copyright 2009 Kitware, Inc.
# Copyright 2008-2009 André Rigland Brodtkorb <Andre.Brodtkorb@ifi.uio.no>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * The names of Kitware, Inc., the Insight Consortium, or the names of
# any consortium members, or of any contributors, may not be used to
# endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# OpenMP_C_FLAGS - flags to add to the C compiler for this package.
# OpenMP_FOUND - true if support for this package is found.
include (CheckCSourceCompiles)
include (FindPackageHandleStandardArgs)
set (OpenMP_C_FLAG_CANDIDATES
#Gnu
"-fopenmp"
#Clang
"-fopenmp=libiomp5"
#Microsoft Visual Studio
"/openmp"
#Intel windows
"-Qopenmp"
#Intel
"-openmp"
#Empty, if compiler automatically accepts openmp
" "
#Sun
"-xopenmp"
#HP
"+Oopenmp"
#IBM XL C/c++
"-qsmp"
#Portland Group
"-mp"
)
# sample openmp source code to test
set (OpenMP_C_TEST_SOURCE
"
#include <omp.h>
int main() {
#ifdef _OPENMP
return 0;
#else
breaks_on_purpose
#endif
}
")
# if these are set then do not try to find them again,
# by avoiding any try_compiles for the flags
if (DEFINED OpenMP_C_FLAGS)
set (OpenMP_C_FLAG_CANDIDATES)
endif (DEFINED OpenMP_C_FLAGS)
set (TRIAL_C_FLAGS)
else ()
set (TRIAL_C_FLAGS
"-fopenmp" # Gnu
"-fopenmp=libiomp5" # Clang
"/openmp" # MSVC
" "
)
# check c compiler
foreach (FLAG ${OpenMP_C_FLAG_CANDIDATES})
set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
set (CMAKE_REQUIRED_FLAGS "${FLAG}")
unset (OpenMP_FLAG_DETECTED CACHE)
message (STATUS "Try OpenMP C flag = [${FLAG}]")
check_c_source_compiles ("${OpenMP_C_TEST_SOURCE}" OpenMP_FLAG_DETECTED)
set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}")
if (OpenMP_FLAG_DETECTED)
set (OpenMP_C_FLAGS_INTERNAL "${FLAG}")
break ()
endif (OpenMP_FLAG_DETECTED)
endforeach (FLAG ${OpenMP_C_FLAG_CANDIDATES})
set (TEST_C_SOURCE "
#ifndef _OPENMP
#error
#endif
#include <omp.h>
int main() {return 0;}
")
endif ()
set (OpenMP_C_FLAGS "${OpenMP_C_FLAGS_INTERNAL}"
CACHE STRING "C compiler flags for OpenMP parallization")
include (FindCFlags)
# handle the standard arguments for find_package
find_package_handle_standard_args (OpenMP DEFAULT_MSG
OpenMP_C_FLAGS)
FindCFlags ("OpenMP" "OpenMP threading"
"${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}")
if (MINGW)
set (OpenMP_SHARED_LINKER_FLAGS "${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_C_FLAGS}")
set (OpenMP_EXE_LINKER_FLAGS "${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_C_FLAGS}")
mark_as_advanced (OpenMP_SHARED_LINKER_FLAGS OpenMP_EXE_LINKER_FLAGS)
endif ()
mark_as_advanced (OpenMP_C_FLAGS OpenMP_SHARED_LINKER_FLAGS OpenMP_EXE_LINKER_FLAGS)

View File

@ -1,104 +0,0 @@
# - Finds SIMD support
#
# The following variables are set:
# SIMD_C_FLAGS - flags to add to the C compiler for this package.
# SIMD_FOUND - true if support for this package is found.
#
#=============================================================================
# Based on FindOpenMP.cmake, which is:
#
# Copyright 2009 Kitware, Inc.
# Copyright 2008-2009 André Rigland Brodtkorb <Andre.Brodtkorb@ifi.uio.no>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * The names of Kitware, Inc., the Insight Consortium, or the names of
# any consortium members, or of any contributors, may not be used to
# endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include (CheckCSourceCompiles)
include (FindPackageHandleStandardArgs)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
set (SIMD_C_FLAG_CANDIDATES
# Gcc
"-Wno-cast-align -mfpu=neon-vfpv4 -mcpu=cortex-a7"
"-Wno-cast-align -mfpu=neon -mfloat-abi=hard"
"-Wno-cast-align -mfpu=neon -mfloat-abi=softfp"
"-Wno-cast-align -mfpu=neon -mfloat-abi=soft"
)
set (SIMD_C_TEST_SOURCE "
#include <arm_neon.h>
int main() {
float32x4_t a = vdupq_n_f32(0), b = a, c = vaddq_f32(a,b);
return 0;
}
")
else ()
if (WIN32) # Safety for when mixed lib/app compilers (but performance hit)
set (GCC_WIN32_SIMD_OPTS "-mincoming-stack-boundary=2")
endif ()
set (SIMD_C_FLAG_CANDIDATES
# x64
"-Wno-cast-align"
" "
# Microsoft Visual Studio x86
"/arch:SSE /fp:fast -D__SSE__"
# Gcc x86
"-Wno-cast-align -msse -mfpmath=sse ${GCC_WIN32_SIMD_OPTS}"
# Gcc x86 (old versions)
"-msse -mfpmath=sse"
)
set (SIMD_C_TEST_SOURCE "
#include <xmmintrin.h>
int main() {
__m128 a = _mm_setzero_ps(), b = a, c = _mm_add_ps(a,b);
return 0;
}
")
endif ()
if (DEFINED SIMD_C_FLAGS)
set (SIMD_C_FLAG_CANDIDATES)
endif ()
foreach (FLAG ${SIMD_C_FLAG_CANDIDATES})
set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
set (CMAKE_REQUIRED_FLAGS "${FLAG}")
unset (SIMD_FLAG_DETECTED CACHE)
message (STATUS "Try SIMD C flag = [${FLAG}]")
check_c_source_compiles ("${SIMD_C_TEST_SOURCE}" SIMD_FLAG_DETECTED)
set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}")
if (SIMD_FLAG_DETECTED)
set (SIMD_C_FLAGS_INTERNAL "${FLAG}")
break ()
endif ()
endforeach ()
set (SIMD_C_FLAGS "${SIMD_C_FLAGS_INTERNAL}"
CACHE STRING "C compiler flags for SIMD vectorization")
find_package_handle_standard_args (SIMD DEFAULT_MSG SIMD_C_FLAGS SIMD_C_FLAGS)
mark_as_advanced (SIMD_C_FLAGS)

View File

@ -0,0 +1,54 @@
# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
# - Finds SIMD32 support
#
# The following variables are set:
# SIMD32_C_FLAGS - flags to add to the C compiler for this package.
# SIMD32_FOUND - true if support for this package is found.
if (DEFINED SIMD32_C_FLAGS)
set (TRIAL_C_FLAGS)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
set (TRIAL_C_FLAGS
# Gcc
"-mfpu=neon-vfpv4 -mcpu=cortex-a7"
"-mfpu=neon -mfloat-abi=hard"
"-mfpu=neon -mfloat-abi=softfp"
"-mfpu=neon -mfloat-abi=soft"
)
set (TEST_C_SOURCE "
#include <arm_neon.h>
int main() {
float32x4_t a = vdupq_n_f32(0), b = a, c = vaddq_f32(a,b);
return 0;
}
")
else ()
if (WIN32) # Safety for when mixed lib/app compilers (but performance hit)
set (GCC_WIN32_SIMD32_OPTS "-mincoming-stack-boundary=2")
endif ()
set (TRIAL_C_FLAGS
# x64
" "
# MSVC x86
"/arch:SSE /fp:fast -D__SSE__"
# Gcc x86
"-Wno-cast-align -msse -mfpmath=sse ${GCC_WIN32_SIMD32_OPTS}"
# Gcc x86 (old versions)
"-msse -mfpmath=sse"
)
set (TEST_C_SOURCE "
#include <xmmintrin.h>
int main() {
__m128 a = _mm_setzero_ps(), b = a, c = _mm_add_ps(a,b);
return 0;
}
")
endif ()
include (FindCFlags)
FindCFlags ("SIMD32" "FLOAT-32 (single-precision) SIMD vectorization"
"${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}")

View File

@ -0,0 +1,29 @@
# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
# - Finds SIMD64 support
#
# The following variables are set:
# SIMD64_C_FLAGS - flags to add to the C compiler for this package.
# SIMD64_FOUND - true if support for this package is found.
if (DEFINED SIMD64_C_FLAGS OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
set (TRIAL_C_FLAGS)
else ()
set (TRIAL_C_FLAGS
"-mavx" # Gcc
"/arch:AVX" # MSVC
)
set (TEST_C_SOURCE "
#include <immintrin.h>
int main() {
__m256d a = _mm256_setzero_pd(), b = a, c = _mm256_add_pd(a,b);
return 0;
}
")
endif ()
include (FindCFlags)
FindCFlags ("SIMD64" "FLOAT-64 (double-precision) SIMD vectorization"
"${TRIAL_C_FLAGS}" "${TEST_C_SOURCE}")

View File

@ -5,13 +5,16 @@
macro (set_system_processor)
if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "")
unset(CMAKE_SYSTEM_PROCESSOR)
endif ()
if (NOT DEFINED CMAKE_SYSTEM_PROCESSOR)
include (CheckCSourceCompiles)
set (CPU_CANDIDATES
set (CPU_LINES
"#if defined __x86_64__ || defined _M_X64 /*\;x86_64\;*/"
"#if defined __i386__ || defined _M_IX86 /*\;x86_32\;*/"
"#if defined __arm__ || defined _M_ARM /*\;arm\;*/"
)
foreach (CPU_LINE ${CPU_CANDIDATES})
foreach (CPU_LINE ${CPU_LINES})
string (CONCAT CPU_SOURCE "${CPU_LINE}" "
int main() {return 0;}
#endif
@ -19,10 +22,14 @@ macro (set_system_processor)
unset (SYSTEM_PROCESSOR_DETECTED CACHE)
check_c_source_compiles ("${CPU_SOURCE}" SYSTEM_PROCESSOR_DETECTED)
if (SYSTEM_PROCESSOR_DETECTED)
list (GET CPU_LINE 1 CMAKE_SYSTEM_PROCESSOR)
message (STATUS "CMAKE_SYSTEM_PROCESSOR set to ${CMAKE_SYSTEM_PROCESSOR}")
list (GET CPU_LINE 1 DETECTED_SYSTEM_PROCESSOR)
message (STATUS "CMAKE_SYSTEM_PROCESSOR is ${DETECTED_SYSTEM_PROCESSOR}")
break ()
endif ()
endforeach ()
endif ()
# N.B. Will not overwrite existing cache variable:
set (CMAKE_SYSTEM_PROCESSOR "${DETECTED_SYSTEM_PROCESSOR}"
CACHE STRING "Target system processor")
endmacro ()

View File

@ -1,16 +1,18 @@
# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
if (${BUILD_EXAMPLES} OR ${BUILD_TESTS})
set (SOURCES 3-options-input-fn)
if (${WITH_LSR_BINDINGS})
set (LSR_SOURCES 1a-lsr)
endif ()
endif ()
if (${BUILD_EXAMPLES})
project (soxr) # Adds c++ compiler
file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/[1-9]-*.[cC])
elseif (${BUILD_TESTS})
file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/3*.c)
endif ()
if (${BUILD_EXAMPLES} OR ${BUILD_TESTS})
if (${WITH_LSR_BINDINGS})
set (LSR_SOURCES 1a-lsr.c)
list (APPEND SOURCES 1-single-block 2-stream 4-split-channels)
if (${WITH_VR32})
list (APPEND SOURCES 5-variable-rate)
endif ()
endif ()
@ -34,4 +36,5 @@ if (${BUILD_TESTS} AND ${WITH_LSR_BINDINGS})
endif ()
file (GLOB INSTALL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.[cCh])
install (FILES ${INSTALL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/README DESTINATION ${DOC_INSTALL_DIR}/examples)
install (FILES ${INSTALL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/README
DESTINATION ${DOC_INSTALL_DIR}/examples)

View File

@ -1,4 +1,4 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Common includes etc. for the examples. */
@ -6,7 +6,7 @@
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <math.h>
#include "math-wrap.h"
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
@ -17,10 +17,7 @@
#include <io.h>
#include <fcntl.h>
#define USE_STD_STDIO _setmode(_fileno(stdout), _O_BINARY), \
_setmode(_fileno(stdin ), _O_BINARY);
/* Sometimes missing, so ensure that it is defined: */
#undef M_PI
#define M_PI 3.14159265358979323846
_setmode(_fileno(stdin ), _O_BINARY)
#else
#define USE_STD_STDIO
#endif

View File

@ -35,7 +35,7 @@ set (tests
callback_hang_test callback_test downsample_test
float_short_test misc_test multi_channel_test
reset_test simple_test termination_test varispeed_test)
if (WITH_DOUBLE_PRECISION)
if (WITH_CR64 OR WITH_CR64S)
set (tests ${tests} snr_bw_test)
endif ()

View File

@ -1,4 +1,4 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined soxsrc_lsr_tests_config_included

View File

@ -118,9 +118,9 @@ main (int argc, char *argv [])
BOOLEAN_TRUE,
{ { 1, { 0.01111111111 }, 3.0, 1, 130.0, 1.0 },
{ 1, { 0.01111111111 }, 0.6, 1, 132.0, 1.0 },
{ 1, { 0.01111111111 }, 0.3, 1, 138.0, 1.0 },
{ 1, { 0.01111111111 }, 0.3, 1, 135.0, 1.0 },
{ 1, { 0.01111111111 }, 1.0, 1, 155.0, 1.0 },
{ 1, { 0.01111111111 }, 1.001, 1, 134.0, 1.0 },
{ 1, { 0.01111111111 }, 1.001, 1, 133.0, 1.0 },
{ 2, { 0.011111, 0.324 }, 1.9999, 2, 127.0, 1.0 },
{ 2, { 0.012345, 0.457 }, 0.456789, 1, 124.0, 0.5 },
{ 2, { 0.011111, 0.45 }, 0.6, 1, 126.0, 0.5 },
@ -135,10 +135,10 @@ main (int argc, char *argv [])
{ 1, { 0.01111111111 }, 0.6, 1, 147.0, 1.0 },
{ 1, { 0.01111111111 }, 0.3, 1, 147.0, 1.0 },
{ 1, { 0.01111111111 }, 1.0, 1, 155.0, 1.0 },
{ 1, { 0.01111111111 }, 1.001, 1, 147.0, 1.0 },
{ 1, { 0.01111111111 }, 1.001, 1, 146.0, 1.0 },
{ 2, { 0.011111, 0.324 }, 1.9999, 2, 147.0, 1.0 },
{ 2, { 0.012345, 0.457 }, 0.456789, 1, 148.0, 0.5 },
{ 2, { 0.011111, 0.45 }, 0.6, 1, 149.0, 0.5 },
{ 2, { 0.011111, 0.45 }, 0.6, 1, 145.0, 0.5 },
{ 1, { 0.43111111111 }, 1.33, 1, 145.0, 1.0 }
}
},

View File

@ -60,6 +60,12 @@
</References>
<Files>
<Filter Name="Source Files" >
<File RelativePath="../src/constructors.c" />
<File RelativePath="../src/cr.c" />
<File RelativePath="../src/cr32.c" />
<File RelativePath="../src/cr32s.c" />
<File RelativePath="../src/cr64.c" />
<File RelativePath="../src/cr64s.c" />
<File RelativePath="../src/data-io.c" />
<File RelativePath="../src/dbesi0.c" />
<File RelativePath="../src/fft4g32.c" />
@ -67,10 +73,9 @@
<File RelativePath="../src/filter.c" />
<File RelativePath="../src/lsr.c" />
<File RelativePath="../src/pffft32s.c" />
<File RelativePath="../src/rate32.c" />
<File RelativePath="../src/rate32s.c" />
<File RelativePath="../src/rate64.c" />
<File RelativePath="../src/simd.c" />
<File RelativePath="../src/pffft64s.c" />
<File RelativePath="../src/simd32.c" />
<File RelativePath="../src/simd64.c" />
<File RelativePath="../src/soxr.c" />
<File RelativePath="../src/vr32.c" />
</Filter>

View File

@ -7,12 +7,14 @@
#if !defined soxr_config_included
#define soxr_config_included
#define WITH_SINGLE_PRECISION 1
#define WITH_DOUBLE_PRECISION 1
#define WITH_CR32 1
#define WITH_CR32S 1
#define WITH_CR64 1
#define WITH_CR64S 1
#define WITH_VR32 1
#define AVCODEC_FOUND 0
#define AVUTIL_FOUND 0
#define SIMD_FOUND 1
#define HAVE_FENV_H 0
#define HAVE_STDBOOL_H 0

29
multi-arch 100755
View File

@ -0,0 +1,29 @@
#!/bin/sh
set -e
# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
rm -f CMakeCache.txt # Prevent interference from any in-tree build
j=-j4
build=Release
for n in \
cc: \
clang: \
arm-linux-gnueabi-gcc:Linux \
x86_64-w64-mingw32-gcc:Windows \
i686-w64-mingw32-gcc:Windows \
; do
compiler=$(echo $n | sed 's/:.*//')
system=$(echo $n | sed 's/.*://')
dir=$build-$compiler
which $compiler > /dev/null || echo $compiler not found && (
echo "***" $dir
mkdir -p $dir
cd $dir
cmake -DCMAKE_BUILD_TYPE=$build -DCMAKE_C_COMPILER=$compiler -DCMAKE_SYSTEM_NAME="$system" ..
make $j && [ /$system = / ] && ctest -j || true
)
done

View File

@ -4,12 +4,14 @@
#if !defined soxr_config_included
#define soxr_config_included
#cmakedefine01 WITH_SINGLE_PRECISION
#cmakedefine01 WITH_DOUBLE_PRECISION
#cmakedefine01 WITH_CR32
#cmakedefine01 WITH_CR32S
#cmakedefine01 WITH_CR64
#cmakedefine01 WITH_CR64S
#cmakedefine01 WITH_VR32
#cmakedefine01 AVCODEC_FOUND
#cmakedefine01 AVUTIL_FOUND
#cmakedefine01 SIMD_FOUND
#cmakedefine01 HAVE_FENV_H
#cmakedefine01 HAVE_STDBOOL_H

View File

@ -1,4 +1,4 @@
# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
@ -7,8 +7,10 @@
if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vr-coefs.h)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
set_property(SOURCE vr32.c APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h)
set_property(SOURCE vr32.c
APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h)
add_executable (vr-coefs vr-coefs.c)
target_link_libraries (vr-coefs ${LIBM_LIBRARIES})
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h
COMMAND vr-coefs > ${CMAKE_CURRENT_BINARY_DIR}/vr-coefs.h
DEPENDS vr-coefs)
@ -29,22 +31,41 @@ if (AVCODEC_FOUND)
elseif (WITH_PFFFT)
#set (RDFT32 pffft32)
set (RDFT32S pffft32s)
elseif (SIMD_FOUND)
set (RDFT32S fft4g32s)
elseif (WITH_CR32S)
set (RDFT32S fft4g32s fft4g32)
endif ()
if (WITH_DOUBLE_PRECISION)
set (DP_SOURCES rate64)
set (SOURCES ${PROJECT_NAME}.c constructors data-io)
if (WITH_CR32 OR WITH_CR32S OR WITH_CR64 OR WITH_CR64S)
list (APPEND SOURCES dbesi0 filter fft4g64 cr.c)
endif ()
if (WITH_SINGLE_PRECISION)
set (SP_SOURCES rate32 ${RDFT32})
if (WITH_CR32)
list (APPEND SOURCES cr32 ${RDFT32})
endif ()
if (SIMD_FOUND)
set (SIMD_SOURCES rate32s ${RDFT32S} simd)
foreach (source ${SIMD_SOURCES})
set_property (SOURCE ${source} PROPERTY COMPILE_FLAGS ${SIMD_C_FLAGS})
if (WITH_CR64)
list (APPEND SOURCES cr64)
endif ()
if (WITH_VR32)
list (APPEND SOURCES vr32)
endif ()
if (WITH_CR32S)
foreach (source cr32s ${RDFT32S} simd32)
list (APPEND SOURCES ${source})
set_property (SOURCE ${source}
APPEND_STRING PROPERTY COMPILE_FLAGS ${SIMD32_C_FLAGS})
endforeach ()
endif ()
if (WITH_CR64S)
foreach (source cr64s pffft64s simd64)
list (APPEND SOURCES ${source})
set_property (SOURCE ${source}
APPEND_STRING PROPERTY COMPILE_FLAGS ${SIMD64_C_FLAGS})
endforeach ()
endif ()
@ -52,8 +73,7 @@ endif ()
# Libsoxr:
add_library (${PROJECT_NAME} ${LIB_TYPE} ${PROJECT_NAME}.c data-io dbesi0 filter fft4g64
${SP_SOURCES} vr32 ${DP_SOURCES} ${SIMD_SOURCES})
add_library (${PROJECT_NAME} ${LIB_TYPE} ${SOURCES})
target_link_libraries (${PROJECT_NAME} PRIVATE ${LIBS} ${LIBM_LIBRARIES})
set_target_properties (${PROJECT_NAME} PROPERTIES
VERSION "${SO_VERSION}"

View File

@ -18,8 +18,10 @@
#define lsx_dfst_f _soxr_dfst_f
#define lsx_dfst _soxr_dfst
#define lsx_fir_to_phase _soxr_fir_to_phase
#define lsx_f_resp _soxr_f_resp
#define lsx_init_fft_cache_f _soxr_init_fft_cache_f
#define lsx_init_fft_cache _soxr_init_fft_cache
#define lsx_inv_f_resp _soxr_inv_f_resp
#define lsx_kaiser_beta _soxr_kaiser_beta
#define lsx_kaiser_params _soxr_kaiser_params
#define lsx_make_lpf _soxr_make_lpf

View File

@ -1,17 +1,19 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include <stdlib.h>
#include <math.h>
#include <libavcodec/avfft.h>
#include "filter.h"
#include "rdft_t.h"
static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
static int multiplier(void) {return 2;}
static void nothing(void) {}
static int flags(void) {return 0;}
typedef void (* fn_t)(void);
fn_t _soxr_rdft32_cb[] = {
(fn_t)forward_setup,
(fn_t)backward_setup,
@ -24,4 +26,8 @@ fn_t _soxr_rdft32_cb[] = {
(fn_t)_soxr_ordered_partial_convolve_f,
(fn_t)multiplier,
(fn_t)nothing,
(fn_t)malloc,
(fn_t)calloc,
(fn_t)free,
(fn_t)flags,
};

View File

@ -3,15 +3,16 @@
#include <math.h>
#include <libavcodec/avfft.h>
#include "simd.h"
#include "simd32.h"
#include "rdft_t.h"
static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
static int multiplier(void) {return 2;}
static void nothing(void) {}
static int flags(void) {return RDFT_IS_SIMD;}
typedef void (* fn_t)(void);
fn_t _soxr_rdft32s_cb[] = {
(fn_t)forward_setup,
(fn_t)backward_setup,
@ -20,8 +21,12 @@ fn_t _soxr_rdft32s_cb[] = {
(fn_t)rdft,
(fn_t)rdft,
(fn_t)rdft,
(fn_t)_soxr_ordered_convolve_simd,
(fn_t)_soxr_ordered_partial_convolve_simd,
(fn_t)ORDERED_CONVOLVE_SIMD,
(fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
(fn_t)multiplier,
(fn_t)nothing,
(fn_t)SIMD_ALIGNED_MALLOC,
(fn_t)SIMD_ALIGNED_CALLOC,
(fn_t)SIMD_ALIGNED_FREE,
(fn_t)flags,
};

40
src/avx.h 100644
View File

@ -0,0 +1,40 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* AVX support macros */
#if !defined soxr_avx_included
#define soxr_avx_included
#include <immintrin.h>
typedef __m256d v4sf;
#define VZERO() _mm256_setzero_pd()
#define VMUL(a,b) _mm256_mul_pd(a,b)
#define VADD(a,b) _mm256_add_pd(a,b)
#define VMADD(a,b,c) VADD(VMUL(a,b),c) /* Note: gcc -mfma will `fuse' these */
#define VSUB(a,b) _mm256_sub_pd(a,b)
#define LD_PS1(p) _mm256_set1_pd(p)
#define INTERLEAVE2(in1, in2, out1, out2) {v4sf \
t1 = _mm256_unpacklo_pd(in1, in2), \
t2 = _mm256_unpackhi_pd(in1, in2); \
out1 = _mm256_permute2f128_pd(t1,t2,0x20); \
out2 = _mm256_permute2f128_pd(t1,t2,0x31); }
#define UNINTERLEAVE2(in1, in2, out1, out2) {v4sf \
t1 = _mm256_permute2f128_pd(in1,in2,0x20), \
t2 = _mm256_permute2f128_pd(in1,in2,0x31); \
out1 = _mm256_unpacklo_pd(t1, t2); \
out2 = _mm256_unpackhi_pd(t1, t2);}
#define VTRANSPOSE4(x0,x1,x2,x3) {v4sf \
t0 = _mm256_shuffle_pd(x0,x1, 0x0), \
t2 = _mm256_shuffle_pd(x0,x1, 0xf), \
t1 = _mm256_shuffle_pd(x2,x3, 0x0), \
t3 = _mm256_shuffle_pd(x2,x3, 0xf); \
x0 = _mm256_permute2f128_pd(t0,t1, 0x20); \
x1 = _mm256_permute2f128_pd(t2,t3, 0x20); \
x2 = _mm256_permute2f128_pd(t0,t1, 0x31); \
x3 = _mm256_permute2f128_pd(t2,t3, 0x31);}
#define VSWAPHL(a,b) _mm256_permute2f128_pd(b, a, 0x30)
#define VALIGNED(ptr) ((((long)(ptr)) & 0x1F) == 0)
#endif

85
src/constructors.c 100644
View File

@ -0,0 +1,85 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include "soxr.h"
#include "filter.h"
#include "internal.h"
#include <math.h>
#include <stdarg.h>
#include <string.h>
#if !WITH_CR32 && !WITH_CR32S && !WITH_CR64 && !WITH_CR64S
#undef lsx_to_3dB
#define lsx_to_3dB(x) ((x)/(x))
#endif
soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags)
{
soxr_quality_spec_t spec, * p = &spec;
unsigned quality = recipe & 0xf;
double rej;
memset(p, 0, sizeof(*p));
if (quality > SOXR_PRECISIONQ) {
p->e = "invalid quality type";
return spec;
}
flags |= quality < SOXR_LSR0Q ? RESET_ON_CLEAR : 0;
p->phase_response = "\62\31\144"[(recipe & 0x30)>>4];
p->stopband_begin = 1;
p->precision =
quality == SOXR_QQ ? 0 :
quality <= SOXR_16_BITQ ? 16 :
quality <= SOXR_32_BITQ ? 4 + quality * 4 :
quality <= SOXR_LSR2Q ? 55 - quality * 4 : /* TODO: move to lsr.c */
0;
rej = p->precision * linear_to_dB(2.);
p->flags = flags;
if (quality <= SOXR_32_BITQ || quality == SOXR_PRECISIONQ) {
#define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / lsx_to_3dB(rej);
if (quality <= 2)
p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
}
else { /* TODO: move to lsr.c */
static float const bw[] = {.931f, .832f, .663f};
p->passband_end = bw[quality - SOXR_LSR0Q];
if (quality == SOXR_LSR2Q)
p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_LSR2Q | SOXR_PROMOTE_TO_LQ;
}
if (recipe & SOXR_STEEP_FILTER)
p->passband_end = 1 - .01 / lsx_to_3dB(rej);
return spec;
}
soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads)
{
soxr_runtime_spec_t spec, * p = &spec;
memset(p, 0, sizeof(*p));
p->log2_min_dft_size = 10;
p->log2_large_dft_size = 17;
p->coef_size_kbytes = 400;
p->num_threads = num_threads;
return spec;
}
soxr_io_spec_t soxr_io_spec(
soxr_datatype_t itype,
soxr_datatype_t otype)
{
soxr_io_spec_t spec, * p = &spec;
memset(p, 0, sizeof(*p));
if ((itype | otype) >= SOXR_SPLIT * 2)
p->e = "invalid io datatype(s)";
else {
p->itype = itype;
p->otype = otype;
p->scale = 1;
}
return spec;
}

297
src/cr-core.c 100644
View File

@ -0,0 +1,297 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details.
*
* Constant-rate resampling engine-specific code. */
#include <math.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "filter.h"
#if defined SOXR_LIB
#include "internal.h"
#include "cr.h"
#if CORE_TYPE & CORE_DBL
typedef double sample_t;
#if CORE_TYPE & CORE_SIMD_DFT
#define RDFT_CB _soxr_rdft64s_cb
#else
#define RDFT_CB _soxr_rdft64_cb
#endif
#else
typedef float sample_t;
#if CORE_TYPE & CORE_SIMD_DFT
#define RDFT_CB _soxr_rdft32s_cb
#else
#define RDFT_CB _soxr_rdft32_cb
#endif
#endif
#if CORE_TYPE & (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
#if CORE_TYPE & CORE_DBL
#include "simd64.h"
#include "simd64-dev.h"
#else
#include "simd32.h"
#include "simd32-dev.h"
#endif
#endif
extern fn_t RDFT_CB[];
#else
#define RDFT_CB 0
#endif
static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo)
{
sample_t const * input = stage_read_p(p);
int num_in = min(stage_occupancy(p), p->input_size);
int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio);
sample_t * output = fifo_reserve(output_fifo, max_num_out);
for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
sample_t const * s = input + p->at.integer;
double x = p->at.fraction * (1 / MULT32);
double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b);
double c = s[1]-*s-a-b;
output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s));
}
assert(max_num_out - i >= 0);
fifo_trim_by(output_fifo, max_num_out - i);
fifo_read(&p->fifo, p->at.integer, NULL);
p->at.integer = 0;
}
#if CORE_TYPE & CORE_DBL
#define SIMD_AVX ((CORE_TYPE & CORE_SIMD_HALF) && defined __AVX__)
#define SIMD_SSE 0
#else
#define SIMD_SSE ((CORE_TYPE & CORE_SIMD_HALF) && (defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86))
#define SIMD_AVX 0
#endif
#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_HALF) && defined __arm__)
#include "half-coefs.h"
#if !(CORE_TYPE & CORE_SIMD_HALF)
#define FUNCTION_H h7
#define CONVOLVE ____ __ _
#include "half-fir.h"
#endif
#define FUNCTION_H h8
#define CONVOLVE ____ ____
#include "half-fir.h"
#define FUNCTION_H h9
#define CONVOLVE ____ ____ _
#include "half-fir.h"
#if CORE_TYPE & CORE_DBL
#define FUNCTION_H h10
#define CONVOLVE ____ ____ __
#include "half-fir.h"
#define FUNCTION_H h11
#define CONVOLVE ____ ____ __ _
#include "half-fir.h"
#define FUNCTION_H h12
#define CONVOLVE ____ ____ ____
#include "half-fir.h"
#define FUNCTION_H h13
#define CONVOLVE ____ ____ ____ _
#include "half-fir.h"
#endif
static half_fir_info_t const half_firs[] = {
#if !(CORE_TYPE & CORE_SIMD_HALF)
{ 7, half_fir_coefs_7 , h7 , 0 , 120.65f},
#endif
{ 8, half_fir_coefs_8 , h8 , 0 , 136.51f},
{ 9, half_fir_coefs_9 , h9 , 0 , 152.32f},
#if CORE_TYPE & CORE_DBL
{10, half_fir_coefs_10, h10, 0 , 168.08f},
{11, half_fir_coefs_11, h11, 0 , 183.79f},
{12, half_fir_coefs_12, h12, 0 , 199.46f},
{13, half_fir_coefs_13, h13, 0 , 215.12f},
#endif
};
#undef SIMD_AVX
#undef SIMD_NEON
#undef SIMD_SSE
#if CORE_TYPE & CORE_DBL
#define SIMD_AVX ((CORE_TYPE & CORE_SIMD_POLY) && defined __AVX__)
#define SIMD_SSE 0
#else
#define SIMD_SSE ((CORE_TYPE & CORE_SIMD_POLY) && (defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86))
#define SIMD_AVX 0
#endif
#define SIMD_NEON ((CORE_TYPE & CORE_SIMD_POLY) && defined __arm__)
#define HI_PREC_CLOCK
#define COEFS (sample_t * __restrict)p->shared->poly_fir_coefs
#define VAR_LENGTH p->n
#define VAR_CONVOLVE(n) while (j < (n)) _
#define VAR_POLY_PHASE_BITS p->phase_bits
#define FUNCTION vpoly0
#define FIR_LENGTH VAR_LENGTH
#define CONVOLVE(n) VAR_CONVOLVE(n)
#include "poly-fir0.h"
#define FUNCTION vpoly1
#define COEF_INTERP 1
#define PHASE_BITS VAR_POLY_PHASE_BITS
#define FIR_LENGTH VAR_LENGTH
#define CONVOLVE(n) VAR_CONVOLVE(n)
#include "poly-fir.h"
#define FUNCTION vpoly2
#define COEF_INTERP 2
#define PHASE_BITS VAR_POLY_PHASE_BITS
#define FIR_LENGTH VAR_LENGTH
#define CONVOLVE(n) VAR_CONVOLVE(n)
#include "poly-fir.h"
#define FUNCTION vpoly3
#define COEF_INTERP 3
#define PHASE_BITS VAR_POLY_PHASE_BITS
#define FIR_LENGTH VAR_LENGTH
#define CONVOLVE(n) VAR_CONVOLVE(n)
#include "poly-fir.h"
#if !(CORE_TYPE & CORE_SIMD_POLY)
#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
#define FUNCTION U100_0
#define FIR_LENGTH U100_l
#define CONVOLVE(n) poly_fir_convolve_U100
#include "poly-fir0.h"
#define u100_l 11
#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _
#define FUNCTION u100_0
#define FIR_LENGTH u100_l
#define CONVOLVE(n) poly_fir_convolve_u100
#include "poly-fir0.h"
#define FUNCTION u100_1
#define COEF_INTERP 1
#define PHASE_BITS 8
#define FIR_LENGTH u100_l
#define CONVOLVE(n) poly_fir_convolve_u100
#include "poly-fir.h"
#define FUNCTION u100_2
#define COEF_INTERP 2
#define PHASE_BITS 6
#define FIR_LENGTH u100_l
#define CONVOLVE(n) poly_fir_convolve_u100
#include "poly-fir.h"
#endif
#define u100_1_b 8
#define u100_2_b 6
static poly_fir_t const poly_firs[] = {
{-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}},
{-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}},
{-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}},
{-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}},
{-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}},
{-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}},
{-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}},
{-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}},
{-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}},
{-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}},
{-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}},
{-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}},
#if CORE_TYPE & CORE_SIMD_POLY
{10.62f, {{0, vpoly0}, {0, 0}, {0, 0}}},
{-1, {{0, vpoly0}, {u100_1_b, vpoly1}, {u100_2_b, vpoly2}}},
#else
{10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}},
{11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}},
#endif
{-1, {{0, vpoly0}, { 9, vpoly1}, { 6, vpoly2}}},
{-1, {{0, vpoly0}, { 11, vpoly1}, { 7, vpoly2}}},
{-1, {{0, vpoly0}, { 13, vpoly1}, { 8, vpoly2}}},
{-1, {{0, vpoly0}, { 10, vpoly2}, { 8, vpoly3}}},
{-1, {{0, vpoly0}, { 12, vpoly2}, { 9, vpoly3}}},
};
static cr_core_t const cr_core = {
#if CORE_TYPE & CORE_SIMD_POLY
{SIMD_ALIGNED_MALLOC, SIMD_ALIGNED_CALLOC, SIMD_ALIGNED_FREE},
#else
{malloc, calloc, free},
#endif
half_firs, array_length(half_firs),
0, 0,
cubic_stage_fn,
poly_firs, RDFT_CB
};
#if defined SOXR_LIB
#include "soxr.h"
static char const * rate_create(void * channel, void * shared, double io_ratio,
soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale)
{
return _soxr_init(channel, shared, io_ratio, q_spec, r_spec, scale,
&cr_core, CORE_TYPE);
}
static char const * id(void) {return CORE_STR;}
fn_t RATE_CB[] = {
(fn_t)_soxr_input,
(fn_t)_soxr_process,
(fn_t)_soxr_output,
(fn_t)_soxr_flush,
(fn_t)_soxr_close,
(fn_t)_soxr_delay,
(fn_t)_soxr_sizes,
(fn_t)rate_create,
(fn_t)0,
(fn_t)id,
};
#endif

581
src/cr.c 100644
View File

@ -0,0 +1,581 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details.
*
* Constant-rate resampling common code. */
#include <math.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "filter.h"
#if defined SOXR_LIB
#include "internal.h"
#define STATIC
#endif
#include "cr.h"
#define num_coefs4 ((core_flags&CORE_SIMD_POLY)? ((num_coefs+3)&~3) : num_coefs)
#define coef_coef(C,T,x) \
C((T*)result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i)
#define STORE(C,T) { \
if (interp_order > 2) coef_coef(C,T,3) = (T)d; \
if (interp_order > 1) coef_coef(C,T,2) = (T)c; \
if (interp_order > 0) coef_coef(C,T,1) = (T)b; \
coef_coef(C,T,0) = (T)f0;}
static real * prepare_poly_fir_coefs(double const * coefs, int num_coefs,
int num_phases, int interp_order, double multiplier,
core_flags_t core_flags, alloc_t const * mem)
{
int i, j, length = num_coefs4 * num_phases * (interp_order + 1);
real * result = mem->calloc(1,(size_t)length << LOG2_SIZEOF_REAL(core_flags));
double fm1 = coefs[0], f1 = 0, f2 = 0;
for (i = num_coefs - 1; i >= 0; --i)
for (j = num_phases - 1; j >= 0; --j) {
double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
int pos = i * num_phases + j - 1;
fm1 = pos > 0 ? coefs[pos - 1] * multiplier : 0;
switch (interp_order) {
case 1: b = f1 - f0; break;
case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break;
default: assert(!interp_order);
}
switch (core_flags & 3) {
case 0: if (WITH_CR32 ) STORE(coef , float ); break;
case 1: if (WITH_CR64 ) STORE(coef , double); break;
case 2: if (WITH_CR32S) STORE(coef4, float ); break;
default:if (WITH_CR64S) STORE(coef4, double); break;
}
f2 = f1, f1 = f0;
}
return result;
}
#undef STORE
#undef coef_coef
#define IS_FLOAT32 (WITH_CR32 || WITH_CR32S) && \
(!(WITH_CR64 || WITH_CR64S) || sizeof_real == sizeof(float))
#define WITH_FLOAT64 WITH_CR64 || WITH_CR64S
static void dft_stage_fn(stage_t * p, fifo_t * output_fifo)
{
real * output, * dft_out;
int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
rate_shared_t const * s = p->shared;
dft_filter_t const * f = &s->dft_filter[p->dft_filter_num];
int const overlap = f->num_taps - 1;
if (p->at.integer + p->L * num_in >= f->dft_length) {
fn_t const * const RDFT_CB = p->rdft_cb;
size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(p->core_flags);
div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L);
real const * input = fifo_read_ptr(&p->fifo);
fifo_read(&p->fifo, divd.quot, NULL);
num_in -= divd.quot;
output = fifo_reserve(output_fifo, f->dft_length);
dft_out = (p->core_flags & CORE_SIMD_DFT)? p->dft_out : output;
if (lsx_is_power_of_2(p->L)) { /* F-domain */
int portion = f->dft_length / p->L;
memcpy(dft_out, input, (unsigned)portion * sizeof_real);
rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch);
if (IS_FLOAT32) {
#define dft_out ((float *)dft_out)
for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
dft_out[i] = dft_out[(portion << 1) - i],
dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
dft_out[portion] = dft_out[1];
dft_out[portion + 1] = 0;
dft_out[1] = dft_out[0];
#undef dft_out
}
else if (WITH_FLOAT64) {
#define dft_out ((double *)dft_out)
for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
dft_out[i] = dft_out[(portion << 1) - i],
dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
dft_out[portion] = dft_out[1];
dft_out[portion + 1] = 0;
dft_out[1] = dft_out[0];
#undef dft_out
}
for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) {
memcpy((char *)dft_out + (size_t)i * sizeof_real, dft_out, (size_t)portion * sizeof_real);
((char *)dft_out)[((size_t)i + 1) * sizeof_real] = 0;
}
if (p->step.integer > 0)
rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
} else {
if (p->L == 1)
memcpy(dft_out, input, (size_t)f->dft_length * sizeof_real);
else {
memset(dft_out, 0, (size_t)f->dft_length * sizeof_real);
if (IS_FLOAT32)
for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
((float *)dft_out)[i] = ((float *)input)[j];
else if (WITH_FLOAT64)
for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
((double *)dft_out)[i] = ((double *)input)[j];
p->at.integer = p->L - 1 - divd.rem;
}
if (p->step.integer > 0)
rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
else
rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
}
if (p->step.integer > 0) {
rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs);
rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
if ((p->core_flags & CORE_SIMD_DFT) && p->step.integer == 1)
memcpy(output, dft_out, (size_t)f->dft_length * sizeof_real);
if (p->step.integer != 1) {
if (IS_FLOAT32)
for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
i += p->step.integer)
((float *)output)[j] = ((float *)dft_out)[i];
else if (WITH_FLOAT64)
for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
i += p->step.integer)
((double *)output)[j] = ((double *)dft_out)[i];
p->remM = i - (f->dft_length - overlap);
fifo_trim_by(output_fifo, f->dft_length - j);
}
else fifo_trim_by(output_fifo, overlap);
}
else { /* F-domain */
int m = -p->step.integer;
rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs);
rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch);
if (p->core_flags & CORE_SIMD_DFT)
memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof_real);
fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m);
}
(void)RDFT_CB;
}
p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L;
}
/* Set to 4 x nearest power of 2 or half of that */
/* if danger of causing too many cache misses. */
static int set_dft_length(int num_taps, int min, int large)
{
double d = log((double)num_taps) / log(2.);
return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large));
}
static void dft_stage_init(
unsigned instance, double Fp, double Fs, double Fn, double att,
double phase_response, stage_t * p, int L, int M, double * multiplier,
unsigned min_dft_size, unsigned large_dft_size, core_flags_t core_flags,
fn_t const * RDFT_CB)
{
dft_filter_t * f = &p->shared->dft_filter[instance];
int num_taps = 0, dft_length = f->dft_length, i, offset;
bool f_domain_m = abs(3-M) == 1 && Fs <= 1;
size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags);
if (!dft_length) {
int k = phase_response == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4;
double m, * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.);
if (phase_response != 50)
lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase_response);
else f->post_peak = num_taps / 2;
dft_length = set_dft_length(num_taps, (int)min_dft_size, (int)large_dft_size);
f->coefs = rdft_calloc((size_t)dft_length, sizeof_real);
offset = dft_length - num_taps + 1;
m = (1. / dft_length) * rdft_multiplier() * L * *multiplier;
if (IS_FLOAT32) for (i = 0; i < num_taps; ++i)
((float *)f->coefs)[(i + offset) & (dft_length - 1)] =(float)(h[i] * m);
else if (WITH_FLOAT64) for (i = 0; i < num_taps; ++i)
((double *)f->coefs)[(i + offset) & (dft_length - 1)] = h[i] * m;
free(h);
}
if (rdft_flags() & RDFT_IS_SIMD)
p->dft_out = rdft_malloc(sizeof_real * (size_t)dft_length);
if (rdft_flags() & RDFT_NEEDS_SCRATCH)
p->dft_scratch = rdft_malloc(2 * sizeof_real * (size_t)dft_length);
if (!f->dft_length) {
void * coef_setup = rdft_forward_setup(dft_length);
int Lp = lsx_is_power_of_2(L)? L : 1;
int Mp = f_domain_m? M : 1;
f->dft_forward_setup = rdft_forward_setup(dft_length / Lp);
f->dft_backward_setup = rdft_backward_setup(dft_length / Mp);
if (Mp == 1)
rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch);
else
rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch);
rdft_delete_setup(coef_setup);
f->num_taps = num_taps;
f->dft_length = dft_length;
lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i",
num_taps, dft_length, Fp, Fs, Fn, att, L, M);
}
*multiplier = 1;
p->out_in_ratio = (double)L / M;
p->core_flags = core_flags;
p->rdft_cb = RDFT_CB;
p->fn = dft_stage_fn;
p->preload = f->post_peak / L;
p->at.integer = f->post_peak % L;
p->L = L;
p->step.integer = f_domain_m? -M/2 : M;
p->dft_filter_num = instance;
p->block_len = f->dft_length - (f->num_taps - 1);
p->phase0 = p->at.integer / p->L;
p->input_size = (f->dft_length - p->at.integer + p->L - 1) / p->L;
}
static struct half_fir_info const * find_half_fir(
struct half_fir_info const * firs, size_t len, double att)
{
size_t i;
for (i = 0; i + 1 < len && att > firs[i].att; ++i);
return &firs[i];
}
#define have_pre_stage (preM * preL != 1)
#define have_arb_stage (arbM * arbL != 1)
#define have_post_stage (postM * postL != 1)
#include "soxr.h"
STATIC char const * _soxr_init(
rate_t * const p, /* Per audio channel. */
rate_shared_t * const shared, /* By channels undergoing same rate change. */
double const io_ratio, /* Input rate divided by output rate. */
soxr_quality_spec_t const * const q_spec,
soxr_runtime_spec_t const * const r_spec,
double multiplier, /* Linear gain to apply during conversion. */
cr_core_t const * const core,
core_flags_t const core_flags)
{
size_t const sizeof_real = sizeof(char) << LOG2_SIZEOF_REAL(core_flags);
double const tolerance = 1 + 1e-5;
double bits = q_spec->precision;
rolloff_t const rolloff = (rolloff_t)(q_spec->flags & 3);
int interpolator = (int)(r_spec->flags & 3) - 1;
double const Fp0 = q_spec->passband_end, Fs0 = q_spec->stopband_begin;
double const phase_response = q_spec->phase_response, tbw0 = Fs0-Fp0;
bool const maintain_3dB_pt = !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT);
double tbw_tighten = 1, alpha;
#define tighten(x) (Fs0-(Fs0-(x))*tbw_tighten)
double arbM = io_ratio, Fn1, Fp1 = Fp0, Fs1 = Fs0, bits1 = min(bits,33);
double att = (bits1 + 1) * linear_to_dB(2.), attArb = att; /* +1: pass+stop */
int preL = 1, preM = 1, shr = 0, arbL = 1, postL = 1, postM = 1;
bool upsample=false, rational=false, iOpt=!(r_spec->flags&SOXR_NOSMALLINTOPT);
bool lq_bits= (q_spec->flags & SOXR_PROMOTE_TO_LQ)? bits <= 16 : bits == 16;
bool lq_Fp0 = (q_spec->flags & SOXR_PROMOTE_TO_LQ)? Fp0<=lq_bw0 : Fp0==lq_bw0;
int n = 0, i, mode = lq_bits && rolloff == rolloff_medium? io_ratio > 1 ||
phase_response != 50 || !lq_Fp0 || Fs0 != 1 : ((int)ceil(bits1) - 6) / 4;
struct half_fir_info const * half_fir_info;
stage_t * s;
if (io_ratio < 1 && Fs0 - 1 > 1 - Fp0 / tolerance)
return "imaging greater than rolloff";
if (.002 / tolerance > tbw0 || tbw0 > .5 * tolerance)
return "transition bandwidth not in [0.2,50] % of nyquist";
if (.5 / tolerance > Fp0 || Fs0 > 1.5 * tolerance)
return "transition band not within [50,150] % of nyquist";
if (bits!=0 && (15 > bits || bits > 33))
return "precision not in [15,33] bits";
if (io_ratio <= 0)
return "resampling factor not positive";
if (0 > phase_response || phase_response > 100)
return "phase response not in [0=min-phase,100=max-phase] %";
p->core = core;
p->io_ratio = io_ratio;
if (bits!=0) while (!n++) { /* Determine stages: */
int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 :
(int)ceil(r_spec->coef_size_kbytes * 1000. / (U100_l * (int)sizeof_real));
double d, epsilon = 0, frac;
upsample = arbM < 1;
for (i = (int)(.5 * arbM), shr = 0; i >>= 1; arbM *= .5, ++shr);
preM = upsample || (arbM > 1.5 && arbM < 2);
postM = 1 + (arbM > 1 && preM), arbM /= postM;
preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL;
if ((frac = arbM - (int)arbM)!=0)
epsilon = fabs(floor(frac * MULT32 + .5) / (frac * MULT32) - 1);
for (i = 1, rational = frac==0; i <= maxL && !rational; ++i) {
d = frac * i, try = (int)(d + .5);
if ((rational = fabs(try / d - 1) <= epsilon)) { /* No long doubles! */
if (try == i)
arbM = ceil(arbM), shr += x = arbM > 3, arbM /= 1 + x;
else arbM = i * (int)arbM + try, arbL = i;
}
}
L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x;
if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) {
for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1);
arbM = arbM * postL / arbL / preL, arbL = 1, n = 0;
} else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt))
preL = L, preM = M, arbM = arbL = postM = 1;
if (!mode && (!rational || !n))
++mode, n = 0;
}
p->num_stages = shr + have_pre_stage + have_arb_stage + have_post_stage;
if (!p->num_stages && multiplier != 1) {
bits = arbL = 0; /* Use cubic_stage in this case. */
++p->num_stages;
}
p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages));
if (!p->stages)
return "out of memory";
for (i = 0; i < p->num_stages; ++i) {
p->stages[i].num = i;
p->stages[i].shared = shared;
p->stages[i].input_size = 4096;
}
p->stages[0].is_input = true;
alpha = postM / (io_ratio * (postL << 0));
if ((n = p->num_stages) > 1) { /* Att. budget: */
if (have_arb_stage)
att += linear_to_dB(2.), attArb = att, --n;
att += linear_to_dB((double)n);
}
half_fir_info = find_half_fir(core->half_firs, core->half_firs_len, att);
for (i = 0, s = p->stages; i < shr; ++i, ++s) {
s->fn = half_fir_info->fn;
s->coefs = half_fir_info->coefs;
s->n = half_fir_info->num_coefs;
s->pre_post = 4 * s->n;
s->preload = s->pre = s->pre_post >> 1;
}
if (have_pre_stage) {
if (maintain_3dB_pt && have_post_stage) { /* Trans. bands overlapping. */
double x = tbw0 * lsx_inv_f_resp(-3., att);
x = -lsx_f_resp(x / (max(2 * alpha - Fs0, alpha) - Fp0), att);
if (x > .035) {
tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014;
lsx_debug("tbw_tighten=%g (%gdB)", tbw_tighten, x);
}
}
Fn1 = preM? max(preL, preM) : arbM / arbL;
dft_stage_init(0, tighten(Fp1), Fs1, Fn1, att, phase_response, s++, preL,
max(preM, 1), &multiplier, r_spec->log2_min_dft_size,
r_spec->log2_large_dft_size, core_flags, core->rdft_cb);
Fp1 /= Fn1, Fs1 /= Fn1;
}
if (bits==0 && have_arb_stage) { /* `Quick' cubic arb stage: */
s->fn = core->cubic_stage_fn;
s->mult = multiplier, multiplier = 1;
s->step.whole = (int64_t)(arbM * MULT32 + .5);
s->pre_post = max(3, s->step.integer);
s->preload = s->pre = 1;
s->out_in_ratio = MULT32 / (double)s->step.whole;
}
else if (have_arb_stage) { /* Higher quality arb stage: */
static const float rolloffs[] = {-.01f, -.3f, 0, -.103f};
poly_fir_t const * f = &core->poly_firs[6*(upsample+!!preM)+mode-!upsample];
int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases;
size_t coefs_size;
double at, Fp = Fp1, Fs, Fn, mult = upsample? 1 : arbM / arbL;
poly_fir1_t const * f1;
if (!upsample && preM)
Fn = 2 * mult, Fs = 3 + fabs(Fs1 - 1);
else Fn = 1, Fs = 2 - (mode? Fp1 + (Fs1 - Fp1) * .7 : Fs1);
if (mode)
Fp = Fs - (Fs - Fp) / (1 - lsx_inv_f_resp(rolloffs[rolloff], attArb));
i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1;
do {
f1 = &f->interp[++i];
assert(f1->fn);
if (i)
arbM /= arbL, arbL = 1, rational = false;
phase_bits = (int)ceil(f1->scalar - log(mult)/log(2.));
phases = !rational? (1 << phase_bits) : arbL;
if (f->interp[0].scalar==0) {
int phases0 = max(phases, 19), n0 = 0;
lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta);
num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM;
}
if ((num_coefs & 1) && rational && (arbL & 1))
phases <<= 1, arbL <<= 1, arbM *= 2;
at = arbL * (s->phase0 = .5 * (num_coefs & 1));
order = i + (i && mode > 4);
coefs_size = (size_t)(num_coefs4 * phases * (order+1)) * sizeof_real;
} while (interpolator < 0 && i < 2 && f->interp[i+1].fn &&
coefs_size / 1000 > r_spec->coef_size_kbytes);
if (!s->shared->poly_fir_coefs) {
int num_taps = num_coefs * phases - 1;
double * coefs = lsx_design_lpf(
Fp, Fs, Fn, attArb, &num_taps, phases, f->beta);
s->shared->poly_fir_coefs = prepare_poly_fir_coefs(
coefs, num_coefs, phases, order, multiplier, core_flags, &core->mem);
lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk",
num_coefs, phases, order, (double)coefs_size / 1000.);
free(coefs);
}
multiplier = 1;
s->fn = f1->fn;
s->pre_post = num_coefs4 - 1;
s->preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs);
s->n = num_coefs4;
s->phase_bits = phase_bits;
s->L = arbL;
s->use_hi_prec_clock =
mode>1 && (q_spec->flags & SOXR_HI_PREC_CLOCK) && !rational;
#if FLOAT_HI_PREC_CLOCK
if (s->use_hi_prec_clock) {
s->at.flt = at;
s->step.flt = arbM;
s->out_in_ratio = (double)(arbL / s->step.flt);
} else
#endif
{
s->at.whole = (int64_t)(at * MULT32 + .5);
#if !FLOAT_HI_PREC_CLOCK
if (s->use_hi_prec_clock) {
double M = arbM * MULT32;
s->at.fix.ls.parts.ms = 0x80000000ul;
s->step.whole = (int64_t)M;
M -= (double)s->step.whole;
M *= MULT32 * MULT32;
s->step.fix.ls.all = (uint64_t)M;
} else
#endif
s->step.whole = (int64_t)(arbM * MULT32 + .5);
s->out_in_ratio = MULT32 * arbL / (double)s->step.whole;
}
++s;
}
if (have_post_stage)
dft_stage_init(1, tighten(Fp0 / (upsample? alpha : 1)), upsample? max(2 -
Fs0 / alpha, 1) : Fs0, (double)max(postL, postM), att, phase_response,
s++, postL, postM, &multiplier, r_spec->log2_min_dft_size,
r_spec->log2_large_dft_size, core_flags, core->rdft_cb);
lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i %x", 1/io_ratio,
shr, preL, preM, arbL, arbM, postL, postM, core_flags);
for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) {
fifo_create(&s->fifo, (int)sizeof_real);
memset(fifo_reserve(&s->fifo, s->preload), 0,
sizeof_real * (size_t)s->preload);
lsx_debug_more("%5i|%-5i preload=%i remL=%i",
s->pre, s->pre_post-s->pre, s->preload, s->at.integer);
}
fifo_create(&s->fifo, (int)sizeof_real);
return 0;
}
static bool stage_process(stage_t * stage, bool flushing)
{
fifo_t * fifo = &stage->fifo;
bool done = false;
int want;
while (!done && (want = stage->input_size - fifo_occupancy(fifo)) > 0) {
if (stage->is_input) {
if (flushing)
memset(fifo_reserve(fifo, want), 0, fifo->item_size * (size_t)want);
else done = true;
}
else done = stage_process(stage - 1, flushing);
}
stage->fn(stage, &stage[1].fifo);
return done && fifo_occupancy(fifo) < stage->input_size;
}
STATIC void _soxr_process(rate_t * p, size_t olen)
{
int const n = p->flushing? min(-(int)p->samples_out, (int)olen) : (int)olen;
stage_t * stage = &p->stages[p->num_stages];
fifo_t * fifo = &stage->fifo;
bool done = false;
while (!done && fifo_occupancy(fifo) < (int)n)
done = stage->is_input || stage_process(stage - 1, p->flushing);
}
STATIC real * _soxr_input(rate_t * p, real const * samples, size_t n)
{
if (p->flushing)
return 0;
p->samples_in += (int64_t)n;
return fifo_write(&p->stages[0].fifo, (int)n, samples);
}
STATIC real const * _soxr_output(rate_t * p, real * samples, size_t * n0)
{
fifo_t * fifo = &p->stages[p->num_stages].fifo;
int n = p->flushing? min(-(int)p->samples_out, (int)*n0) : (int)*n0;
p->samples_out += n = min(n, fifo_occupancy(fifo));
return fifo_read(fifo, (int)(*n0 = (size_t)n), samples);
}
STATIC void _soxr_flush(rate_t * p)
{
if (p->flushing) return;
p->samples_out -= (int64_t)((double)p->samples_in / p->io_ratio + .5);
p->samples_in = 0;
p->flushing = true;
}
STATIC void _soxr_close(rate_t * p)
{
if (p->stages) {
fn_t const * const RDFT_CB = p->core->rdft_cb;
rate_shared_t * shared = p->stages[0].shared;
int i;
for (i = 0; i <= p->num_stages; ++i) {
stage_t * s = &p->stages[i];
rdft_free(s->dft_scratch);
rdft_free(s->dft_out);
fifo_delete(&s->fifo);
}
if (shared) {
for (i = 0; i < 2; ++i) {
dft_filter_t * f= &shared->dft_filter[i];
rdft_free(f->coefs);
rdft_delete_setup(f->dft_forward_setup);
rdft_delete_setup(f->dft_backward_setup);
}
p->core->mem.free(shared->poly_fir_coefs);
memset(shared, 0, sizeof(*shared));
}
free(p->stages);
(void)RDFT_CB;
}
}
#if defined SOXR_LIB
STATIC double _soxr_delay(rate_t * p)
{
return (double)p->samples_in / p->io_ratio - (double)p->samples_out;
}
STATIC void _soxr_sizes(size_t * shared, size_t * channel)
{
*shared = sizeof(rate_shared_t);
*channel = sizeof(rate_t);
}
#endif

175
src/cr.h 100644
View File

@ -0,0 +1,175 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined soxr_rate1_included
#define soxr_rate1_included
#define FIFO_SIZE_T int
#include "fifo.h"
typedef void real; /* float or double */
struct stage;
typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
typedef struct half_fir_info {int num_coefs; real const * coefs; stage_fn_t fn, dfn; float att;} half_fir_info_t;
typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t;
typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t;
#define U100_l 42
#define MULT32 (65536. * 65536.)
/* Conceptually: coef_p is &coefs[num_phases][fir_len][interp_order+1]: */
#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\
(fir_len) * ((interp_order) + 1) * (phase_num) + \
((interp_order) + 1) * (fir_coef_num) + \
((interp_order) - (coef_interp_num))]
/* Conceptually: coef_p is &coefs[num_phases][fir_len/4][interp_order+1][4]: */
#define coef4(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) (coef_p)[\
(fir_len) * ((interp_order) + 1) * (phase_num) + \
((interp_order) + 1) * ((fir_coef_num) & ~3) + \
4 * ((interp_order) - (coef_interp_num)) + \
((fir_coef_num) & 3)]
typedef union { /* Int64 in parts */
#if HAVE_BIGENDIAN
struct {int32_t ms; uint32_t ls;} parts;
#else
struct {uint32_t ls; int32_t ms;} parts;
#endif
int64_t all;
} int64p_t;
typedef union { /* Uint64 in parts */
#if HAVE_BIGENDIAN
struct {uint32_t ms, ls;} parts;
#else
struct {uint32_t ls, ms;} parts;
#endif
uint64_t all;
} uint64p_t;
#define FLOAT_HI_PREC_CLOCK 0 /* Non-float hi-prec has ~96 bits. */
#define float_step_t long double /* __float128 is also a (slow) option */
typedef struct {
int dft_length, num_taps, post_peak;
void * dft_forward_setup, * dft_backward_setup;
real * coefs;
} dft_filter_t;
typedef struct { /* So generated filter coefs may be shared between channels */
real * poly_fir_coefs;
dft_filter_t dft_filter[2];
} rate_shared_t;
typedef union { /* Fixed point arithmetic */
struct {uint64p_t ls; int64p_t ms;} fix;
float_step_t flt;
} step_t;
#define CORE_DBL 1
#define CORE_SIMD_POLY 2
#define CORE_SIMD_HALF 4
#define CORE_SIMD_DFT 8
#define LOG2_SIZEOF_REAL(core_flags) (2 + ((core_flags) & 1))
typedef int core_flags_t;
#if defined SOXR_LIB
#include "rdft_t.h"
#else
typedef void fn_t;
#endif
typedef struct stage {
int num;
/* Common to all stage types: */
core_flags_t core_flags;
stage_fn_t fn;
fifo_t fifo;
int pre; /* Number of past samples to store */
int pre_post; /* pre + number of future samples to store */
int preload; /* Number of zero samples to pre-load the fifo */
double out_in_ratio; /* For buffer management. */
int input_size;
bool is_input;
/* For a stage with variable (run-time generated) filter coefs: */
fn_t const * rdft_cb;
rate_shared_t * shared;
unsigned dft_filter_num; /* Which, if any, of the 2 DFT filters to use */
real * dft_scratch;
float * dft_out;
real const * coefs;
/* For a stage with variable L/M: */
step_t at, step;
bool use_hi_prec_clock;
int L, remM;
int n, phase_bits, block_len;
double mult, phase0;
} stage_t;
#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
#define integer fix.ms.parts.ms
#define fraction fix.ms.parts.ls
#define whole fix.ms.all
#define lq_bw0 (1385/2048.) /* ~.67625, FP exact. */
typedef enum {rolloff_small, rolloff_medium, rolloff_none} rolloff_t;
typedef struct {
void * (* alloc)(size_t);
void * (* calloc)(size_t, size_t);
void (* free)(void *);
} alloc_t;
typedef struct {
alloc_t mem;
half_fir_info_t const * half_firs;
size_t half_firs_len;
half_fir_info_t const * doub_firs;
size_t doub_firs_len;
stage_fn_t cubic_stage_fn;
poly_fir_t const * poly_firs;
fn_t * rdft_cb;
} cr_core_t;
typedef struct rate rate_t;
struct rate {
cr_core_t const * core;
double io_ratio;
int64_t samples_in, samples_out;
int num_stages, flushing;
stage_t * stages;
};
#if defined SOXR_LIB
#include "soxr.h"
char const * _soxr_init(
rate_t * const p, /* Per audio channel. */
rate_shared_t * const shared, /* Between channels (undergoing same rate change)*/
double const io_ratio, /* Input rate divided by output rate. */
soxr_quality_spec_t const * const q_spec,
soxr_runtime_spec_t const * const r_spec,
double multiplier, /* Linear gain to apply during conversion. 1 */
cr_core_t const * const core,
core_flags_t const);
void _soxr_process(struct rate * p, size_t olen);
real * _soxr_input(struct rate * p, real const * samples, size_t n);
real const * _soxr_output(struct rate * p, real * samples, size_t * n0);
void _soxr_flush(struct rate * p);
void _soxr_close(struct rate * p);
double _soxr_delay(struct rate * p);
void _soxr_sizes(size_t * shared, size_t * channel);
#endif
#endif

8
src/cr32.c 100644
View File

@ -0,0 +1,8 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define RATE_CB _soxr_rate32_cb
#define CORE_STR "cr32"
#define CORE_TYPE 0
#include "cr-core.c"

8
src/cr32s.c 100644
View File

@ -0,0 +1,8 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define RATE_CB _soxr_rate32s_cb
#define CORE_STR "cr32s"
#define CORE_TYPE (CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
#include "cr-core.c"

8
src/cr64.c 100644
View File

@ -0,0 +1,8 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define RATE_CB _soxr_rate64_cb
#define CORE_STR "cr64"
#define CORE_TYPE CORE_DBL
#include "cr-core.c"

8
src/cr64s.c 100644
View File

@ -0,0 +1,8 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define RATE_CB _soxr_rate64s_cb
#define CORE_STR "cr64s"
#define CORE_TYPE (CORE_DBL|CORE_SIMD_POLY|CORE_SIMD_HALF|CORE_SIMD_DFT)
#include "cr-core.c"

View File

@ -23,7 +23,7 @@
#if WITH_DOUBLE_PRECISION
#if WITH_CR64 || WITH_CR64S
void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */
soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
{
@ -40,7 +40,7 @@ void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */
#if WITH_SINGLE_PRECISION
#if WITH_CR32 || WITH_CR32S || WITH_VR32
void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
{
@ -97,7 +97,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
#endif
#endif
#if WITH_DOUBLE_PRECISION
#if WITH_CR64 || WITH_CR64S
#define FLOATX double
#define LSX_RINT_CLIP_2 lsx_rint32_clip_2
@ -139,7 +139,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
#if WITH_SINGLE_PRECISION
#if WITH_CR32 || WITH_CR32S || WITH_VR32
#define FLOATX float
#define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f
@ -199,7 +199,7 @@ void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
return 0; \
} while (0)
#if WITH_DOUBLE_PRECISION
#if WITH_CR64 || WITH_CR64S
size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
double const * const * src, size_t n, unsigned ch, unsigned long * seed)
{
@ -225,7 +225,7 @@ size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
}
#endif
#if WITH_SINGLE_PRECISION
#if WITH_CR32 || WITH_CR32S || WITH_VR32
size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0,
float const * const * src, size_t n, unsigned ch, unsigned long * seed)
{

View File

@ -282,22 +282,16 @@ Appendix :
*/
#include <math.h>
#include "math-wrap.h"
#include "fft4g.h"
#ifdef FFT4G_FLOAT
#define double float
#define one_half 0.5f
#if defined _MSC_VER
#define sin (float)sin
#define cos (float)cos
#define atan (float)atan
#else
#define sin sinf
#define cos cosf
#define atan atanf
#endif
#define sin(x) sinf(x)
#define cos(x) cosf(x)
#define atan(x) atanf(x)
#define cdft lsx_cdft_f
#define rdft lsx_rdft_f
@ -818,7 +812,7 @@ static void bitrv2(int n, int *ip0, double *a)
static void bitrv2conj(int n, int *ip0, double *a)
{
int j, j1, k, k1, l, m, m2, ip[256];
int j, j1, k, k1, l, m, m2, ip[512];
double xr, xi, yr, yi;
(void)ip0;

View File

@ -1,17 +1,19 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include <stdlib.h>
#include "filter.h"
#define FFT4G_FLOAT
#include "fft4g.c"
#include "rdft_t.h"
static void * null(void) {return 0;}
static void forward (int length, void * setup, double * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;}
static void backward(int length, void * setup, double * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
static int multiplier(void) {return 2;}
static void nothing(void) {}
static int flags(void) {return 0;}
typedef void (* fn_t)(void);
fn_t _soxr_rdft32_cb[] = {
(fn_t)null,
(fn_t)null,
@ -24,4 +26,8 @@ fn_t _soxr_rdft32_cb[] = {
(fn_t)_soxr_ordered_partial_convolve_f,
(fn_t)multiplier,
(fn_t)nothing,
(fn_t)malloc,
(fn_t)calloc,
(fn_t)free,
(fn_t)flags,
};

View File

@ -3,14 +3,15 @@
#include "filter.h"
#include "simd.h"
#include "rdft_t.h"
static void * null(void) {return 0;}
static void nothing(void) {}
static void forward (int length, void * setup, float * H) {lsx_safe_rdft_f(length, 1, H); (void)setup;}
static void backward(int length, void * setup, float * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
static int multiplier(void) {return 2;}
static int flags(void) {return RDFT_IS_SIMD;}
typedef void (* fn_t)(void);
fn_t _soxr_rdft32s_cb[] = {
(fn_t)null,
(fn_t)null,
@ -19,8 +20,12 @@ fn_t _soxr_rdft32s_cb[] = {
(fn_t)forward,
(fn_t)backward,
(fn_t)backward,
(fn_t)_soxr_ordered_convolve_simd,
(fn_t)_soxr_ordered_partial_convolve_simd,
(fn_t)ORDERED_CONVOLVE_SIMD,
(fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
(fn_t)multiplier,
(fn_t)nothing,
(fn_t)SIMD_ALIGNED_MALLOC,
(fn_t)SIMD_ALIGNED_CALLOC,
(fn_t)SIMD_ALIGNED_FREE,
(fn_t)flags,
};

View File

@ -1,16 +1,18 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include <stdlib.h>
#include "filter.h"
#include "fft4g.c"
#include "soxr-config.h"
#if WITH_DOUBLE_PRECISION
#if WITH_CR64
static void * null(void) {return 0;}
static void nothing(void) {}
static void forward (int length, void * setup, double * H) {lsx_safe_rdft(length, 1, H); (void)setup;}
static void backward(int length, void * setup, double * H) {lsx_safe_rdft(length, -1, H); (void)setup;}
static int multiplier(void) {return 2;}
static int flags(void) {return 0;}
typedef void (* fn_t)(void);
fn_t _soxr_rdft64_cb[] = {
@ -25,5 +27,9 @@ fn_t _soxr_rdft64_cb[] = {
(fn_t)_soxr_ordered_partial_convolve,
(fn_t)multiplier,
(fn_t)nothing,
(fn_t)malloc,
(fn_t)calloc,
(fn_t)free,
(fn_t)flags,
};
#endif

View File

@ -9,6 +9,7 @@
#endif
#if !defined FIFO_REALLOC
#include <stdlib.h>
#define FIFO_REALLOC(a,b,c) realloc(a,b)
#undef FIFO_FREE
#define FIFO_FREE free

View File

@ -1,12 +1,9 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include "filter.h"
#include <math.h>
#if !defined M_PI
#define M_PI 3.14159265358979323846
#endif
#include "math-wrap.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
@ -14,7 +11,7 @@
#include "fft4g.h"
#include "ccrw2.h"
#if 1 || WITH_DOUBLE_PRECISION /* Always need this, for lsx_fir_to_phase. */
#if 1 || WITH_CR64 || WITH_CR64S /* Always need this, for lsx_fir_to_phase. */
#define DFT_FLOAT double
#define DONE_WITH_FFT_CACHE done_with_fft_cache
#define FFT_CACHE_CCRW fft_cache_ccrw
@ -31,7 +28,7 @@
#include "fft4g_cache.h"
#endif
#if WITH_SINGLE_PRECISION && !AVCODEC_FOUND
#if WITH_CR32 && !AVCODEC_FOUND
#define DFT_FLOAT float
#define DONE_WITH_FFT_CACHE done_with_fft_cache_f
#define FFT_CACHE_CCRW fft_cache_ccrw_f
@ -48,14 +45,14 @@
#include "fft4g_cache.h"
#endif
#if WITH_DOUBLE_PRECISION || !SOXR_LIB
#if WITH_CR64 || WITH_CR64S || !SOXR_LIB
#define DFT_FLOAT double
#define ORDERED_CONVOLVE lsx_ordered_convolve
#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve
#include "rdft.h"
#endif
#if WITH_SINGLE_PRECISION
#if WITH_CR32
#define DFT_FLOAT float
#define ORDERED_CONVOLVE lsx_ordered_convolve_f
#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve_f
@ -129,6 +126,9 @@ double * lsx_design_lpf(
int n = *num_taps, phases = max(k, 1), modulo = max(-k, 1);
double tr_bw, Fc, rho = phases == 1? .5 : att < 120? .63 : .75;
lsx_debug_more("./sinctest %-12.7g %-12.7g %g 0 %-5g %i %i 50 %g %g -4 >1",
Fp, Fs, Fn, att, *num_taps, k, beta, rho);
Fp /= fabs(Fn), Fs /= fabs(Fn); /* Normalise to Fn = 1 */
tr_bw = .5 * (Fs - Fp); /* Transition band-width: 6dB to stop points */
tr_bw /= phases, Fs /= phases;
@ -243,3 +243,35 @@ void lsx_fir_to_phase(double * * h, int * len, int * post_len, double phase)
work[imp_peak], *len, *post_len, 100 - 100. * *post_len / (*len - 1));
free(pi_wraps), free(work);
}
#define F_x(F,expr) static double F(double x) {return expr;}
F_x(sinePhi, ((2.0517e-07*x-1.1303e-04)*x+.023154)*x+.55924 )
F_x(sinePsi, ((9.0667e-08*x-5.6114e-05)*x+.013658)*x+1.0977 )
F_x(sinePow, log(.5)/log(sin(x*.5)) )
#define dB_to_linear(x) exp((x) * (M_LN10 * 0.05))
double lsx_f_resp(double t, double a)
{
double x;
if (t > (a <= 160? .8 : .82)) {
double a1 = a+15;
double p = .00035*a+.375;
double w = 1/(1-.597)*asin(pow((a1-10.6)/a1,1/p));
double c = 1+asin(pow(1-a/a1,1/p))/w;
return a1*(pow(sin((c-t)*w),p)-1);
}
if (t > .5)
x = sinePsi(a), x = pow(sin((1-t) * x), sinePow(x));
else
x = sinePhi(a), x = 1 - pow(sin(t * x), sinePow(x));
return linear_to_dB(x);
}
double lsx_inv_f_resp(double drop, double a)
{
double x = sinePhi(a), s;
drop = dB_to_linear(drop);
s = drop > .5 ? 1 - drop : drop;
x = asin(pow(s, 1/sinePow(x))) / x;
return drop > .5? x : 1 -x;
}

View File

@ -33,7 +33,12 @@ double * lsx_design_lpf(
int * num_taps, /* 0: value will be estimated */
int k, /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */
double beta); /* <0: value will be estimated */
void lsx_fir_to_phase(double * * h, int * len,
int * post_len, double phase0);
double lsx_f_resp(double t, double a);
double lsx_inv_f_resp(double drop, double a);
#define lsx_to_3dB(a) (1 - lsx_inv_f_resp(-3., a))
#endif

View File

@ -1,151 +0,0 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include "half_coefs.h"
#define FUNCTION h8
#define CONVOLVE _ _ _ _ _ _ _ _
#define h8_l 8
#define COEFS half_fir_coefs_8
#include "half-fir.h"
#define FUNCTION h9
#define CONVOLVE _ _ _ _ _ _ _ _ _
#define h9_l 9
#define COEFS half_fir_coefs_9
#include "half-fir.h"
#define FUNCTION h10
#define CONVOLVE _ _ _ _ _ _ _ _ _ _
#define h10_l 10
#define COEFS half_fir_coefs_10
#include "half-fir.h"
#define FUNCTION h11
#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _
#define h11_l 11
#define COEFS half_fir_coefs_11
#include "half-fir.h"
#define FUNCTION h12
#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _
#define h12_l 12
#define COEFS half_fir_coefs_12
#include "half-fir.h"
#define FUNCTION h13
#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _
#define h13_l 13
#define COEFS half_fir_coefs_13
#include "half-fir.h"
static struct {int num_coefs; stage_fn_t fn; float att;} const half_firs[] = {
{ 8, h8 , 136.51f},
{ 9, h9 , 152.32f},
{10, h10, 168.07f},
{11, h11, 183.78f},
{12, h12, 199.44f},
{13, h13, 212.75f},
};
#define HI_PREC_CLOCK
#define VAR_LENGTH p->n
#define VAR_CONVOLVE while (j < FIR_LENGTH) _
#define VAR_POLY_PHASE_BITS p->phase_bits
#define FUNCTION vpoly0
#define FIR_LENGTH VAR_LENGTH
#define CONVOLVE VAR_CONVOLVE
#include "poly-fir0.h"
#define FUNCTION vpoly1
#define COEF_INTERP 1
#define PHASE_BITS VAR_POLY_PHASE_BITS
#define FIR_LENGTH VAR_LENGTH
#define CONVOLVE VAR_CONVOLVE
#include "poly-fir.h"
#define FUNCTION vpoly2
#define COEF_INTERP 2
#define PHASE_BITS VAR_POLY_PHASE_BITS
#define FIR_LENGTH VAR_LENGTH
#define CONVOLVE VAR_CONVOLVE
#include "poly-fir.h"
#define FUNCTION vpoly3
#define COEF_INTERP 3
#define PHASE_BITS VAR_POLY_PHASE_BITS
#define FIR_LENGTH VAR_LENGTH
#define CONVOLVE VAR_CONVOLVE
#include "poly-fir.h"
#undef HI_PREC_CLOCK
#define U100_l 42
#if RATE_SIMD_POLY
#define U100_l_EXTRA _ _
#define u100_l_EXTRA _
#define U100_l_EXTRA_LENGTH 2
#define u100_l_EXTRA_LENGTH 1
#else
#define U100_l_EXTRA
#define u100_l_EXTRA
#define U100_l_EXTRA_LENGTH 0
#define u100_l_EXTRA_LENGTH 0
#endif
#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ U100_l_EXTRA
#define FUNCTION U100_0
#define FIR_LENGTH (U100_l + U100_l_EXTRA_LENGTH)
#define CONVOLVE poly_fir_convolve_U100
#include "poly-fir0.h"
#define u100_l 11
#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _ u100_l_EXTRA
#define FUNCTION u100_0
#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
#define CONVOLVE poly_fir_convolve_u100
#include "poly-fir0.h"
#define FUNCTION u100_1
#define COEF_INTERP 1
#define PHASE_BITS 8
#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
#define CONVOLVE poly_fir_convolve_u100
#include "poly-fir.h"
#define u100_1_b 8
#define FUNCTION u100_2
#define COEF_INTERP 2
#define PHASE_BITS 6
#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
#define CONVOLVE poly_fir_convolve_u100
#include "poly-fir.h"
#define u100_2_b 6
typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t;
typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t;
static poly_fir_t const poly_firs[] = {
{-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}},
{-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}},
{-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}},
{-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}},
{-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}},
{-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}},
{-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}},
{-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}},
{-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}},
{-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}},
{-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}},
{-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}},
{10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}},
{11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}},
{-1, {{0, vpoly0}, { 9, vpoly1}, { 6, vpoly2}}},
{-1, {{0, vpoly0}, { 11, vpoly1}, { 7, vpoly2}}},
{-1, {{0, vpoly0}, { 13, vpoly1}, { 8, vpoly2}}},
{-1, {{0, vpoly0}, { 10, vpoly2}, { 8, vpoly3}}},
{-1, {{0, vpoly0}, { 12, vpoly2}, { 9, vpoly3}}},
};

75
src/half-coefs.h 100644
View File

@ -0,0 +1,75 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if defined __GNUC__
#pragma GCC system_header
#elif defined __SUNPRO_C
#pragma disable_warn
#elif defined _MSC_VER
#pragma warning(push, 1)
#endif
#if CORE_TYPE & CORE_SIMD_HALF
#define VALIGN vAlign
#else
#define VALIGN
#endif
#if !(CORE_TYPE & CORE_SIMD_HALF)
static VALIGN const sample_t half_fir_coefs_7[] = {
3.1062656496657370e-01, -8.4998810699955796e-02, 3.4007044621123500e-02,
-1.2839903789829387e-02, 3.9899380181723145e-03, -8.9355202017945374e-04,
1.0918292424806546e-04,
};
#endif
static VALIGN const sample_t half_fir_coefs_8[] = {
3.1154652365332069e-01, -8.7344917685739543e-02, 3.6814458353637280e-02,
-1.5189204581464479e-02, 5.4540855610738801e-03, -1.5643862626630416e-03,
3.1816575906323303e-04, -3.4799449225005688e-05,
};
static VALIGN const sample_t half_fir_coefs_9[] = {
3.1227034755311189e-01, -8.9221517147969526e-02, 3.9139704015071934e-02,
-1.7250558515852023e-02, 6.8589440230476112e-03, -2.3045049636430419e-03,
6.0963740543348963e-04, -1.1323803957431231e-04, 1.1197769991000046e-05,
};
#if CORE_TYPE & CORE_DBL
static VALIGN const sample_t half_fir_coefs_10[] = {
3.1285456012000523e-01, -9.0756740799292787e-02, 4.1096398104193160e-02,
-1.9066319572525220e-02, 8.1840569787684902e-03, -3.0766876176359834e-03,
9.6396524429277980e-04, -2.3585679989922018e-04, 4.0252189026627833e-05,
-3.6298196342497932e-06,
};
static VALIGN const sample_t half_fir_coefs_11[] = {
3.1333588822574199e-01, -9.2035898673019811e-02, 4.2765169698406408e-02,
-2.0673580894964429e-02, 9.4225426824512421e-03, -3.8563379950013192e-03,
1.3634742159642453e-03, -3.9874150714431009e-04, 9.0586723632664806e-05,
-1.4285617244076783e-05, 1.1834642946400529e-06,
};
static VALIGN const sample_t half_fir_coefs_12[] = {
3.1373928463345568e-01, -9.3118180335301962e-02, 4.4205005881659098e-02,
-2.2103860986973051e-02, 1.0574689371162864e-02, -4.6276428065385065e-03,
1.7936153397572132e-03, -5.9617527051353237e-04, 1.6314517495669067e-04,
-3.4555126770115446e-05, 5.0617615610782593e-06, -3.8768958592971409e-07,
};
static VALIGN const sample_t half_fir_coefs_13[] = {
3.1408224847888910e-01, -9.4045836332667387e-02, 4.5459878763259978e-02,
-2.3383369012219993e-02, 1.1644273044890753e-02, -5.3806714579057013e-03,
2.2429072878264022e-03, -8.2204347506606424e-04, 2.5724946477840893e-04,
-6.6072709864248668e-05, 1.3099163296288644e-05, -1.7907147069136000e-06,
1.2750825595240592e-07,
};
#endif
#undef VALIGN
#if defined __SUNPRO_C
#pragma enable_warn
#elif defined _MSC_VER
#pragma warning(pop)
#endif

View File

@ -1,25 +1,61 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Down-sample by a factor of 2 using a FIR with odd length (LEN).*/
/* Decimate by 2 using a FIR with odd length (LEN). */
/* Input must be preceded and followed by LEN >> 1 samples. */
#define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
static void FUNCTION(stage_t * p, fifo_t * output_fifo)
#define COEFS ((sample_t const *)p->coefs)
#if SIMD_SSE
#define BEGINNING v4_t sum, q1, q2, t
#define ____ \
q1 = _mm_shuffle_ps(t=vLdu(input+2*j),vLdu(input+2*j+4),_MM_SHUFFLE(3,1,3,1)); \
q2 = _mm_shuffle_ps(vLdu(input-2*j-4),vLdu(input-2*j-8),_MM_SHUFFLE(1,3,1,3)); \
sum = vAdd(j? sum : vMul(vSet1(.5), t), vMul(vAdd(q1, q2), vLd(COEFS+j))); \
j += 4;
#define __ \
q1 = _mm_shuffle_ps(vLdu(input+2*j), vLdu(input-2*j-4), _MM_SHUFFLE(1,3,3,1)); \
q2 = _mm_loadl_pi(q2, (__m64*)(COEFS+j)), q2 = _mm_movelh_ps(q2, q2); \
sum = vAdd(sum, vMul(q1, q2)); \
j += 2;
#define _ \
q1 = _mm_add_ss(_mm_load_ss(input+2*j+1), _mm_load_ss(input-2*j-1)); \
sum = _mm_add_ss(sum, _mm_mul_ss(q1, _mm_load_ss(COEFS+j))); \
++j;
#define END vStorSum(output+i, sum)
/* #elif SIMD_AVX; No good solution found. */
/* #elif SIMD_NEON; No need: gcc -O3 does a good job by itself. */
#else
#define BEGINNING sample_t sum = input[0] * .5f
#define ____ __ __
#define __ _ _
#define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
#define END output[i] = sum
#endif
static void FUNCTION_H(stage_t * p, fifo_t * output_fifo)
{
sample_t const * input = stage_read_p(p);
int i, num_out = (stage_occupancy(p) + 1) / 2;
sample_t * output = fifo_reserve(output_fifo, num_out);
sample_t const * __restrict input = stage_read_p(p);
int num_in = min(stage_occupancy(p), p->input_size);
int i, num_out = (num_in + 1) >> 1;
sample_t * __restrict output = fifo_reserve(output_fifo, num_out);
for (i = 0; i < num_out; ++i, input += 2) {
int j = 0;
sample_t sum = input[0] * .5f;
CONVOLVE
output[i] = sum;
BEGINNING; CONVOLVE; END;
}
fifo_read(&p->fifo, 2 * num_out, NULL);
}
#undef _
#undef __
#undef ____
#undef BEGINNING
#undef END
#undef COEFS
#undef CONVOLVE
#undef FUNCTION
#undef FUNCTION_H

View File

@ -1,57 +0,0 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if defined __GNUC__
#pragma GCC system_header
#elif defined __SUNPRO_C
#pragma disable_warn
#elif defined _MSC_VER
#pragma warning(push, 1)
#endif
static const sample_t half_fir_coefs_8[] = {
0.3115465451887802, -0.08734497241282892, 0.03681452335604365,
-0.01518925831569441, 0.005454118437408876, -0.001564400922162005,
0.0003181701445034203, -3.48001341225749e-5,
};
static const sample_t half_fir_coefs_9[] = {
0.3122703613711853, -0.08922155288172305, 0.03913974805854332,
-0.01725059723447163, 0.006858970092378141, -0.002304518467568703,
0.0006096426006051062, -0.0001132393923815236, 1.119795386287666e-5,
};
static const sample_t half_fir_coefs_10[] = {
0.3128545521327376, -0.09075671986104322, 0.04109637155154835,
-0.01906629512749895, 0.008184039342054333, -0.0030766775017262,
0.0009639607022414314, -0.0002358552746579827, 4.025184282444155e-5,
-3.629779111541012e-6,
};
static const sample_t half_fir_coefs_11[] = {
0.3133358837508807, -0.09203588680609488, 0.04276515428384758,
-0.02067356614745591, 0.00942253142371517, -0.003856330993895144,
0.001363470684892284, -0.0003987400965541919, 9.058629923971627e-5,
-1.428553070915318e-5, 1.183455238783835e-6,
};
static const sample_t half_fir_coefs_12[] = {
0.3137392991811407, -0.0931182192961332, 0.0442050575271454,
-0.02210391200618091, 0.01057473015666001, -0.00462766983973885,
0.001793630226239453, -0.0005961819959665878, 0.0001631475979359577,
-3.45557865639653e-5, 5.06188341942088e-6, -3.877010943315563e-7,
};
static const sample_t half_fir_coefs_13[] = {
0.3140822554324578, -0.0940458550886253, 0.04545990399121566,
-0.02338339450796002, 0.01164429409071052, -0.005380686021429845,
0.002242915773871009, -0.000822047600000082, 0.0002572510962395222,
-6.607320708956279e-5, 1.309926399120154e-5, -1.790719575255006e-6,
1.27504961098836e-7,
};
#if defined __SUNPRO_C
#pragma enable_warn
#elif defined _MSC_VER
#pragma warning(pop)
#endif

View File

@ -6,11 +6,15 @@
#include "std-types.h"
#undef min
#undef max
#define min(a, b) ((a) <= (b) ? (a) : (b))
#define max(a, b) ((a) >= (b) ? (a) : (b))
#define range_limit(x, lower, upper) (min(max(x, lower), upper))
#define linear_to_dB(x) (log10(x) * 20)
#define array_length(a) (sizeof(a)/sizeof(a[0]))
@ -20,12 +24,16 @@
#define iAL(a) (int)AL(a)
#define sqr(a) ((a) * (a))
#if defined __GNUC__
#define UNUSED __attribute__ ((unused))
#else
#define UNUSED
#endif
#if !WITH_DEV_TRACE
#ifdef __GNUC__
void lsx_dummy(char const *, ...);
@ -35,10 +43,41 @@
#define lsx_debug if(0) lsx_dummy
#define lsx_debug_more lsx_debug
#else
int _soxr_trace_level(void);
extern int _soxr_trace_level;
void _soxr_trace(char const * fmt, ...);
#define lsx_debug if (_soxr_trace_level() >= 4) _soxr_trace
#define lsx_debug_more if (_soxr_trace_level() >= 5) _soxr_trace
#define lsx_debug if (_soxr_trace_level > 0) _soxr_trace
#define lsx_debug_more if (_soxr_trace_level > 1) _soxr_trace
#endif
/* soxr_quality_spec_t.flags: */
#define SOXR_ROLLOFF_LSR2Q 3u /* Reserved for internal use. */
#define SOXR_ROLLOFF_MASK 3u /* For masking these bits. */
#define SOXR_PROMOTE_TO_LQ 64u /* Reserved for internal use. */
/* soxr_runtime_spec_t.flags: */
#define SOXR_STRICT_BUFFERING 4u /* Reserved for future use. */
#define SOXR_NOSMALLINTOPT 8u /* For test purposes only. */
/* soxr_quality_spec recipe: */
#define SOXR_PRECISIONQ 11 /* Quality specified by the precision parameter. */
#define SOXR_PHASE_MASK 0x30 /* For masking these bits. */
/* soxr_quality_spec flags: */
#define RESET_ON_CLEAR (1u<<31)
#endif

31
src/math-wrap.h 100644
View File

@ -0,0 +1,31 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined soxr_math_wrap_included
#define soxr_math_wrap_included
#include <math.h>
#if defined __STRICT_ANSI__
#define sinf(x) (float)sin ((double)(x))
#define cosf(x) (float)cos ((double)(x))
#define atanf(x) (float)atan((double)(x))
#endif
#if !defined M_PI
#define M_PI 3.141592653589793238462643383279502884
#endif
#if !defined M_LN10
#define M_LN10 2.302585092994045684017991454684364208
#endif
#if !defined M_SQRT2
#define M_SQRT2 1.414213562373095048801688724209698079
#endif
#if !defined M_LN2
#define M_LN2 0.693147180559945309417232121458176568
#endif
#endif

View File

@ -3,24 +3,23 @@
#if !defined PFFT_MACROS_ONLY
#include "simd.h"
#include <math.h>
#include "math-wrap.h"
#define pffft_aligned_free _soxr_simd_aligned_free
#define pffft_aligned_malloc _soxr_simd_aligned_malloc
#define pffft_aligned_calloc _soxr_simd_aligned_calloc
#if PFFFT_DOUBLE
#include "simd64.h"
#else
#include "simd32.h"
#define sin(x) sinf(x)
#define cos(x) cosf(x)
#endif
#define pffft_aligned_free SIMD_ALIGNED_FREE
#define pffft_aligned_malloc SIMD_ALIGNED_MALLOC
#define pffft_aligned_calloc SIMD_ALIGNED_CALLOC
#undef inline
#define inline __inline
#if defined _MSC_VER
#define sin (float)sin
#define cos (float)cos
#else
#define sin(x) sinf((float)(x))
#define cos(x) cosf((float)(x))
#endif
#endif

View File

@ -133,9 +133,11 @@ inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_p
*/
#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86))
# define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */
#if !PFFFT_DOUBLE
#include <xmmintrin.h>
typedef __m128 v4sf;
# define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */
# define VZERO() _mm_setzero_ps()
# define VMUL(a,b) _mm_mul_ps(a,b)
# define VADD(a,b) _mm_add_ps(a,b)
@ -148,6 +150,10 @@ typedef __m128 v4sf;
# define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0))
# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
#else
#include "avx.h"
#endif
/*
ARM NEON support macros
*/
@ -181,6 +187,10 @@ typedef float32x4_t v4sf;
# endif
#endif
#if PFFFT_DOUBLE
#define float double
#endif
/* fallback mode for situations where SSE/Altivec are not available, use scalar mode instead */
#ifdef PFFFT_SIMD_DISABLE
typedef float v4sf;
@ -202,6 +212,8 @@ typedef float v4sf;
#define SVMUL(f,v) VMUL(LD_PS1(f),v)
#endif
#if !defined PFFT_MACROS_ONLY
#if !defined(PFFFT_SIMD_DISABLE)
typedef union v4sf_union {
v4sf v;
@ -214,7 +226,8 @@ typedef union v4sf_union {
#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3))
/* detect bugs with the vector support macros */
void validate_pffft_simd() {
void validate_pffft_simd(void);
void validate_pffft_simd(void) {
float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 };
v4sf_union a0, a1, a2, a3, t, u;
memcpy(a0.f, f, 4*sizeof(float));
@ -230,7 +243,6 @@ void validate_pffft_simd() {
printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
t.v = VMADD(a1.v, a2.v,a0.v);
printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
INTERLEAVE2(a1.v,a2.v,t.v,u.v);
printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
@ -271,8 +283,6 @@ void pffft_aligned_free(void *p) {
int pffft_simd_size() { return SIMD_SZ; }
#endif
#if !defined PFFT_MACROS_ONLY
/*
passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
*/

View File

@ -86,6 +86,10 @@
#ifdef __cplusplus
extern "C" {
#endif
#if PFFFT_DOUBLE
#define float double
#endif
/* opaque struct holding internal stuff (precomputed twiddle factors)
@ -182,6 +186,8 @@ extern "C" {
int pffft_simd_size();
#endif
#undef float
#ifdef __cplusplus
}
#endif

View File

@ -1,11 +1,14 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define _soxr_simd_aligned_free free
#define _soxr_simd_aligned_malloc malloc
#define SIMD_ALIGNED_FREE free
#define SIMD_ALIGNED_MALLOC malloc
#define PFFFT_SIMD_DISABLE
#define PFFFT_DOUBLE 0
#include "pffft-wrap.c"
#include "filter.h"
#include "rdft_t.h"
static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
static void delete_setup(void * setup) {pffft_destroy_setup(setup);}
@ -15,8 +18,8 @@ static void backward (int length, void * setup, float * H, float * scratch) {pff
static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H); (void)length;}
static int multiplier(void) {return 1;}
static int flags(void) {return RDFT_NEEDS_SCRATCH;}
typedef void (* fn_t)(void);
fn_t _soxr_rdft32_cb[] = {
(fn_t)setup,
(fn_t)setup,
@ -29,4 +32,8 @@ fn_t _soxr_rdft32_cb[] = {
(fn_t)_soxr_ordered_partial_convolve_f,
(fn_t)multiplier,
(fn_t)pffft_reorder_back,
(fn_t)malloc,
(fn_t)calloc,
(fn_t)free,
(fn_t)flags,
};

View File

@ -1,17 +1,20 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define PFFFT_DOUBLE 0
#include "pffft-wrap.c"
#include "rdft_t.h"
static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
static void forward (int length, void * setup, float * h, float * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H); (void)length;}
static void convolve(int length, void * setup, float * H, float const * with) {pffft_zconvolve(setup, H, with, H); (void)length;}
static int multiplier(void) {return 1;}
static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;}
typedef void (* fn_t)(void);
fn_t _soxr_rdft32s_cb[] = {
(fn_t)setup,
(fn_t)setup,
@ -21,7 +24,11 @@ fn_t _soxr_rdft32s_cb[] = {
(fn_t)backward,
(fn_t)obackward,
(fn_t)convolve,
(fn_t)_soxr_ordered_partial_convolve_simd,
(fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
(fn_t)multiplier,
(fn_t)pffft_reorder_back,
(fn_t)SIMD_ALIGNED_MALLOC,
(fn_t)SIMD_ALIGNED_CALLOC,
(fn_t)SIMD_ALIGNED_FREE,
(fn_t)flags,
};

34
src/pffft64s.c 100644
View File

@ -0,0 +1,34 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define PFFFT_DOUBLE 1
#include "pffft-wrap.c"
#include "rdft_t.h"
static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
static void forward (int length, void * setup, double * h, double * scratch) {pffft_transform (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
static void oforward (int length, void * setup, double * h, double * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
static void backward (int length, void * setup, double * H, double * scratch) {pffft_transform (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
static void obackward(int length, void * setup, double * H, double * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
static void convolve(int length, void * setup, double * H, double const * with) {pffft_zconvolve(setup, H, with, H); (void)length;}
static int multiplier(void) {return 1;}
static int flags(void) {return RDFT_IS_SIMD | RDFT_NEEDS_SCRATCH;}
fn_t _soxr_rdft64s_cb[] = {
(fn_t)setup,
(fn_t)setup,
(fn_t)pffft_destroy_setup,
(fn_t)forward,
(fn_t)oforward,
(fn_t)backward,
(fn_t)obackward,
(fn_t)convolve,
(fn_t)ORDERED_PARTIAL_CONVOLVE_SIMD,
(fn_t)multiplier,
(fn_t)pffft_reorder_back,
(fn_t)SIMD_ALIGNED_MALLOC,
(fn_t)SIMD_ALIGNED_CALLOC,
(fn_t)SIMD_ALIGNED_FREE,
(fn_t)flags,
};

View File

@ -1,97 +1,138 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Resample using an interpolated poly-phase FIR with length LEN.*/
/* Input must be followed by LEN-1 samples. */
/* Resample using an interpolated poly-phase FIR with length LEN. */
/* Input must be followed by FIR_LENGTH-1 samples. */
#define a (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 0,j))
#define b (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 1,j))
#define c (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 2,j))
#define d (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 3,j))
#if COEF_INTERP == 0
#define _ sum += a *in[j], ++j;
#elif COEF_INTERP == 1
#define _ sum += (b *x + a)*in[j], ++j;
#elif COEF_INTERP == 2
#define _ sum += ((c *x + b)*x + a)*in[j], ++j;
#elif COEF_INTERP == 3
#define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j;
#else
#if COEF_INTERP != 1 && COEF_INTERP != 2 && COEF_INTERP != 3
#error COEF_INTERP
#endif
#if SIMD_AVX || SIMD_SSE || SIMD_NEON
#define N (FIR_LENGTH>>2)
#if COEF_INTERP == 1
#define _ sum=vMac(vMac(b,X,a),vLdu(in+j*4),sum), ++j;
#elif COEF_INTERP == 2
#define _ sum=vMac(vMac(vMac(c,X,b),X,a),vLdu(in+j*4),sum), ++j;
#else
#define _ sum=vMac(vMac(vMac(vMac(d,X,c),X,b),X,a),vLdu(in+j*4),sum), ++j;
#endif
#define a coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-0)]
#define b coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-1)]
#define c coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-2)]
#define d coefs[(COEF_INTERP+1)*(N*phase+j)+(COEF_INTERP-3)]
#define BEGINNING v4_t X = vLds(x), sum = vZero(); \
v4_t const * const __restrict coefs = (v4_t *)COEFS
#define MIDDLE switch (N) {case 3: CONVOLVE(3); break; case 4: CONVOLVE(4); \
break; case 5: CONVOLVE(5); break; default: CONVOLVE(N); }
#define END vStorSum(output+i, sum)
#define cc(n) case n: core(n); break
#define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);}
#else
#define N FIR_LENGTH
#if COEF_INTERP == 1
#define _ sum += (b*x + a)*in[j], ++j;
#elif COEF_INTERP == 2
#define _ sum += ((c*x + b)*x + a)*in[j], ++j;
#else
#define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j;
#endif
#define a (coef(COEFS, COEF_INTERP, N, phase, 0,j))
#define b (coef(COEFS, COEF_INTERP, N, phase, 1,j))
#define c (coef(COEFS, COEF_INTERP, N, phase, 2,j))
#define d (coef(COEFS, COEF_INTERP, N, phase, 3,j))
#define BEGINNING sample_t sum = 0
#define MIDDLE CONVOLVE(N)
#define END output[i] = sum
#define CORE(n) core(n)
#endif
#define fphpCore(n) \
if (p->use_hi_prec_clock) { \
float_step_t at = p->at.flt; \
for (i = 0; (int)at < num_in; ++i, at += p->step.flt) { \
sample_t const * const __restrict in = input + (int)at; \
float_step_t frac = at - (int)at; \
int phase = (int)(frac * (1 << PHASE_BITS)); \
sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase); \
int j = 0; \
BEGINNING; CONVOLVE(n); END; \
} \
fifo_read(&p->fifo, (int)at, NULL); \
p->at.flt = at - (int)at; \
} else
#define hpCore(n) \
if (p->use_hi_prec_clock) { \
for (i = 0; p->at.integer < num_in; ++i, \
p->at.fix.ls.all += p->step.fix.ls.all, \
p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) { \
sample_t const * const __restrict in = input + p->at.integer; \
uint32_t frac = p->at.fraction; \
int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \
sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); /* low-order bits, scaled to [0,1) */ \
int j = 0; \
BEGINNING; CONVOLVE(n); END; \
} \
fifo_read(&p->fifo, p->at.integer, NULL); \
p->at.integer = 0; \
} else
#define spCore(n) { \
for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) { \
sample_t const * const __restrict in = input + p->at.integer; \
uint32_t frac = p->at.fraction; \
int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */ \
sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32)); /* low-order bits, scaled to [0,1) */ \
int j = 0; \
BEGINNING; CONVOLVE(n); END; \
} \
fifo_read(&p->fifo, p->at.integer, NULL); \
p->at.integer = 0; }
#if defined HI_PREC_CLOCK && FLOAT_HI_PREC_CLOCK
#define core(n) fphpCore(n) spCore(n)
#elif defined HI_PREC_CLOCK
#define core(n) hpCore(n) spCore(n)
#else
#define core(n) spCore(n)
#endif
static void FUNCTION(stage_t * p, fifo_t * output_fifo)
{
sample_t const * input = stage_read_p(p);
int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
sample_t * output = fifo_reserve(output_fifo, max_num_out);
int num_in = min(stage_occupancy(p), p->input_size);
int i, max_num_out = 1 + (int)(num_in * p->out_in_ratio);
sample_t * const __restrict output = fifo_reserve(output_fifo, max_num_out);
#if defined HI_PREC_CLOCK
#if FLOAT_HI_PREC_CLOCK
if (p->use_hi_prec_clock) {
float_step_t at = p->at.flt;
for (i = 0; (int)at < num_in; ++i, at += p->step.flt) {
sample_t const * in = input + (int)at;
float_step_t frac = at - (int)at;
int phase = (int)(frac * (1 << PHASE_BITS));
#if COEF_INTERP > 0
sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase);
#endif
sample_t sum = 0;
int j = 0;
CONVOLVE
output[i] = sum;
}
fifo_read(&p->fifo, (int)at, NULL);
p->at.flt = at - (int)at;
} else
#else
if (p->use_hi_prec_clock) {
for (i = 0; p->at.integer < num_in; ++i,
p->at.fix.ls.all += p->step.fix.ls.all,
p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) {
sample_t const * in = input + p->at.integer;
uint32_t frac = p->at.fraction;
int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */
#if COEF_INTERP > 0 /* low-order bits, scaled to [0,1) */
sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32));
#endif
sample_t sum = 0;
int j = 0;
CONVOLVE
output[i] = sum;
}
fifo_read(&p->fifo, p->at.integer, NULL);
p->at.integer = 0;
} else
#endif
#endif
{
for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
sample_t const * in = input + p->at.integer;
uint32_t frac = p->at.fraction;
int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */
#if COEF_INTERP > 0 /* low-order bits, scaled to [0,1) */
sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32));
#endif
sample_t sum = 0;
int j = 0;
CONVOLVE
output[i] = sum;
}
fifo_read(&p->fifo, p->at.integer, NULL);
p->at.integer = 0;
}
CORE(N);
assert(max_num_out - i >= 0);
fifo_trim_by(output_fifo, max_num_out - i);
}
#undef _
#undef a
#undef b
#undef c
#undef d
#undef CORE
#undef cc
#undef core
#undef COEF_INTERP
#undef N
#undef BEGINNING
#undef MIDDLE
#undef END
#undef CONVOLVE
#undef FIR_LENGTH
#undef FUNCTION

View File

@ -1,32 +1,56 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Resample using a non-interpolated poly-phase FIR with length LEN.*/
/* Input must be followed by LEN-1 samples. */
/* Resample using a non-interpolated poly-phase FIR with length LEN. */
/* Input must be followed by FIR_LENGTH-1 samples. */
#define _ sum += (coef(p->shared->poly_fir_coefs, 0, FIR_LENGTH, rem, 0, j)) *at[j], ++j;
#if SIMD_AVX || SIMD_SSE || SIMD_NEON
#define N (FIR_LENGTH>>2)
#define BEGINNING v4_t sum = vZero(); \
v4_t const * const __restrict coefs = (v4_t *)COEFS + N * rem;
#define _ sum = vMac(vLdu(at+j*4), coefs[j], sum), ++j;
#define END vStorSum(output+i, sum)
#define cc(n) case n: core(n); break
#define CORE(n) switch (n) {cc(2); cc(3); cc(4); cc(5); cc(6); default: core(n);}
#else
#define N FIR_LENGTH
#define BEGINNING sample_t sum = 0; \
sample_t const * const __restrict coefs = (sample_t *)COEFS + N * rem;
#define _ sum += coefs[j]*at[j], ++j;
#define END output[i] = sum
#define CORE(n) core(n)
#endif
#define core(n) \
for (i = 0; p->at.integer < num_in * p->L; ++i, \
p->at.integer += p->step.integer) { \
int const div = p->at.integer / p->L, rem = p->at.integer % p->L; \
sample_t const * const __restrict at = input + div; \
int j = 0; BEGINNING; CONVOLVE(n); END;}
static void FUNCTION(stage_t * p, fifo_t * output_fifo)
{
sample_t const * input = stage_read_p(p);
int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
sample_t * output = fifo_reserve(output_fifo, max_num_out);
int num_in = min(stage_occupancy(p), p->input_size);
if (num_in) {
sample_t const * input = stage_read_p(p);
int i, num_out = (num_in * p->L - p->at.integer + p->step.integer - 1) / p->step.integer;
sample_t * __restrict output = fifo_reserve(output_fifo, num_out);
for (i = 0; p->at.integer < num_in * p->L; ++i, p->at.integer += p->step.integer) {
int div = p->at.integer / p->L, rem = p->at.integer % p->L;
sample_t const * at = input + div;
sample_t sum = 0;
int j = 0;
CONVOLVE
output[i] = sum;
CORE(N);
assert(i == num_out);
fifo_read(&p->fifo, p->at.integer / p->L, NULL);
p->at.integer = p->at.integer % p->L;
}
assert(max_num_out - i >= 0);
fifo_trim_by(output_fifo, max_num_out - i);
fifo_read(&p->fifo, p->at.integer / p->L, NULL);
p->at.integer = p->at.integer % p->L;
}
#undef _
#undef CORE
#undef cc
#undef core
#undef N
#undef BEGINNING
#undef MIDDLE
#undef END
#undef CONVOLVE
#undef FIR_LENGTH
#undef FUNCTION

View File

@ -1,726 +0,0 @@
/* SoX Resampler Library Copyright (c) 2007-14 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include <math.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "filter.h"
#if defined SOXR_LIB
#include "internal.h"
typedef void (* fn_t)(void);
extern fn_t RDFT_CB[11];
#define rdft_forward_setup (*(void * (*)(int))RDFT_CB[0])
#define rdft_backward_setup (*(void * (*)(int))RDFT_CB[1])
#define rdft_delete_setup (*(void (*)(void *))RDFT_CB[2])
#define rdft_forward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[3])
#define rdft_oforward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[4])
#define rdft_backward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[5])
#define rdft_obackward (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[6])
#define rdft_convolve (*(void (*)(int, void *, sample_t *, sample_t const *))RDFT_CB[7])
#define rdft_convolve_portion (*(void (*)(int, sample_t *, sample_t const *))RDFT_CB[8])
#define rdft_multiplier (*(int (*)(void))RDFT_CB[9])
#define rdft_reorder_back (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[10])
#endif
#if RATE_SIMD /* Align for SIMD: */
#include "simd.h"
#if 0 /* Not using this yet. */
#define RATE_SIMD_POLY 1
#define num_coefs4 ((num_coefs + 3) & ~3)
#define coefs4_check(i) ((i) < num_coefs)
#else
#define RATE_SIMD_POLY 0
#define num_coefs4 num_coefs
#define coefs4_check(i) 1
#endif
#define aligned_free _soxr_simd_aligned_free
#define aligned_malloc _soxr_simd_aligned_malloc
#define aligned_calloc _soxr_simd_aligned_calloc
#if 0
#define FIFO_REALLOC aligned_realloc
#define FIFO_MALLOC aligned_malloc
#define FIFO_FREE aligned_free
static void * aligned_realloc(void * q, size_t nb_bytes, size_t copy_bytes) {
void * p = aligned_malloc(nb_bytes);
if (p) memcpy(p, q, copy_bytes);
aligned_free(q);
return p;
}
#endif
#else
#define RATE_SIMD_POLY 0
#define num_coefs4 num_coefs
#define coefs4_check(i) 1
#define aligned_free free
#define aligned_malloc malloc
#define aligned_calloc calloc
#endif
#define FIFO_SIZE_T int
#include "fifo.h"
typedef union { /* Int64 in parts */
#if HAVE_BIGENDIAN
struct {int32_t ms; uint32_t ls;} parts;
#else
struct {uint32_t ls; int32_t ms;} parts;
#endif
int64_t all;
} int64p_t;
typedef union { /* Uint64 in parts */
#if HAVE_BIGENDIAN
struct {uint32_t ms, ls;} parts;
#else
struct {uint32_t ls, ms;} parts;
#endif
uint64_t all;
} uint64p_t;
#define FLOAT_HI_PREC_CLOCK 0 /* Non-float hi-prec has ~96 bits. */
#define float_step_t long double /* __float128 is also a (slow) option */
#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
#define raw_coef_t double
static sample_t * prepare_coefs(raw_coef_t const * coefs, int num_coefs,
int num_phases, int interp_order, double multiplier)
{
int i, j, length = num_coefs4 * num_phases;
sample_t * result = malloc((size_t)(length * (interp_order + 1)) * sizeof(*result));
double fm1 = coefs[0], f1 = 0, f2 = 0;
for (i = num_coefs4 - 1; i >= 0; --i)
for (j = num_phases - 1; j >= 0; --j) {
double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
int pos = i * num_phases + j - 1;
fm1 = coefs4_check(i) && pos > 0 ? coefs[pos - 1] * multiplier : 0;
switch (interp_order) {
case 1: b = f1 - f0; break;
case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break;
default: if (interp_order) assert(0);
}
#define coef_coef(x) \
coef(result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i)
coef_coef(0) = (sample_t)f0;
if (interp_order > 0) coef_coef(1) = (sample_t)b;
if (interp_order > 1) coef_coef(2) = (sample_t)c;
if (interp_order > 2) coef_coef(3) = (sample_t)d;
#undef coef_coef
f2 = f1, f1 = f0;
}
return result;
}
typedef struct {
int dft_length, num_taps, post_peak;
void * dft_forward_setup, * dft_backward_setup;
sample_t * coefs;
} dft_filter_t;
typedef struct { /* So generated filter coefs may be shared between channels */
sample_t * poly_fir_coefs;
dft_filter_t dft_filter[2];
} rate_shared_t;
typedef enum {
irrational_stage = 1,
cubic_stage,
dft_stage,
half_stage,
rational_stage
} stage_type_t;
struct stage;
typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
#define MULT32 (65536. * 65536.)
typedef union { /* Fixed point arithmetic */
struct {uint64p_t ls; int64p_t ms;} fix;
float_step_t flt;
} step_t;
typedef struct stage {
/* Common to all stage types: */
stage_type_t type;
stage_fn_t fn;
fifo_t fifo;
int pre; /* Number of past samples to store */
int pre_post; /* pre + number of future samples to store */
int preload; /* Number of zero samples to pre-load the fifo */
double out_in_ratio; /* For buffer management. */
/* For a stage with variable (run-time generated) filter coefs: */
rate_shared_t * shared;
unsigned dft_filter_num; /* Which, if any, of the 2 DFT filters to use */
sample_t * dft_scratch, * dft_out;
/* For a stage with variable L/M: */
step_t at, step;
bool use_hi_prec_clock;
int L, remM;
int n, phase_bits, block_len;
double mult, phase0;
} stage_t;
#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo)
{
int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
sample_t const * input = stage_read_p(p);
sample_t * output = fifo_reserve(output_fifo, max_num_out);
#define integer fix.ms.parts.ms
#define fraction fix.ms.parts.ls
#define whole fix.ms.all
for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
sample_t const * s = input + p->at.integer;
double x = p->at.fraction * (1 / MULT32);
double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b);
double c = s[1]-*s-a-b;
output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s));
}
assert(max_num_out - i >= 0);
fifo_trim_by(output_fifo, max_num_out - i);
fifo_read(&p->fifo, p->at.integer, NULL);
p->at.integer = 0;
}
#if RATE_SIMD
#define dft_out p->dft_out
#else
#define dft_out output
#endif
static void dft_stage_fn(stage_t * p, fifo_t * output_fifo)
{
sample_t * output;
int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
rate_shared_t const * s = p->shared;
dft_filter_t const * f = &s->dft_filter[p->dft_filter_num];
int const overlap = f->num_taps - 1;
while (p->at.integer + p->L * num_in >= f->dft_length) {
div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L);
sample_t const * input = fifo_read_ptr(&p->fifo);
fifo_read(&p->fifo, divd.quot, NULL);
num_in -= divd.quot;
output = fifo_reserve(output_fifo, f->dft_length);
if (lsx_is_power_of_2(p->L)) { /* F-domain */
int portion = f->dft_length / p->L;
memcpy(dft_out, input, (unsigned)portion * sizeof(*dft_out));
rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch);
for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
dft_out[i] = dft_out[(portion << 1) - i],
dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
dft_out[portion] = dft_out[1];
dft_out[portion + 1] = 0;
dft_out[1] = dft_out[0];
for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) {
memcpy(dft_out + i, dft_out, (size_t)portion * sizeof(*dft_out));
dft_out[i + 1] = 0;
}
if (p->step.integer > 0)
rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
} else {
if (p->L == 1)
memcpy(dft_out, input, (size_t)f->dft_length * sizeof(*dft_out));
else {
memset(dft_out, 0, (size_t)f->dft_length * sizeof(*dft_out));
for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
dft_out[i] = input[j];
p->at.integer = p->L - 1 - divd.rem;
}
if (p->step.integer > 0)
rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
else
rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
}
if (p->step.integer > 0) {
rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs);
rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
#if RATE_SIMD
if (p->step.integer == 1)
memcpy(output, dft_out, (size_t)f->dft_length * sizeof(sample_t));
#endif
if (p->step.integer != 1) {
for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
i += p->step.integer)
output[j] = dft_out[i];
p->remM = i - (f->dft_length - overlap);
fifo_trim_by(output_fifo, f->dft_length - j);
}
else fifo_trim_by(output_fifo, overlap);
}
else { /* F-domain */
int m = -p->step.integer;
rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs);
rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch);
#if RATE_SIMD
memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof(sample_t));
#endif
fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m);
}
}
}
#undef dft_out
/* Set to 4 x nearest power of 2 */
/* or half of that if danger of causing too many cache misses. */
static int set_dft_length(int num_taps, int min, int large)
{
double d = log((double)num_taps) / log(2.);
return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large));
}
static void dft_stage_init(
unsigned instance, double Fp, double Fs, double Fn, double att,
double phase, stage_t * p, int L, int M, double * multiplier,
int min_dft_size, int large_dft_size)
{
dft_filter_t * f = &p->shared->dft_filter[instance];
int num_taps = 0, dft_length = f->dft_length, i;
bool f_domain_m = abs(3-M) == 1 && Fs <= 1;
if (!dft_length) {
int k = phase == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4;
double * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.);
if (phase != 50)
lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase);
else f->post_peak = num_taps / 2;
dft_length = set_dft_length(num_taps, min_dft_size, large_dft_size);
f->coefs = aligned_calloc((size_t)dft_length, sizeof(*f->coefs));
for (i = 0; i < num_taps; ++i)
f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
= (sample_t)(h[i] * ((1. / dft_length) * rdft_multiplier() * L * *multiplier));
free(h);
}
#if RATE_SIMD
p->dft_out = aligned_malloc(sizeof(sample_t) * (size_t)dft_length);
#endif
#if 1 /* In fact, currently, only pffft needs this. */
p->dft_scratch = aligned_malloc(2 * sizeof(sample_t) * (size_t)dft_length);
#endif
if (!f->dft_length) {
void * coef_setup = rdft_forward_setup(dft_length);
int Lp = lsx_is_power_of_2(L)? L : 1;
int Mp = f_domain_m? M : 1;
f->dft_forward_setup = rdft_forward_setup(dft_length / Lp);
f->dft_backward_setup = rdft_backward_setup(dft_length / Mp);
if (Mp == 1)
rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch);
else
rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch);
rdft_delete_setup(coef_setup);
f->num_taps = num_taps;
f->dft_length = dft_length;
lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i",
num_taps, dft_length, Fp, Fs, Fn, att, L, M);
}
*multiplier = 1;
p->out_in_ratio = (double)L / M;
p->type = dft_stage;
p->fn = dft_stage_fn;
p->preload = f->post_peak / L;
p->at.integer = f->post_peak % L;
p->L = L;
p->step.integer = f_domain_m? -M/2 : M;
p->dft_filter_num = instance;
p->block_len = f->dft_length - (f->num_taps - 1);
p->phase0 = p->at.integer / p->L;
}
#include "filters.h"
typedef struct {
double factor;
uint64_t samples_in, samples_out;
int num_stages;
stage_t * stages;
} rate_t;
#define pre_stage p->stages[shift]
#define arb_stage p->stages[shift + have_pre_stage]
#define post_stage p->stages[shift + have_pre_stage + have_arb_stage]
#define have_pre_stage (preM * preL != 1)
#define have_arb_stage (arbM * arbL != 1)
#define have_post_stage (postM * postL != 1)
#define TO_3dB(a) ((1.6e-6*a-7.5e-4)*a+.646)
#define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
typedef enum {
rolloff_none, rolloff_small /* <= 0.01 dB */, rolloff_medium /* <= 0.35 dB */
} rolloff_t;
static char const * rate_init(
/* Private work areas (to be supplied by the client): */
rate_t * p, /* Per audio channel. */
rate_shared_t * shared, /* Between channels (undergoing same rate change)*/
/* Public parameters: Typically */
double factor, /* Input rate divided by output rate. */
double bits, /* Required bit-accuracy (pass + stop) 16|20|28 */
double phase, /* Linear/minimum etc. filter phase. 50 */
double passband_end, /* 0dB pt. bandwidth to preserve; nyquist=1 0.913*/
double stopband_begin, /* Aliasing/imaging control; > passband_end 1 */
rolloff_t rolloff, /* Pass-band roll-off small */
bool maintain_3dB_pt, /* true */
double multiplier, /* Linear gain to apply during conversion. 1 */
/* Primarily for test/development purposes: */
bool use_hi_prec_clock, /* Increase irrational ratio accuracy. false */
int interpolator, /* Force a particular coef interpolator. -1 */
size_t max_coefs_size, /* k bytes of coefs to try to keep below. 400 */
bool noSmallIntOpt, /* Disable small integer optimisations. false */
int log2_min_dft_size,
int log2_large_dft_size)
{
double att = (bits + 1) * linear_to_dB(2.), attArb = att; /* pass + stop */
double tbw0 = 1 - passband_end, Fs_a = stopband_begin;
double arbM = factor, tbw_tighten = 1;
int n = 0, i, preL = 1, preM = 1, shift = 0, arbL = 1, postL = 1, postM = 1;
bool upsample = false, rational = false, iOpt = !noSmallIntOpt;
int mode = rolloff > rolloff_small? factor > 1 || passband_end > LOW_Q_BW0:
(int)ceil(2 + (bits - 17) / 4);
stage_t * s;
assert(factor > 0);
assert(!bits || (15 <= bits && bits <= 33));
assert(0 <= phase && phase <= 100);
assert(.53 <= passband_end);
assert(stopband_begin <= 1.2);
assert(passband_end + .005 < stopband_begin);
p->factor = factor;
if (bits!=0) while (!n++) { /* Determine stages: */
int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 :
(int)ceil((double)max_coefs_size * 1000. / (U100_l * sizeof(sample_t)));
double d, epsilon = 0, frac;
upsample = arbM < 1;
for (i = (int)(arbM * .5), shift = 0; i >>= 1; arbM *= .5, ++shift);
preM = upsample || (arbM > 1.5 && arbM < 2);
postM = 1 + (arbM > 1 && preM), arbM /= postM;
preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL;
if ((frac = arbM - (int)arbM)!=0)
epsilon = fabs(floor(frac * MULT32 + .5) / (frac * MULT32) - 1);
for (i = 1, rational = frac==0; i <= maxL && !rational; ++i) {
d = frac * i, try = (int)(d + .5);
if ((rational = fabs(try / d - 1) <= epsilon)) { /* No long doubles! */
if (try == i)
arbM = ceil(arbM), shift += x = arbM > 3, arbM /= 1 + x;
else arbM = i * (int)arbM + try, arbL = i;
}
}
L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x;
if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) {
for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1);
arbM = arbM * postL / arbL / preL, arbL = 1, n = 0;
} else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt))
preL = L, preM = M, arbM = arbL = postM = 1;
if (!mode && (!rational || !n))
++mode, n = 0;
}
p->num_stages = shift + have_pre_stage + have_arb_stage + have_post_stage;
if (!p->num_stages && multiplier != 1) {
bits = arbL = 0; /* Use cubic_stage in this case. */
++p->num_stages;
}
p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages));
for (i = 0; i < p->num_stages; ++i)
p->stages[i].shared = shared;
if ((n = p->num_stages) > 1) { /* Att. budget: */
if (have_arb_stage)
att += linear_to_dB(2.), attArb = att, --n;
att += linear_to_dB((double)n);
}
for (n = 0; (size_t)n + 1 < array_length(half_firs) && att > half_firs[n].att; ++n);
for (i = 0, s = p->stages; i < shift; ++i, ++s) {
s->type = half_stage;
s->fn = half_firs[n].fn;
s->pre_post = 4 * half_firs[n].num_coefs;
s->preload = s->pre = s->pre_post >> 1;
}
if (have_pre_stage) {
if (maintain_3dB_pt && have_post_stage) { /* Trans. bands overlapping. */
double tbw3 = tbw0 * TO_3dB(att); /* FFS: consider Fs_a. */
double x = ((2.1429e-4 - 5.2083e-7 * att) * att - .015863) * att + 3.95;
x = att * pow((tbw0 - tbw3) / (postM / (factor * postL) - 1 + tbw0), x);
if (x > .035) {
tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014;
lsx_debug("x=%g tbw_tighten=%g", x, tbw_tighten);
}
}
dft_stage_init(0, 1 - tbw0 * tbw_tighten, Fs_a, preM? max(preL, preM) :
arbM / arbL, att, phase, &pre_stage, preL, max(preM, 1), &multiplier,
log2_min_dft_size, log2_large_dft_size);
}
if (bits==0 && have_arb_stage) { /* `Quick' cubic arb stage: */
arb_stage.type = cubic_stage;
arb_stage.fn = cubic_stage_fn;
arb_stage.mult = multiplier, multiplier = 1;
arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5);
arb_stage.pre_post = max(3, arb_stage.step.integer);
arb_stage.preload = arb_stage.pre = 1;
arb_stage.out_in_ratio = MULT32 / (double)arb_stage.step.whole;
}
else if (have_arb_stage) { /* Higher quality arb stage: */
poly_fir_t const * f = &poly_firs[6*(upsample + !!preM) + mode - !upsample];
int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases;
size_t coefs_size;
double x = .5, at, Fp, Fs, Fn, mult = upsample? 1 : arbL / arbM;
poly_fir1_t const * f1;
Fn = !upsample && preM? x = arbM / arbL : 1;
Fp = !preM? mult : mode? .5 : 1;
Fs = 2 - Fp; /* Ignore Fs_a; it would have little benefit here. */
Fp *= 1 - tbw0;
if (rolloff > rolloff_small && mode)
Fp = !preM? mult * .5 - .125 : mult * .05 + .1;
else if (rolloff == rolloff_small)
Fp = Fs - (Fs - .148 * x - Fp * .852) * (.00813 * bits + .973);
i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1;
do {
f1 = &f->interp[++i];
assert(f1->fn);
if (i)
arbM /= arbL, arbL = 1, rational = false;
phase_bits = (int)ceil(f1->scalar + log(mult)/log(2.));
phases = !rational? (1 << phase_bits) : arbL;
if (f->interp[0].scalar==0) {
int phases0 = max(phases, 19), n0 = 0;
lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta);
num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM;
}
if ((num_coefs & 1) && rational && (arbL & 1))
phases <<= 1, arbL <<= 1, arbM *= 2;
at = arbL * (arb_stage.phase0 = .5 * (num_coefs & 1));
order = i + (i && mode > 4);
coefs_size = (size_t)(num_coefs4 * phases * (order + 1)) * sizeof(sample_t);
} while (interpolator < 0 && i < 2 && f->interp[i+1].fn &&
coefs_size / 1000 > max_coefs_size);
if (!arb_stage.shared->poly_fir_coefs) {
int num_taps = num_coefs * phases - 1;
raw_coef_t * coefs = lsx_design_lpf(
Fp, Fs, Fn, attArb, &num_taps, phases, f->beta);
arb_stage.shared->poly_fir_coefs = prepare_coefs(
coefs, num_coefs, phases, order, multiplier);
lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk",
num_coefs, phases, order, (double)coefs_size / 1000.);
free(coefs);
}
multiplier = 1;
arb_stage.type = rational? rational_stage : irrational_stage;
arb_stage.fn = f1->fn;
arb_stage.pre_post = num_coefs4 - 1;
arb_stage.preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs);
arb_stage.n = num_coefs4;
arb_stage.phase_bits = phase_bits;
arb_stage.L = arbL;
arb_stage.use_hi_prec_clock = mode > 1 && use_hi_prec_clock && !rational;
#if FLOAT_HI_PREC_CLOCK
if (arb_stage.use_hi_prec_clock) {
arb_stage.at.flt = at;
arb_stage.step.flt = arbM;
arb_stage.out_in_ratio = (double)(arbL / arb_stage.step.flt);
} else
#endif
{
arb_stage.at.whole = (int64_t)(at * MULT32 + .5);
#if !FLOAT_HI_PREC_CLOCK
if (arb_stage.use_hi_prec_clock) {
arb_stage.at.fix.ls.parts.ms = 0x80000000ul;
arbM *= MULT32;
arb_stage.step.whole = (int64_t)arbM;
arbM -= (double)arb_stage.step.whole;
arbM *= MULT32 * MULT32;
arb_stage.step.fix.ls.all = (uint64_t)arbM;
} else
#endif
arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5);
arb_stage.out_in_ratio = MULT32 * arbL / (double)arb_stage.step.whole;
}
}
if (have_post_stage)
dft_stage_init(1, 1 - (1 - (1 - tbw0) *
(upsample? factor * postL / postM : 1)) * tbw_tighten, Fs_a,
(double)max(postL, postM), att, phase, &post_stage, postL, postM,
&multiplier, log2_min_dft_size, log2_large_dft_size);
lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i",
1/factor, shift, preL, preM, arbL, arbM, postL, postM);
for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) {
fifo_create(&s->fifo, (int)sizeof(sample_t));
memset(fifo_reserve(&s->fifo, s->preload), 0, sizeof(sample_t) * (size_t)s->preload);
lsx_debug("%5i|%-5i preload=%i remL=%i o/i=%g",
s->pre, s->pre_post - s->pre, s->preload, s->at.integer, s->out_in_ratio);
}
fifo_create(&s->fifo, (int)sizeof(sample_t));
return 0;
}
static void rate_process(rate_t * p)
{
stage_t * stage = p->stages;
int i;
for (i = 0; i < p->num_stages; ++i, ++stage)
stage->fn(stage, &(stage+1)->fifo);
}
static sample_t * rate_input(rate_t * p, sample_t const * samples, size_t n)
{
p->samples_in += n;
return fifo_write(&p->stages[0].fifo, (int)n, samples);
}
static sample_t const * rate_output(rate_t * p, sample_t * samples, size_t * n)
{
fifo_t * fifo = &p->stages[p->num_stages].fifo;
p->samples_out += *n = min(*n, (size_t)fifo_occupancy(fifo));
return fifo_read(fifo, (int)*n, samples);
}
static void rate_flush(rate_t * p)
{
fifo_t * fifo = &p->stages[p->num_stages].fifo;
#if defined _MSC_VER && _MSC_VER == 1200
uint64_t samples_out = (uint64_t)(int64_t)((double)(int64_t)p->samples_in / p->factor + .5);
#else
uint64_t samples_out = (uint64_t)((double)p->samples_in / p->factor + .5);
#endif
size_t remaining = (size_t)(samples_out - p->samples_out);
if ((size_t)fifo_occupancy(fifo) < remaining) {
uint64_t samples_in = p->samples_in;
sample_t * buff = calloc(1024, sizeof(*buff));
while ((size_t)fifo_occupancy(fifo) < remaining) {
rate_input(p, buff, 1024);
rate_process(p);
}
fifo_trim_to(fifo, (int)remaining);
p->samples_in = samples_in;
free(buff);
}
}
static void rate_close(rate_t * p)
{
rate_shared_t * shared = p->stages[0].shared;
int i;
for (i = 0; i <= p->num_stages; ++i) {
stage_t * s = &p->stages[i];
aligned_free(s->dft_scratch);
aligned_free(s->dft_out);
fifo_delete(&s->fifo);
}
if (shared) {
for (i = 0; i < 2; ++i) {
dft_filter_t * f= &shared->dft_filter[i];
aligned_free(f->coefs);
rdft_delete_setup(f->dft_forward_setup);
rdft_delete_setup(f->dft_backward_setup);
}
free(shared->poly_fir_coefs);
memset(shared, 0, sizeof(*shared));
}
free(p->stages);
}
#if defined SOXR_LIB
static double rate_delay(rate_t * p)
{
#if defined _MSC_VER && _MSC_VER == 1200
double samples_out = (double)(int64_t)p->samples_in / p->factor;
return max(0, samples_out - (double)(int64_t)p->samples_out);
#else
double samples_out = (double)p->samples_in / p->factor;
return max(0, samples_out - (double)p->samples_out);
#endif
}
static void rate_sizes(size_t * shared, size_t * channel)
{
*shared = sizeof(rate_shared_t);
*channel = sizeof(rate_t);
}
#include "soxr.h"
static char const * rate_create(
void * channel,
void * shared,
double io_ratio,
soxr_quality_spec_t * q_spec,
soxr_runtime_spec_t * r_spec,
double scale)
{
return rate_init(
channel, shared,
io_ratio,
q_spec->precision,
q_spec->phase_response,
q_spec->passband_end,
q_spec->stopband_begin,
(rolloff_t)"\1\2\0"[q_spec->flags & 3],
!!(q_spec->flags & SOXR_MAINTAIN_3DB_PT),
scale,
!!(q_spec->flags & SOXR_HI_PREC_CLOCK),
(int)(r_spec->flags & 3) - 1,
r_spec->coef_size_kbytes,
!!(r_spec->flags & SOXR_NOSMALLINTOPT),
(int)r_spec->log2_min_dft_size,
(int)r_spec->log2_large_dft_size);
}
static char const * id(void)
{
return RATE_ID;
}
fn_t RATE_CB[] = {
(fn_t)rate_input,
(fn_t)rate_process,
(fn_t)rate_output,
(fn_t)rate_flush,
(fn_t)rate_close,
(fn_t)rate_delay,
(fn_t)rate_sizes,
(fn_t)rate_create,
(fn_t)0,
(fn_t)id,
};
#endif

View File

@ -1,9 +0,0 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define sample_t float
#define RATE_SIMD 0
#define RDFT_CB _soxr_rdft32_cb
#define RATE_CB _soxr_rate32_cb
#define RATE_ID "cr32"
#include "rate.h"

View File

@ -1,9 +0,0 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define sample_t float
#define RATE_SIMD 1
#define RDFT_CB _soxr_rdft32s_cb
#define RATE_CB _soxr_rate32s_cb
#define RATE_ID "cr32s"
#include "rate.h"

View File

@ -1,9 +0,0 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define sample_t double
#define RATE_SIMD 0
#define RDFT_CB _soxr_rdft64_cb
#define RATE_CB _soxr_rate64_cb
#define RATE_ID "cr64"
#include "rate.h"

24
src/rdft_t.h 100644
View File

@ -0,0 +1,24 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
typedef void (* fn_t)(void);
#define rdft_forward_setup (*(void * (*)(int))RDFT_CB[0])
#define rdft_backward_setup (*(void * (*)(int))RDFT_CB[1])
#define rdft_delete_setup (*(void (*)(void *))RDFT_CB[2])
#define rdft_forward (*(void (*)(int, void *, void *, void *))RDFT_CB[3])
#define rdft_oforward (*(void (*)(int, void *, void *, void *))RDFT_CB[4])
#define rdft_backward (*(void (*)(int, void *, void *, void *))RDFT_CB[5])
#define rdft_obackward (*(void (*)(int, void *, void *, void *))RDFT_CB[6])
#define rdft_convolve (*(void (*)(int, void *, void *, void const *))RDFT_CB[7])
#define rdft_convolve_portion (*(void (*)(int, void *, void const *))RDFT_CB[8])
#define rdft_multiplier (*(int (*)(void))RDFT_CB[9])
#define rdft_reorder_back (*(void (*)(int, void *, void *, void *))RDFT_CB[10])
#define rdft_malloc (*(void * (*)(size_t))RDFT_CB[11])
#define rdft_calloc (*(void * (*)(size_t, size_t))RDFT_CB[12])
#define rdft_free (*(void (*)(void *))RDFT_CB[13])
#define rdft_flags (*(int (*)(void))RDFT_CB[14])
/* Flag templates: */
#define RDFT_IS_SIMD 1
#define RDFT_NEEDS_SCRATCH 2

View File

@ -1,5 +0,0 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define PFFT_MACROS_ONLY
#include "pffft.c"

View File

@ -1,21 +1,15 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "simd.h"
#include "simd-dev.h"
#include "soxr-config.h"
#if AVCODEC_FOUND
#define SIMD_ALIGNMENT (sizeof(double) * 4)
#else
#define SIMD_ALIGNMENT (sizeof(float) * 4)
#endif
#define SIMD_ALIGNMENT (sizeof(float) * (1 + (PFFFT_DOUBLE|AVCODEC_FOUND)) * 4)
void * _soxr_simd_aligned_malloc(size_t size)
void * SIMD_ALIGNED_MALLOC(size_t size)
{
char * p1 = 0, * p = malloc(size + SIMD_ALIGNMENT);
if (p) {
@ -27,9 +21,9 @@ void * _soxr_simd_aligned_malloc(size_t size)
void * _soxr_simd_aligned_calloc(size_t nmemb, size_t size)
void * SIMD_ALIGNED_CALLOC(size_t nmemb, size_t size)
{
void * p = _soxr_simd_aligned_malloc(nmemb * size);
void * p = SIMD_ALIGNED_MALLOC(nmemb * size);
if (p)
memset(p, 0, nmemb * size);
return p;
@ -37,7 +31,7 @@ void * _soxr_simd_aligned_calloc(size_t nmemb, size_t size)
void _soxr_simd_aligned_free(void * p1)
void SIMD_ALIGNED_FREE(void * p1)
{
if (p1)
free(*((void * *)p1 - 1));
@ -45,11 +39,16 @@ void _soxr_simd_aligned_free(void * p1)
void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b)
#define PFFT_MACROS_ONLY
#include "pffft.c"
void ORDERED_CONVOLVE_SIMD(int n, void * not_used, float * a, float const * b)
{
int i;
float ab0, ab1;
v4sf * /*RESTRICT*/ va = (v4sf *)a;
v4sf * RESTRICT va = (v4sf *)a;
v4sf const * RESTRICT vb = (v4sf const *)b;
assert(VALIGNED(a) && VALIGNED(b));
ab0 = a[0] * b[0], ab1 = a[1] * b[1];
@ -68,11 +67,11 @@ void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float
void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b)
void ORDERED_PARTIAL_CONVOLVE_SIMD(int n, float * a, float const * b)
{
int i;
float ab0;
v4sf * /*RESTRICT*/ va = (v4sf *)a;
v4sf * RESTRICT va = (v4sf *)a;
v4sf const * RESTRICT vb = (v4sf const *)b;
assert(VALIGNED(a) && VALIGNED(b));
ab0 = a[0] * b[0];

View File

@ -1,16 +0,0 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined simd_included
#define simd_included
#include <stddef.h>
void * _soxr_simd_aligned_malloc(size_t);
void * _soxr_simd_aligned_calloc(size_t, size_t);
void _soxr_simd_aligned_free(void *);
void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b);
void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b);
#endif

54
src/simd32-dev.h 100644
View File

@ -0,0 +1,54 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined soxr_simd_dev_included
#define soxr_simd_dev_included
#if defined __GNUC__
#define SIMD_INLINE(T) static __inline T __attribute__((always_inline))
#define vAlign __attribute__((aligned (16)))
#elif defined _MSC_VER
#define SIMD_INLINE(T) static __forceinline T
#define vAlign __declspec(align(16))
#endif
#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86
#include <xmmintrin.h>
#define vZero() _mm_setzero_ps()
#define vSet1(a) _mm_set_ss(a)
#define vMul(a,b) _mm_mul_ps(a,b)
#define vAdd(a,b) _mm_add_ps(a,b)
#define vMac(a,b,c) vAdd(vMul(a,b),c)
#define vLds(a) _mm_set1_ps(a)
#define vLd(a) _mm_load_ps(a)
#define vLdu(a) _mm_loadu_ps(a)
typedef __m128 v4_t;
SIMD_INLINE(void) vStorSum(float * a, v4_t b) {
v4_t t = vAdd(_mm_movehl_ps(b, b), b);
_mm_store_ss(a, vAdd(t, _mm_shuffle_ps(t,t,1)));}
#elif defined __arm__
#include <arm_neon.h>
#define vZero() vdupq_n_f32(0)
#define vMul(a,b) vmulq_f32(a,b)
#define vAdd(a,b) vaddq_f32(a,b)
#define vMac(a,b,c) vmlaq_f32(c,a,b)
#define vLds(a) vld1q_dup_f32(&(a))
#define vLd(a) vld1q_f32(a)
#define vLdu(a) vld1q_f32(a)
typedef float32x4_t v4_t;
SIMD_INLINE(void) vStorSum(float * a, v4_t b) {
float32x2_t t = vadd_f32(vget_high_f32(b), vget_low_f32(b));
*a = vget_lane_f32(vpadd_f32(t, t), 0);}
#endif
#endif

8
src/simd32.c 100644
View File

@ -0,0 +1,8 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define PFFFT_DOUBLE 0
#include "simd32.h"
#include "simd.c"

23
src/simd32.h 100644
View File

@ -0,0 +1,23 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined simd32_included
#define simd32_included
#include <stddef.h>
void * _soxr_simd32_aligned_malloc(size_t);
void * _soxr_simd32_aligned_calloc(size_t, size_t);
void _soxr_simd32_aligned_free(void *);
#define SIMD_ALIGNED_MALLOC _soxr_simd32_aligned_malloc
#define SIMD_ALIGNED_CALLOC _soxr_simd32_aligned_calloc
#define SIMD_ALIGNED_FREE _soxr_simd32_aligned_free
void _soxr_ordered_convolve_simd32(int n, void * not_used, float * a, float const * b);
void _soxr_ordered_partial_convolve_simd32(int n, float * a, float const * b);
#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd32
#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd32
#endif

42
src/simd64-dev.h 100644
View File

@ -0,0 +1,42 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined soxr_simd64_dev_included
#define soxr_simd64_dev_included
#if defined __GNUC__
#define SIMD_INLINE(T) static __inline T __attribute__((always_inline))
#define vAlign __attribute__((aligned (32)))
#elif defined _MSC_VER
#define SIMD_INLINE(T) static __forceinline T
#define vAlign __declspec(align(32))
#else
#define SIMD_INLINE(T) static __inline T
#endif
#if defined __x86_64__ || defined _M_X64 || defined i386 || defined _M_IX86
#include <immintrin.h>
#if defined __AVX__
#define vZero() _mm256_setzero_pd()
#define vSet1(a) _mm256_set_pd(0,0,0,a)
#define vMul(a,b) _mm256_mul_pd(a,b)
#define vAdd(a,b) _mm256_add_pd(a,b)
#define vMac(a,b,c) vAdd(vMul(a,b),c) /* Note: gcc -mfma will `fuse' these */
#define vLds(a) _mm256_set1_pd(a)
#define vLd(a) _mm256_load_pd(a)
#define vLdu(a) _mm256_loadu_pd(a)
typedef __m256d v4_t;
SIMD_INLINE(void) vStorSum(double * a, v4_t b) {
b = _mm256_hadd_pd(b, _mm256_permute2f128_pd(b,b,1));
_mm_store_sd(a, _mm256_castpd256_pd128(_mm256_hadd_pd(b,b)));}
#endif
#endif
#endif

8
src/simd64.c 100644
View File

@ -0,0 +1,8 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#define PFFFT_DOUBLE 1
#include "simd64.h"
#include "simd.c"

23
src/simd64.h 100644
View File

@ -0,0 +1,23 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#if !defined simd64_included
#define simd64_included
#include <stddef.h>
void * _soxr_simd64_aligned_malloc(size_t);
void * _soxr_simd64_aligned_calloc(size_t, size_t);
void _soxr_simd64_aligned_free(void *);
#define SIMD_ALIGNED_MALLOC _soxr_simd64_aligned_malloc
#define SIMD_ALIGNED_CALLOC _soxr_simd64_aligned_calloc
#define SIMD_ALIGNED_FREE _soxr_simd64_aligned_free
void _soxr_ordered_convolve_simd64(int n, void * not_used, double * a, double const * b);
void _soxr_ordered_partial_convolve_simd64(int n, double * a, double const * b);
#define ORDERED_CONVOLVE_SIMD _soxr_ordered_convolve_simd64
#define ORDERED_PARTIAL_CONVOLVE_SIMD _soxr_ordered_partial_convolve_simd64
#endif

View File

@ -37,9 +37,9 @@
#endif
typedef float SRC_SAMPLE;
#if !defined SOXR_LIB
enum SRC_SRCTYPE_e {SRC_SINC_BEST_QUALITY, SRC_SINC_MEDIUM_QUALITY,
SRC_SINC_FASTEST, SRC_ZERO_ORDER_HOLD, SRC_LINEAR};
#if !defined SOXR_LIB
typedef int SRC_SRCTYPE;
typedef int SRC_ERROR;
typedef long (* src_callback_t)(void *, SRC_SAMPLE * *);

View File

@ -1,10 +1,8 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include <math.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
@ -13,20 +11,17 @@
#include "internal.h"
#if AVUTIL_FOUND
#include <libavutil/cpu.h>
#include <libavutil/cpu.h>
#endif
#if WITH_DEV_TRACE
int _soxr_trace_level(void)
{
char const * e = getenv("SOXR_TRACE");
return e? atoi(e) : 4;
}
#include <stdarg.h>
#include <stdio.h>
int _soxr_trace_level;
void _soxr_trace(char const * fmt, ...)
{
@ -96,52 +91,6 @@ struct soxr {
#define RESET_ON_CLEAR (1u<<31)
/* TODO: these should not be here. */
#define TO_3dB(a) ((1.6e-6*a-7.5e-4)*a+.646)
#define LOW_Q_BW0 (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags)
{
soxr_quality_spec_t spec, * p = &spec;
unsigned quality = recipe & 0xf;
double rej;
memset(p, 0, sizeof(*p));
if (quality > 13) {
p->e = "invalid quality type";
return spec;
}
flags |= quality < SOXR_LSR0Q? RESET_ON_CLEAR : 0;
if (quality == 13)
quality = 6;
else if (quality > 10)
quality = 0;
p->phase_response = "\62\31\144"[(recipe & 0x30) >> 4];
p->stopband_begin = 1;
p->precision = !quality? 0: quality < 3? 16 : quality < 8? 4 + quality * 4 : 55 - quality * 4;
rej = p->precision * linear_to_dB(2.);
p->flags = flags;
if (quality < 8) {
p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / TO_3dB(rej);
if (quality <= 2)
p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
}
else {
static float const bw[] = {.931f, .832f, .663f};
p->passband_end = bw[quality - 8];
if (quality - 8 == 2)
p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
}
if (recipe & SOXR_STEEP_FILTER)
p->passband_end = 1 - .01 / TO_3dB(rej);
return spec;
}
char const * soxr_engine(soxr_t p)
{
return resampler_id();
@ -163,82 +112,132 @@ soxr_error_t soxr_error(soxr_t p)
soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads)
{
soxr_runtime_spec_t spec, * p = &spec;
memset(p, 0, sizeof(*p));
p->log2_min_dft_size = 10;
p->log2_large_dft_size = 17;
p->coef_size_kbytes = 400;
p->num_threads = num_threads;
return spec;
}
soxr_io_spec_t soxr_io_spec(
soxr_datatype_t itype,
soxr_datatype_t otype)
{
soxr_io_spec_t spec, * p = &spec;
memset(p, 0, sizeof(*p));
if ((itype | otype) >= SOXR_SPLIT * 2)
p->e = "invalid io datatype(s)";
else {
p->itype = itype;
p->otype = otype;
p->scale = 1;
}
return spec;
}
#if SIMD_FOUND
static bool cpu_has_simd(void)
{
#if defined __x86_64__ || defined _M_X64
return true;
#elif defined __GNUC__ && defined i386
uint32_t eax, ebx, ecx, edx;
__asm__ __volatile__ (
"pushl %%ebx \n\t"
"cpuid \n\t"
"movl %%ebx, %1\n\t"
"popl %%ebx \n\t"
: "=a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx)
: "a"(1)
: "cc" );
return !!(edx & 0x06000000);
#elif defined _MSC_VER && defined _M_IX86
uint32_t d;
__asm {
xor eax, eax
inc eax
push ebx
cpuid
pop ebx
mov d, edx
}
return !!(d & 0x06000000);
#elif defined AV_CPU_FLAG_NEON
return !!(av_get_cpu_flags() & AV_CPU_FLAG_NEON);
#endif
return false;
}
static bool should_use_simd(void)
{
char const * e = getenv("SOXR_USE_SIMD");
return e? !!atoi(e) : cpu_has_simd();
}
#if WITH_CR32S || WITH_CR64S
#if defined __GNUC__ && defined __x86_64__
#define CPUID(type, eax_, ebx_, ecx_, edx_) \
__asm__ __volatile__ ( \
"cpuid \n\t" \
: "=a" (eax_), "=b" (ebx_), "=c" (ecx_), "=d" (edx_) \
: "a" (type), "c" (0));
#elif defined __GNUC__ && defined __i386__
#define CPUID(type, eax_, ebx_, ecx_, edx_) \
__asm__ __volatile__ ( \
"mov %%ebx, %%edi \n\t" \
"cpuid \n\t" \
"xchg %%edi, %%ebx \n\t" \
: "=a" (eax_), "=D" (ebx_), "=c" (ecx_), "=d" (edx_) \
: "a" (type), "c" (0));
#elif defined _M_X64 && defined _MSC_VER && _MSC_VER > 1500
void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue);
#pragma intrinsic(__cpuidex)
#define CPUID(type, eax_, ebx_, ecx_, edx_) do { \
int regs[4]; \
__cpuidex(regs, type, 0); \
eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \
} while(0)
#elif defined _M_X64 && defined _MSC_VER
void __cpuidex(int CPUInfo[4], int info_type);
#pragma intrinsic(__cpuidex)
#define CPUID(type, eax_, ebx_, ecx_, edx_) do { \
int regs[4]; \
__cpuidex(regs, type); \
eax_ = regs[0], ebx_ = regs[1], ecx_ = regs[2], edx_ = regs[3]; \
} while(0)
#elif defined _M_IX86 && defined _MSC_VER
#define CPUID(type, eax_, ebx_, ecx_, edx_) \
__asm pushad \
__asm mov eax, type \
__asm xor ecx, ecx \
__asm cpuid \
__asm mov eax_, eax \
__asm mov ebx_, ebx \
__asm mov ecx_, ecx \
__asm mov edx_, edx \
__asm popad
#endif
#endif
extern control_block_t _soxr_rate32s_cb, _soxr_rate32_cb, _soxr_rate64_cb, _soxr_vr32_cb;
#if WITH_CR32S
static bool cpu_has_simd32(void)
{
#if defined __x86_64__ || defined _M_X64
return true;
#elif defined __i386__ || defined _M_IX86
enum {SSE = 1 << 25, SSE2 = 1 << 26};
unsigned eax_, ebx_, ecx_, edx_;
CPUID(1, eax_, ebx_, ecx_, edx_);
return (edx_ & (SSE|SSE2)) != 0;
#elif defined AV_CPU_FLAG_NEON
return !!(av_get_cpu_flags() & AV_CPU_FLAG_NEON);
#else
return false;
#endif
}
static bool should_use_simd32(void)
{
char const * e = getenv("SOXR_USE_SIMD32");
return e? !!atoi(e) : cpu_has_simd32();
}
#endif
#if WITH_CR64S
#if defined __GNUC__
#define XGETBV(type, eax_, edx_) \
__asm__ __volatile__ ( \
".byte 0x0f, 0x01, 0xd0\n" \
: "=a"(eax_), "=d"(edx_) : "c" (type));
#elif defined _M_X64 && defined _MSC_FULL_VER && _MSC_FULL_VER >= 160040219
#include <immintrin.h>
#define XGETBV(type, eax_, edx_) do { \
union {uint64_t x; uint32_t y[2];} a = {_xgetbv(0)}; \
eax_ = a.y[0], edx_ = a.y[1]; \
} while(0)
#elif defined _M_IX86 && defined _MSC_VER
#define XGETBV(type, eax_, edx_) \
__asm pushad \
__asm mov ecx, type \
__asm _emit 0x0f \
__asm _emit 0x01 \
__asm _emit 0xd0 \
__asm mov eax_, eax \
__asm mov edx_, edx \
__asm popad
#else
#define XGETBV(type, eax_, edx_) eax_ = edx_ = 0
#endif
static bool cpu_has_simd64(void)
{
enum {OSXSAVE = 1 << 27, AVX = 1 << 28};
unsigned eax_, ebx_, ecx_, edx_;
CPUID(1, eax_, ebx_, ecx_, edx_);
if ((ecx_ & (OSXSAVE|AVX)) == (OSXSAVE|AVX)) {
XGETBV(0, eax_, edx_);
return (eax_ & 6) == 6;
}
return false;
}
static bool should_use_simd64(void)
{
char const * e = getenv("SOXR_USE_SIMD64");
return e? !!atoi(e) : cpu_has_simd64();
}
#endif
extern control_block_t
_soxr_rate32_cb,
_soxr_rate32s_cb,
_soxr_rate64_cb,
_soxr_rate64s_cb,
_soxr_vr32_cb;
@ -280,6 +279,11 @@ soxr_t soxr_create(
soxr_t p = 0;
soxr_error_t error = 0;
#if WITH_DEV_TRACE
char const * e = getenv("SOXR_TRACE");
_soxr_trace_level = e? atoi(e) : 0;
#endif
if (q_spec && q_spec->e) error = q_spec->e;
else if (io_spec && (io_spec->itype | io_spec->otype) >= SOXR_SPLIT * 2)
error = "invalid io datatype(s)";
@ -319,27 +323,39 @@ soxr_t soxr_create(
p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p;
#if WITH_SINGLE_PRECISION
if (!WITH_DOUBLE_PRECISION || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION))
|| (p->q_spec.flags & SOXR_VR)) {
#if WITH_CR32 || WITH_CR32S || WITH_VR32
if (0
#if WITH_VR32
|| ((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))
#endif
#if WITH_CR32 || WITH_CR32S
|| !(WITH_CR64 || WITH_CR64S) || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION))
#endif
) {
p->deinterleave = (deinterleave_t)_soxr_deinterleave_f;
p->interleave = (interleave_t)_soxr_interleave_f;
memcpy(&p->control_block,
(p->q_spec.flags & SOXR_VR)? &_soxr_vr32_cb :
#if SIMD_FOUND
should_use_simd()? &_soxr_rate32s_cb :
#if WITH_VR32
((!WITH_CR32 && !WITH_CR32S) || (p->q_spec.flags & SOXR_VR))? &_soxr_vr32_cb :
#endif
#if WITH_CR32S
!WITH_CR32 || should_use_simd32()? &_soxr_rate32s_cb :
#endif
&_soxr_rate32_cb, sizeof(p->control_block));
}
#if WITH_DOUBLE_PRECISION
#if WITH_CR64 || WITH_CR64S
else
#endif
#endif
#if WITH_DOUBLE_PRECISION
#if WITH_CR64 || WITH_CR64S
{
p->deinterleave = (deinterleave_t)_soxr_deinterleave;
p->interleave = (interleave_t)_soxr_interleave;
memcpy(&p->control_block, &_soxr_rate64_cb, sizeof(p->control_block));
memcpy(&p->control_block,
#if WITH_CR64S
!WITH_CR64 || should_use_simd64()? &_soxr_rate64s_cb :
#endif
&_soxr_rate64_cb, sizeof(p->control_block));
}
#endif

View File

@ -1,4 +1,4 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
@ -269,9 +269,6 @@ struct soxr_runtime_spec { /* Typically */
#define SOXR_COEF_INTERP_LOW 2u /* Man. select: less CPU, more memory. */
#define SOXR_COEF_INTERP_HIGH 3u /* Man. select: more CPU, less memory. */
#define SOXR_STRICT_BUFFERING 4u /* Reserved for future use. */
#define SOXR_NOSMALLINTOPT 8u /* For test purposes only. */
/* -------------------------- API type constructors ------------------------- */
@ -296,7 +293,7 @@ SOXR soxr_quality_spec_t soxr_quality_spec(
#define SOXR_24_BITQ 5
#define SOXR_28_BITQ 6
#define SOXR_32_BITQ 7
/* Libsamplerate equivalent qualities: */
/* For internal use only; to be removed: */
#define SOXR_LSR0Q 8 /* 'Best sinc'. */
#define SOXR_LSR1Q 9 /* 'Medium sinc'. */
#define SOXR_LSR2Q 10 /* 'Fast sinc'. */
@ -304,8 +301,8 @@ SOXR soxr_quality_spec_t soxr_quality_spec(
#define SOXR_LINEAR_PHASE 0x00
#define SOXR_INTERMEDIATE_PHASE 0x10
#define SOXR_MINIMUM_PHASE 0x30
#define SOXR_STEEP_FILTER 0x40
#define SOXR_ALLOW_ALIASING 0x80 /* Reserved for future use. */

View File

@ -1,16 +1,10 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Variable-rate resampling. */
#include <assert.h>
#include <math.h>
#if !defined M_PI
#define M_PI 3.14159265358979323846
#endif
#if !defined M_LN2
#define M_LN2 0.69314718055994530942
#endif
#include "math-wrap.h"
#include <string.h>
#include <stdlib.h>
#include "internal.h"

View File

@ -10,7 +10,10 @@ foreach (fe ${SOURCES})
add_executable (${f} ${fe})
endforeach ()
enable_testing ()
# Can't use c89 for this file:
if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
set_property (SOURCE throughput APPEND_STRING PROPERTY COMPILE_FLAGS "-std=gnu89")
endif ()
set (sweep_to_freq 22050)
set (leader 1)
@ -20,33 +23,36 @@ math (EXPR base_rate "${sweep_to_freq} + ${sweep_to_freq}")
macro (add_vector r)
set (output ${CMAKE_CURRENT_BINARY_DIR}/ref-${r}.s32)
add_custom_command (OUTPUT ${output} DEPENDS vector-gen ${CMAKE_CURRENT_LIST_FILE}
COMMAND vector-gen ${r} ${leader} ${len} ${sweep_to_freq} 1 ${output})
COMMAND vector-gen ${r} ${leader} ${len} 0 ${sweep_to_freq} 1 ${output})
set (vectors ${output} ${vectors})
endmacro ()
macro (add_cmp_test from to bits)
set (name ${bits}-bit-perfect-${from}-${to})
add_test (NAME ${name} COMMAND ${CMAKE_COMMAND} -Dbits=${bits} -DBIN=${BIN} -DEXAMPLES_BIN=${EXAMPLES_BIN} -Dleader=${leader} -Dto=${to}
-Dfrom=${from} -Dlen=${len} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmp-test.cmake)
add_vector (${from})
add_vector (${to})
macro (add_cmp_test irate orate bits)
set (name ${bits}-bit-perfect-${irate}-${orate})
add_test (NAME ${name} COMMAND ${CMAKE_COMMAND} -Dbits=${bits} -DBIN=${BIN}
-DEXAMPLES_BIN=${EXAMPLES_BIN} -DlenToSkip=${leader} -Dorate=${orate}
-Dirate=${irate} -Dlen=${len} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmp-test.cmake)
add_vector (${irate})
add_vector (${orate})
endmacro ()
unset (test_bits)
if (WITH_SINGLE_PRECISION)
if (WITH_CR32 OR WITH_CR32S OR WITH_CR64 OR WITH_CR64S)
set (test_bits 20)
endif ()
if (WITH_DOUBLE_PRECISION)
set (test_bits ${test_bits} 24)
if (WITH_CR64 OR WITH_CR64S)
set (test_bits ${test_bits} 28)
endif ()
foreach (b ${test_bits})
foreach (r 96000 65537)
foreach (r 192000 65537)
add_cmp_test (${base_rate} ${r} ${b})
add_cmp_test (${r} ${base_rate} ${b})
endforeach ()
endforeach ()
add_custom_target (test-vectors ALL DEPENDS ${vectors})
if (NOT CMAKE_CROSSCOMPILING)
add_custom_target (test-vectors ALL DEPENDS ${vectors})
endif ()
add_test (1-delay-clear ${BIN}1-delay-clear)

View File

@ -1,17 +1,13 @@
# SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
if (${bits} STREQUAL 24)
set (quality 45)
else ()
set (quality 44)
endif ()
math (EXPR quality "43 + (${bits} - 13) / 4")
set (ofile ${irate}-${orate}-${quality}.s32)
#message (STATUS "Output file = [${ofile}]")
set (output ${from}-${to}-${quality}.s32)
execute_process(COMMAND ${EXAMPLES_BIN}3-options-input-fn ${from} ${to} 1 2 2 ${quality} a
INPUT_FILE ref-${from}.s32
OUTPUT_FILE ${output}
execute_process(COMMAND ${EXAMPLES_BIN}3-options-input-fn ${irate} ${orate} 1 2 2 ${quality} a
INPUT_FILE ref-${irate}.s32
OUTPUT_FILE ${ofile}
ERROR_VARIABLE test_error
RESULT_VARIABLE test_result)
@ -19,7 +15,11 @@ if (test_result)
message (FATAL_ERROR "Resampling failure: ${test_error}")
endif ()
execute_process(COMMAND ${BIN}vector-cmp ref-${to}.s32 ${output} ${to} ${leader} ${len} ${bits} 98
set (percentageToCheck 98)
math (EXPR lenToCheck "${len} * ${percentageToCheck}")
string (REGEX REPLACE "(..)$" ".\\1" lenToCheck "${lenToCheck}") # Divide by 100
execute_process(COMMAND ${BIN}vector-cmp ref-${orate}.s32 ${ofile} ${orate} ${lenToSkip} ${lenToCheck} ${bits}
OUTPUT_VARIABLE test_output
RESULT_VARIABLE test_result)

View File

@ -35,7 +35,7 @@ test z$1 != z && j=$1 || j=1
for c in `seq 1 $j`; do
for n in `seq 0 3`; do
sox -r $ir -n $c.${types[$n]} synth $len sin $f gain -.1
sox -R -r $ir -n $c.${types[$n]} synth $len sin $f gain -.1
done
n=0

View File

@ -1,22 +1,22 @@
#!/usr/bin/env bash
set -e
# SoX Resampler Library Copyright (c) 2007-15 robs@users.sourceforge.net
# SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
# Licence for this file: LGPL v2.1 See LICENCE for details.
# Tests interpolating then decimating by the same, large ratio.
tool=../examples/3-options-input-fn
w=$(echo -e "`sox --ver |sed 's/.*SoX v//'` d\n14.4.1 k"|sort -Vr|head -1|sed 's/.* //')
q=6
ratio=2e4
srate=8000
nrate=`expr $srate / 2`
q=4
test x$1 = x && ratio=1e5 || ratio=$1
test x$2 = x && rate=8000 || rate=$2
../tests/vector-gen $srate 0 8 $nrate .9375 1.s32
sox -r$rate -n 1.s32 synth 10 sin 0:`expr $rate / 2` vol .9375
sync
$tool 1 $ratio 1 2 1 $q < 1.s32 | $tool $ratio 1 1 1 2 $q > 2.s32
time { $tool 1 $ratio 1 2 1 $q a < 1.s32 | $tool $ratio 1 1 1 2 $q a > 2.s32;}
sox -M -r $srate -c1 1.s32 -r $srate -c1 2.s32 -n spectrogram -hw$w -Z-10 -z180 -o lr-$w.png -c "large-ratio-test q:$q ratio:$ratio"
sox -mv-1 -r$rate -c1 1.s32 -r$rate -c1 2.s32 -n spectrogram -hw$w -z150 -o lr-$w.png -c "large-ratio-test q:$q ratio:$ratio"
rm [12].s32

View File

@ -0,0 +1,4 @@
#!/bin/sh
set -e
for n in `seq 0 3`; do ./throughput 44.1 48 1 0 $n; done

View File

@ -0,0 +1 @@
for /L %%i in (0,1,3) DO throughput 44.1 48 1 0 %%i

111
tests/throughput.c 100644
View File

@ -0,0 +1,111 @@
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
#include <soxr.h>
#include "rint.h"
#include "../examples/examples-common.h"
#define k 1000
#if defined _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#define timerDecl LARGE_INTEGER start, stop, tmp
#define timerStart(msecs) QueryPerformanceCounter(&start), \
QueryPerformanceFrequency(&tmp), \
stop.QuadPart = (msecs * tmp.QuadPart + k/2) / k
#define timerRunning() (QueryPerformanceCounter(&tmp), \
(tmp.QuadPart-start.QuadPart < stop.QuadPart))
#else
#include <time.h>
#define timerDecl struct timespec stop, tmp
#define timerStart(msecs) clock_gettime(CLOCK_MONOTONIC, &stop), \
stop.tv_nsec += (msecs%k)*(k*k), \
stop.tv_sec += msecs/k + stop.tv_nsec/(k*k*k), \
stop.tv_nsec %= k*k*k
#define timerRunning() (clock_gettime(CLOCK_MONOTONIC, &tmp), \
(tmp.tv_sec < stop.tv_sec || tmp.tv_nsec < stop.tv_nsec))
#endif
int main(int n, char const * arg[])
{
char const * const arg0 = n? --n, *arg++ : "", * engine = "";
double const irate = n? --n, atof(*arg++) : 96000.;
double const orate = n? --n, atof(*arg++) : 44100.;
unsigned const chans = n? --n, (unsigned)atoi(*arg++) : 1;
soxr_datatype_t const itype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
unsigned const ospec = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
unsigned long const q_recipe= n? --n, strtoul(*arg++, 0, 16) : SOXR_HQ;
unsigned long const q_flags = n? --n, strtoul(*arg++, 0, 16) : 0;
double const passband_end = n? --n, atof(*arg++) : 0;
double const stopband_begin = n? --n, atof(*arg++) : 0;
double const phase_response = n? --n, atof(*arg++) : -1;
int const use_threads = n? --n, atoi(*arg++) : 1;
soxr_datatype_t const otype = ospec & 3;
soxr_quality_spec_t q_spec = soxr_quality_spec(q_recipe, q_flags);
soxr_io_spec_t io_spec = soxr_io_spec(itype, otype);
soxr_runtime_spec_t const runtime_spec = soxr_runtime_spec(!use_threads);
/* Allocate resampling input and output buffers in proportion to the input
* and output rates: */
#define buf_total_len 15000 /* In samples per channel. */
size_t const osize = soxr_datatype_size(otype) * chans;
size_t const isize = soxr_datatype_size(itype) * chans;
size_t const olen0= (size_t)(orate * buf_total_len / (irate + orate) + .5);
size_t const olen = min(max(olen0, 1), buf_total_len - 1);
size_t const ilen = buf_total_len - olen;
void * const obuf = malloc(osize * olen);
void * const ibuf = malloc(isize * ilen);
size_t odone = 0, clips = 0, omax = 0, i;
soxr_error_t error;
soxr_t soxr;
/* Overrides (if given): */
if (passband_end > 0) q_spec.passband_end = passband_end / 100;
if (stopband_begin > 0) q_spec.stopband_begin = stopband_begin / 100;
if (phase_response >=0) q_spec.phase_response = phase_response;
io_spec.flags = ospec & ~7u;
/* Create a stream resampler: */
soxr = soxr_create(
irate, orate, chans, /* Input rate, output rate, # of channels. */
&error, /* To report any error during creation. */
&io_spec, &q_spec, &runtime_spec);
if (!error) { /* If all is well, run the resampler: */
engine = soxr_engine(soxr);
#define RAND ((rand()*(1./RAND_MAX)-.5)*1)
switch (itype & 3) {
case 0: for (i=0;i<ilen*chans; ((float *)ibuf)[i]=(float )RAND, ++i); break;
case 1: for (i=0;i<ilen*chans; ((double *)ibuf)[i]=(double )RAND, ++i); break;
case 2: for (i=0;i<ilen*chans; ((int32_t *)ibuf)[i]=rint32(65536.*32768*RAND), ++i); break;
case 3: for (i=0;i<ilen*chans; ((int16_t *)ibuf)[i]=rint16( 1.*32768*RAND), ++i); break;
}
/* Resample in blocks: */
for (i=0; i<8; ++i) {
size_t itotal = 0, ototal = 0;
timerDecl;
#define MSECS 125
timerStart(MSECS);
do {
size_t const ilen1 = odone < olen? ilen : 0;
error = soxr_process(soxr, ibuf, ilen1, NULL, obuf, olen, &odone);
itotal += ilen1;
ototal += odone;
} while (!error && timerRunning());
omax = max(omax, ototal);
}
}
/* Tidy up: */
clips = *soxr_num_clips(soxr); /* Can occur only with integer output. */
soxr_delete(soxr);
free(obuf), free(ibuf);
/* Diagnostics: */
fprintf(stderr, "%-26s %s; %lu clips; I/O: %s (%s) %.2fMs/s\n",
arg0, soxr_strerror(error), (long unsigned)clips,
ferror(stdin) || ferror(stdout)? strerror(errno) : "no error", engine,
1e-6*k/MSECS*chans*(double)omax);
return !!error;
}

View File

@ -1,53 +1,56 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Utility used to help test the library; not for general consumption.
*
* Compare two swept-sine files. */
* Measure the peak bit difference between two files. */
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "../src/rint.h"
#include "../examples/examples-common.h"
int main(int bit, char const * arg[])
#define TYPE 0 /* As vector-gen */
#if TYPE
#define sample_t double
#define N 50
#define DIFF(s1,s2) abs(rint32((s1-s2)*ldexp(1,N-1)))
#else
#define sample_t int32_t
#define N 32
#define DIFF(s1,s2) abs((int)(s1-s2))
#endif
int main(int argc, char const * arg[])
{
FILE * f1 = fopen(arg[1], "rb"),
* f2 = fopen(arg[2], "rb");
double rate = atof (arg[3]), /* Rate for this vector */
leader_len = atof (arg[4]), /* Leader length in seconds */
len = atof (arg[5]), /* Sweep length (excl. leader_len) */
expect_bits= atof (arg[6]),
expect_bw = atof (arg[7]);
int two = !!arg[2][0];
FILE * f1 = fopen(arg[1], "rb"), * f2 = two? fopen(arg[2], "rb") : 0;
double rate = atof (arg[3]), /* Sample-rate */
skip_len = atof (arg[4]), /* Skip length in seconds */
len = atof (arg[5]), /* Compare length in seconds */ r;
int i = 0, count = rint32(rate * len), max = 0, diff;
sample_t s1, s2;
int32_t s1, s2;
long count = 0;
static long thresh[32];
double bw, prev = 0;
for (; fread(&s1, sizeof(s1), 1, f1) == 1 &&
fread(&s2, sizeof(s2), 1, f2) == 1; ++count) {
long diff = abs((int)(s1 - s2));
for (bit = 0; diff && bit < 32; bit++, diff >>= 1)
if ((diff & 1) && !thresh[bit])
thresh[bit] = count + 1;
}
if (count != (long)((leader_len + len) * rate + .5)) {
printf("incorrect file length\n");
exit(1);
}
for (bit = 0; bit < 32; ++bit) {
bw = ((double)thresh[bit] - 1) / rate - leader_len;
if (bit && bw >= 0 && (bw - prev) * 100 / len < .08) {
--bit;
break;
fseek(f1, rint32(rate * skip_len) * (int)sizeof(s1), SEEK_CUR);
if (two) {
fseek(f2, rint32(rate * skip_len) * (int)sizeof(s2), SEEK_CUR);
for (; i < count &&
fread(&s1, sizeof(s1), 1, f1) &&
fread(&s2, sizeof(s2), 1, f2); ++i) {
diff = DIFF(s1, s2);
max = max(max, diff);
}
prev = bw;
}
bit = 32 - bit;
bw = bw * 100 / len;
printf("Bit perfect to %i bits, from DC to %.2f%% nyquist.\n", bit, bw);
return !(bit >= expect_bits && bw >= expect_bw);
else for (; i < count && fread(&s1, sizeof(s1), 1, f1); ++i) {
diff = DIFF(s1, 0);
max = max(max, diff);
}
if (i != count) {
fprintf(stderr, "incorrect file length\n");
return 1;
}
printf("%f\n", r = N-log(max)/log(2));
return argc>6? r<atof(arg[6]) : 0;
}

View File

@ -1,61 +1,61 @@
/* SoX Resampler Library Copyright (c) 2007-13 robs@users.sourceforge.net
/* SoX Resampler Library Copyright (c) 2007-16 robs@users.sourceforge.net
* Licence for this file: LGPL v2.1 See LICENCE for details. */
/* Utility used to help test the library; not for general consumption.
*
* Generate a swept sine to a file, with faded `lead-in' section. */
* Generate a swept sine to a file, with `lead-in' section. */
#define QUAD 0
#define TYPE 0 /* calc/store: 0:flt64/int32 1:flt80/flt64 2:flt128/flt64 */
#if QUAD
#if TYPE > 1
#include <quadmath.h>
#endif
#include <math.h>
#include "math-wrap.h"
#include <stdlib.h>
#include <stdio.h>
#if !defined M_PI
#define M_PI 3.14159265358979323846
#endif
#if QUAD
#define modf modfq
#define cos cosq
#define sin sinq
#undef M_PI
#define M_PI M_PIq
#define real __float128
#define atof(x) strtoflt128(x, 0)
#if TYPE
#if TYPE > 1
#define modf modfq
#define cos cosq
#define sin sinq
#define PI M_PIq
#define real __float128
#define atof(x) strtoflt128(x, 0)
#else
#define modf modfl
#define cos cosl
#define sin sinl
#define PI M_PIl
#define real long double
#endif
#define MULT 1
#define OUT(d) double output = d
#else
#define PI M_PI
#define real double
#include "rint.h"
#define MULT (32768. * 65536 - 1/scale)
#define OUT(d) int32_t output = rint32(d)
#endif
int main(int i, char const * argv[])
int main(int argc, char const * argv[])
{
real rate = atof(argv[1]), /* Rate for this vector */
lead_in_len = atof(argv[2]), /* Lead-in length in seconds */
len = atof(argv[3]), /* Sweep length (excl. lead_in_len) */
sweep_to_freq = atof(argv[4]), /* Sweep from DC to this freq. */
multiplier = atof(argv[5]), /* For headroom */
f1 = -sweep_to_freq / len * lead_in_len, f2 = sweep_to_freq,
n1 = rate * -lead_in_len, n2 = rate * len,
m = (f2 - f1) / (n2 - n1) / 2, dummy;
FILE * file = fopen(argv[6], "wb");
i = (int)n1;
if (!file || i != n1)
exit(1);
for (; i < (int)(n2 + .5); ++i) {
double d1 = multiplier * sin(2 * M_PI * modf(i * m * i / rate, &dummy));
double d = i < 0? d1 * (1 - cos(M_PI * (i + n1) / n1)) * .5 : d1;
#if QUAD
size_t actual = fwrite(&d, sizeof(d), 1, file);
#else
int32_t out = rint32(d * (32768. * 65536 - 1));
size_t actual = fwrite(&out, sizeof(out), 1, file);
#endif
if (actual != 1)
return 1;
real rate = atof(argv[1]), /* Rate for this vector */
lead_in_len = atof(argv[2]), /* Lead-in length in seconds */
len = atof(argv[3]), /* Sweep length (excl. lead_in_len) */
f1 = atof(argv[4]),
f2 = atof(argv[5]),
scale = atof(argv[6]), /* For headroom */
n1 = rate * -lead_in_len,
m = (f2 - f1) / (rate * len * 2), dummy;
FILE * file = fopen(argv[7], "wb");
int i = (int)n1, err = !file || i != n1;
for (; !err && i < (int)(rate*(len+lead_in_len)+.5); ++i) {
real d = sin(2 * PI * modf((f1 + i * m) * i / rate, &dummy));
OUT((double)(scale * MULT * d));
err = fwrite(&output, sizeof(output), 1, file) != 1;
}
return 0;
return err |!argc;
}