From e36d0f26d2924498376469da8822c9c2a06ec35d Mon Sep 17 00:00:00 2001
From: Rob Sykes <rob@rob-Ideapad-S205.(none)>
Date: Mon, 3 Dec 2012 17:25:33 +0000
Subject: [PATCH] Initial commit

---
 .gitignore                                |    2 +
 AUTHORS                                   |    1 +
 CMakeLists.txt                            |  280 ++++
 COPYING.LGPL                              |  502 ++++++
 INSTALL                                   |   66 +
 LICENCE                                   |   24 +
 README                                    |   50 +
 TODO                                      |    3 +
 cmake/Modules/FindLibAVCodec.cmake        |   23 +
 cmake/Modules/FindOpenMP.cmake            |   91 ++
 cmake/Modules/FindSIMD.cmake              |   53 +
 cmake/Modules/TestBigEndian.cmake         |   15 +
 configure                                 |   13 +
 deinstall.cmake.in                        |   25 +
 examples/1-single-block.c                 |   48 +
 examples/2-stream.c                       |   78 +
 examples/3-options-input-fn.c             |   97 ++
 examples/4-split-channels.c               |  147 ++
 examples/5-variable-rate.c                |   94 ++
 examples/CMakeLists.txt                   |   21 +
 examples/README                           |   18 +
 examples/examples-common.h                |   45 +
 go                                        |   15 +
 go.bat                                    |   24 +
 lsr-tests/CMakeLists.txt                  |   49 +
 lsr-tests/COPYING                         |  340 ++++
 lsr-tests/README                          |    8 +
 lsr-tests/calc_snr.c                      |  242 +++
 lsr-tests/callback_hang_test.c            |  131 ++
 lsr-tests/callback_test.c                 |  243 +++
 lsr-tests/cmake/Modules/FindFFTW.cmake    |   23 +
 lsr-tests/cmake/Modules/Findsndfile.cmake |   23 +
 lsr-tests/config.h.in                     |   24 +
 lsr-tests/downsample_test.c               |   61 +
 lsr-tests/float_cast.h                    |  281 ++++
 lsr-tests/float_short_test.c              |  192 +++
 lsr-tests/misc_test.c                     |  175 +++
 lsr-tests/multi_channel_test.c            |  364 +++++
 lsr-tests/multichan_throughput_test.c     |  216 +++
 lsr-tests/reset_test.c                    |  238 +++
 lsr-tests/simple_test.c                   |  117 ++
 lsr-tests/sndfile-resample.c              |  332 ++++
 lsr-tests/snr_bw_test.c                   |  401 +++++
 lsr-tests/termination_test.c              |  339 ++++
 lsr-tests/throughput_test.c               |  212 +++
 lsr-tests/util.c                          |  230 +++
 lsr-tests/util.h                          |   50 +
 lsr-tests/varispeed_test.c                |  152 ++
 msvc/README                               |    9 +
 msvc/libsoxr.vcproj                       |   80 +
 msvc/soxr-config.h                        |   57 +
 soxr-config.h.in                          |   50 +
 src/CMakeLists.txt                        |  112 ++
 src/aliases.h                             |   37 +
 src/avfft32.c                             |   27 +
 src/avfft32s.c                            |   27 +
 src/ccrw2.h                               |   73 +
 src/data-io.c                             |  249 +++
 src/data-io.h                             |   39 +
 src/dbesi0.c                              |  149 ++
 src/fft4g.c                               | 1352 ++++++++++++++++
 src/fft4g.h                               |   23 +
 src/fft4g32.c                             |   27 +
 src/fft4g32s.c                            |   26 +
 src/fft4g64.c                             |   29 +
 src/fft4g_cache.h                         |   92 ++
 src/fifo.h                                |  124 ++
 src/filter.c                              |  245 +++
 src/filter.h                              |   39 +
 src/filters.h                             |  151 ++
 src/half-fir.h                            |   25 +
 src/half_coefs.h                          |   57 +
 src/internal.h                            |   43 +
 src/libsoxr-dev.src.in                    |    2 +
 src/libsoxr-lsr.pc.in                     |   11 +
 src/libsoxr.pc.in                         |   11 +
 src/libsoxr.src.in                        |    1 +
 src/lsr.c                                 |  114 ++
 src/pffft.c                               | 1729 +++++++++++++++++++++
 src/pffft.h                               |  177 +++
 src/pffft32.c                             |   30 +
 src/pffft32s.c                            |   27 +
 src/poly-fir.h                            |   98 ++
 src/poly-fir0.h                           |   32 +
 src/rate.h                                |  734 +++++++++
 src/rate32.c                              |    9 +
 src/rate32s.c                             |    9 +
 src/rate64.c                              |    9 +
 src/rdft.h                                |   31 +
 src/rint-clip.h                           |  153 ++
 src/rint.h                                |   68 +
 src/samplerate.h                          |    1 +
 src/simd-dev.h                            |    5 +
 src/simd.c                                |   84 +
 src/simd.h                                |   16 +
 src/soxr-lsr.h                            |   78 +
 src/soxr.c                                |  643 ++++++++
 src/soxr.h                                |  318 ++++
 src/vr32.c                                |  771 +++++++++
 tests/CMakeLists.txt                      |   50 +
 tests/README                              |    1 +
 tests/cmp-test.cmake                      |   28 +
 tests/eg-test                             |   29 +
 tests/io-test                             |   40 +
 tests/large-ratio                         |   21 +
 tests/vector-cmp.c                        |   53 +
 tests/vector-gen.c                        |   56 +
 107 files changed, 14759 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 AUTHORS
 create mode 100644 CMakeLists.txt
 create mode 100644 COPYING.LGPL
 create mode 100644 INSTALL
 create mode 100644 LICENCE
 create mode 100644 README
 create mode 100644 TODO
 create mode 100644 cmake/Modules/FindLibAVCodec.cmake
 create mode 100644 cmake/Modules/FindOpenMP.cmake
 create mode 100644 cmake/Modules/FindSIMD.cmake
 create mode 100644 cmake/Modules/TestBigEndian.cmake
 create mode 100755 configure
 create mode 100644 deinstall.cmake.in
 create mode 100644 examples/1-single-block.c
 create mode 100644 examples/2-stream.c
 create mode 100644 examples/3-options-input-fn.c
 create mode 100644 examples/4-split-channels.c
 create mode 100644 examples/5-variable-rate.c
 create mode 100644 examples/CMakeLists.txt
 create mode 100644 examples/README
 create mode 100644 examples/examples-common.h
 create mode 100755 go
 create mode 100644 go.bat
 create mode 100644 lsr-tests/CMakeLists.txt
 create mode 100644 lsr-tests/COPYING
 create mode 100644 lsr-tests/README
 create mode 100644 lsr-tests/calc_snr.c
 create mode 100644 lsr-tests/callback_hang_test.c
 create mode 100644 lsr-tests/callback_test.c
 create mode 100644 lsr-tests/cmake/Modules/FindFFTW.cmake
 create mode 100644 lsr-tests/cmake/Modules/Findsndfile.cmake
 create mode 100644 lsr-tests/config.h.in
 create mode 100644 lsr-tests/downsample_test.c
 create mode 100644 lsr-tests/float_cast.h
 create mode 100644 lsr-tests/float_short_test.c
 create mode 100644 lsr-tests/misc_test.c
 create mode 100644 lsr-tests/multi_channel_test.c
 create mode 100644 lsr-tests/multichan_throughput_test.c
 create mode 100644 lsr-tests/reset_test.c
 create mode 100644 lsr-tests/simple_test.c
 create mode 100644 lsr-tests/sndfile-resample.c
 create mode 100644 lsr-tests/snr_bw_test.c
 create mode 100644 lsr-tests/termination_test.c
 create mode 100644 lsr-tests/throughput_test.c
 create mode 100644 lsr-tests/util.c
 create mode 100644 lsr-tests/util.h
 create mode 100644 lsr-tests/varispeed_test.c
 create mode 100644 msvc/README
 create mode 100644 msvc/libsoxr.vcproj
 create mode 100644 msvc/soxr-config.h
 create mode 100644 soxr-config.h.in
 create mode 100644 src/CMakeLists.txt
 create mode 100644 src/aliases.h
 create mode 100644 src/avfft32.c
 create mode 100644 src/avfft32s.c
 create mode 100644 src/ccrw2.h
 create mode 100644 src/data-io.c
 create mode 100644 src/data-io.h
 create mode 100644 src/dbesi0.c
 create mode 100644 src/fft4g.c
 create mode 100644 src/fft4g.h
 create mode 100644 src/fft4g32.c
 create mode 100644 src/fft4g32s.c
 create mode 100644 src/fft4g64.c
 create mode 100644 src/fft4g_cache.h
 create mode 100644 src/fifo.h
 create mode 100644 src/filter.c
 create mode 100644 src/filter.h
 create mode 100644 src/filters.h
 create mode 100644 src/half-fir.h
 create mode 100644 src/half_coefs.h
 create mode 100644 src/internal.h
 create mode 100644 src/libsoxr-dev.src.in
 create mode 100644 src/libsoxr-lsr.pc.in
 create mode 100644 src/libsoxr.pc.in
 create mode 100644 src/libsoxr.src.in
 create mode 100644 src/lsr.c
 create mode 100644 src/pffft.c
 create mode 100644 src/pffft.h
 create mode 100644 src/pffft32.c
 create mode 100644 src/pffft32s.c
 create mode 100644 src/poly-fir.h
 create mode 100644 src/poly-fir0.h
 create mode 100644 src/rate.h
 create mode 100644 src/rate32.c
 create mode 100644 src/rate32s.c
 create mode 100644 src/rate64.c
 create mode 100644 src/rdft.h
 create mode 100644 src/rint-clip.h
 create mode 100644 src/rint.h
 create mode 100644 src/samplerate.h
 create mode 100644 src/simd-dev.h
 create mode 100644 src/simd.c
 create mode 100644 src/simd.h
 create mode 100644 src/soxr-lsr.h
 create mode 100644 src/soxr.c
 create mode 100644 src/soxr.h
 create mode 100644 src/vr32.c
 create mode 100644 tests/CMakeLists.txt
 create mode 100644 tests/README
 create mode 100644 tests/cmp-test.cmake
 create mode 100755 tests/eg-test
 create mode 100755 tests/io-test
 create mode 100755 tests/large-ratio
 create mode 100644 tests/vector-cmp.c
 create mode 100644 tests/vector-gen.c

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7c4baa5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+Release/
+Debug/
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..2ba76d3
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Rob Sykes <robs@users.sourceforge.net>
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..5bd53be
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,280 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+cmake_minimum_required (VERSION 2.8 FATAL_ERROR)
+
+project (soxr C)
+set (DESCRIPTION_SUMMARY "One-dimensional sample-rate conversion library")
+
+
+
+# Release versioning:
+
+set (PROJECT_VERSION_MAJOR 0)
+set (PROJECT_VERSION_MINOR 0)
+set (PROJECT_VERSION_PATCH 5)
+
+# For shared-object; if, since the last public release:
+#  * library code changed at all: ++revision
+#  * interfaces changed at all:   ++current, revision = 0
+#  * interfaces added:            ++age
+#  * interfaces removed:          age = 0
+
+set (SO_VERSION_CURRENT  0)
+set (SO_VERSION_REVISION 0)
+set (SO_VERSION_AGE      0)
+
+
+
+# Main options:
+
+include (CMakeDependentOption)
+
+if (NOT CMAKE_BUILD_TYPE)
+  set (CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
+endif ()
+
+option (BUILD_TESTS "Build sanity-tests."  OFF)
+option (BUILD_SHARED_LIBS "Build shared libraries." ON)
+option (BUILD_EXAMPLES "Build examples." OFF)
+option (WITH_OPENMP "Include OpenMP threading." ON)
+option (WITH_LSR_BINDINGS "Include a `libsamplerate'-like interface." ON)
+cmake_dependent_option (WITH_SINGLE_PRECISION "Build with single precision (for up to 20-bit accuracy)." ON
+  "WITH_DOUBLE_PRECISION" ON)
+cmake_dependent_option (WITH_DOUBLE_PRECISION "Build with double precision (for up to 32-bit accuracy)." ON
+  "WITH_SINGLE_PRECISION" ON)
+cmake_dependent_option (WITH_SIMD "Use SIMD (for faster single precision)." ON
+  "WITH_SINGLE_PRECISION" OFF)
+cmake_dependent_option (WITH_AVFFT "Use libavcodec (LGPL) for SIMD DFT." OFF
+  "WITH_SIMD;NOT WITH_PFFFT" OFF)
+cmake_dependent_option (WITH_PFFFT "Use PFFFT (BSD-like licence) for SIMD DFT." ON
+  "WITH_SIMD;NOT WITH_AVFFT" OFF)
+if (UNIX)
+  cmake_dependent_option (BUILD_LSR_TESTS "Build LSR tests." OFF
+    "WITH_LSR_BINDINGS" OFF)
+endif ()
+
+
+
+# Introspection:
+
+list (APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
+
+include (CheckFunctionExists)
+include (CheckIncludeFiles)
+include (CheckLibraryExists)
+include (TestBigEndian)
+
+check_library_exists (m pow "" NEED_LIBM)
+if (NEED_LIBM)
+  set (CMAKE_REQUIRED_LIBRARIES "m;${CMAKE_REQUIRED_LIBRARIES}")
+  link_libraries (m)
+endif ()
+
+if (WITH_OPENMP)
+  find_package (OpenMP)
+endif ()
+if (OPENMP_FOUND)
+  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+  set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+endif ()
+
+if (WITH_SIMD)
+  find_package (SIMD)
+  if (SIMD_FOUND)
+    set (HAVE_SIMD 1)
+  endif ()
+endif ()
+
+if (WITH_SINGLE_PRECISION)
+  set (HAVE_SINGLE_PRECISION 1)
+endif ()
+
+if (WITH_DOUBLE_PRECISION)
+  set (HAVE_DOUBLE_PRECISION 1)
+endif ()
+
+if (WITH_AVFFT)
+  find_package (LibAVCodec)
+  if (AVCODEC_FOUND)
+    include_directories (${AVCODEC_INCLUDE_DIRS})
+    link_libraries (${AVCODEC_LIBRARIES})
+    set (HAVE_AVFFT 1)
+  endif ()
+endif ()
+
+if (EXISTS ${PROJECT_SOURCE_DIR}/src/vr32.c)
+  set (HAVE_VR 1)
+endif ()
+
+check_function_exists (lrint HAVE_LRINT)
+check_include_files (fenv.h HAVE_FENV_H)
+test_big_endian (WORDS_BIGENDIAN)
+
+macro (make_exist)
+  foreach (x ${ARGN})
+    if (NOT ${x})
+      set (${x} 0)
+    endif ()
+  endforeach ()
+endmacro ()
+
+make_exist (HAVE_LRINT HAVE_FENV_H WORDS_BIGENDIAN HAVE_SIMD HAVE_VR)
+make_exist (HAVE_SINGLE_PRECISION HAVE_DOUBLE_PRECISION HAVE_AVFFT)
+
+
+
+# Compiler configuration:
+
+if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
+  set (PROJECT_CXX_FLAGS "-Wconversion -Wall -W -pedantic -Wundef -Wcast-align -Wpointer-arith -Wno-long-long")
+  set (PROJECT_C_FLAGS "${PROJECT_CXX_FLAGS} -Wnested-externs -Wmissing-prototypes -Wstrict-prototypes")
+  if (CMAKE_BUILD_TYPE STREQUAL "Release")
+    set (CMAKE_SHARED_LINKER_FLAGS "-s") # strip
+  endif ()
+  #option (VISIBILITY_HIDDEN "Build with -fvisibility=hidden." ON)
+  if (VISIBILITY_HIDDEN)
+    add_definitions (-fvisibility=hidden)
+  endif ()
+endif ()
+
+if (MSVC)
+  add_definitions (-D_USE_MATH_DEFINES -D_CRT_SECURE_NO_WARNINGS)
+  option (ENABLE_STATIC_RUNTIME "Visual Studio, link with runtime statically."  OFF)
+  if (ENABLE_STATIC_RUNTIME)
+    foreach (flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      string (REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+    endforeach ()
+  endif ()
+  # By default, do not warn when built on machines using only VS Express:
+  if (NOT DEFINED CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS)
+    set (CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
+  endif ()
+endif ()
+
+
+
+# Build configuration:
+
+if (${BUILD_SHARED_LIBS} AND ${CMAKE_SYSTEM_NAME} STREQUAL Windows) # Allow exes to find dlls:
+  set (BIN ${PROJECT_BINARY_DIR}/bin/)
+  set (EXAMPLES_BIN ${BIN})
+  set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${BIN})
+  set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BIN})
+else ()
+  set (BIN ./)
+  set (EXAMPLES_BIN ../examples/)
+endif ()
+
+set (LIB_TYPE STATIC)
+if (BUILD_SHARED_LIBS)
+  set (LIB_TYPE SHARED)
+  if (MSVC)
+    add_definitions (-DSOXR_DLL)
+  endif ()
+endif ()
+
+
+
+# Installation configuration:
+
+set (LIB_SUFFIX "" CACHE STRING "Define suffix of libraries directory name (32 or 64).")
+set (BIN_INSTALL_DIR "bin" CACHE PATH "The subdirectory to the binaries." FORCE)
+set (LIB_INSTALL_DIR "lib${LIB_SUFFIX}" CACHE PATH "The subdirectory to the libraries." FORCE)
+set (INCLUDE_INSTALL_DIR "include" CACHE PATH "The subdirectory to the headers." FORCE)
+
+if (APPLE)
+  option (BUILD_FRAMEWORK "Build an OS X framework." OFF)
+  set (FRAMEWORK_INSTALL_DIR "/Library/Frameworks" CACHE STRING "Directory to install frameworks to.")
+endif ()
+
+
+
+# Top-level:
+
+set (PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH})
+math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}")
+math (EXPR SO_VERSION_MINOR "${SO_VERSION_AGE}")
+math (EXPR SO_VERSION_PATCH "${SO_VERSION_REVISION}")
+set (SO_VERSION ${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH})
+
+configure_file (
+  ${PROJECT_SOURCE_DIR}/${PROJECT_NAME}-config.h.in
+  ${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.h)
+include_directories (${PROJECT_BINARY_DIR})
+
+if (BUILD_TESTS OR BUILD_LSR_TESTS)
+  enable_testing ()
+endif ()
+
+
+
+# Subdirectories:
+
+include_directories (${PROJECT_SOURCE_DIR}/src)
+
+add_subdirectory (src)
+if (BUILD_TESTS)
+  add_subdirectory (tests)
+endif ()
+if (BUILD_LSR_TESTS)
+  add_subdirectory (lsr-tests)
+endif ()
+if (BUILD_EXAMPLES OR BUILD_TESTS)
+  add_subdirectory (examples)
+endif ()
+
+
+
+# Rough-and-ready distclean for anyone still doing in-tree builds:
+
+if (UNIX)
+  add_custom_target (distclean
+    COMMAND make clean && rm -rf
+      CMakeCache.txt
+      CMakeFiles
+      cmake_install.cmake
+      CPackConfig.cmake
+      CPackSourceConfig.cmake
+      deinstall.cmake
+      Makefile
+      soxr-config.h
+      src/CMakeFiles
+      src/cmake_install.cmake
+      src/libsoxr-dev.src
+      src/libsoxr-lsr.pc
+      src/libsoxr.pc
+      src/libsoxr.src
+      src/Makefile)
+endif ()
+
+
+
+# Deinstallation:
+
+configure_file (
+  "${CMAKE_CURRENT_SOURCE_DIR}/deinstall.cmake.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/deinstall.cmake"
+  IMMEDIATE @ONLY)
+
+add_custom_target (deinstall
+  COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/deinstall.cmake")
+
+
+
+# Packaging:
+
+if (UNIX)
+  set (CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}")
+  set (CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}")
+  set (CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}")
+
+  set (CPACK_SOURCE_GENERATOR "TBZ2")
+  set (CPACK_SOURCE_IGNORE_FILES "/Debug/;/Release/;/cpack/;\\\\.swp$;\\\\.gitignore")
+
+  include (CPack)
+
+  if (IS_DIRECTORY ${PROJECT_SOURCE_DIR}/cpack)
+    add_subdirectory (cpack)
+  endif ()
+endif ()
diff --git a/COPYING.LGPL b/COPYING.LGPL
new file mode 100644
index 0000000..551cb4a
--- /dev/null
+++ b/COPYING.LGPL
@@ -0,0 +1,502 @@
+		  GNU LESSER GENERAL PUBLIC LICENSE
+		       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+		  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+			    NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..5412815
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,66 @@
+SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+
+1. Prerequisites:
+
+    Before you can build this library, you need to have available on your
+    system:
+
+    * A C-compiler with 64-bit integer support and, optionally, OpenMP, SIMD.
+
+    * A 'make' utility (most compiler installations already have one of these).
+
+    * CMake: http://www.cmake.org/cmake/resources/software.html
+
+
+2. Build:
+
+    At a command prompt, change directory (`cd') to the one containing this
+    file, then enter:
+
+        go                          (on MS-Windows with nmake)
+    or
+        ./go                        (on unix-like systems)
+
+    This should build the library and run a few sanity tests.
+
+
+3. Installation:
+
+    Note that this step may need to be performed by a system
+    adminstrator.  Enter:
+
+        nmake install               (on MS-Windows)
+    or
+        cd Release; make install    (on unix)
+
+
+4. Configuration:
+
+    To use the library you may need to set up appropriate paths to the
+    library and its header file in your development environment.
+
+
+5. Installation test
+
+    To test the installation, build and run the first example programme (see
+    examples/README).
+
+
+If it is necessary to customise the build, then steps 2 and 3 above may be
+substituted as follows.  Change directory to the one containing this file,
+then enter commands along the lines of:
+
+    mkdir build
+    cd build
+    cmake [OPTIONS] ..
+    make
+    make test
+    sudo make install
+
+To list help on the available options, enter:
+
+    cmake -LH ..
+
+Options, if given, should be preceded with '-D', e.g.
+
+    cmake -DWITH_SIMD:BOOL=OFF ..
diff --git a/LICENCE b/LICENCE
new file mode 100644
index 0000000..ded4ca3
--- /dev/null
+++ b/LICENCE
@@ -0,0 +1,24 @@
+SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+
+This library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at
+your option) any later version.
+
+This library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+
+
+Notes
+
+1. Re software in the `examples' directory: works that are not resampling
+examples but are based on the given examples -- for example, applications using
+the library -- shall not be considered to be derivative works of the examples.
+
+2. If building with pffft.c, see the licence embedded in that file.
diff --git a/README b/README
new file mode 100644
index 0000000..5e1a51c
--- /dev/null
+++ b/README
@@ -0,0 +1,50 @@
+SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+
+The SoX Resampler library `libsoxr' performs one-dimensional sample-rate
+conversion -- it may be used, for example, to resample PCM-encoded audio.
+For higher-dimensional resampling, such as for visual-image processing, you
+should look elsewhere.
+
+It aims to give fast¹ and very high quality² results for any constant
+(rational or irrational) resampling ratio.  Phase-response, preserved
+bandwidth, aliasing, and rejection level parameters are all configurable;
+alternatively, simple `preset' configurations may be selected.  An
+experimental, variable-rate resampling mode of operation is also included.
+
+The resampler is currently available either as part of `libsox' (the audio
+file-format and effect library), or stand-alone as `libsoxr' (this package).
+The interfaces to libsox and libsoxr are slightly different, with that of
+libsoxr designed specifically for resampling.  An application requiring
+support for other effects, or for reading-from or writing-to audio files or
+devices, should use libsox (or other libraries such as libsndfile or
+libavformat).
+
+Libsoxr provides a simple API that allows interfacing using the most
+commonly-used sample formats and buffering schemes: sample-formats may be
+either floating-point or integer, and multiple channels either interleaved
+or split in separate buffers.  The API is documented in the header file
+`soxr.h', together with sample code found in the 'examples' directory.
+
+For compatibility with the popular `libsamplerate' library, the header file
+`soxr-lsr.h' is provided and may be used as an alternative API.³  Note
+however, that libsoxr does not provide a full emulation of libsamplerate
+and that using this approach, only a sub-set of libsoxr's features are
+available.
+
+The design was inspired by Laurent De Soras' paper `The Quest For The
+Perfect Resampler', http://ldesoras.free.fr/doc/articles/resampler-en.pdf;
+in essence, it combines Julius O. Smith's `Bandlimited Interpolation'
+technique (https://ccrma.stanford.edu/~jos/resample/resample.pdf) with FFT-
+based over-sampling.
+
+Note that for real-time resampling, libsoxr may have a higher latency
+than non-FFT based resamplers.  For example, when using the `High Quality'
+configuration to resample between 44100Hz and 48000Hz, the latency is
+around 1000 output samples, i.e. roughly 20ms.
+
+For build and installation instructions, see the file `INSTALL'; for
+copyright and licensing information, see the file `LICENCE'.
+________
+¹ For example, multi-channel resampling can utilise multiple CPU-cores.
+² Bit-perfect within practical occupied-bandwidth limits.
+³ For details of that API, see http://www.mega-nerd.com/SRC/api.html.
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..1c4a31b
--- /dev/null
+++ b/TODO
@@ -0,0 +1,3 @@
+* SOXR_ALLOW_ALIASING
+* Explicit flush API fn, perhaps.
+* More SIMD.
diff --git a/cmake/Modules/FindLibAVCodec.cmake b/cmake/Modules/FindLibAVCodec.cmake
new file mode 100644
index 0000000..be15051
--- /dev/null
+++ b/cmake/Modules/FindLibAVCodec.cmake
@@ -0,0 +1,23 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# - Find AVCODEC
+# Find the native installation of this package: includes and libraries.
+#
+#  AVCODEC_INCLUDES    - where to find headers for this package.
+#  AVCODEC_LIBRARIES   - List of libraries when using this package.
+#  AVCODEC_FOUND       - True if this package can be found.
+
+if (AVCODEC_INCLUDES)
+  set (AVCODEC_FIND_QUIETLY TRUE)
+endif (AVCODEC_INCLUDES)
+
+find_path (AVCODEC_INCLUDES libavcodec/avcodec.h)
+
+find_library (AVCODEC_LIBRARIES NAMES avcodec)
+
+include (FindPackageHandleStandardArgs)
+find_package_handle_standard_args (
+  AVCODEC DEFAULT_MSG AVCODEC_LIBRARIES AVCODEC_INCLUDES)
+
+mark_as_advanced (AVCODEC_LIBRARIES AVCODEC_INCLUDES)
diff --git a/cmake/Modules/FindOpenMP.cmake b/cmake/Modules/FindOpenMP.cmake
new file mode 100644
index 0000000..465f1b0
--- /dev/null
+++ b/cmake/Modules/FindOpenMP.cmake
@@ -0,0 +1,91 @@
+# - Finds OpenMP support
+# This module can be used to detect OpenMP support in a compiler.
+# If the compiler supports OpenMP, the flags required to compile with
+# openmp support are set.
+#
+# The following variables are set:
+#   OpenMP_C_FLAGS - flags to add to the C compiler for OpenMP support
+#   OPENMP_FOUND - true if openmp is detected
+#
+# Supported compilers can be found at http://openmp.org/wp/openmp-compilers/
+
+#=============================================================================
+# Copyright 2009 Kitware, Inc.
+# Copyright 2008-2009 André Rigland Brodtkorb <Andre.Brodtkorb@ifi.uio.no>
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#
+# Modified for libsoxr not to rely on presence of C++ compiler.
+#=============================================================================
+# (To distribute this file outside of CMake, substitute the full
+#  License text for the above reference.)
+
+include (CheckCSourceCompiles)
+include (FindPackageHandleStandardArgs)
+
+set (OpenMP_C_FLAG_CANDIDATES
+  #Gnu
+  "-fopenmp"
+  #Microsoft Visual Studio
+  "/openmp"
+  #Intel windows
+  "-Qopenmp"
+  #Intel
+  "-openmp"
+  #Empty, if compiler automatically accepts openmp
+  " "
+  #Sun
+  "-xopenmp"
+  #HP
+  "+Oopenmp"
+  #IBM XL C/c++
+  "-qsmp"
+  #Portland Group
+  "-mp"
+)
+
+# sample openmp source code to test
+set (OpenMP_C_TEST_SOURCE
+"
+#include <omp.h>
+int main() {
+#ifdef _OPENMP
+  return 0;
+#else
+  breaks_on_purpose
+#endif
+}
+")
+# if these are set then do not try to find them again,
+# by avoiding any try_compiles for the flags
+if (DEFINED OpenMP_C_FLAGS)
+  set (OpenMP_C_FLAG_CANDIDATES)
+endif (DEFINED OpenMP_C_FLAGS)
+
+# check c compiler
+foreach (FLAG ${OpenMP_C_FLAG_CANDIDATES})
+  set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
+  set (CMAKE_REQUIRED_FLAGS "${FLAG}")
+  unset (OpenMP_FLAG_DETECTED CACHE)
+  message (STATUS "Try OpenMP C flag = [${FLAG}]")
+  check_c_source_compiles ("${OpenMP_C_TEST_SOURCE}" OpenMP_FLAG_DETECTED)
+  set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}")
+  if (OpenMP_FLAG_DETECTED)
+    set (OpenMP_C_FLAGS_INTERNAL "${FLAG}")
+    break ()
+  endif (OpenMP_FLAG_DETECTED)
+endforeach (FLAG ${OpenMP_C_FLAG_CANDIDATES})
+
+set (OpenMP_C_FLAGS "${OpenMP_C_FLAGS_INTERNAL}"
+  CACHE STRING "C compiler flags for OpenMP parallization")
+
+# handle the standard arguments for find_package
+find_package_handle_standard_args (OpenMP DEFAULT_MSG
+  OpenMP_C_FLAGS OpenMP_C_FLAGS)
+
+mark_as_advanced (OpenMP_C_FLAGS)
diff --git a/cmake/Modules/FindSIMD.cmake b/cmake/Modules/FindSIMD.cmake
new file mode 100644
index 0000000..16f39f6
--- /dev/null
+++ b/cmake/Modules/FindSIMD.cmake
@@ -0,0 +1,53 @@
+# - Finds SIMD support
+#
+# The following variables are set:
+#   SIMD_C_FLAGS - flags to add to the C compiler for this package.
+#   SIMD_FOUND - true if support for this package is found.
+
+include (CheckCSourceCompiles)
+include (FindPackageHandleStandardArgs)
+
+set (SIMD_C_FLAG_CANDIDATES
+  #Microsoft Visual Studio
+  "/arch:SSE /fp:fast -D__SSE__"
+  #Gnu
+  "-msse -mfpmath=sse"
+)
+
+set (SIMD_C_TEST_SOURCE
+"
+#include <xmmintrin.h>
+int main()
+{
+  __m128 a, b;
+  float vals[4] = {0};
+  a = _mm_loadu_ps (vals);
+  b = a;
+  b = _mm_add_ps (a,b);
+  _mm_storeu_ps (vals,b);
+  return 0;
+}
+")
+
+if (DEFINED SIMD_C_FLAGS)
+  set (SIMD_C_FLAG_CANDIDATES)
+endif ()
+
+foreach (FLAG ${SIMD_C_FLAG_CANDIDATES})
+  set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
+  set (CMAKE_REQUIRED_FLAGS "${FLAG}")
+  unset (SIMD_FLAG_DETECTED CACHE)
+  message (STATUS "Try SIMD C flag = [${FLAG}]")
+  check_c_source_compiles ("${SIMD_C_TEST_SOURCE}" SIMD_FLAG_DETECTED)
+  set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}")
+  if (SIMD_FLAG_DETECTED)
+    set (SIMD_C_FLAGS_INTERNAL "${FLAG}")
+    break ()
+  endif ()
+endforeach ()
+
+set (SIMD_C_FLAGS "${SIMD_C_FLAGS_INTERNAL}"
+  CACHE STRING "C compiler flags for SIMD vectorization")
+
+find_package_handle_standard_args (SIMD DEFAULT_MSG SIMD_C_FLAGS SIMD_C_FLAGS)
+mark_as_advanced (SIMD_C_FLAGS)
diff --git a/cmake/Modules/TestBigEndian.cmake b/cmake/Modules/TestBigEndian.cmake
new file mode 100644
index 0000000..d5f52d1
--- /dev/null
+++ b/cmake/Modules/TestBigEndian.cmake
@@ -0,0 +1,15 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# - Macro to determine endian type
+#  test_big_endian (VARIABLE)
+#  VARIABLE - variable to store the result to
+
+macro (test_big_endian VARIABLE)
+  if ("HAVE_${VARIABLE}" MATCHES "^HAVE_${VARIABLE}$")
+    include (CheckCSourceRuns)
+    check_c_source_runs ("int main() {union {long i; char c[sizeof(long)];}
+      const u = {1}; return !!u.c[0];}" HAVE_${VARIABLE})
+    set (${VARIABLE} "${HAVE_${VARIABLE}}" CACHE INTERNAL "1 if system is big endian" FORCE)
+  endif ()
+endmacro ()
diff --git a/configure b/configure
new file mode 100755
index 0000000..abe9baa
--- /dev/null
+++ b/configure
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# Wrapper to allow easier integration with projects using autotools.
+
+# Such projects will probably be using static libs so should pass
+#  -DBUILD_SHARED_LIBS=OFF amongst any other options needed.
+
+# Autotools options should not be passed to this script.
+
+cmake $* .
diff --git a/deinstall.cmake.in b/deinstall.cmake.in
new file mode 100644
index 0000000..a017418
--- /dev/null
+++ b/deinstall.cmake.in
@@ -0,0 +1,25 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+if (NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+  message (FATAL_ERROR "Cannot find install manifest")
+endif ()
+
+file (READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
+string (REGEX REPLACE "\n" ";" files "${files}")
+foreach (file ${files})
+  set (dest "$ENV{DESTDIR}${file}")
+  message (STATUS "Deinstalling \"${dest}\"")
+  if (EXISTS "${dest}" OR IS_SYMLINK "${dest}")
+    execute_process (
+      COMMAND @CMAKE_COMMAND@ -E remove "${dest}"
+      OUTPUT_VARIABLE rm_out
+      RESULT_VARIABLE rm_retval
+    )
+    if (NOT ${rm_retval} EQUAL 0)
+      message (FATAL_ERROR "Problem when removing \"${dest}\"")
+    endif ()
+  else ()
+    message (STATUS "File \"${dest}\" does not exist.")
+  endif ()
+endforeach ()
diff --git a/examples/1-single-block.c b/examples/1-single-block.c
new file mode 100644
index 0000000..ccb5675
--- /dev/null
+++ b/examples/1-single-block.c
@@ -0,0 +1,48 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 1: `One-shot' resample a single block of data in memory.
+ *
+ * N.B. See example 2 for how to resample a stream (of blocks).
+ *
+ * Optional arguments are: INPUT-RATE OUTPUT-RATE
+ *
+ * With the default arguments, the output should produce lines similar to the
+ * following:
+ *
+ *  0.00  0.71  1.00  0.71 -0.00 -0.71 -1.00 -0.71
+ *
+ * Gibbs effect may be seen at the ends of the resampled signal; this is because
+ * unlike a `real-world' signal, the synthetic input signal is not band-limited.
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+const float in[] = {  /* Input: 12 cycles of a sine wave with freq. = irate/4 */
+  0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1,
+  0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1};
+
+int main(int argc, char const * arg[])
+{
+  double irate = argc > 1? atof(arg[1]) : 1;         /* Default to upsampling */
+  double orate = argc > 2? atof(arg[2]) : 2;             /* by a factor of 2. */
+
+  size_t olen = (size_t)(AL(in) * orate / irate + .5);   /* Assay output len. */
+  float * out = malloc(sizeof(*out) * olen);       /* Allocate output buffer. */
+  size_t odone;
+
+  soxr_error_t error = soxr_oneshot(irate, orate, 1, /* Rates and # of chans. */
+      in, AL(in), NULL,                              /* Input. */
+      out, olen, &odone,                             /* Output. */
+      NULL, NULL, NULL);                             /* Default configuration.*/
+
+  unsigned i = 0;                          /* Print out the resampled data... */
+  while (i++ < odone)
+    printf("%5.2f%c", out[i-1], " \n"[!(i&7) || i == odone]);
+  puts(soxr_strerror(error));                  /* ...and the reported result. */
+
+  free(out);                                                      /* Tidy up. */
+  return !!error;
+  (void)argc, (void)arg;                         /* Not used in this example. */
+}
diff --git a/examples/2-stream.c b/examples/2-stream.c
new file mode 100644
index 0000000..bf47d0c
--- /dev/null
+++ b/examples/2-stream.c
@@ -0,0 +1,78 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 2: resample a raw, single-channel, floating-point data stream from
+ * stdin to stdout.
+ *
+ * The application uses the single function `soxr_process' for both input and
+ * output to/from the resampler; compared to the `input function' approach
+ * (illustrated in example 3) this requires that the application implements
+ * more logic, but one less function.
+ *
+ * Arguments are: INPUT-RATE OUTPUT-RATE
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+int main(int argc, char const * arg[])
+{
+  double const irate = argc > 1? atof(arg[1]) : 96000.;
+  double const orate = argc > 2? atof(arg[2]) : 44100.;
+
+  /* Allocate resampling input and output buffers in proportion to the input
+   * and output rates: */
+  #define buf_total_len 15000  /* In samples. */
+  size_t const olen = (size_t)(orate * buf_total_len / (irate + orate) + .5);
+  size_t const ilen = buf_total_len - olen;
+  size_t const osize = sizeof(float), isize = osize;
+  void * obuf = malloc(osize * olen);
+  void * ibuf = malloc(isize * ilen);
+
+  size_t odone, written, need_input = 1;
+  soxr_error_t error;
+
+  /* Create a stream resampler: */
+  soxr_t soxr = soxr_create(
+      irate, orate, 1,             /* Input rate, output rate, # of channels. */
+      &error,                         /* To report any error during creation. */
+      NULL, NULL, NULL);                        /* Use configuration defaults.*/
+
+  if (!error) {                         /* If all is well, run the resampler: */
+    USE_STD_STDIO;
+                                                       /* Resample in blocks: */
+    do {
+      size_t ilen1 = 0;
+
+      if (need_input) {
+
+        /* Read one block into the buffer, ready to be resampled: */
+        ilen1 = fread(ibuf, isize, ilen, stdin);
+
+        if (!ilen1) {     /* If the is no (more) input data available, */
+          free(ibuf);     /* set ibuf to NULL, to indicate end-of-input */
+          ibuf = NULL;    /* to the resampler. */
+        }
+      }
+
+      /* Copy data from the input buffer into the resampler, and resample
+       * to produce as much output as is possible to the given output buffer: */
+      error = soxr_process(soxr, ibuf, ilen1, NULL, obuf, olen, &odone);
+
+      written = fwrite(obuf, osize, odone, stdout); /* Consume output.*/
+
+      /* If the actual amount of data output is less than that requested, and
+       * we have not already reached the end of the input data, then supply some
+       * more input next time round the loop: */
+      need_input = odone < olen && ibuf;
+
+    } while (!error && (need_input || written));
+  }
+                                                                  /* Tidy up: */
+  soxr_delete(soxr);
+  free(obuf), free(ibuf);
+                                                              /* Diagnostics: */
+  fprintf(stderr, "%-26s %s; I/O: %s\n", arg[0],
+      soxr_strerror(error), errno? strerror(errno) : "no error");
+  return error || errno;
+}
diff --git a/examples/3-options-input-fn.c b/examples/3-options-input-fn.c
new file mode 100644
index 0000000..c34859d
--- /dev/null
+++ b/examples/3-options-input-fn.c
@@ -0,0 +1,97 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 3: extends example 2 with multiple channels, multiple datatypes,
+ * and other options.
+ *
+ * The application provides an input function, called on demand by libsoxr, in
+ * response to calls to soxr_output(); compared to the `process' approach
+ * (illustrated in example 2) this requires that the application implements
+ * less logic, but one more function.
+ *
+ * The eight arguments (which are optional, from last to first) are:
+ *   INPUT-RATE       As example 2
+ *   OUTPUT-RATE      Ditto
+ *   NUM-CHANNELS     Number of interleaved channels
+ *   IN-DATATYPE#     0:float32 1:float64 2:int32 3:int16
+ *   OUT-DATATYPE#    Ditto
+ *   Q-RECIPE         Quality recipe (in hex) See soxr.h
+ *   Q-FLAGS          Quality flags  (in hex) See soxr.h
+ *   USE-THREADS      1 to use multi-threading (where available)
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+typedef struct {void * ibuf; size_t isize;} input_context_t;
+
+static size_t input_fn(input_context_t * p, soxr_cbuf_t * buf, size_t len)
+{
+  /* Read one block into the buffer, ready to be input to the resampler: */
+  len = fread(p->ibuf, p->isize, len, stdin); /* Actual len read may be less. */
+
+  /* Inform the resampler of the data's whereabouts (which could be anywhere, in
+   * a freshly malloc'd buffer, for example): */
+  *buf = (!len && ferror(stdin))? NULL : p->ibuf;  /* NULL if error occurred. */
+
+  return len;                           /* # of samples per channel to input. */
+}
+
+int main(int n, char const * arg[])
+{
+  char const *     const arg0 = n? --n, *arg++ : "";
+  double          const irate = n? --n, atof(*arg++) : 96000.;
+  double          const orate = n? --n, atof(*arg++) : 44100.;
+  unsigned        const chans = n? --n, (unsigned)atoi(*arg++) : 1;
+  soxr_datatype_t const itype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+  soxr_datatype_t const otype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+  unsigned long const q_recipe= n? --n, strtoul(*arg++, 0, 16) : SOXR_HQ;
+  unsigned long const q_flags = n? --n, strtoul(*arg++, 0, 16) : 0;
+  int       const use_threads = n? --n, atoi(*arg++) : 1;
+
+  soxr_quality_spec_t const q_spec = soxr_quality_spec(q_recipe, q_flags);
+  soxr_io_spec_t      const io_spec = soxr_io_spec(itype, otype);
+  soxr_runtime_spec_t const runtime_spec = soxr_runtime_spec(!use_threads);
+
+  /* Allocate resampling input and output buffers in proportion to the input
+   * and output rates: */
+  #define buf_total_len 15000  /* In samples per channel. */
+  size_t const osize = soxr_datatype_size(otype) * chans;
+  size_t const isize = soxr_datatype_size(itype) * chans;
+  size_t const olen = (size_t)(orate * buf_total_len / (irate + orate) + .5);
+  size_t const ilen = buf_total_len - olen;
+  void * const obuf = malloc(osize * olen);
+  void * const ibuf = malloc(isize * ilen);
+
+  input_context_t icontext;
+  size_t odone, clips = 0;
+  soxr_error_t error;
+
+  /* Create a stream resampler: */
+  soxr_t soxr = soxr_create(
+      irate, orate, chans,         /* Input rate, output rate, # of channels. */
+      &error,                         /* To report any error during creation. */
+      &io_spec, &q_spec, &runtime_spec);
+
+  if (!error) {                      /* Register input_fn with the resampler: */
+    icontext.ibuf = ibuf, icontext.isize = isize;
+    error = soxr_set_input_fn(soxr, (soxr_input_fn_t)input_fn, &icontext, ilen);
+  }
+
+  if (!error) {                         /* If all is well, run the resampler: */
+    USE_STD_STDIO;
+                                                       /* Resample in blocks: */
+    do odone = soxr_output(soxr, obuf, olen);
+    while (fwrite(obuf, osize, odone, stdout));            /* Consume output. */
+
+    error = soxr_error(soxr);            /* Check if any soxr error occurred. */
+    clips = *soxr_num_clips(soxr);     /* Can occur only with integer output. */
+  }
+                                                                  /* Tidy up: */
+  soxr_delete(soxr);
+  free(obuf), free(ibuf);
+                                                              /* Diagnostics: */
+  fprintf(stderr, "%-26s %s; %lu clips; I/O: %s\n", arg0, soxr_strerror(error),
+      (long unsigned)clips, errno? strerror(errno) : "no error");
+  return error || errno;
+}
diff --git a/examples/4-split-channels.c b/examples/4-split-channels.c
new file mode 100644
index 0000000..d5e8060
--- /dev/null
+++ b/examples/4-split-channels.c
@@ -0,0 +1,147 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 4: variant of examples 2 & 3, demonstrating I/O with split channels.
+ *
+ * Note that, for convenience of the demonstration, split-channel data is
+ * made available by deinterleaving data sourced from and sent to
+ * interleaved file-streams; this adds a lot of code to the example that,
+ * for purposes of understanding how to use split-channels, may safely be
+ * ignored.  In a real application, the channel-data might never be
+ * interleaved; for example, the split-channel data output from the
+ * resampler might be sent directly to digital-to-analogue converters.
+ *
+ * Note also (not shown in the examples) that split/interleaved channels may
+ * be used for input and output independently.
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+
+
+#define DEINTERLEAVE(T) do { \
+  unsigned i; \
+  size_t j; \
+  T * const * dest = (T * const *)dest0; \
+  T const * src = src0; \
+  if (ch == 1) memcpy(dest[0], src, n * sizeof(dest[0][0])); \
+  else for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) dest[i][j] = *src++; \
+  return; \
+} while (0)
+
+static void deinterleave(soxr_datatype_t data_type,
+    void * const * dest0,
+    void const * src0,
+    size_t n, unsigned ch)
+{
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: DEINTERLEAVE(float);
+    case SOXR_FLOAT64: DEINTERLEAVE(double);
+    case SOXR_INT32  : DEINTERLEAVE(int32_t);
+    case SOXR_INT16  : DEINTERLEAVE(int16_t);
+    default: break;
+  }
+}
+
+#define INTERLEAVE(T) do { \
+  unsigned i; \
+  size_t j; \
+  T * dest = dest0; \
+  T const * const * src = (T const * const *)src0; \
+  if (ch == 1) memcpy(dest, src[0], n * sizeof(dest[0])); \
+  else for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) *dest++ = src[i][j]; \
+  return; \
+} while (0)
+
+static void interleave(soxr_datatype_t data_type, void * dest0,
+  void * const * src0, size_t n, unsigned ch)
+{
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: INTERLEAVE(float);
+    case SOXR_FLOAT64: INTERLEAVE(double);
+    case SOXR_INT32  : INTERLEAVE(int32_t);
+    case SOXR_INT16  : INTERLEAVE(int16_t);
+    default: break;
+  }
+}
+
+int main(int n, char const * arg[])
+{
+  char const *     const arg0 = n? --n, *arg++ : "";
+  double          const irate = n? --n, atof(*arg++) : 96000.;
+  double          const orate = n? --n, atof(*arg++) : 44100.;
+  unsigned        const chans = n? --n, (unsigned)atoi(*arg++) : 1;
+  soxr_datatype_t const itype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+  soxr_datatype_t const otype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+  unsigned long const q_recipe= n? --n, strtoul(*arg++, 0, 16) : SOXR_HQ;
+  unsigned long const q_flags = n? --n, strtoul(*arg++, 0, 16) : 0;
+  int       const use_threads = n? --n, atoi(*arg++) : 1;
+
+  soxr_quality_spec_t const q_spec = soxr_quality_spec(q_recipe, q_flags);
+  soxr_io_spec_t const io_spec=soxr_io_spec(itype|SOXR_SPLIT, otype|SOXR_SPLIT);
+  soxr_runtime_spec_t const runtime_spec = soxr_runtime_spec(!use_threads);
+
+  /* Allocate resampling input and output buffers in proportion to the input
+   * and output rates: */
+  #define buf_total_len 15000  /* In samples per channel. */
+  size_t const osize = soxr_datatype_size(otype) * chans;
+  size_t const isize = soxr_datatype_size(itype) * chans;
+  size_t const olen = (size_t)(orate * buf_total_len / (irate + orate) + .5);
+  size_t const ilen = buf_total_len - olen;
+
+  /* For split channels: */
+  void * * const obuf_ptrs = malloc(sizeof(void *) * chans);
+  void * *       ibuf_ptrs = malloc(sizeof(void *) * chans);
+  char * const obufs = malloc(osize * olen), * optr = obufs;
+  char * const ibufs = malloc(isize * ilen), * iptr = ibufs;
+
+  /* For interleaved channels: */
+  char * const obuf = malloc(osize * olen);
+  char * const ibuf = malloc(isize * ilen);
+
+  size_t odone, written, need_input = 1, clips = 0;
+  soxr_error_t error;
+
+  soxr_t soxr = soxr_create(
+      irate, orate, chans, &error, &io_spec, &q_spec, &runtime_spec);
+
+  unsigned i;
+  for (i = 0; i < chans; ++i) {
+    ibuf_ptrs[i] = iptr;
+    obuf_ptrs[i] = optr;
+    iptr += ilen * soxr_datatype_size(itype);
+    optr += olen * soxr_datatype_size(otype);
+  }
+
+  if (!error) {
+    USE_STD_STDIO;
+
+    do {
+      size_t ilen1 = 0;
+
+      if (need_input) {
+        if (!(ilen1 = fread(ibuf, isize, ilen, stdin)))
+          free(ibuf_ptrs), ibuf_ptrs = 0; /* If none available, don't retry. */
+        else deinterleave(itype, ibuf_ptrs, ibuf, ilen1, chans);
+      }
+
+      error = soxr_process(soxr, ibuf_ptrs, ilen1, NULL, obuf_ptrs, olen, &odone);
+      interleave(otype, obuf, obuf_ptrs, odone, chans);  /* Consume output... */
+      written = fwrite(obuf, osize, odone, stdout);
+
+      need_input = odone < olen && ibuf_ptrs;
+
+    } while (!error && (need_input || written));
+
+    clips = *soxr_num_clips(soxr);     /* Can occur only with integer output. */
+  }
+                                                                  /* Tidy up: */
+  soxr_delete(soxr);
+  free(obuf), free(ibuf), free(obufs), free(ibufs);
+  free(obuf_ptrs), free(ibuf_ptrs);
+                                                              /* Diagnostics: */
+  fprintf(stderr, "%-26s %s; %lu clips; I/O: %s\n", arg0, soxr_strerror(error),
+      (long unsigned)clips, errno? strerror(errno) : "no error");
+  return error || errno;
+}
diff --git a/examples/5-variable-rate.c b/examples/5-variable-rate.c
new file mode 100644
index 0000000..fc5a63c
--- /dev/null
+++ b/examples/5-variable-rate.c
@@ -0,0 +1,94 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 5:  Variable-rate resampling (N.B. experimental).  A test signal
+ * (held in a buffer) is resampled over a wide range of octaves.  Resampled
+ * data is sent to stdout as raw, float32 samples.  Choices of 2 test-signals
+ * and of 2 ways of varying the sample-rate are combined in a command-line
+ * option:
+ *
+ * Usage: ./5-variable-rate [0|1|2|3]
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+#define OCTAVES  5       /* Resampling range. ± */
+#define OLEN     16      /* Output length in seconds. */
+#define FS       44100   /* Output sampling rate in Hz. */
+
+/* For output pos in [0,1], returns an ioratio in the 2^±OCTAVES range: */
+static double ioratio(double pos, int fm)
+{
+  if (fm) /* fm: non-0 for a fast-changing ioratio, 0 for a slow sweep. */
+    pos = .5 - cos(pos * 2 * M_PI) * .4 + sin(pos * OLEN * 20 * M_PI) * .05;
+  return pow(2, 2 * OCTAVES * pos - OCTAVES);
+}
+
+int main(int argc, char *arg[])
+{
+  int opt = argc <= 1? 2 : (atoi(arg[1]) & 3), saw = opt & 1, fm = opt & 2;
+  float ibuf[10 << OCTAVES], obuf[AL(ibuf)];
+  int i, wl = 2 << OCTAVES;
+  size_t ilen = AL(ibuf), need_input = 1;
+  size_t odone, total_odone, total_olen = OLEN * FS;
+  size_t olen1 = fm? 10 : AL(obuf); /* Small block-len if fast-changing ratio */
+  soxr_error_t error;
+
+  /* When creating a var-rate resampler, q_spec must be set as follows: */
+  soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_HQ, SOXR_VR);
+
+  /* The ratio of the given input rate and output rates must equate to the
+   * maximum I/O ratio that will be used: */
+  soxr_t soxr = soxr_create(1 << OCTAVES, 1, 1, &error, NULL, &q_spec, NULL);
+
+  if (!error) {
+    USE_STD_STDIO;
+
+    /* Generate input signal, sine or saw, with wave-length = wl: */
+    for (i = 0; i < (int)ilen; ++i)
+      ibuf[i] = (float)(saw? (i%wl)/(wl-1.)-.5 : .9 * sin(2 * M_PI * i / wl));
+
+    /* Set the initial resampling ratio (N.B. 3rd parameter = 0): */
+    soxr_set_io_ratio(soxr, ioratio(0, fm), 0);
+
+    /* Resample in blocks of size olen1: */
+    for (total_odone = 0; !error && total_odone < total_olen;) {
+
+      /* The last block might be shorter: */
+      size_t block_len = min(olen1, total_olen - total_odone);
+
+      /* Determine the position in [0,1] of the end of the current block: */
+      double pos = (double)(total_odone + block_len) / (double)total_olen;
+
+      /* Calculate an ioratio for this position and instruct the resampler to
+       * move smoothly to the new value, over the course of outputting the next
+       * 'block_len' samples (or give 0 for an instant change instead): */
+      soxr_set_io_ratio(soxr, ioratio(pos, fm), block_len);
+
+      /* Output the block of samples, supplying input samples as needed: */
+      do {
+        size_t len = need_input? ilen : 0;
+        error = soxr_process(soxr, ibuf, len, NULL, obuf, block_len, &odone);
+        fwrite(obuf, sizeof(float), odone, stdout);
+
+        /* Update counters for the current block and for the total length: */
+        block_len -= odone;
+        total_odone += odone;
+
+        /* If soxr_process did not provide the complete block, we must call it
+         * again, supplying more input samples: */
+        need_input = block_len != 0;
+
+      } while (need_input && !error);
+
+      /* Now that the block for the current ioratio is complete, go back
+       * round the main `for' loop in order to process the next block. */
+    }
+    soxr_delete(soxr);
+  }
+                                                              /* Diagnostics: */
+  fprintf(stderr, "%-26s %s; I/O: %s\n", arg[0],
+      soxr_strerror(error), errno? strerror(errno) : "no error");
+  return error || errno;
+}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 0000000..c3291f8
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+if (${BUILD_EXAMPLES})
+  project (soxr)
+  file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.[cC])
+  if (NOT BUILD_SHARED_LIBS AND OPENMP_FOUND)
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_C_FLAGS}")
+  endif ()
+else ()
+  file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/3*.c)
+endif ()
+
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${PROJECT_C_FLAGS}")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PROJECT_CXX_FLAGS}")
+link_libraries (${PROJECT_NAME})
+
+foreach (fe ${SOURCES})
+  get_filename_component (f ${fe} NAME_WE)
+  add_executable (${f} ${fe})
+endforeach ()
diff --git a/examples/README b/examples/README
new file mode 100644
index 0000000..3ceb039
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,18 @@
+SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+
+These simple examples show the different ways that an application may
+interface with libsoxr.  Note that real-world applications may also have to
+deal with file-formats, codecs, (more sophisticated) dithering, etc., which
+are not covered here.
+
+With libsoxr installed, the examples may be built using commands similar to
+the following.  On unix-like systems:
+
+    cc 1-single-block.c -lsoxr
+
+or, on MS-Windows:
+
+    cl 1-single-block.c -I"C:/Program Files/soxr/include" "C:/Program Files/soxr/lib/soxr.lib"
+
+IDEs may hide such commands behind configuration screens and build menus --
+where applicable, consult your IDE's user-manual.
diff --git a/examples/examples-common.h b/examples/examples-common.h
new file mode 100644
index 0000000..cf8401c
--- /dev/null
+++ b/examples/examples-common.h
@@ -0,0 +1,45 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Common includes etc. for the examples.  */
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+  /* Work-around for broken file-I/O on MS-Windows: */
+  #include <io.h>
+  #include <fcntl.h>
+  #define USE_STD_STDIO _setmode(_fileno(stdout), _O_BINARY), \
+                        _setmode(_fileno(stdin ), _O_BINARY);
+  /* Sometimes missing, so ensure that it is defined: */
+  #undef M_PI
+  #define M_PI 3.14159265358979323846
+#else
+  #define USE_STD_STDIO
+#endif
+
+#undef int16_t
+#define int16_t short
+
+#undef int32_t
+#if LONG_MAX > 2147483647L
+  #define int32_t int
+#elif LONG_MAX < 2147483647L
+  #error this programme requires that 'long int' has at least 32-bits
+#else
+  #define int32_t long
+#endif
+
+#undef min
+#undef max
+#define min(x,y) ((x)<(y)?(x):(y))
+#define max(x,y) ((x)>(y)?(x):(y))
+
+#define AL(a) (sizeof(a)/sizeof((a)[0]))  /* Array Length */
diff --git a/go b/go
new file mode 100755
index 0000000..01cc147
--- /dev/null
+++ b/go
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+build=$1
+test x$build = x && build=Release
+
+rm -f CMakeCache.txt             # Prevent interference from any in-tree build
+
+mkdir -p $build
+cd $build
+
+cmake -DCMAKE_BUILD_TYPE=$build -DBUILD_TESTS=ON .. &&
+  make &&
+    (make test || echo "FAILURE details in $build/Testing/Temporary/LastTest.log")
diff --git a/go.bat b/go.bat
new file mode 100644
index 0000000..6112dbe
--- /dev/null
+++ b/go.bat
@@ -0,0 +1,24 @@
+@echo off
+rem SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+rem Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+set build=%1
+if x%build% == x set build=Release
+
+mkdir %build%
+cd %build%
+
+cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%build% -DBUILD_TESTS=ON ..
+if errorlevel 1 goto end
+
+nmake
+if errorlevel 1 goto end
+
+nmake test
+if errorlevel 1 goto error
+goto end
+
+:error
+echo FAILURE details in Testing\Temporary\LastTest.log
+
+:end
diff --git a/lsr-tests/CMakeLists.txt b/lsr-tests/CMakeLists.txt
new file mode 100644
index 0000000..4db2a73
--- /dev/null
+++ b/lsr-tests/CMakeLists.txt
@@ -0,0 +1,49 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+list (APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
+
+find_package (FFTW)
+if (FFTW_FOUND)
+  include_directories (${FFTW_INCLUDE_DIRS})
+  link_libraries (${FFTW_LIBRARIES})
+  set (HAVE_FFTW3 1)
+endif ()
+
+find_package (sndfile)
+if (SNDFILE_FOUND)
+  include_directories (${SNDFILE_INCLUDE_DIRS})
+  link_libraries (${SNDFILE_LIBRARIES})
+  set (HAVE_SNDFILE 1)
+endif ()
+
+check_function_exists (lrintf HAVE_LRINTF)
+check_function_exists (alarm HAVE_ALARM)
+check_function_exists (signal HAVE_SIGNAL)
+check_include_files (sys/times.h HAVE_SYS_TIMES_H)
+make_exist (HAVE_LRINTF HAVE_ALARM HAVE_SIGNAL HAVE_SYS_TIMES_H)
+make_exist (HAVE_FFTW HAVE_SNDFILE)
+
+configure_file (${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
+include_directories (${CMAKE_CURRENT_BINARY_DIR})
+
+add_library (tests_lib SHARED util calc_snr)
+
+link_libraries (tests_lib ${PROJECT_NAME}-lsr)
+
+enable_testing ()
+
+set (tests
+  callback_hang_test callback_test downsample_test
+  float_short_test misc_test multi_channel_test
+  reset_test simple_test snr_bw_test termination_test varispeed_test)
+
+foreach (test ${tests})
+  add_executable (${test} ${test})
+  add_test (lsr-${test} ${BIN}${test})
+  set_property (TEST lsr-${test} PROPERTY ENVIRONMENT "SOXR_LSR_STRICT=1")
+endforeach ()
+
+add_executable (multichan_throughput_test multichan_throughput_test)
+add_executable (throughput_test throughput_test )
+add_executable (sndfile-resample sndfile-resample)
diff --git a/lsr-tests/COPYING b/lsr-tests/COPYING
new file mode 100644
index 0000000..d60c31a
--- /dev/null
+++ b/lsr-tests/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year  name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/lsr-tests/README b/lsr-tests/README
new file mode 100644
index 0000000..f468446
--- /dev/null
+++ b/lsr-tests/README
@@ -0,0 +1,8 @@
+The C source and header files in this directory have been copied from
+the `libsamplerate' project and are copyrighted by its authors -- see
+the notices within the files and the file `COPYING' for details.
+
+They are used here to test libsoxr's optional libsamplerate-like
+wrapper.  The only modifications made are to the file `snr_bw_test.c' to
+remove reliance on certain frequency response troughs that are specific
+to libsamplerate.
diff --git a/lsr-tests/calc_snr.c b/lsr-tests/calc_snr.c
new file mode 100644
index 0000000..ddfc04c
--- /dev/null
+++ b/lsr-tests/calc_snr.c
@@ -0,0 +1,242 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include "config.h"
+
+#include "util.h"
+
+#if (HAVE_FFTW3 == 1)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include <fftw3.h>
+
+#define	MAX_SPEC_LEN	(1<<18)
+#define	MAX_PEAKS		10
+
+static void log_mag_spectrum (double *input, int len, double *magnitude) ;
+static void smooth_mag_spectrum (double *magnitude, int len) ;
+static double find_snr (const double *magnitude, int len, int expected_peaks) ;
+
+typedef struct
+{	double	peak ;
+	int		index ;
+} PEAK_DATA ;
+
+double
+calculate_snr (float *data, int len, int expected_peaks)
+{	static double magnitude [MAX_SPEC_LEN] ;
+	static double datacopy [MAX_SPEC_LEN] ;
+
+	double snr = 200.0 ;
+	int k ;
+
+	if (len > MAX_SPEC_LEN)
+	{	printf ("%s : line %d : data length too large.\n", __FILE__, __LINE__) ;
+		exit (1) ;
+		} ;
+
+	for (k = 0 ; k < len ; k++)
+		datacopy [k] = data [k] ;
+
+	/* Pad the data just a little to speed up the FFT. */
+	while ((len & 0x1F) && len < MAX_SPEC_LEN)
+	{	datacopy [len] = 0.0 ;
+		len ++ ;
+		} ;
+
+	log_mag_spectrum (datacopy, len, magnitude) ;
+	smooth_mag_spectrum (magnitude, len / 2) ;
+
+	snr = find_snr (magnitude, len, expected_peaks) ;
+
+	return snr ;
+} /* calculate_snr */
+
+/*==============================================================================
+** There is a slight problem with trying to measure SNR with the method used
+** here; the side lobes of the windowed FFT can look like a noise/aliasing peak.
+** The solution is to smooth the magnitude spectrum by wiping out troughs
+** between adjacent peaks as done here.
+** This removes side lobe peaks without affecting noise/aliasing peaks.
+*/
+
+static void linear_smooth (double *mag, PEAK_DATA *larger, PEAK_DATA *smaller) ;
+
+static void
+smooth_mag_spectrum (double *mag, int len)
+{	PEAK_DATA peaks [2] ;
+
+	int k ;
+
+	memset (peaks, 0, sizeof (peaks)) ;
+
+	/* Find first peak. */
+	for (k = 1 ; k < len - 1 ; k++)
+	{	if (mag [k - 1] < mag [k] && mag [k] >= mag [k + 1])
+		{	peaks [0].peak = mag [k] ;
+			peaks [0].index = k ;
+			break ;
+			} ;
+		} ;
+
+	/* Find subsequent peaks ans smooth between peaks. */
+	for (k = peaks [0].index + 1 ; k < len - 1 ; k++)
+	{	if (mag [k - 1] < mag [k] && mag [k] >= mag [k + 1])
+		{	peaks [1].peak = mag [k] ;
+			peaks [1].index = k ;
+
+			if (peaks [1].peak > peaks [0].peak)
+				linear_smooth (mag, &peaks [1], &peaks [0]) ;
+			else
+				linear_smooth (mag, &peaks [0], &peaks [1]) ;
+			peaks [0] = peaks [1] ;
+			} ;
+		} ;
+
+} /* smooth_mag_spectrum */
+
+static void
+linear_smooth (double *mag, PEAK_DATA *larger, PEAK_DATA *smaller)
+{	int k ;
+
+	if (smaller->index < larger->index)
+	{	for (k = smaller->index + 1 ; k < larger->index ; k++)
+			mag [k] = (mag [k] < mag [k - 1]) ? 0.999 * mag [k - 1] : mag [k] ;
+		}
+	else
+	{	for (k = smaller->index - 1 ; k >= larger->index ; k--)
+			mag [k] = (mag [k] < mag [k + 1]) ? 0.999 * mag [k + 1] : mag [k] ;
+		} ;
+
+} /* linear_smooth */
+
+/*==============================================================================
+*/
+
+static int
+peak_compare (const void *vp1, const void *vp2)
+{	const PEAK_DATA *peak1, *peak2 ;
+
+	peak1 = (const PEAK_DATA*) vp1 ;
+	peak2 = (const PEAK_DATA*) vp2 ;
+
+	return (peak1->peak < peak2->peak) ? 1 : -1 ;
+} /* peak_compare */
+
+static double
+find_snr (const double *magnitude, int len, int expected_peaks)
+{	PEAK_DATA peaks [MAX_PEAKS] ;
+
+	int		k, peak_count = 0 ;
+	double	snr ;
+
+	memset (peaks, 0, sizeof (peaks)) ;
+
+	/* Find the MAX_PEAKS largest peaks. */
+	for (k = 1 ; k < len - 1 ; k++)
+	{	if (magnitude [k - 1] < magnitude [k] && magnitude [k] >= magnitude [k + 1])
+		{	if (peak_count < MAX_PEAKS)
+			{	peaks [peak_count].peak = magnitude [k] ;
+				peaks [peak_count].index = k ;
+				peak_count ++ ;
+				qsort (peaks, peak_count, sizeof (PEAK_DATA), peak_compare) ;
+				}
+			else if (magnitude [k] > peaks [MAX_PEAKS - 1].peak)
+			{	peaks [MAX_PEAKS - 1].peak = magnitude [k] ;
+				peaks [MAX_PEAKS - 1].index = k ;
+				qsort (peaks, MAX_PEAKS, sizeof (PEAK_DATA), peak_compare) ;
+				} ;
+			} ;
+		} ;
+
+	if (peak_count < expected_peaks)
+	{	printf ("\n%s : line %d : bad peak_count (%d), expected %d.\n\n", __FILE__, __LINE__, peak_count, expected_peaks) ;
+		return -1.0 ;
+		} ;
+
+	/* Sort the peaks. */
+	qsort (peaks, peak_count, sizeof (PEAK_DATA), peak_compare) ;
+
+	snr = peaks [0].peak ;
+	for (k = 1 ; k < peak_count ; k++)
+		if (fabs (snr - peaks [k].peak) > 10.0)
+			return fabs (peaks [k].peak) ;
+
+	return snr ;
+} /* find_snr */
+
+static void
+log_mag_spectrum (double *input, int len, double *magnitude)
+{	fftw_plan plan = NULL ;
+
+	double	maxval ;
+	int		k ;
+
+	if (input == NULL || magnitude == NULL)
+		return ;
+
+	plan = fftw_plan_r2r_1d (len, input, magnitude, FFTW_R2HC, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT) ;
+	if (plan == NULL)
+	{	printf ("%s : line %d : create plan failed.\n", __FILE__, __LINE__) ;
+		exit (1) ;
+		} ;
+
+	fftw_execute (plan) ;
+
+	fftw_destroy_plan (plan) ;
+
+	/* (k < N/2 rounded up) */
+	maxval = 0.0 ;
+	for (k = 1 ; k < len / 2 ; k++)
+	{	magnitude [k] = sqrt (magnitude [k] * magnitude [k] + magnitude [len - k - 1] * magnitude [len - k - 1]) ;
+		maxval = (maxval < magnitude [k]) ? magnitude [k] : maxval ;
+		} ;
+
+	memset (magnitude + len / 2, 0, len / 2 * sizeof (magnitude [0])) ;
+
+	/* Don't care about DC component. Make it zero. */
+	magnitude [0] = 0.0 ;
+
+	/* log magnitude. */
+	for (k = 0 ; k < len ; k++)
+	{	magnitude [k] = magnitude [k] / maxval ;
+		magnitude [k] = (magnitude [k] < 1e-15) ? -200.0 : 20.0 * log10 (magnitude [k]) ;
+		} ;
+
+	return ;
+} /* log_mag_spectrum */
+
+#else /* ! (HAVE_LIBFFTW && HAVE_LIBRFFTW) */
+
+double
+calculate_snr (float *data, int len, int expected_peaks)
+{	double snr = 200.0 ;
+
+	data = data ;
+	len = len ;
+	expected_peaks = expected_peaks ;
+
+	return snr ;
+} /* calculate_snr */
+
+#endif
+
diff --git a/lsr-tests/callback_hang_test.c b/lsr-tests/callback_hang_test.c
new file mode 100644
index 0000000..be89369
--- /dev/null
+++ b/lsr-tests/callback_hang_test.c
@@ -0,0 +1,131 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+
+#if HAVE_ALARM && HAVE_SIGNAL && HAVE_SIGALRM
+
+#include <signal.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+
+#define	SHORT_BUFFER_LEN	512
+#define	LONG_BUFFER_LEN		(1 << 14)
+
+typedef struct
+{	double ratio ;
+	int count ;
+} SRC_PAIR ;
+
+static void callback_hang_test (int converter) ;
+
+static void alarm_handler (int number) ;
+static long input_callback (void *cb_data, float **data) ;
+
+
+int
+main (void)
+{
+	/* Set up SIGALRM handler. */
+	signal (SIGALRM, alarm_handler) ;
+
+	puts ("") ;
+	callback_hang_test (SRC_ZERO_ORDER_HOLD) ;
+	callback_hang_test (SRC_LINEAR) ;
+	callback_hang_test (SRC_SINC_FASTEST) ;
+	puts ("") ;
+
+	return 0 ;
+} /* main */
+
+
+static void
+callback_hang_test (int converter)
+{	static float output [LONG_BUFFER_LEN] ;
+	static SRC_PAIR pairs [] =
+	{
+		{ 1.2, 5 }, { 1.1, 1 }, { 1.0, 1 }, { 3.0, 1 }, { 2.0, 1 }, { 0.3, 1 },
+		{ 1.2, 0 }, { 1.1, 10 }, { 1.0, 1 }
+		} ;
+
+
+	SRC_STATE	*src_state ;
+
+	double src_ratio = 1.0 ;
+	int k, error ;
+
+	printf ("\tcallback_hang_test  (%-28s) ....... ", src_get_name (converter)) ;
+	fflush (stdout) ;
+
+	/* Perform sample rate conversion. */
+	src_state = src_callback_new (input_callback, converter, 1, &error, NULL) ;
+	if (src_state == NULL)
+	{	printf ("\n\nLine %d : src_callback_new () failed : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	for (k = 0 ; k < ARRAY_LEN (pairs) ; k++)
+	{	alarm (1) ;
+		src_ratio = pairs [k].ratio ;
+		src_callback_read (src_state, src_ratio, pairs [k].count, output) ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	alarm (0) ;
+	puts ("ok") ;
+
+	return ;
+} /* callback_hang_test */
+
+static void
+alarm_handler (int number)
+{
+	(void) number ;
+	printf ("\n\n    Error : Hang inside src_callback_read() detected. Exiting!\n\n") ;
+	exit (1) ;
+} /* alarm_handler */
+
+static long
+input_callback (void *cb_data, float **data)
+{
+	static float buffer [20] ;
+
+	(void) cb_data ;
+	*data = buffer ;
+
+	return ARRAY_LEN (buffer) ;
+} /* input_callback */
+
+#else
+
+int
+main (void)
+{
+	puts ("\tCan't run this test on this platform.") ;
+	return 0 ;
+} /* main */
+
+#endif
diff --git a/lsr-tests/callback_test.c b/lsr-tests/callback_test.c
new file mode 100644
index 0000000..0854d64
--- /dev/null
+++ b/lsr-tests/callback_test.c
@@ -0,0 +1,243 @@
+/*
+** Copyright (C) 2003-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+
+#define	BUFFER_LEN		10000
+#define CB_READ_LEN		256
+
+static void callback_test (int converter, double ratio) ;
+static void end_of_stream_test (int converter) ;
+
+int
+main (void)
+{	static double src_ratios [] =
+	{	1.0, 0.099, 0.1, 0.33333333, 0.789, 1.0001, 1.9, 3.1, 9.9
+	} ;
+
+	int k ;
+
+	puts ("") ;
+
+	puts ("    Zero Order Hold interpolator :") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		callback_test (SRC_ZERO_ORDER_HOLD, src_ratios [k]) ;
+
+	puts ("    Linear interpolator :") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		callback_test (SRC_LINEAR, src_ratios [k]) ;
+
+	puts ("    Sinc interpolator :") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		callback_test (SRC_SINC_FASTEST, src_ratios [k]) ;
+
+	puts ("") ;
+
+	puts ("    End of stream test :") ;
+	end_of_stream_test (SRC_ZERO_ORDER_HOLD) ;
+	end_of_stream_test (SRC_LINEAR) ;
+	end_of_stream_test (SRC_SINC_FASTEST) ;
+
+	puts ("") ;
+	return 0 ;
+} /* main */
+
+/*=====================================================================================
+*/
+
+typedef struct
+{	int channels ;
+	long count, total ;
+	int end_of_data ;
+	float data [BUFFER_LEN] ;
+} TEST_CB_DATA ;
+
+static long
+test_callback_func (void *cb_data, float **data)
+{	TEST_CB_DATA *pcb_data ;
+
+	long frames ;
+
+	if ((pcb_data = cb_data) == NULL)
+		return 0 ;
+
+	if (data == NULL)
+		return 0 ;
+
+	if (pcb_data->total - pcb_data->count > CB_READ_LEN)
+		frames = CB_READ_LEN / pcb_data->channels ;
+	else
+		frames = (pcb_data->total - pcb_data->count) / pcb_data->channels ;
+
+	*data = pcb_data->data + pcb_data->count ;
+	pcb_data->count += frames ;
+
+	return frames ;
+} /* test_callback_func */
+
+
+static void
+callback_test (int converter, double src_ratio)
+{	static TEST_CB_DATA test_callback_data ;
+	static float output [BUFFER_LEN] ;
+
+	SRC_STATE	*src_state ;
+
+	long	read_count, read_total ;
+	int 	error ;
+
+	printf ("\tcallback_test    (SRC ratio = %6.4f) ........... ", src_ratio) ;
+	fflush (stdout) ;
+
+	test_callback_data.channels = 2 ;
+	test_callback_data.count = 0 ;
+	test_callback_data.end_of_data = 0 ;
+	test_callback_data.total = ARRAY_LEN (test_callback_data.data) ;
+
+	if ((src_state = src_callback_new (test_callback_func, converter, test_callback_data.channels, &error, &test_callback_data)) == NULL)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	read_total = 0 ;
+	do
+	{	/* We will be throwing away output data, so just grab as much as possible. */
+		read_count = ARRAY_LEN (output) / test_callback_data.channels ;
+		read_count = src_callback_read (src_state, src_ratio, read_count, output) ;
+		read_total += read_count ;
+		}
+	while (read_count > 0) ;
+
+	if ((error = src_error (src_state)) != 0)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	if (fabs (read_total / src_ratio - ARRAY_LEN (test_callback_data.data)) > 2.0)
+	{	printf ("\n\nLine %d : input / output length mismatch.\n\n", __LINE__) ;
+		printf ("    input len  : %d\n", ARRAY_LEN (test_callback_data.data)) ;
+		printf ("    output len : %ld (should be %g +/- 2)\n\n", read_total,
+					floor (0.5 + src_ratio * ARRAY_LEN (test_callback_data.data))) ;
+		exit (1) ;
+		} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* callback_test */
+
+/*=====================================================================================
+*/
+
+static long
+eos_callback_func (void *cb_data, float **data)
+{
+	TEST_CB_DATA *pcb_data ;
+	long frames ;
+
+	if (data == NULL)
+		return 0 ;
+
+	if ((pcb_data = cb_data) == NULL)
+		return 0 ;
+
+	/*
+	**	Return immediately if there is no more data.
+	**	In this case, the output pointer 'data' will not be set and
+	**	valgrind should not warn about it.
+	*/
+	if (pcb_data->end_of_data)
+		return 0 ;
+
+	if (pcb_data->total - pcb_data->count > CB_READ_LEN)
+		frames = CB_READ_LEN / pcb_data->channels ;
+	else
+		frames = (pcb_data->total - pcb_data->count) / pcb_data->channels ;
+
+	*data = pcb_data->data + pcb_data->count ;
+	pcb_data->count += frames ;
+
+	/*
+	**	Set end_of_data so that the next call to the callback function will
+	**	return zero ocunt without setting the 'data' pointer.
+	*/
+	if (pcb_data->total < 2 * pcb_data->count)
+		pcb_data->end_of_data = 1 ;
+
+	return frames ;
+} /* eos_callback_data */
+
+
+static void
+end_of_stream_test (int converter)
+{	static TEST_CB_DATA test_callback_data ;
+	static float output [BUFFER_LEN] ;
+
+	SRC_STATE	*src_state ;
+
+	double	src_ratio = 0.3 ;
+	long	read_count, read_total ;
+	int 	error ;
+
+	printf ("\t%-30s        ........... ", src_get_name (converter)) ;
+	fflush (stdout) ;
+
+	test_callback_data.channels = 2 ;
+	test_callback_data.count = 0 ;
+	test_callback_data.end_of_data = 0 ;
+	test_callback_data.total = ARRAY_LEN (test_callback_data.data) ;
+
+	if ((src_state = src_callback_new (eos_callback_func, converter, test_callback_data.channels, &error, &test_callback_data)) == NULL)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	read_total = 0 ;
+	do
+	{	/* We will be throwing away output data, so just grab as much as possible. */
+		read_count = ARRAY_LEN (output) / test_callback_data.channels ;
+		read_count = src_callback_read (src_state, src_ratio, read_count, output) ;
+		read_total += read_count ;
+		}
+	while (read_count > 0) ;
+
+	if ((error = src_error (src_state)) != 0)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	if (test_callback_data.end_of_data == 0)
+	{	printf ("\n\nLine %d : test_callback_data.end_of_data should not be 0."
+				" This is a bug in the test.\n\n", __LINE__) ;
+		exit (1) ;
+		} ;
+
+	puts ("ok") ;
+	return ;
+} /* end_of_stream_test */
diff --git a/lsr-tests/cmake/Modules/FindFFTW.cmake b/lsr-tests/cmake/Modules/FindFFTW.cmake
new file mode 100644
index 0000000..eff7c02
--- /dev/null
+++ b/lsr-tests/cmake/Modules/FindFFTW.cmake
@@ -0,0 +1,23 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# - Find FFTW
+# Find the native installation of this package: includes and libraries.
+#
+#  FFTW_INCLUDES    - where to find headers for this package.
+#  FFTW_LIBRARIES   - List of libraries when using this package.
+#  FFTW_FOUND       - True if this package can be found.
+
+if (FFTW_INCLUDES)
+  set (FFTW_FIND_QUIETLY TRUE)
+endif (FFTW_INCLUDES)
+
+find_path (FFTW_INCLUDES fftw3.h)
+
+find_library (FFTW_LIBRARIES NAMES fftw3)
+
+include (FindPackageHandleStandardArgs)
+find_package_handle_standard_args (
+  FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDES)
+
+mark_as_advanced (FFTW_LIBRARIES FFTW_INCLUDES)
diff --git a/lsr-tests/cmake/Modules/Findsndfile.cmake b/lsr-tests/cmake/Modules/Findsndfile.cmake
new file mode 100644
index 0000000..3d7a107
--- /dev/null
+++ b/lsr-tests/cmake/Modules/Findsndfile.cmake
@@ -0,0 +1,23 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# - Find SNDFILE
+# Find the native installation of this package: includes and libraries.
+#
+#  SNDFILE_INCLUDES    - where to find headers for this package.
+#  SNDFILE_LIBRARIES   - List of libraries when using this package.
+#  SNDFILE_FOUND       - True if this package can be found.
+
+if (SNDFILE_INCLUDES)
+  set (SNDFILE_FIND_QUIETLY TRUE)
+endif (SNDFILE_INCLUDES)
+
+find_path (SNDFILE_INCLUDES sndfile.h)
+
+find_library (SNDFILE_LIBRARIES NAMES sndfile)
+
+include (FindPackageHandleStandardArgs)
+find_package_handle_standard_args (
+  SNDFILE DEFAULT_MSG SNDFILE_LIBRARIES SNDFILE_INCLUDES)
+
+mark_as_advanced (SNDFILE_LIBRARIES SNDFILE_INCLUDES)
diff --git a/lsr-tests/config.h.in b/lsr-tests/config.h.in
new file mode 100644
index 0000000..39c0ca9
--- /dev/null
+++ b/lsr-tests/config.h.in
@@ -0,0 +1,24 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxsrc_lsr_tests_config_included
+#define soxsrc_lsr_tests_config_included
+
+#define HAVE_ALARM @HAVE_ALARM@
+#define HAVE_FFTW3 @HAVE_FFTW3@
+#define HAVE_LRINTF @HAVE_LRINTF@
+#define HAVE_LRINT @HAVE_LRINT@
+#define HAVE_SIGNAL @HAVE_SIGNAL@
+#define HAVE_SNDFILE @HAVE_SNDFILE@
+#define HAVE_SYS_TIMES_H @HAVE_SYS_TIMES_H@
+
+#if HAVE_SIGNAL
+  #include <signal.h>
+  #if defined SIGALRM
+    #define HAVE_SIGALRM 1
+  #else
+    #define HAVE_SIGALRM 0
+  #endif
+#endif
+
+#endif
diff --git a/lsr-tests/downsample_test.c b/lsr-tests/downsample_test.c
new file mode 100644
index 0000000..87243e7
--- /dev/null
+++ b/lsr-tests/downsample_test.c
@@ -0,0 +1,61 @@
+/*
+** Copyright (C) 2008-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <samplerate.h>
+
+#include "util.h"
+
+static void
+downsample_test (int converter)
+{	static float in [1000], out [10] ;
+	SRC_DATA data ;
+
+    printf ("        downsample_test     (%-28s) ....... ", src_get_name (converter)) ;
+	fflush (stdout) ;
+
+	data.src_ratio = 1.0 / 255.0 ;
+	data.input_frames = ARRAY_LEN (in) ;
+	data.output_frames = ARRAY_LEN (out) ;
+	data.data_in = in ;
+	data.data_out = out ;
+
+	if (src_simple (&data, converter, 1))
+	{	puts ("src_simple failed.") ;
+		exit (1) ;
+		} ;
+
+	puts ("ok") ;
+} /* downsample_test */
+
+int
+main (void)
+{
+	puts ("") ;
+
+	downsample_test (SRC_ZERO_ORDER_HOLD) ;
+	downsample_test (SRC_LINEAR) ;
+	downsample_test (SRC_SINC_FASTEST) ;
+	downsample_test (SRC_SINC_MEDIUM_QUALITY) ;
+	downsample_test (SRC_SINC_BEST_QUALITY) ;
+
+	puts ("") ;
+
+	return 0 ;
+} /* main */
diff --git a/lsr-tests/float_cast.h b/lsr-tests/float_cast.h
new file mode 100644
index 0000000..77ad5b4
--- /dev/null
+++ b/lsr-tests/float_cast.h
@@ -0,0 +1,281 @@
+/*
+** Copyright (C) 2001-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU Lesser General Public License as published by
+** the Free Software Foundation; either version 2.1 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU Lesser General Public License for more details.
+**
+** You should have received a copy of the GNU Lesser General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+/* Version 1.5 */
+
+#ifndef FLOAT_CAST_HEADER
+#define FLOAT_CAST_HEADER
+
+/*============================================================================
+**	On Intel Pentium processors (especially PIII and probably P4), converting
+**	from float to int is very slow. To meet the C specs, the code produced by
+**	most C compilers targeting Pentium needs to change the FPU rounding mode
+**	before the float to int conversion is performed.
+**
+**	Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
+**	is this flushing of the pipeline which is so slow.
+**
+**	Fortunately the ISO C99 specifications define the functions lrint, lrintf,
+**	llrint and llrintf which fix this problem as a side effect.
+**
+**	On Unix-like systems, the configure process should have detected the
+**	presence of these functions. If they weren't found we have to replace them
+**	here with a standard C cast.
+*/
+
+/*
+**	The C99 prototypes for lrint and lrintf are as follows:
+**
+**		long int lrintf (float x) ;
+**		long int lrint  (double x) ;
+*/
+
+#include "config.h"
+
+/*
+**	The presence of the required functions are detected during the configure
+**	process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
+**	the config.h file.
+*/
+
+#define		HAVE_LRINT_REPLACEMENT	0
+
+#if (HAVE_LRINT && HAVE_LRINTF)
+
+	/*
+	**	These defines enable functionality introduced with the 1999 ISO C
+	**	standard. They must be defined before the inclusion of math.h to
+	**	engage them. If optimisation is enabled, these functions will be
+	**	inlined. With optimisation switched off, you have to link in the
+	**	maths library using -lm.
+	*/
+
+	#define	_ISOC9X_SOURCE	1
+	#define _ISOC99_SOURCE	1
+
+	#define	__USE_ISOC9X	1
+	#define	__USE_ISOC99	1
+
+	#include	<math.h>
+
+#elif (defined (__CYGWIN__))
+
+	#include	<math.h>
+
+	#undef		HAVE_LRINT_REPLACEMENT
+	#define		HAVE_LRINT_REPLACEMENT	1
+
+	#undef	lrint
+	#undef	lrintf
+
+	#define	lrint	double2int
+	#define	lrintf	float2int
+
+	/*
+	**	The native CYGWIN lrint and lrintf functions are buggy:
+	**		http://sourceware.org/ml/cygwin/2005-06/msg00153.html
+	**		http://sourceware.org/ml/cygwin/2005-09/msg00047.html
+	**	and slow.
+	**	These functions (pulled from the Public Domain MinGW math.h header)
+	**	replace the native versions.
+	*/
+
+	static inline long double2int (double in)
+	{	long retval ;
+
+		__asm__ __volatile__
+		(	"fistpl %0"
+			: "=m" (retval)
+			: "t" (in)
+			: "st"
+			) ;
+
+		return retval ;
+	} /* double2int */
+
+	static inline long float2int (float in)
+	{	long retval ;
+
+		__asm__ __volatile__
+		(	"fistpl %0"
+			: "=m" (retval)
+			: "t" (in)
+			: "st"
+			) ;
+
+		return retval ;
+	} /* float2int */
+
+#elif (defined (WIN64) || defined(_WIN64))
+
+	/*	Win64 section should be places before Win32 one, because
+	**	most likely both WIN32 and WIN64 will be defined in 64-bit case.
+	*/
+
+	#include	<math.h>
+
+	/*	Win64 doesn't seem to have these functions, nor inline assembly.
+	**	Therefore implement inline versions of these functions here.
+	*/
+	#include    <emmintrin.h>
+	#include    <mmintrin.h>
+
+	__inline long int
+	lrint(double flt)
+	{
+		return _mm_cvtsd_si32(_mm_load_sd(&flt));
+	}
+
+	__inline long int
+	lrintf(float flt)
+	{
+		return _mm_cvtss_si32(_mm_load_ss(&flt));
+	}
+
+#elif (defined (WIN32) || defined (_WIN32))
+
+	#undef		HAVE_LRINT_REPLACEMENT
+	#define		HAVE_LRINT_REPLACEMENT	1
+
+	#include	<math.h>
+
+	/*
+	**	Win32 doesn't seem to have these functions.
+	**	Therefore implement inline versions of these functions here.
+	*/
+
+	__inline long int
+	lrint (double flt)
+	{	int intgr ;
+
+		_asm
+		{	fld flt
+			fistp intgr
+			} ;
+
+		return intgr ;
+	}
+
+	__inline long int
+	lrintf (float flt)
+	{	int intgr ;
+
+		_asm
+		{	fld flt
+			fistp intgr
+			} ;
+
+		return intgr ;
+	}
+
+#elif (defined (__MWERKS__) && defined (macintosh))
+
+	/* This MacOS 9 solution was provided by Stephane Letz */
+
+	#undef		HAVE_LRINT_REPLACEMENT
+	#define		HAVE_LRINT_REPLACEMENT	1
+	#include	<math.h>
+
+	#undef	lrint
+	#undef	lrintf
+
+	#define	lrint	double2int
+	#define	lrintf	float2int
+
+	inline int
+	float2int (register float in)
+	{	long res [2] ;
+
+		asm
+		{	fctiw	in, in
+			stfd	 in, res
+		}
+		return res [1] ;
+	} /* float2int */
+
+	inline int
+	double2int (register double in)
+	{	long res [2] ;
+
+		asm
+		{	fctiw	in, in
+			stfd	 in, res
+		}
+		return res [1] ;
+	} /* double2int */
+
+#elif (defined (__MACH__) && defined (__APPLE__))
+
+	/* For Apple MacOSX. */
+
+	#undef		HAVE_LRINT_REPLACEMENT
+	#define		HAVE_LRINT_REPLACEMENT	1
+	#include	<math.h>
+
+	#undef lrint
+	#undef lrintf
+
+	#define lrint	double2int
+	#define lrintf	float2int
+
+	inline static long
+	float2int (register float in)
+	{	int res [2] ;
+
+		__asm__ __volatile__
+		(	"fctiw	%1, %1\n\t"
+			"stfd	%1, %0"
+			: "=m" (res)	/* Output */
+			: "f" (in)		/* Input */
+			: "memory"
+			) ;
+
+		return res [1] ;
+	} /* lrintf */
+
+	inline static long
+	double2int (register double in)
+	{	int res [2] ;
+
+		__asm__ __volatile__
+		(	"fctiw	%1, %1\n\t"
+			"stfd	%1, %0"
+			: "=m" (res)	/* Output */
+			: "f" (in)		/* Input */
+			: "memory"
+			) ;
+
+		return res [1] ;
+	} /* lrint */
+
+#else
+	#ifndef __sgi
+	#warning "Don't have the functions lrint() and lrintf()."
+	#warning "Replacing these functions with a standard C cast."
+	#endif
+
+	#include	<math.h>
+
+	#define	lrint(dbl)		((long) (dbl))
+	#define	lrintf(flt)		((long) (flt))
+
+#endif
+
+
+#endif /* FLOAT_CAST_HEADER */
+
diff --git a/lsr-tests/float_short_test.c b/lsr-tests/float_short_test.c
new file mode 100644
index 0000000..6664a3b
--- /dev/null
+++ b/lsr-tests/float_short_test.c
@@ -0,0 +1,192 @@
+/*
+** Copyright (C) 2003-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+
+#define	BUFFER_LEN		10000
+
+static void float_to_short_test (void) ;
+static void short_to_float_test (void) ;
+
+static void float_to_int_test (void) ;
+static void int_to_float_test (void) ;
+
+int
+main (void)
+{
+	puts ("") ;
+
+	float_to_short_test () ;
+	short_to_float_test () ;
+
+	float_to_int_test () ;
+	int_to_float_test () ;
+
+	puts ("") ;
+
+	return 0 ;
+} /* main */
+
+/*=====================================================================================
+*/
+
+static void
+float_to_short_test (void)
+{
+	static float fpos [] =
+	{	0.95, 0.99, 1.0, 1.01, 1.1, 2.0, 11.1, 111.1, 2222.2, 33333.3
+		} ;
+	static float fneg [] =
+	{	-0.95, -0.99, -1.0, -1.01, -1.1, -2.0, -11.1, -111.1, -2222.2, -33333.3
+		} ;
+
+	static short out [MAX (ARRAY_LEN (fpos), ARRAY_LEN (fneg))] ;
+
+	int k ;
+
+	printf ("\tfloat_to_short_test ............................. ") ;
+
+	src_float_to_short_array (fpos, out, ARRAY_LEN (fpos)) ;
+
+	for (k = 0 ; k < ARRAY_LEN (fpos) ; k++)
+		if (out [k] < 30000)
+		{	printf ("\n\n\tLine %d : out [%d] == %d\n", __LINE__, k, out [k]) ;
+			exit (1) ;
+			} ;
+
+	src_float_to_short_array (fneg, out, ARRAY_LEN (fneg)) ;
+
+	for (k = 0 ; k < ARRAY_LEN (fneg) ; k++)
+		if (out [k] > -30000)
+		{	printf ("\n\n\tLine %d : out [%d] == %d\n", __LINE__, k, out [k]) ;
+			exit (1) ;
+			} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* float_to_short_test */
+
+/*-------------------------------------------------------------------------------------
+*/
+
+static void
+short_to_float_test (void)
+{
+	static short input	[BUFFER_LEN] ;
+	static short output	[BUFFER_LEN] ;
+	static float temp	[BUFFER_LEN] ;
+
+	int k ;
+
+	printf ("\tshort_to_float_test ............................. ") ;
+
+	for (k = 0 ; k < ARRAY_LEN (input) ; k++)
+		input [k] = (k * 0x8000) / ARRAY_LEN (input) ;
+
+	src_short_to_float_array (input, temp, ARRAY_LEN (temp)) ;
+	src_float_to_short_array (temp, output, ARRAY_LEN (output)) ;
+
+	for (k = 0 ; k < ARRAY_LEN (input) ; k++)
+		if (ABS (input [k] - output [k]) > 0)
+		{	printf ("\n\n\tLine %d : index %d   %d -> %d\n", __LINE__, k, input [k], output [k]) ;
+			exit (1) ;
+			} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* short_to_float_test */
+
+/*=====================================================================================
+*/
+
+static void
+float_to_int_test (void)
+{
+	static float fpos [] =
+	{	0.95, 0.99, 1.0, 1.01, 1.1, 2.0, 11.1, 111.1, 2222.2, 33333.3
+		} ;
+	static float fneg [] =
+	{	-0.95, -0.99, -1.0, -1.01, -1.1, -2.0, -11.1, -111.1, -2222.2, -33333.3
+		} ;
+
+	static int out [MAX (ARRAY_LEN (fpos), ARRAY_LEN (fneg))] ;
+
+	int k ;
+
+	printf ("\tfloat_to_int_test ............................... ") ;
+
+	src_float_to_int_array (fpos, out, ARRAY_LEN (fpos)) ;
+
+	for (k = 0 ; k < ARRAY_LEN (fpos) ; k++)
+		if (out [k] < 30000 * 0x10000)
+		{	printf ("\n\n\tLine %d : out [%d] == %d\n", __LINE__, k, out [k]) ;
+			exit (1) ;
+			} ;
+
+	src_float_to_int_array (fneg, out, ARRAY_LEN (fneg)) ;
+
+	for (k = 0 ; k < ARRAY_LEN (fneg) ; k++)
+		if (out [k] > -30000 * 0x1000)
+		{	printf ("\n\n\tLine %d : out [%d] == %d\n", __LINE__, k, out [k]) ;
+			exit (1) ;
+			} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* float_to_int_test */
+
+/*-------------------------------------------------------------------------------------
+*/
+
+static void
+int_to_float_test (void)
+{
+	static int input	[BUFFER_LEN] ;
+	static int output	[BUFFER_LEN] ;
+	static float temp	[BUFFER_LEN] ;
+
+	int k ;
+
+	printf ("\tint_to_float_test ............................... ") ;
+
+	for (k = 0 ; k < ARRAY_LEN (input) ; k++)
+		input [k] = (k * 0x80000000) / ARRAY_LEN (input) ;
+
+	src_int_to_float_array (input, temp, ARRAY_LEN (temp)) ;
+	src_float_to_int_array (temp, output, ARRAY_LEN (output)) ;
+
+	for (k = 0 ; k < ARRAY_LEN (input) ; k++)
+		if (ABS (input [k] - output [k]) > 0)
+		{	printf ("\n\n\tLine %d : index %d   %d -> %d\n", __LINE__, k, input [k], output [k]) ;
+			exit (1) ;
+			} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* int_to_float_test */
+
diff --git a/lsr-tests/misc_test.c b/lsr-tests/misc_test.c
new file mode 100644
index 0000000..4baa334
--- /dev/null
+++ b/lsr-tests/misc_test.c
@@ -0,0 +1,175 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+
+static void name_test (void) ;
+static void error_test (void) ;
+static void src_ratio_test (void) ;
+static void zero_input_test (int converter) ;
+
+int
+main (void)
+{
+	puts ("") ;
+
+	printf ("    version : %s\n\n", src_get_version ()) ;
+
+	/* Current max converter is SRC_LINEAR. */
+	name_test () ;
+
+	error_test () ;
+
+	src_ratio_test () ;
+
+	zero_input_test (SRC_ZERO_ORDER_HOLD) ;
+	zero_input_test (SRC_LINEAR) ;
+	zero_input_test (SRC_SINC_FASTEST) ;
+
+	puts ("") ;
+	return 0 ;
+} /* main */
+
+static void
+name_test (void)
+{	const char	*name ;
+	int	k = 0 ;
+
+	puts ("    name_test :") ;
+
+	while (1)
+	{	name = src_get_name (k) ;
+		if (name == NULL)
+			break ;
+		printf ("\tName %d : %s\n", k, name) ;
+		printf ("\tDesc %d : %s\n", k, src_get_description (k)) ;
+		k ++ ;
+		} ;
+
+	puts ("") ;
+
+	return ;
+} /* name_test */
+
+/*------------------------------------------------------------------------------
+*/
+
+typedef struct
+{	double	ratio ;
+	int		should_pass ;
+} RATIO_TEST ;
+
+static RATIO_TEST ratio_test [] =
+{	{	1.0 / 256.1,	0 },
+	{	1.0 / 256.0,	1 },
+	{	1.0,			1 },
+	{	256.0, 			1 },
+	{	256.1,			0 },
+	{	-1.0,			0 }
+} ;
+
+static void
+src_ratio_test (void)
+{	int k ;
+
+	puts ("    src_ratio_test (SRC ratio must be in range [1/256, 256]):" ) ;
+
+
+	for (k = 0 ; k < ARRAY_LEN (ratio_test) ; k++)
+	{	if (ratio_test [k].should_pass && src_is_valid_ratio (ratio_test [k].ratio) == 0)
+		{	printf ("\n\nLine %d : SRC ratio %f should have passed.\n\n", __LINE__, ratio_test [k].ratio) ;
+			exit (1) ;
+			} ;
+		if (! ratio_test [k].should_pass && src_is_valid_ratio (ratio_test [k].ratio) != 0)
+		{	printf ("\n\nLine %d : SRC ratio %f should not have passed.\n\n", __LINE__, ratio_test [k].ratio) ;
+			exit (1) ;
+			} ;
+		printf ("\t SRC ratio (%9.5f) : %s ................... ok\n", ratio_test [k].ratio,
+			(ratio_test [k].should_pass ? "pass" : "fail")) ;
+		} ;
+
+	puts ("") ;
+
+	return ;
+} /* src_ratio_test */
+
+static void
+error_test (void)
+{	const char *errorstr ;
+	int		k, errors = 0 ;
+
+	puts ("    error_test :") ;
+
+	for (k = 0 ; 1 ; k++)
+	{	errorstr = src_strerror (k) ;
+		printf ("\t%-2d : %s\n", k, errorstr) ;
+		if (errorstr == NULL)
+		{	errors ++ ;
+			continue ;
+			} ;
+		if (strstr (errorstr, "Placeholder.") == errorstr)
+			break ;
+		} ;
+
+	if (errors != 0)
+	{	printf ("\n\nLine %d : Missing error numbers above.\n\n", __LINE__) ;
+		exit (1) ;
+		} ;
+
+	puts ("") ;
+
+	return ;
+} /* error_test */
+
+static void
+zero_input_test (int converter)
+{	SRC_DATA data ;
+	SRC_STATE *state ;
+	float out [100] ;
+	int error ;
+
+	printf ("    %s (%-26s) ........ ", __func__, src_get_name (converter)) ;
+	fflush (stdout) ;
+
+	if ((state = src_new (converter, 1, &error)) == NULL)
+	{	printf ("\n\nLine %d : src_new failed : %s.\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	data.data_in = (float *) 0xdeadbeef ;
+	data.input_frames = 0 ;
+	data.data_out = out ;
+	data.output_frames = ARRAY_LEN (out) ;
+	data.end_of_input = 0 ;
+	data.src_ratio = 1.0 ;
+
+	if ((error = src_process (state, &data)))
+	{	printf ("\n\nLine %d : src_new failed : %s.\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	state = src_delete (state) ;
+
+	puts ("ok") ;
+} /* zero_input_test */
diff --git a/lsr-tests/multi_channel_test.c b/lsr-tests/multi_channel_test.c
new file mode 100644
index 0000000..1ad9ced
--- /dev/null
+++ b/lsr-tests/multi_channel_test.c
@@ -0,0 +1,364 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+#define	BUFFER_LEN		50000
+#define	BLOCK_LEN		(12)
+
+#define	MAX_CHANNELS	10
+
+static void simple_test (int converter, int channel_count, double target_snr) ;
+static void process_test (int converter, int channel_count, double target_snr) ;
+static void callback_test (int converter, int channel_count, double target_snr) ;
+
+int
+main (void)
+{	double target ;
+	int k ;
+
+	puts ("\n    Zero Order Hold interpolator :") ;
+	target = 38.0 ;
+	for (k = 1 ; k <= 3 ; k++)
+	{	simple_test		(SRC_ZERO_ORDER_HOLD, k, target) ;
+		process_test	(SRC_ZERO_ORDER_HOLD, k, target) ;
+		callback_test	(SRC_ZERO_ORDER_HOLD, k, target) ;
+		} ;
+
+	puts ("\n    Linear interpolator :") ;
+	target = 79.0 ;
+	for (k = 1 ; k <= 3 ; k++)
+	{	simple_test		(SRC_LINEAR, k, target) ;
+		process_test	(SRC_LINEAR, k, target) ;
+		callback_test	(SRC_LINEAR, k, target) ;
+		} ;
+
+	puts ("\n    Sinc interpolator :") ;
+	target = 100.0 ;
+	for (k = 1 ; k <= MAX_CHANNELS ; k++)
+	{	simple_test		(SRC_SINC_FASTEST, k, target) ;
+		process_test	(SRC_SINC_FASTEST, k, target) ;
+		callback_test	(SRC_SINC_FASTEST, k, target) ;
+		} ;
+
+	puts ("") ;
+
+	return 0 ;
+} /* main */
+
+/*==============================================================================
+*/
+
+static float input_serial		[BUFFER_LEN * MAX_CHANNELS] ;
+static float input_interleaved	[BUFFER_LEN * MAX_CHANNELS] ;
+static float output_interleaved	[BUFFER_LEN * MAX_CHANNELS] ;
+static float output_serial		[BUFFER_LEN * MAX_CHANNELS] ;
+
+static void
+simple_test (int converter, int channel_count, double target_snr)
+{	SRC_DATA	src_data ;
+
+	double	freq, snr ;
+	int		ch, error, frames ;
+
+	printf ("\t%-22s (%2d channel%c) ............ ", "simple_test", channel_count, channel_count > 1 ? 's' : ' ') ;
+	fflush (stdout) ;
+
+	assert (channel_count <= MAX_CHANNELS) ;
+
+	memset (input_serial, 0, sizeof (input_serial)) ;
+	memset (input_interleaved, 0, sizeof (input_interleaved)) ;
+	memset (output_interleaved, 0, sizeof (output_interleaved)) ;
+	memset (output_serial, 0, sizeof (output_serial)) ;
+
+	frames = BUFFER_LEN ;
+
+	/* Calculate channel_count separate windowed sine waves. */
+	for (ch = 0 ; ch < channel_count ; ch++)
+	{	freq = (200.0 + 33.333333333 * ch) / 44100.0 ;
+		gen_windowed_sines (1, &freq, 1.0, input_serial + ch * frames, frames) ;
+		} ;
+
+	/* Interleave the data in preparation for SRC. */
+	interleave_data (input_serial, input_interleaved, frames, channel_count) ;
+
+	/* Choose a converstion ratio <= 1.0. */
+	src_data.src_ratio = 0.95 ;
+
+	src_data.data_in = input_interleaved ;
+	src_data.input_frames = frames ;
+
+	src_data.data_out = output_interleaved ;
+	src_data.output_frames = frames ;
+
+	if ((error = src_simple (&src_data, converter, channel_count)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	if (fabs (src_data.output_frames_gen - src_data.src_ratio * src_data.input_frames) > 2)
+	{	printf ("\n\nLine %d : bad output data length %ld should be %d.\n", __LINE__,
+					src_data.output_frames_gen, (int) floor (src_data.src_ratio * src_data.input_frames)) ;
+		printf ("\tsrc_ratio  : %.4f\n", src_data.src_ratio) ;
+		printf ("\tinput_len  : %ld\n", src_data.input_frames) ;
+		printf ("\toutput_len : %ld\n\n", src_data.output_frames_gen) ;
+		exit (1) ;
+		} ;
+
+	/* De-interleave data so SNR can be calculated for each channel. */
+	deinterleave_data (output_interleaved, output_serial, frames, channel_count) ;
+
+	for (ch = 0 ; ch < channel_count ; ch++)
+	{	snr = calculate_snr (output_serial + ch * frames, frames, 1) ;
+		if (snr < target_snr)
+		{	printf ("\n\nLine %d: channel %d snr %f should be %f\n", __LINE__, ch, snr, target_snr) ;
+			save_oct_float ("output.dat", input_serial, channel_count * frames, output_serial, channel_count * frames) ;
+			exit (1) ;
+			} ;
+		} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* simple_test */
+
+/*==============================================================================
+*/
+
+static void
+process_test (int converter, int channel_count, double target_snr)
+{	SRC_STATE	*src_state ;
+	SRC_DATA	src_data ;
+
+	double	freq, snr ;
+	int		ch, error, frames, current_in, current_out ;
+
+	printf ("\t%-22s (%2d channel%c) ............ ", "process_test", channel_count, channel_count > 1 ? 's' : ' ') ;
+	fflush (stdout) ;
+
+	assert (channel_count <= MAX_CHANNELS) ;
+
+	memset (input_serial, 0, sizeof (input_serial)) ;
+	memset (input_interleaved, 0, sizeof (input_interleaved)) ;
+	memset (output_interleaved, 0, sizeof (output_interleaved)) ;
+	memset (output_serial, 0, sizeof (output_serial)) ;
+
+	frames = BUFFER_LEN ;
+
+	/* Calculate channel_count separate windowed sine waves. */
+	for (ch = 0 ; ch < channel_count ; ch++)
+	{	freq = (400.0 + 11.333333333 * ch) / 44100.0 ;
+		gen_windowed_sines (1, &freq, 1.0, input_serial + ch * frames, frames) ;
+		} ;
+
+	/* Interleave the data in preparation for SRC. */
+	interleave_data (input_serial, input_interleaved, frames, channel_count) ;
+
+	/* Perform sample rate conversion. */
+	if ((src_state = src_new (converter, channel_count, &error)) == NULL)
+	{	printf ("\n\nLine %d : src_new() failed : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_data.end_of_input = 0 ; /* Set this later. */
+
+	/* Choose a converstion ratio < 1.0. */
+	src_data.src_ratio = 0.95 ;
+
+	src_data.data_in = input_interleaved ;
+	src_data.data_out = output_interleaved ;
+
+	current_in = current_out = 0 ;
+
+	while (1)
+	{	src_data.input_frames	= MAX (MIN (BLOCK_LEN, frames - current_in), 0) ;
+		src_data.output_frames	= MAX (MIN (BLOCK_LEN, frames - current_out), 0) ;
+
+		if ((error = src_process (src_state, &src_data)))
+		{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+			exit (1) ;
+			} ;
+
+		if (src_data.end_of_input && src_data.output_frames_gen == 0)
+			break ;
+
+		current_in	+= src_data.input_frames_used ;
+		current_out += src_data.output_frames_gen ;
+
+		src_data.data_in	+= src_data.input_frames_used * channel_count ;
+		src_data.data_out	+= src_data.output_frames_gen * channel_count ;
+
+		src_data.end_of_input = (current_in >= frames) ? 1 : 0 ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	if (fabs (current_out - src_data.src_ratio * current_in) > 2)
+	{	printf ("\n\nLine %d : bad output data length %d should be %d.\n", __LINE__,
+					current_out, (int) floor (src_data.src_ratio * current_in)) ;
+		printf ("\tsrc_ratio  : %.4f\n", src_data.src_ratio) ;
+		printf ("\tinput_len  : %d\n", frames) ;
+		printf ("\toutput_len : %d\n\n", current_out) ;
+		exit (1) ;
+		} ;
+
+	/* De-interleave data so SNR can be calculated for each channel. */
+	deinterleave_data (output_interleaved, output_serial, frames, channel_count) ;
+
+	for (ch = 0 ; ch < channel_count ; ch++)
+	{	snr = calculate_snr (output_serial + ch * frames, frames, 1) ;
+		if (snr < target_snr)
+		{	printf ("\n\nLine %d: channel %d snr %f should be %f\n", __LINE__, ch, snr, target_snr) ;
+			save_oct_float ("output.dat", input_serial, channel_count * frames, output_serial, channel_count * frames) ;
+			exit (1) ;
+			} ;
+		} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* process_test */
+
+/*==============================================================================
+*/
+
+typedef struct
+{	int channels ;
+	long total_frames ;
+	long current_frame ;
+	float *data ;
+} TEST_CB_DATA ;
+
+static long
+test_callback_func (void *cb_data, float **data)
+{	TEST_CB_DATA *pcb_data ;
+
+	long frames ;
+
+	if ((pcb_data = cb_data) == NULL)
+		return 0 ;
+
+	if (data == NULL)
+		return 0 ;
+
+	*data = pcb_data->data + (pcb_data->current_frame * pcb_data->channels) ;
+
+	if (pcb_data->total_frames - pcb_data->current_frame < BLOCK_LEN)
+		frames = pcb_data->total_frames - pcb_data->current_frame ;
+	else
+		frames = BLOCK_LEN ;
+
+	pcb_data->current_frame += frames ;
+
+	return frames ;
+} /* test_callback_func */
+
+static void
+callback_test (int converter, int channel_count, double target_snr)
+{	TEST_CB_DATA test_callback_data ;
+	SRC_STATE	*src_state = NULL ;
+
+	double	freq, snr, src_ratio ;
+	int		ch, error, frames, read_total, read_count ;
+
+	printf ("\t%-22s (%2d channel%c) ............ ", "callback_test", channel_count, channel_count > 1 ? 's' : ' ') ;
+	fflush (stdout) ;
+
+	assert (channel_count <= MAX_CHANNELS) ;
+
+	memset (input_serial, 0, sizeof (input_serial)) ;
+	memset (input_interleaved, 0, sizeof (input_interleaved)) ;
+	memset (output_interleaved, 0, sizeof (output_interleaved)) ;
+	memset (output_serial, 0, sizeof (output_serial)) ;
+	memset (&test_callback_data, 0, sizeof (test_callback_data)) ;
+
+	frames = BUFFER_LEN ;
+
+	/* Calculate channel_count separate windowed sine waves. */
+	for (ch = 0 ; ch < channel_count ; ch++)
+	{	freq = (200.0 + 33.333333333 * ch) / 44100.0 ;
+		gen_windowed_sines (1, &freq, 1.0, input_serial + ch * frames, frames) ;
+		} ;
+
+	/* Interleave the data in preparation for SRC. */
+	interleave_data (input_serial, input_interleaved, frames, channel_count) ;
+
+	/* Perform sample rate conversion. */
+	src_ratio = 0.95 ;
+	test_callback_data.channels = channel_count ;
+	test_callback_data.total_frames = frames ;
+	test_callback_data.current_frame = 0 ;
+	test_callback_data.data = input_interleaved ;
+
+	if ((src_state = src_callback_new (test_callback_func, converter, channel_count, &error, &test_callback_data)) == NULL)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	read_total = 0 ;
+	while (read_total < frames)
+	{	read_count = src_callback_read (src_state, src_ratio, frames - read_total, output_interleaved + read_total * channel_count) ;
+
+		if (read_count <= 0)
+			break ;
+
+		read_total += read_count ;
+		} ;
+
+	if ((error = src_error (src_state)) != 0)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	if (fabs (read_total - src_ratio * frames) > 2)
+	{	printf ("\n\nLine %d : bad output data length %d should be %d.\n", __LINE__,
+					read_total, (int) floor (src_ratio * frames)) ;
+		printf ("\tsrc_ratio  : %.4f\n", src_ratio) ;
+		printf ("\tinput_len  : %d\n", frames) ;
+		printf ("\toutput_len : %d\n\n", read_total) ;
+		exit (1) ;
+		} ;
+
+	/* De-interleave data so SNR can be calculated for each channel. */
+	deinterleave_data (output_interleaved, output_serial, frames, channel_count) ;
+
+	for (ch = 0 ; ch < channel_count ; ch++)
+	{	snr = calculate_snr (output_serial + ch * frames, frames, 1) ;
+		if (snr < target_snr)
+		{	printf ("\n\nLine %d: channel %d snr %f should be %f\n", __LINE__, ch, snr, target_snr) ;
+			save_oct_float ("output.dat", input_serial, channel_count * frames, output_serial, channel_count * frames) ;
+			exit (1) ;
+			} ;
+		} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* callback_test */
+
diff --git a/lsr-tests/multichan_throughput_test.c b/lsr-tests/multichan_throughput_test.c
new file mode 100644
index 0000000..523139e
--- /dev/null
+++ b/lsr-tests/multichan_throughput_test.c
@@ -0,0 +1,216 @@
+/*
+** Copyright (C) 2008-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <samplerate.h>
+
+#include "config.h"
+
+#include "util.h"
+#include "float_cast.h"
+
+#define BUFFER_LEN	(1<<17)
+
+static float input [BUFFER_LEN] ;
+static float output [BUFFER_LEN] ;
+
+static long
+throughput_test (int converter, int channels, long best_throughput)
+{	SRC_DATA src_data ;
+	clock_t start_time, clock_time ;
+	double duration ;
+	long total_frames = 0, throughput ;
+	int error ;
+
+	printf ("    %-30s     %2d         ", src_get_name (converter), channels) ;
+	fflush (stdout) ;
+
+	src_data.data_in = input ;
+	src_data.input_frames = ARRAY_LEN (input) / channels ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = ARRAY_LEN (output) / channels ;
+
+	src_data.src_ratio = 0.99 ;
+
+	sleep (2) ;
+
+	start_time = clock () ;
+
+	do
+	{
+		if ((error = src_simple (&src_data, converter, channels)) != 0)
+		{	puts (src_strerror (error)) ;
+			exit (1) ;
+			} ;
+
+		total_frames += src_data.output_frames_gen ;
+
+		clock_time = clock () - start_time ;
+		duration = (1.0 * clock_time) / CLOCKS_PER_SEC ;
+	}
+	while (duration < 5.0) ;
+
+	if (src_data.input_frames_used != src_data.input_frames)
+	{	printf ("\n\nLine %d : input frames used %ld should be %ld\n", __LINE__, src_data.input_frames_used, src_data.input_frames) ;
+		exit (1) ;
+		} ;
+
+	if (fabs (src_data.src_ratio * src_data.input_frames_used - src_data.output_frames_gen) > 2)
+	{	printf ("\n\nLine %d : input / output length mismatch.\n\n", __LINE__) ;
+		printf ("    input len  : %d\n", ARRAY_LEN (input) / channels) ;
+		printf ("    output len : %ld (should be %g +/- 2)\n\n", src_data.output_frames_gen,
+				floor (0.5 + src_data.src_ratio * src_data.input_frames_used)) ;
+		exit (1) ;
+		} ;
+
+	throughput = lrint (floor (total_frames / duration)) ;
+
+	if (best_throughput == 0)
+	{	best_throughput = MAX (throughput, best_throughput) ;
+		printf ("%5.2f      %10ld\n", duration, throughput) ;
+		}
+	else
+	{	best_throughput = MAX (throughput, best_throughput) ;
+		printf ("%5.2f      %10ld       %10ld\n", duration, throughput, best_throughput) ;
+		}
+
+	return best_throughput ;
+} /* throughput_test */
+
+static void
+single_run (void)
+{	const int max_channels = 10 ;
+	int k ;
+
+	printf ("\n    CPU name : %s\n", get_cpu_name ()) ;
+
+	puts (
+		"\n"
+		"    Converter                        Channels    Duration      Throughput\n"
+		"    ---------------------------------------------------------------------"
+		) ;
+
+	for (k = 1 ; k <= max_channels / 2 ; k++)
+		throughput_test (SRC_SINC_FASTEST, k, 0) ;
+
+	puts ("") ;
+	for (k = 1 ; k <= max_channels / 2 ; k++)
+		throughput_test (SRC_SINC_MEDIUM_QUALITY, k, 0) ;
+
+	puts ("") ;
+	for (k = 1 ; k <= max_channels ; k++)
+		throughput_test (SRC_SINC_BEST_QUALITY, k, 0) ;
+
+	puts ("") ;
+	return ;
+} /* single_run */
+
+static void
+multi_run (int run_count)
+{	int k, ch ;
+
+	printf ("\n    CPU name : %s\n", get_cpu_name ()) ;
+
+	puts (
+		"\n"
+		"    Converter                        Channels    Duration      Throughput    Best Throughput\n"
+		"    ----------------------------------------------------------------------------------------"
+		) ;
+
+	for (ch = 1 ; ch <= 5 ; ch++)
+	{	long sinc_fastest = 0, sinc_medium = 0, sinc_best = 0 ;
+
+		for (k = 0 ; k < run_count ; k++)
+		{	sinc_fastest =		throughput_test (SRC_SINC_FASTEST, ch, sinc_fastest) ;
+			sinc_medium =		throughput_test (SRC_SINC_MEDIUM_QUALITY, ch, sinc_medium) ;
+			sinc_best =			throughput_test (SRC_SINC_BEST_QUALITY, ch, sinc_best) ;
+
+			puts ("") ;
+
+			/* Let the CPU cool down. We might be running on a laptop. */
+			sleep (10) ;
+			} ;
+
+		puts (
+			"\n"
+			"    Converter                        Best Throughput\n"
+			"    ------------------------------------------------"
+			) ;
+
+		printf ("    %-30s    %10ld\n", src_get_name (SRC_SINC_FASTEST), sinc_fastest) ;
+		printf ("    %-30s    %10ld\n", src_get_name (SRC_SINC_MEDIUM_QUALITY), sinc_medium) ;
+		printf ("    %-30s    %10ld\n", src_get_name (SRC_SINC_BEST_QUALITY), sinc_best) ;
+		} ;
+
+	puts ("") ;
+} /* multi_run */
+
+static void
+usage_exit (const char * argv0)
+{	const char * cptr ;
+
+	if ((cptr = strrchr (argv0, '/')) != NULL)
+		argv0 = cptr ;
+
+	printf (
+		"Usage :\n"
+	 	"    %s                 - Single run of the throughput test.\n"
+		"    %s --best-of N     - Do N runs of test a print bext result.\n"
+		"\n",
+		argv0, argv0) ;
+
+	exit (0) ;
+} /* usage_exit */
+
+int
+main (int argc, char ** argv)
+{	double freq ;
+
+	memset (input, 0, sizeof (input)) ;
+	freq = 0.01 ;
+	gen_windowed_sines (1, &freq, 1.0, input, BUFFER_LEN) ;
+
+	if (argc == 1)
+		single_run () ;
+	else if (argc == 3 && strcmp (argv [1], "--best-of") == 0)
+	{	int run_count = atoi (argv [2]) ;
+
+		if (run_count < 1 || run_count > 20)
+		{	printf ("Please be sensible. Run count should be in range (1, 10].\n") ;
+			exit (1) ;
+			} ;
+
+		multi_run (run_count) ;
+		}
+	else
+		usage_exit (argv [0]) ;
+
+	puts (
+		"            Duration is in seconds.\n"
+		"            Throughput is in frames/sec (more is better).\n"
+		) ;
+
+	return 0 ;
+} /* main */
+
diff --git a/lsr-tests/reset_test.c b/lsr-tests/reset_test.c
new file mode 100644
index 0000000..40485c2
--- /dev/null
+++ b/lsr-tests/reset_test.c
@@ -0,0 +1,238 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+
+#define	BUFFER_LEN		2048
+#define CB_READ_LEN		256
+
+static void process_reset_test (int converter) ;
+static void callback_reset_test (int converter) ;
+
+static float data_one [BUFFER_LEN] ;
+static float data_zero [BUFFER_LEN] ;
+
+int
+main (void)
+{
+	puts ("") ;
+
+	process_reset_test (SRC_ZERO_ORDER_HOLD) ;
+	process_reset_test (SRC_LINEAR) ;
+	process_reset_test (SRC_SINC_FASTEST) ;
+
+	callback_reset_test (SRC_ZERO_ORDER_HOLD) ;
+	callback_reset_test (SRC_LINEAR) ;
+	callback_reset_test (SRC_SINC_FASTEST) ;
+
+	puts ("") ;
+
+	return 0 ;
+} /* main */
+
+static void
+process_reset_test (int converter)
+{	static float output [BUFFER_LEN] ;
+
+	SRC_STATE *src_state ;
+	SRC_DATA src_data ;
+	int k, error ;
+
+	printf ("\tprocess_reset_test  (%-28s) ....... ", src_get_name (converter)) ;
+	fflush (stdout) ;
+
+	for (k = 0 ; k < BUFFER_LEN ; k++)
+	{	data_one [k] = 1.0 ;
+		data_zero [k] = 0.0 ;
+		} ;
+
+	/* Get a converter. */
+	if ((src_state = src_new (converter, 1, &error)) == NULL)
+	{	printf ("\n\nLine %d : src_new() failed : %s.\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	/* Process a bunch of 1.0 valued samples. */
+	src_data.data_in		= data_one ;
+	src_data.data_out		= output ;
+	src_data.input_frames	= BUFFER_LEN ;
+	src_data.output_frames	= BUFFER_LEN ;
+	src_data.src_ratio		= 0.9 ;
+	src_data.end_of_input	= 1 ;
+
+	if ((error = src_process (src_state, &src_data)) != 0)
+	{	printf ("\n\nLine %d : src_simple () returned error : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	/* Reset the state of the converter.*/
+	src_reset (src_state) ;
+
+	/* Now process some zero data. */
+	src_data.data_in		= data_zero ;
+	src_data.data_out		= output ;
+	src_data.input_frames	= BUFFER_LEN ;
+	src_data.output_frames	= BUFFER_LEN ;
+	src_data.src_ratio		= 0.9 ;
+	src_data.end_of_input	= 1 ;
+
+	if ((error = src_process (src_state, &src_data)) != 0)
+	{	printf ("\n\nLine %d : src_simple () returned error : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	/* Finally make sure that the output data is zero ie reset was sucessful. */
+	for (k = 0 ; k < BUFFER_LEN / 2 ; k++)
+		if (output [k] != 0.0)
+		{	printf ("\n\nLine %d : output [%d] should be 0.0, is %f.\n", __LINE__, k, output [k]) ;
+			exit (1) ;
+			} ;
+
+	/* Make sure that this function has been exported. */
+	src_set_ratio (src_state, 1.0) ;
+
+	/* Delete converter. */
+	src_state = src_delete (src_state) ;
+
+	puts ("ok") ;
+} /* process_reset_test */
+
+/*==============================================================================
+*/
+
+typedef struct
+{	int channels ;
+	long count, total ;
+	float *data ;
+} TEST_CB_DATA ;
+
+static long
+test_callback_func (void *cb_data, float **data)
+{	TEST_CB_DATA *pcb_data ;
+
+	long frames ;
+
+	if ((pcb_data = cb_data) == NULL)
+		return 0 ;
+
+	if (data == NULL)
+		return 0 ;
+
+	if (pcb_data->total - pcb_data->count > 0)
+		frames = pcb_data->total - pcb_data->count ;
+	else
+		frames = 0 ;
+
+	*data = pcb_data->data + pcb_data->count ;
+	pcb_data->count += frames ;
+
+	return frames ;
+} /* test_callback_func */
+
+static void
+callback_reset_test (int converter)
+{	static TEST_CB_DATA test_callback_data ;
+
+	static float output [BUFFER_LEN] ;
+
+	SRC_STATE *src_state ;
+
+	double src_ratio = 1.1 ;
+	long read_count, read_total ;
+	int k, error ;
+
+	printf ("\tcallback_reset_test (%-28s) ....... ", src_get_name (converter)) ;
+	fflush (stdout) ;
+
+	for (k = 0 ; k < ARRAY_LEN (data_one) ; k++)
+	{	data_one [k] = 1.0 ;
+		data_zero [k] = 0.0 ;
+		} ;
+
+	if ((src_state = src_callback_new (test_callback_func, converter, 1, &error, &test_callback_data)) == NULL)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	/* Process a bunch of 1.0 valued samples. */
+	test_callback_data.channels = 1 ;
+	test_callback_data.count = 0 ;
+	test_callback_data.total = ARRAY_LEN (data_one) ;
+	test_callback_data.data = data_one ;
+
+	read_total = 0 ;
+	do
+	{	read_count = (ARRAY_LEN (output) - read_total > CB_READ_LEN) ? CB_READ_LEN : ARRAY_LEN (output) - read_total ;
+		read_count = src_callback_read (src_state, src_ratio, read_count, output + read_total) ;
+		read_total += read_count ;
+		}
+	while (read_count > 0) ;
+
+	/* Check for errors. */
+	if ((error = src_error (src_state)) != 0)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	/* Reset the state of the converter.*/
+	src_reset (src_state) ;
+
+	/* Process a bunch of 0.0 valued samples. */
+	test_callback_data.channels = 1 ;
+	test_callback_data.count = 0 ;
+	test_callback_data.total = ARRAY_LEN (data_zero) ;
+	test_callback_data.data = data_zero ;
+
+	/* Now process some zero data. */
+	read_total = 0 ;
+	do
+	{	read_count = (ARRAY_LEN (output) - read_total > CB_READ_LEN) ? CB_READ_LEN : ARRAY_LEN (output) - read_total ;
+		read_count = src_callback_read (src_state, src_ratio, read_count, output + read_total) ;
+		read_total += read_count ;
+		}
+	while (read_count > 0) ;
+
+	/* Check for errors. */
+	if ((error = src_error (src_state)) != 0)
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	/* Finally make sure that the output data is zero ie reset was sucessful. */
+	for (k = 0 ; k < BUFFER_LEN / 2 ; k++)
+		if (output [k] != 0.0)
+		{	printf ("\n\nLine %d : output [%d] should be 0.0, is %f.\n\n", __LINE__, k, output [k]) ;
+			save_oct_float ("output.dat", data_one, ARRAY_LEN (data_one), output, ARRAY_LEN (output)) ;
+			exit (1) ;
+			} ;
+
+	/* Make sure that this function has been exported. */
+	src_set_ratio (src_state, 1.0) ;
+
+	/* Delete converter. */
+	src_state = src_delete (src_state) ;
+
+	puts ("ok") ;
+} /* callback_reset_test */
+
+
diff --git a/lsr-tests/simple_test.c b/lsr-tests/simple_test.c
new file mode 100644
index 0000000..91dcde3
--- /dev/null
+++ b/lsr-tests/simple_test.c
@@ -0,0 +1,117 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+
+#define	BUFFER_LEN		2048
+
+static void simple_test (int converter, double ratio) ;
+
+int
+main (void)
+{	static double src_ratios [] =
+	{	1.0001, 0.099, 0.1, 0.33333333, 0.789, 1.9, 3.1, 9.9
+	} ;
+
+	int k ;
+
+	puts ("") ;
+
+	puts ("    Zero Order Hold interpolator :") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		simple_test (SRC_ZERO_ORDER_HOLD, src_ratios [k]) ;
+
+	puts ("    Linear interpolator :") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		simple_test (SRC_LINEAR, src_ratios [k]) ;
+
+	puts ("    Sinc interpolator :") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		simple_test (SRC_SINC_FASTEST, src_ratios [k]) ;
+
+	puts ("") ;
+
+	return 0 ;
+} /* main */
+
+static void
+simple_test (int converter, double src_ratio)
+{	static float input [BUFFER_LEN], output [BUFFER_LEN] ;
+
+	SRC_DATA	src_data ;
+
+	int input_len, output_len, error, terminate ;
+
+	printf ("\tsimple_test      (SRC ratio = %6.4f) ........... ", src_ratio) ;
+	fflush (stdout) ;
+
+	/* Calculate maximun input and output lengths. */
+	if (src_ratio >= 1.0)
+	{	output_len = BUFFER_LEN ;
+		input_len = (int) floor (BUFFER_LEN / src_ratio) ;
+		}
+	else
+	{	input_len = BUFFER_LEN ;
+		output_len = (int) floor (BUFFER_LEN * src_ratio) ;
+		} ;
+
+	/* Reduce input_len by 10 so output is longer than necessary. */
+	input_len -= 10 ;
+
+	if (output_len > BUFFER_LEN)
+	{	printf ("\n\nLine %d : output_len > BUFFER_LEN\n\n", __LINE__) ;
+		exit (1) ;
+		} ;
+
+	memset (&src_data, 0, sizeof (src_data)) ;
+
+	src_data.data_in = input ;
+	src_data.input_frames = input_len ;
+
+	src_data.src_ratio = src_ratio ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = BUFFER_LEN ;
+
+	if ((error = src_simple (&src_data, converter, 1)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	terminate = (int) ceil ((src_ratio >= 1.0) ? src_ratio : 1.0 / src_ratio) ;
+
+	if (fabs (src_data.output_frames_gen - src_ratio * input_len) > 2 * terminate)
+	{	printf ("\n\nLine %d : bad output data length %ld should be %d.\n", __LINE__,
+					src_data.output_frames_gen, (int) floor (src_ratio * input_len)) ;
+		printf ("\tsrc_ratio  : %.4f\n", src_ratio) ;
+		printf ("\tinput_len  : %d\n\toutput_len : %d\n\n", input_len, output_len) ;
+		exit (1) ;
+		} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* simple_test */
+
diff --git a/lsr-tests/sndfile-resample.c b/lsr-tests/sndfile-resample.c
new file mode 100644
index 0000000..63d179c
--- /dev/null
+++ b/lsr-tests/sndfile-resample.c
@@ -0,0 +1,332 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <math.h>
+
+#if (HAVE_SNDFILE)
+
+#include <samplerate.h>
+#include <sndfile.h>
+
+#define DEFAULT_CONVERTER SRC_SINC_MEDIUM_QUALITY
+
+#define	BUFFER_LEN		4096	/*-(1<<16)-*/
+
+static void usage_exit (const char *progname) ;
+static sf_count_t sample_rate_convert (SNDFILE *infile, SNDFILE *outfile, int converter, double src_ratio, int channels, double * gain) ;
+static double apply_gain (float * data, long frames, int channels, double max, double gain) ;
+
+int
+main (int argc, char *argv [])
+{	SNDFILE	*infile, *outfile = NULL ;
+	SF_INFO sfinfo ;
+
+	sf_count_t	count ;
+	double		src_ratio = -1.0, gain = 1.0 ;
+	int			new_sample_rate = -1, k, converter, max_speed = SF_FALSE ;
+
+	if (argc == 2 && strcmp (argv [1], "--version") == 0)
+	{	char buffer [64], *cptr ;
+
+		if ((cptr = strrchr (argv [0], '/')) != NULL)
+			argv [0] = cptr + 1 ;
+		if ((cptr = strrchr (argv [0], '\\')) != NULL)
+			argv [0] = cptr + 1 ;
+
+		sf_command (NULL, SFC_GET_LIB_VERSION, buffer, sizeof (buffer)) ;
+
+		printf ("%s (%s,%s)\n", argv [0], src_get_version (), buffer) ;
+		exit (0) ;
+		} ;
+
+	if (argc != 5 && argc != 7 && argc != 8)
+		usage_exit (argv [0]) ;
+
+	/* Set default converter. */
+	converter = DEFAULT_CONVERTER ;
+
+	for (k = 1 ; k < argc - 2 ; k++)
+	{	if (strcmp (argv [k], "--max-speed") == 0)
+			max_speed = SF_TRUE ;
+		else if (strcmp (argv [k], "-to") == 0)
+		{	k ++ ;
+			new_sample_rate = atoi (argv [k]) ;
+			}
+		else if (strcmp (argv [k], "-by") == 0)
+		{	k ++ ;
+			src_ratio = atof (argv [k]) ;
+			}
+		else if (strcmp (argv [k], "-c") == 0)
+		{	k ++ ;
+			converter = atoi (argv [k]) ;
+			}
+		else
+			usage_exit (argv [0]) ;
+		} ;
+
+	if (new_sample_rate <= 0 && src_ratio <= 0.0)
+		usage_exit (argv [0]) ;
+
+	if (src_get_name (converter) == NULL)
+	{	printf ("Error : bad converter number.\n") ;
+		usage_exit (argv [0]) ;
+		} ;
+
+	if (strcmp (argv [argc - 2], argv [argc - 1]) == 0)
+	{	printf ("Error : input and output file names are the same.\n") ;
+		exit (1) ;
+		} ;
+
+	if ((infile = sf_open (argv [argc - 2], SFM_READ, &sfinfo)) == NULL)
+	{	printf ("Error : Not able to open input file '%s'\n", argv [argc - 2]) ;
+		exit (1) ;
+		} ;
+
+	printf ("Input File    : %s\n", argv [argc - 2]) ;
+	printf ("Sample Rate   : %d\n", sfinfo.samplerate) ;
+	printf ("Input Frames  : %ld\n\n", (long) sfinfo.frames) ;
+
+	if (new_sample_rate > 0)
+	{	src_ratio = (1.0 * new_sample_rate) / sfinfo.samplerate ;
+		sfinfo.samplerate = new_sample_rate ;
+		}
+	else if (src_is_valid_ratio (src_ratio))
+		sfinfo.samplerate = (int) floor (sfinfo.samplerate * src_ratio) ;
+	else
+	{	printf ("Not able to determine new sample rate. Exiting.\n") ;
+		sf_close (infile) ;
+		exit (1) ;
+		} ;
+
+	if (fabs (src_ratio - 1.0) < 1e-20)
+	{	printf ("Target samplerate and input samplerate are the same. Exiting.\n") ;
+		sf_close (infile) ;
+		exit (0) ;
+		} ;
+
+	printf ("SRC Ratio     : %f\n", src_ratio) ;
+	printf ("Converter     : %s\n\n", src_get_name (converter)) ;
+
+	if (src_is_valid_ratio (src_ratio) == 0)
+	{	printf ("Error : Sample rate change out of valid range.\n") ;
+		sf_close (infile) ;
+		exit (1) ;
+		} ;
+
+	/* Delete the output file length to zero if already exists. */
+	remove (argv [argc - 1]) ;
+
+	printf ("Output file   : %s\n", argv [argc - 1]) ;
+	printf ("Sample Rate   : %d\n", sfinfo.samplerate) ;
+
+	do
+	{	sf_close (outfile) ;
+
+		if ((outfile = sf_open (argv [argc - 1], SFM_WRITE, &sfinfo)) == NULL)
+		{	printf ("Error : Not able to open output file '%s'\n", argv [argc - 1]) ;
+			sf_close (infile) ;
+			exit (1) ;
+			} ;
+
+		if (max_speed)
+		{	/* This is mainly for the comparison program tests/src-evaluate.c */
+			sf_command (outfile, SFC_SET_ADD_PEAK_CHUNK, NULL, SF_FALSE) ;
+			}
+		else
+		{	/* Update the file header after every write. */
+			sf_command (outfile, SFC_SET_UPDATE_HEADER_AUTO, NULL, SF_TRUE) ;
+			} ;
+
+		sf_command (outfile, SFC_SET_CLIPPING, NULL, SF_TRUE) ;
+
+		count = sample_rate_convert (infile, outfile, converter, src_ratio, sfinfo.channels, &gain) ;
+		}
+	while (count < 0) ;
+
+	printf ("Output Frames : %ld\n\n", (long) count) ;
+
+	sf_close (infile) ;
+	sf_close (outfile) ;
+
+	return 0 ;
+} /* main */
+
+/*==============================================================================
+*/
+
+static sf_count_t
+sample_rate_convert (SNDFILE *infile, SNDFILE *outfile, int converter, double src_ratio, int channels, double * gain)
+{	static float input [BUFFER_LEN] ;
+	static float output [BUFFER_LEN] ;
+
+	SRC_STATE	*src_state ;
+	SRC_DATA	src_data ;
+	int			error ;
+	double		max = 0.0 ;
+	sf_count_t	output_count = 0 ;
+
+	sf_seek (infile, 0, SEEK_SET) ;
+	sf_seek (outfile, 0, SEEK_SET) ;
+
+	/* Initialize the sample rate converter. */
+	if ((src_state = src_new (converter, channels, &error)) == NULL)
+	{	printf ("\n\nError : src_new() failed : %s.\n\n", src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_data.end_of_input = 0 ; /* Set this later. */
+
+	/* Start with zero to force load in while loop. */
+	src_data.input_frames = 0 ;
+	src_data.data_in = input ;
+
+	src_data.src_ratio = src_ratio ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = BUFFER_LEN /channels ;
+
+	while (1)
+	{
+		/* If the input buffer is empty, refill it. */
+		if (src_data.input_frames == 0)
+		{	src_data.input_frames = sf_readf_float (infile, input, BUFFER_LEN / channels) ;
+			src_data.data_in = input ;
+
+			/* The last read will not be a full buffer, so snd_of_input. */
+			if (src_data.input_frames < BUFFER_LEN / channels)
+				src_data.end_of_input = SF_TRUE ;
+			} ;
+
+		if ((error = src_process (src_state, &src_data)))
+		{	printf ("\nError : %s\n", src_strerror (error)) ;
+			exit (1) ;
+			} ;
+
+		/* Terminate if done. */
+		if (src_data.end_of_input && src_data.output_frames_gen == 0)
+			break ;
+
+		max = apply_gain (src_data.data_out, src_data.output_frames_gen, channels, max, *gain) ;
+
+		/* Write output. */
+		sf_writef_float (outfile, output, src_data.output_frames_gen) ;
+		output_count += src_data.output_frames_gen ;
+
+		src_data.data_in += src_data.input_frames_used * channels ;
+		src_data.input_frames -= src_data.input_frames_used ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	if (max > 1.0)
+	{	*gain = 1.0 / max ;
+		printf ("\nOutput has clipped. Restarting conversion to prevent clipping.\n\n") ;
+		return -1 ;
+		} ;
+
+	return output_count ;
+} /* sample_rate_convert */
+
+static double
+apply_gain (float * data, long frames, int channels, double max, double gain)
+{
+	long k ;
+
+	for (k = 0 ; k < frames * channels ; k++)
+	{	data [k] *= gain ;
+
+		if (fabs (data [k]) > max)
+			max = fabs (data [k]) ;
+		} ;
+
+	return max ;
+} /* apply_gain */
+
+static void
+usage_exit (const char *progname)
+{	char lsf_ver [128] ;
+	const char	*cptr ;
+	int		k ;
+
+	if ((cptr = strrchr (progname, '/')) != NULL)
+		progname = cptr + 1 ;
+
+	if ((cptr = strrchr (progname, '\\')) != NULL)
+		progname = cptr + 1 ;
+
+
+	sf_command (NULL, SFC_GET_LIB_VERSION, lsf_ver, sizeof (lsf_ver)) ;
+
+	printf ("\n"
+		"  A Sample Rate Converter using libsndfile for file I/O and Secret \n"
+		"  Rabbit Code (aka libsamplerate) for performing the conversion.\n"
+		"  It works on any file format supported by libsndfile with any \n"
+		"  number of channels (limited only by host memory).\n"
+		"\n"
+		"       %s\n"
+		"       %s\n"
+		"\n"
+		"  Usage : \n"
+		"       %s -to <new sample rate> [-c <number>] <input file> <output file>\n"
+		"       %s -by <amount> [-c <number>] <input file> <output file>\n"
+		"\n", src_get_version (), lsf_ver, progname, progname) ;
+
+	puts (
+		"  The optional -c argument allows the converter type to be chosen from\n"
+		"  the following list :"
+		"\n"
+		) ;
+
+	for (k = 0 ; (cptr = src_get_name (k)) != NULL ; k++)
+		printf ("       %d : %s%s\n", k, cptr, k == DEFAULT_CONVERTER ? " (default)" : "") ;
+
+	puts ("") ;
+
+	exit (1) ;
+} /* usage_exit */
+
+/*==============================================================================
+*/
+
+#else /* (HAVE_SNFILE == 0) */
+
+/* Alternative main function when libsndfile is not available. */
+
+int
+main (void)
+{	puts (
+		"\n"
+		"****************************************************************\n"
+		"  This example program was compiled without libsndfile \n"
+		"  (http://www.mega-nerd.com/libsndfile/).\n"
+		"  It is therefore completely broken and non-functional.\n"
+		"****************************************************************\n"
+		"\n"
+		) ;
+
+	return 0 ;
+} /* main */
+
+#endif
+
diff --git a/lsr-tests/snr_bw_test.c b/lsr-tests/snr_bw_test.c
new file mode 100644
index 0000000..26fb279
--- /dev/null
+++ b/lsr-tests/snr_bw_test.c
@@ -0,0 +1,401 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+
+#if (HAVE_FFTW3)
+
+#include <samplerate.h>
+
+#include "util.h"
+
+#define	BUFFER_LEN		50000
+#define	MAX_FREQS		4
+#define	MAX_RATIOS		6
+#define	MAX_SPEC_LEN	(1<<15)
+
+#ifndef	M_PI
+#define	M_PI			3.14159265358979323846264338
+#endif
+
+enum
+{	BOOLEAN_FALSE	= 0,
+	BOOLEAN_TRUE	= 1
+} ;
+
+typedef struct
+{	int		freq_count ;
+	double	freqs [MAX_FREQS] ;
+
+	double	src_ratio ;
+	int		pass_band_peaks ;
+
+	double	snr ;
+	double	peak_value ;
+} SINGLE_TEST ;
+
+typedef struct
+{	int			converter ;
+	int			tests ;
+	int			do_bandwidth_test ;
+	SINGLE_TEST	test_data [10] ;
+} CONVERTER_TEST ;
+
+static double snr_test (SINGLE_TEST *snr_test_data, int number, int converter, int verbose) ;
+static double find_peak (float *output, int output_len) ;
+static double bandwidth_test (int converter, int verbose) ;
+
+int
+main (int argc, char *argv [])
+{	CONVERTER_TEST snr_test_data [] =
+	{
+		{	SRC_ZERO_ORDER_HOLD,
+			8,
+			BOOLEAN_FALSE,
+			{	{	1,	{ 0.01111111111 },		3.0,		1,	 28.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.6,		1,	 36.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.3,		1,	 36.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.0,		1,	150.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.001,		1,	 38.0,	1.0 },
+				{	2,	{ 0.011111, 0.324 },	1.9999,		2,	 14.0,	.96 },
+				{	2,	{ 0.012345, 0.457 },	0.456789,	1,	 12.0,	.96 },
+				{	1,	{ 0.3511111111 },		1.33,		1,	 10.0,	1.0 }
+				}
+			},
+
+		{	SRC_LINEAR,
+			8,
+			BOOLEAN_FALSE,
+			{	{	1,	{ 0.01111111111 },		3.0,		1,	 73.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.6,		1,	 73.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.3,		1,	 73.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.0,		1,	150.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.001,		1,	 77.0,	1.0 },
+				{	2,	{ 0.011111, 0.324 },	1.9999,		2,	 16.0,	0.96 },
+				{	2,	{ 0.012345, 0.457 },	0.456789,	1,	 26.0,	0.96 },
+				{	1,	{ 0.3511111111 },		1.33,		1,	 14.4,	0.99 }
+				}
+			},
+
+		{	SRC_SINC_FASTEST,
+			9,
+			BOOLEAN_TRUE,
+			{	{	1,	{ 0.01111111111 },		3.0,		1,	100.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.6,		1,	 99.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.3,		1,	100.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.0,		1,	150.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.001,		1,	100.0,	1.0 },
+				{	2,	{ 0.011111, 0.324 },	1.9999,		2,	 97.0,	1.0 },
+				{	2,	{ 0.012345, 0.457 },	0.456789,	1,	100.0,	0.5 },
+				{	2,	{ 0.011111, 0.45 },		0.6,		1,	 97.0,	0.5 },
+				{	1,	{ 0.3511111111 },		1.33,		1,	 97.0,	1.0 }
+				}
+			},
+
+		{	SRC_SINC_MEDIUM_QUALITY,
+			9,
+			BOOLEAN_TRUE,
+			{	{	1,	{ 0.01111111111 },		3.0,		1,	130.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.6,		1,	132.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.3,		1,	138.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.0,		1,	155.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.001,		1,	134.0,	1.0 },
+				{	2,	{ 0.011111, 0.324 },	1.9999,		2,	127.0,	1.0 },
+				{	2,	{ 0.012345, 0.457 },	0.456789,	1,	124.0,	0.5 },
+				{	2,	{ 0.011111, 0.45 },		0.6,		1,	126.0,	0.5 },
+				{	1,	{ 0.43111111111 },		1.33,		1,	121.0,	1.0 }
+				}
+			},
+
+		{	SRC_SINC_BEST_QUALITY,
+			9,
+			BOOLEAN_TRUE,
+			{	{	1,	{ 0.01111111111 },		3.0,		1,	147.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.6,		1,	147.0,	1.0 },
+				{	1,	{ 0.01111111111 },		0.3,		1,	147.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.0,		1,	155.0,	1.0 },
+				{	1,	{ 0.01111111111 },		1.001,		1,	147.0,	1.0 },
+				{	2,	{ 0.011111, 0.324 },	1.9999,		2,	147.0,	1.0 },
+				{	2,	{ 0.012345, 0.457 },	0.456789,	1,	148.0,	0.5 },
+				{	2,	{ 0.011111, 0.45 },		0.6,		1,	149.0,	0.5 },
+				{	1,	{ 0.43111111111 },		1.33,		1,	145.0,	1.0 }
+				}
+			},
+		} ; /* snr_test_data */
+
+	double	best_snr, snr, freq3dB ;
+	int 	j, k, converter, verbose = 0 ;
+
+	if (argc == 2 && strcmp (argv [1], "--verbose") == 0)
+		verbose = 1 ;
+
+	puts ("") ;
+
+	for (j = 0 ; j < ARRAY_LEN (snr_test_data) ; j++)
+	{	best_snr = 5000.0 ;
+
+		converter = snr_test_data [j].converter ;
+
+		printf ("    Converter %d : %s\n", converter, src_get_name (converter)) ;
+		printf ("    %s\n", src_get_description (converter)) ;
+
+		for (k = 0 ; k < snr_test_data [j].tests ; k++)
+		{	snr = snr_test (&(snr_test_data [j].test_data [k]), k, converter, verbose) ;
+			if (best_snr > snr)
+				best_snr = snr ;
+			} ;
+
+		printf ("    Worst case Signal-to-Noise Ratio : %.2f dB.\n", best_snr) ;
+
+		if (snr_test_data [j].do_bandwidth_test == BOOLEAN_FALSE)
+		{	puts ("    Bandwith test not performed on this converter.\n") ;
+			continue ;
+			}
+
+		freq3dB = bandwidth_test (converter, verbose) ;
+
+		printf ("    Measured -3dB rolloff point      : %5.2f %%.\n\n", freq3dB) ;
+		} ;
+
+	return 0 ;
+} /* main */
+
+/*==============================================================================
+*/
+
+static double
+snr_test (SINGLE_TEST *test_data, int number, int converter, int verbose)
+{	static float data [BUFFER_LEN + 1] ;
+	static float output [MAX_SPEC_LEN] ;
+
+	SRC_STATE	*src_state ;
+	SRC_DATA	src_data ;
+
+	double		output_peak, snr ;
+	int 		k, output_len, input_len, error ;
+
+	if (verbose != 0)
+	{	printf ("\tSignal-to-Noise Ratio Test %d.\n"
+				"\t=====================================\n", number) ;
+		printf ("\tFrequencies : [ ") ;
+		for (k = 0 ; k < test_data->freq_count ; k++)
+			printf ("%6.4f ", test_data->freqs [k]) ;
+
+		printf ("]\n\tSRC Ratio   : %8.4f\n", test_data->src_ratio) ;
+		}
+	else
+	{	printf ("\tSignal-to-Noise Ratio Test %d : ", number) ;
+		fflush (stdout) ;
+		} ;
+
+	/* Set up the output array. */
+	if (test_data->src_ratio >= 1.0)
+	{	output_len = MAX_SPEC_LEN ;
+		input_len = (int) ceil (MAX_SPEC_LEN / test_data->src_ratio) ;
+		if (input_len > BUFFER_LEN)
+			input_len = BUFFER_LEN ;
+		}
+	else
+	{	input_len = BUFFER_LEN ;
+		output_len = (int) ceil (BUFFER_LEN * test_data->src_ratio) ;
+		output_len &= ((-1) << 4) ;
+		if (output_len > MAX_SPEC_LEN)
+			output_len = MAX_SPEC_LEN ;
+		input_len = (int) ceil (output_len / test_data->src_ratio) ;
+		} ;
+
+	memset (output, 0, sizeof (output)) ;
+
+	/* Generate input data array. */
+	gen_windowed_sines (test_data->freq_count, test_data->freqs, 1.0, data, input_len) ;
+
+	/* Perform sample rate conversion. */
+	if ((src_state = src_new (converter, 1, &error)) == NULL)
+	{	printf ("\n\nLine %d : src_new() failed : %s.\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_data.end_of_input = 1 ; /* Only one buffer worth of input. */
+
+	src_data.data_in = data ;
+	src_data.input_frames = input_len ;
+
+	src_data.src_ratio = test_data->src_ratio ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = output_len ;
+
+	if ((error = src_process (src_state, &src_data)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	if (verbose != 0)
+		printf ("\tOutput Len  :   %ld\n", src_data.output_frames_gen) ;
+
+	if (abs (src_data.output_frames_gen - output_len) > 4)
+	{	printf ("\n\nLine %d : output data length should be %d.\n\n", __LINE__, output_len) ;
+		exit (1) ;
+		} ;
+
+	/* Check output peak. */
+	output_peak = find_peak (output, src_data.output_frames_gen) ;
+
+	if (verbose != 0)
+		printf ("\tOutput Peak :   %6.4f\n", output_peak) ;
+
+	if (fabs (output_peak - test_data->peak_value) > 0.01)
+	{	printf ("\n\nLine %d : output peak (%6.4f) should be %6.4f\n\n", __LINE__, output_peak, test_data->peak_value) ;
+		save_oct_float ("snr_test.dat", data, BUFFER_LEN, output, output_len) ;
+		exit (1) ;
+		} ;
+
+	/* Calculate signal-to-noise ratio. */
+	snr = calculate_snr (output, src_data.output_frames_gen, test_data->pass_band_peaks) ;
+
+	if (snr < 0.0)
+	{	/* An error occurred. */
+		save_oct_float ("snr_test.dat", data, BUFFER_LEN, output, src_data.output_frames_gen) ;
+		exit (1) ;
+		} ;
+
+	if (verbose != 0)
+		printf ("\tSNR Ratio   :   %.2f dB\n", snr) ;
+
+	if (snr < test_data->snr)
+	{	printf ("\n\nLine %d : SNR (%5.2f) should be > %6.2f dB\n\n", __LINE__, snr, test_data->snr) ;
+		exit (1) ;
+		} ;
+
+	if (verbose != 0)
+		puts ("\t-------------------------------------\n\tPass\n") ;
+	else
+		puts ("Pass") ;
+
+	return snr ;
+} /* snr_test */
+
+static double
+find_peak (float *data, int len)
+{	double 	peak = 0.0 ;
+	int		k = 0 ;
+
+	for (k = 0 ; k < len ; k++)
+		if (fabs (data [k]) > peak)
+			peak = fabs (data [k]) ;
+
+	return peak ;
+} /* find_peak */
+
+
+static double
+find_attenuation (double freq, int converter, int verbose)
+{	static float input	[BUFFER_LEN] ;
+	static float output [2 * BUFFER_LEN] ;
+
+	SRC_DATA	src_data ;
+	double 		output_peak ;
+	int			error ;
+
+	gen_windowed_sines (1, &freq, 1.0, input, BUFFER_LEN) ;
+
+	src_data.end_of_input = 1 ; /* Only one buffer worth of input. */
+
+	src_data.data_in = input ;
+	src_data.input_frames = BUFFER_LEN ;
+
+	src_data.src_ratio = 1.999 ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = ARRAY_LEN (output) ;
+
+	if ((error = src_simple (&src_data, converter, 1)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	output_peak = find_peak (output, ARRAY_LEN (output)) ;
+
+	if (verbose)
+		printf ("\tFreq : %6f   InPeak : %6f    OutPeak : %6f   Atten : %6.2f dB\n",
+				freq, 1.0, output_peak, 20.0 * log10 (1.0 / output_peak)) ;
+
+	return 20.0 * log10 (1.0 / output_peak) ;
+} /* find_attenuation */
+
+static double
+bandwidth_test (int converter, int verbose)
+{	double	f1, f2, a1, a2 ;
+	double	freq, atten ;
+
+	f1 = 0.35 ;
+	a1 = find_attenuation (f1, converter, verbose) ;
+
+	f2 = 0.495 ;
+	a2 = find_attenuation (f2, converter, verbose) ;
+
+	if (a1 > 3.0 || a2 < 3.0)
+	{	printf ("\n\nLine %d : cannot bracket 3dB point.\n\n", __LINE__) ;
+		exit (1) ;
+		} ;
+
+	while (a2 - a1 > 1.0)
+	{	freq = f1 + 0.5 * (f2 - f1) ;
+		atten = find_attenuation (freq, converter, verbose) ;
+
+		if (atten < 3.0)
+		{	f1 = freq ;
+			a1 = atten ;
+			}
+		else
+		{	f2 = freq ;
+			a2 = atten ;
+			} ;
+		} ;
+
+	freq = f1 + (3.0 - a1) * (f2 - f1) / (a2 - a1) ;
+
+	return 200.0 * freq ;
+} /* bandwidth_test */
+
+#else /* (HAVE_FFTW3) == 0 */
+
+/* Alternative main function when librfftw is not available. */
+
+int
+main (void)
+{	puts ("\n"
+		"****************************************************************\n"
+		" This test cannot be run without FFTW (http://www.fftw.org/).\n"
+		" Both the real and the complex versions of the library are\n"
+		" required.") ;
+	puts ("****************************************************************\n") ;
+
+	return 0 ;
+} /* main */
+
+#endif
+
diff --git a/lsr-tests/termination_test.c b/lsr-tests/termination_test.c
new file mode 100644
index 0000000..6bb0fc0
--- /dev/null
+++ b/lsr-tests/termination_test.c
@@ -0,0 +1,339 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+
+#define	SHORT_BUFFER_LEN	2048
+#define	LONG_BUFFER_LEN		((1 << 16) - 20)
+
+static void simple_test (int converter) ;
+static void stream_test (int converter, double ratio) ;
+static void init_term_test (int converter, double ratio) ;
+
+static int	next_block_length (int reset) ;
+
+int
+main (void)
+{	static double src_ratios [] =
+	{	0.999900, 1.000100, 0.789012, 1.200000, 0.333333, 3.100000,
+		0.125000, 8.000000, 0.099900, 9.990000, 0.100000, 10.00000
+	} ;
+
+	int k ;
+
+	puts ("\n    Zero Order Hold interpolator:") ;
+
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		init_term_test (SRC_ZERO_ORDER_HOLD, src_ratios [k]) ;
+	puts ("") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		stream_test (SRC_ZERO_ORDER_HOLD, src_ratios [k]) ;
+
+
+	puts ("\n    Linear interpolator:") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		init_term_test (SRC_LINEAR, src_ratios [k]) ;
+	puts ("") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		stream_test (SRC_LINEAR, src_ratios [k]) ;
+
+
+	puts ("\n    Sinc interpolator:") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		init_term_test (SRC_SINC_FASTEST, src_ratios [k]) ;
+	puts ("") ;
+	for (k = 0 ; k < ARRAY_LEN (src_ratios) ; k++)
+		stream_test (SRC_SINC_FASTEST, src_ratios [k]) ;
+
+	puts ("") ;
+
+	simple_test (SRC_SINC_FASTEST) ;
+
+	return 0 ;
+} /* main */
+
+static void
+simple_test (int converter)
+{
+	int ilen = 199030, olen = 1000, error ;
+
+	{
+		float in [ilen] ;
+		float out [olen] ;
+		double ratio = (1.0 * olen) / ilen ;
+		SRC_DATA src_data =
+		{	in, out,
+			ilen, olen,
+			0, 0, 0,
+			ratio
+		} ;
+
+		error = src_simple (&src_data, converter, 1) ;
+		if (error)
+		{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+			exit (1) ;
+			} ;
+	} ;
+
+    return ;
+} /* simple_test */
+
+static void
+init_term_test (int converter, double src_ratio)
+{	static float input [SHORT_BUFFER_LEN], output [SHORT_BUFFER_LEN] ;
+
+	SRC_DATA	src_data ;
+
+	int k, input_len, output_len, error, terminate ;
+
+	printf ("\tinit_term_test   (SRC ratio = %7.4f) .......... ", src_ratio) ;
+	fflush (stdout) ;
+
+	/* Calculate maximun input and output lengths. */
+	if (src_ratio >= 1.0)
+	{	output_len = SHORT_BUFFER_LEN ;
+		input_len = (int) floor (SHORT_BUFFER_LEN / src_ratio) ;
+		}
+	else
+	{	input_len = SHORT_BUFFER_LEN ;
+		output_len = (int) floor (SHORT_BUFFER_LEN * src_ratio) ;
+		} ;
+
+	/* Reduce input_len by 10 so output is longer than necessary. */
+	input_len -= 10 ;
+
+	for (k = 0 ; k < ARRAY_LEN (input) ; k++)
+		input [k] = 1.0 ;
+
+	if (output_len > SHORT_BUFFER_LEN)
+	{	printf ("\n\nLine %d : output_len > SHORT_BUFFER_LEN\n\n", __LINE__) ;
+		exit (1) ;
+		} ;
+
+	src_data.data_in = input ;
+	src_data.input_frames = input_len ;
+
+	src_data.src_ratio = src_ratio ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = SHORT_BUFFER_LEN ;
+
+	if ((error = src_simple (&src_data, converter, 1)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	terminate = (int) ceil ((src_ratio >= 1.0) ? 1 : 1.0 / src_ratio) ;
+
+	if (fabs (src_ratio * input_len - src_data.output_frames_gen) > terminate)
+	{	printf ("\n\nLine %d : Bad output frame count.\n\n", __LINE__) ;
+		printf ("\tterminate             : %d\n", terminate) ;
+		printf ("\tsrc_ratio             : %.4f\n", src_ratio) ;
+		printf ("\tinput_len             : %d\n"
+				"\tinput_len * src_ratio : %f\n", input_len, input_len * src_ratio) ;
+		printf ("\toutput_frames_gen     : %ld\n\n", src_data.output_frames_gen) ;
+		exit (1) ;
+		} ;
+
+	if (abs (src_data.input_frames_used - input_len) > 1)
+	{	printf ("\n\nLine %d : input_frames_used should be %d, is %ld.\n\n",
+					 __LINE__, input_len, src_data.input_frames_used) ;
+		printf ("\tsrc_ratio  : %.4f\n", src_ratio) ;
+		printf ("\tinput_len  : %d\n\tinput_used : %ld\n\n", input_len, src_data.input_frames_used) ;
+		exit (1) ;
+		} ;
+
+	if (fabs (output [0]) < 0.1)
+	{	printf ("\n\nLine %d : First output sample is bad.\n\n", __LINE__) ;
+		printf ("\toutput [0] == %f\n\n", output [0]) ;
+		exit (1) ;
+		}
+
+	puts ("ok") ;
+
+	return ;
+} /* init_term_test */
+
+static void
+stream_test (int converter, double src_ratio)
+{	static float input [LONG_BUFFER_LEN], output [LONG_BUFFER_LEN] ;
+
+	SRC_STATE	*src_state ;
+	SRC_DATA	src_data ;
+
+	int input_len, output_len, current_in, current_out ;
+	int k, error, terminate ;
+
+	printf ("\tstream_test      (SRC ratio = %7.4f) .......... ", src_ratio) ;
+	fflush (stdout) ;
+
+/* Erik */
+for (k = 0 ; k < LONG_BUFFER_LEN ; k++) input [k] = k * 1.0 ;
+
+	/* Calculate maximun input and output lengths. */
+	if (src_ratio >= 1.0)
+	{	output_len = LONG_BUFFER_LEN ;
+		input_len = (int) floor (LONG_BUFFER_LEN / src_ratio) ;
+		}
+	else
+	{	input_len = LONG_BUFFER_LEN ;
+		output_len = (int) floor (LONG_BUFFER_LEN * src_ratio) ;
+		} ;
+
+	/* Reduce input_len by 10 so output is longer than necessary. */
+	input_len -= 20 ;
+
+	if (output_len > LONG_BUFFER_LEN)
+	{	printf ("\n\nLine %d : output_len > LONG_BUFFER_LEN\n\n", __LINE__) ;
+		exit (1) ;
+		} ;
+
+	current_in = current_out = 0 ;
+
+	/* Perform sample rate conversion. */
+	if ((src_state = src_new (converter, 1, &error)) == NULL)
+	{	printf ("\n\nLine %d : src_new() failed : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_data.end_of_input = 0 ; /* Set this later. */
+
+	src_data.data_in = input ;
+
+	src_data.src_ratio = src_ratio ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = ARRAY_LEN (output) / 10 ;
+
+	terminate = 1 + (int) ceil ((src_ratio >= 1.0) ? src_ratio : 1.0 / src_ratio) ;
+
+	while (1)
+	{
+		src_data.input_frames = next_block_length (0) ;
+		src_data.input_frames = MIN (src_data.input_frames, input_len - current_in) ;
+
+		src_data.output_frames = ARRAY_LEN (output) - current_out ;
+		/*-Erik MIN (src_data.output_frames, output_len - current_out) ;-*/
+
+		src_data.end_of_input = (current_in >= input_len) ? 1 : 0 ;
+
+		if ((error = src_process (src_state, &src_data)))
+		{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+			printf ("  src_data.input_frames  : %ld\n", src_data.input_frames) ;
+			printf ("  src_data.output_frames : %ld\n\n", src_data.output_frames) ;
+			exit (1) ;
+			} ;
+
+		if (src_data.end_of_input && src_data.output_frames_gen == 0)
+			break ;
+
+		if (src_data.input_frames_used > src_data.input_frames)
+		{	printf ("\n\nLine %d : input_frames_used > input_frames\n\n", __LINE__) ;
+			printf ("  src_data.input_frames      : %ld\n", src_data.input_frames) ;
+			printf ("  src_data.input_frames_used : %ld\n", src_data.input_frames_used) ;
+			printf ("  src_data.output_frames     : %ld\n", src_data.output_frames) ;
+			printf ("  src_data.output_frames_gen : %ld\n\n", src_data.output_frames_gen) ;
+			exit (1) ;
+			} ;
+
+		if (src_data.input_frames_used < 0)
+		{	printf ("\n\nLine %d : input_frames_used (%ld) < 0\n\n", __LINE__, src_data.input_frames_used) ;
+			exit (1) ;
+			} ;
+
+		if (src_data.output_frames_gen < 0)
+		{	printf ("\n\nLine %d : output_frames_gen (%ld) < 0\n\n", __LINE__, src_data.output_frames_gen) ;
+			exit (1) ;
+			} ;
+
+		current_in	+= src_data.input_frames_used ;
+		current_out += src_data.output_frames_gen ;
+
+		if (current_in > input_len + terminate)
+		{	printf ("\n\nLine %d : current_in (%d) > input_len (%d + %d)\n\n", __LINE__, current_in, input_len, terminate) ;
+			exit (1) ;
+			} ;
+
+		if (current_out > output_len)
+		{	printf ("\n\nLine %d : current_out (%d) > output_len (%d)\n\n", __LINE__, current_out, output_len) ;
+			exit (1) ;
+			} ;
+
+		if (src_data.input_frames_used > input_len)
+		{	printf ("\n\nLine %d : input_frames_used (%ld) > %d\n\n", __LINE__, src_data.input_frames_used, input_len) ;
+			exit (1) ;
+			} ;
+
+		if (src_data.output_frames_gen > output_len)
+		{	printf ("\n\nLine %d : output_frames_gen (%ld) > %d\n\n", __LINE__, src_data.output_frames_gen, output_len) ;
+			exit (1) ;
+			} ;
+
+		if (src_data.data_in == NULL && src_data.output_frames_gen == 0)
+			break ;
+
+
+		src_data.data_in	+= src_data.input_frames_used ;
+		src_data.data_out	+= src_data.output_frames_gen ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	if (fabs (current_out - src_ratio * input_len) > terminate)
+	{	printf ("\n\nLine %d : bad output data length %d should be %2.1f +/- %d.\n", __LINE__,
+					current_out, src_ratio * input_len, terminate) ;
+		printf ("\tsrc_ratio  : %.4f\n", src_ratio) ;
+		printf ("\tinput_len  : %d\n\tinput_used : %d\n", input_len, current_in) ;
+		printf ("\toutput_len : %d\n\toutput_gen : %d\n\n", output_len, current_out) ;
+		exit (1) ;
+		} ;
+
+	if (current_in != input_len)
+	{	printf ("\n\nLine %d : unused input.\n", __LINE__) ;
+		printf ("\tinput_len         : %d\n", input_len) ;
+		printf ("\tinput_frames_used : %d\n\n", current_in) ;
+		exit (1) ;
+		} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* stream_test */
+
+static int
+next_block_length (int reset)
+{	static int block_lengths [] = /* Should be an odd length. */
+	{	/*-2, 500, 5, 400, 10, 300, 20, 200, 50, 100, 70 -*/
+		5, 400, 10, 300, 20, 200, 50, 100, 70
+		} ;
+	static int block_len_index = 0 ;
+
+	if (reset)
+		block_len_index = 0 ;
+	else
+		block_len_index = (block_len_index + 1) % ARRAY_LEN (block_lengths) ;
+
+	return block_lengths [block_len_index] ;
+} /* next_block_length */
+
diff --git a/lsr-tests/throughput_test.c b/lsr-tests/throughput_test.c
new file mode 100644
index 0000000..28b6fe5
--- /dev/null
+++ b/lsr-tests/throughput_test.c
@@ -0,0 +1,212 @@
+/*
+** Copyright (C) 2004-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <samplerate.h>
+
+#include "config.h"
+
+#include "util.h"
+#include "float_cast.h"
+
+#define BUFFER_LEN	(1<<16)
+
+static float input [BUFFER_LEN] ;
+static float output [BUFFER_LEN] ;
+
+static long
+throughput_test (int converter, long best_throughput)
+{	SRC_DATA src_data ;
+	clock_t start_time, clock_time ;
+	double duration ;
+	long total_frames = 0, throughput ;
+	int error ;
+
+	printf ("    %-30s    ", src_get_name (converter)) ;
+	fflush (stdout) ;
+
+	src_data.data_in = input ;
+	src_data.input_frames = ARRAY_LEN (input) ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = ARRAY_LEN (output) ;
+
+	src_data.src_ratio = 0.99 ;
+
+	sleep (2) ;
+
+	start_time = clock () ;
+
+	do
+	{
+		if ((error = src_simple (&src_data, converter, 1)) != 0)
+		{	puts (src_strerror (error)) ;
+			exit (1) ;
+			} ;
+
+		total_frames += src_data.output_frames_gen ;
+
+		clock_time = clock () - start_time ;
+		duration = (1.0 * clock_time) / CLOCKS_PER_SEC ;
+	}
+	while (duration < 3.0) ;
+
+	if (src_data.input_frames_used != ARRAY_LEN (input))
+	{	printf ("\n\nLine %d : input frames used %ld should be %d\n", __LINE__, src_data.input_frames_used, ARRAY_LEN (input)) ;
+		exit (1) ;
+		} ;
+
+	if (fabs (src_data.src_ratio * src_data.input_frames_used - src_data.output_frames_gen) > 2)
+	{	printf ("\n\nLine %d : input / output length mismatch.\n\n", __LINE__) ;
+		printf ("    input len  : %d\n", ARRAY_LEN (input)) ;
+		printf ("    output len : %ld (should be %g +/- 2)\n\n", src_data.output_frames_gen,
+				floor (0.5 + src_data.src_ratio * src_data.input_frames_used)) ;
+		exit (1) ;
+		} ;
+
+	throughput = lrint (floor (total_frames / duration)) ;
+
+	if (best_throughput == 0)
+	{	best_throughput = MAX (throughput, best_throughput) ;
+		printf ("%5.2f          %10ld\n", duration, throughput) ;
+		}
+	else
+	{	best_throughput = MAX (throughput, best_throughput) ;
+		printf ("%5.2f          %10ld       %10ld\n", duration, throughput, best_throughput) ;
+		}
+
+
+	return best_throughput ;
+} /* throughput_test */
+
+static void
+single_run (void)
+{
+
+	printf ("\n    CPU name : %s\n", get_cpu_name ()) ;
+
+	puts (
+		"\n"
+		"    Converter                        Duration        Throughput\n"
+		"    -----------------------------------------------------------"
+		) ;
+
+	throughput_test (SRC_ZERO_ORDER_HOLD, 0) ;
+	throughput_test (SRC_LINEAR, 0) ;
+	throughput_test (SRC_SINC_FASTEST, 0) ;
+	throughput_test (SRC_SINC_MEDIUM_QUALITY, 0) ;
+	throughput_test (SRC_SINC_BEST_QUALITY, 0) ;
+
+	puts ("") ;
+	return ;
+} /* single_run */
+
+static void
+multi_run (int run_count)
+{	long zero_order_hold = 0, linear = 0 ;
+	long sinc_fastest = 0, sinc_medium = 0, sinc_best = 0 ;
+	int k ;
+
+	puts (
+		"\n"
+		"    Converter                        Duration        Throughput      Best Throughput\n"
+		"    --------------------------------------------------------------------------------"
+		) ;
+
+	for (k = 0 ; k < run_count ; k++)
+	{	zero_order_hold =	throughput_test (SRC_ZERO_ORDER_HOLD, zero_order_hold) ;
+		linear =			throughput_test (SRC_LINEAR, linear) ;
+		sinc_fastest =		throughput_test (SRC_SINC_FASTEST, sinc_fastest) ;
+		sinc_medium =		throughput_test (SRC_SINC_MEDIUM_QUALITY, sinc_medium) ;
+		sinc_best =			throughput_test (SRC_SINC_BEST_QUALITY, sinc_best) ;
+
+		puts ("") ;
+
+		/* Let the CPU cool down. We might be running on a laptop. */
+		sleep (10) ;
+		} ;
+
+	printf ("\n    CPU name : %s\n", get_cpu_name ()) ;
+
+	puts (
+		"\n"
+		"    Converter                        Best Throughput\n"
+		"    ------------------------------------------------"
+		) ;
+	printf ("    %-30s    %10ld\n", src_get_name (SRC_ZERO_ORDER_HOLD), zero_order_hold) ;
+	printf ("    %-30s    %10ld\n", src_get_name (SRC_LINEAR), linear) ;
+	printf ("    %-30s    %10ld\n", src_get_name (SRC_SINC_FASTEST), sinc_fastest) ;
+	printf ("    %-30s    %10ld\n", src_get_name (SRC_SINC_MEDIUM_QUALITY), sinc_medium) ;
+	printf ("    %-30s    %10ld\n", src_get_name (SRC_SINC_BEST_QUALITY), sinc_best) ;
+
+	puts ("") ;
+} /* multi_run */
+
+static void
+usage_exit (const char * argv0)
+{	const char * cptr ;
+
+	if ((cptr = strrchr (argv0, '/')) != NULL)
+		argv0 = cptr ;
+
+	printf (
+		"Usage :\n"
+	 	"    %s                 - Single run of the throughput test.\n"
+		"    %s --best-of N     - Do N runs of test a print bext result.\n"
+		"\n",
+		argv0, argv0) ;
+
+	exit (0) ;
+} /* usage_exit */
+
+int
+main (int argc, char ** argv)
+{	double freq ;
+
+	memset (input, 0, sizeof (input)) ;
+	freq = 0.01 ;
+	gen_windowed_sines (1, &freq, 1.0, input, BUFFER_LEN) ;
+
+	if (argc == 1)
+		single_run () ;
+	else if (argc == 3 && strcmp (argv [1], "--best-of") == 0)
+	{	int run_count = atoi (argv [2]) ;
+
+		if (run_count < 1 || run_count > 20)
+		{	printf ("Please be sensible. Run count should be in range (1, 10].\n") ;
+			exit (1) ;
+			} ;
+
+		multi_run (run_count) ;
+		}
+	else
+		usage_exit (argv [0]) ;
+
+	puts (
+		"            Duration is in seconds.\n"
+		"            Throughput is in samples/sec (more is better).\n"
+		) ;
+
+	return 0 ;
+} /* main */
+
diff --git a/lsr-tests/util.c b/lsr-tests/util.c
new file mode 100644
index 0000000..fefcaf2
--- /dev/null
+++ b/lsr-tests/util.c
@@ -0,0 +1,230 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+
+#include	"util.h"
+
+#ifndef	M_PI
+#define	M_PI			3.14159265358979323846264338
+#endif
+
+void
+gen_windowed_sines (int freq_count, const double *freqs, double max, float *output, int output_len)
+{	int 	k, freq ;
+	double	amplitude, phase ;
+
+	amplitude = max / freq_count ;
+
+	for (k = 0 ; k < output_len ; k++)
+		output [k] = 0.0 ;
+
+	for (freq = 0 ; freq < freq_count ; freq++)
+	{	phase = 0.9 * M_PI / freq_count ;
+
+		if (freqs [freq] <= 0.0 || freqs [freq] >= 0.5)
+		{	printf ("\n%s : Error : freq [%d] == %g is out of range. Should be < 0.5.\n", __FILE__, freq, freqs [freq]) ;
+			exit (1) ;
+			} ;
+
+		for (k = 0 ; k < output_len ; k++)
+			output [k] += amplitude * sin (freqs [freq] * (2 * k) * M_PI + phase) ;
+		} ;
+
+	/* Apply Hanning Window. */
+	for (k = 0 ; k < output_len ; k++)
+		output [k] *= 0.5 - 0.5 * cos ((2 * k) * M_PI / (output_len - 1)) ;
+
+	/*	data [k] *= 0.3635819 - 0.4891775 * cos ((2 * k) * M_PI / (output_len - 1))
+					+ 0.1365995 * cos ((4 * k) * M_PI / (output_len - 1))
+					- 0.0106411 * cos ((6 * k) * M_PI / (output_len - 1)) ;
+		*/
+
+	return ;
+} /* gen_windowed_sines */
+
+void
+save_oct_float (char *filename, float *input, int in_len, float *output, int out_len)
+{	FILE 	*file ;
+	int		k ;
+
+	printf ("Dumping input and output data to file : %s.\n\n", filename) ;
+
+	if (! (file = fopen (filename, "w")))
+		return ;
+
+	fprintf (file, "# Not created by Octave\n") ;
+
+	fprintf (file, "# name: input\n") ;
+	fprintf (file, "# type: matrix\n") ;
+	fprintf (file, "# rows: %d\n", in_len) ;
+	fprintf (file, "# columns: 1\n") ;
+
+	for (k = 0 ; k < in_len ; k++)
+		fprintf (file, "% g\n", input [k]) ;
+
+	fprintf (file, "# name: output\n") ;
+	fprintf (file, "# type: matrix\n") ;
+	fprintf (file, "# rows: %d\n", out_len) ;
+	fprintf (file, "# columns: 1\n") ;
+
+	for (k = 0 ; k < out_len ; k++)
+		fprintf (file, "% g\n", output [k]) ;
+
+	fclose (file) ;
+	return ;
+} /* save_oct_float */
+
+void
+save_oct_double (char *filename, double *input, int in_len, double *output, int out_len)
+{	FILE 	*file ;
+	int		k ;
+
+	printf ("Dumping input and output data to file : %s.\n\n", filename) ;
+
+	if (! (file = fopen (filename, "w")))
+		return ;
+
+	fprintf (file, "# Not created by Octave\n") ;
+
+	fprintf (file, "# name: input\n") ;
+	fprintf (file, "# type: matrix\n") ;
+	fprintf (file, "# rows: %d\n", in_len) ;
+	fprintf (file, "# columns: 1\n") ;
+
+	for (k = 0 ; k < in_len ; k++)
+		fprintf (file, "% g\n", input [k]) ;
+
+	fprintf (file, "# name: output\n") ;
+	fprintf (file, "# type: matrix\n") ;
+	fprintf (file, "# rows: %d\n", out_len) ;
+	fprintf (file, "# columns: 1\n") ;
+
+	for (k = 0 ; k < out_len ; k++)
+		fprintf (file, "% g\n", output [k]) ;
+
+	fclose (file) ;
+	return ;
+} /* save_oct_double */
+
+void
+interleave_data (const float *in, float *out, int frames, int channels)
+{	int fr, ch ;
+
+	for (fr = 0 ; fr < frames ; fr++)
+		for (ch = 0 ; ch < channels ; ch++)
+			out [ch + channels * fr] = in [fr + frames * ch] ;
+
+	return ;
+} /* interleave_data */
+
+void
+deinterleave_data (const float *in, float *out, int frames, int channels)
+{	int fr, ch ;
+
+	for (ch = 0 ; ch < channels ; ch++)
+		for (fr = 0 ; fr < frames ; fr++)
+			out [fr + frames * ch] = in [ch + channels * fr] ;
+
+	return ;
+} /* deinterleave_data */
+
+void
+reverse_data (float *data, int datalen)
+{	int left, right ;
+	float temp ;
+
+	left = 0 ;
+	right = datalen - 1 ;
+
+	while (left < right)
+	{	temp = data [left] ;
+		data [left] = data [right] ;
+		data [right] = temp ;
+		left ++ ;
+		right -- ;
+		} ;
+
+} /* reverse_data */
+
+const char *
+get_cpu_name (void)
+{
+	const char *name = "Unknown", *search = NULL ;
+	static char buffer [512] ;
+	FILE * file = NULL ;
+	int is_pipe = 0 ;
+
+#if defined (__linux__)
+	file = fopen ("/proc/cpuinfo", "r") ;
+	search = "model name" ;
+#elif defined (__APPLE__)
+	file = popen ("/usr/sbin/system_profiler -detailLevel full SPHardwareDataType", "r") ;
+	search = "Processor Name" ;
+	is_pipe = 1 ;
+#elif defined (__FreeBSD__)
+	file = popen ("sysctl -a", "r") ;
+	search = "hw.model" ;
+	is_pipe = 1 ;
+#else
+	file = NULL ;
+#endif
+
+	if (file == NULL)
+		return name ;
+
+	if (search == NULL)
+	{	printf ("Error : search is NULL in function %s.\n", __func__) ;
+		return name ;
+		} ;
+
+	while (fgets (buffer, sizeof (buffer), file) != NULL)
+		if (strstr (buffer, search))
+		{	char *src, *dest ;
+
+			if ((src = strchr (buffer, ':')) != NULL)
+			{	src ++ ;
+				while (isspace (src [0]))
+					src ++ ;
+				name = src ;
+
+				/* Remove consecutive spaces. */
+				src ++ ;
+				for (dest = src ; src [0] ; src ++)
+				{	if (isspace (src [0]) && isspace (dest [-1]))
+						continue ;
+					dest [0] = src [0] ;
+					dest ++ ;
+					} ;
+				dest [0] = 0 ;
+				break ;
+				} ;
+			} ;
+
+	if (is_pipe)
+		pclose (file) ;
+	else
+		fclose (file) ;
+
+	return name ;
+} /* get_cpu_name */
+
diff --git a/lsr-tests/util.h b/lsr-tests/util.h
new file mode 100644
index 0000000..80b1b49
--- /dev/null
+++ b/lsr-tests/util.h
@@ -0,0 +1,50 @@
+/*
+** Copyright (C) 2002-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#define	ABS(a)			(((a) < 0) ? - (a) : (a))
+#define MIN(a,b)		(((a) < (b)) ? (a) : (b))
+#define	MAX(a,b)		(((a) >= (b)) ? (a) : (b))
+
+#define	ARRAY_LEN(x)	((int) (sizeof (x) / sizeof ((x) [0])))
+
+void gen_windowed_sines (int freq_count, const double *freqs, double max, float *output, int output_len) ;
+
+void save_oct_float (char *filename, float *input, int in_len, float *output, int out_len) ;
+void save_oct_double (char *filename, double *input, int in_len, double *output, int out_len) ;
+
+void interleave_data (const float *in, float *out, int frames, int channels) ;
+
+void deinterleave_data (const float *in, float *out, int frames, int channels) ;
+
+void reverse_data (float *data, int datalen) ;
+
+double calculate_snr (float *data, int len, int expected_peaks) ;
+
+const char * get_cpu_name (void) ;
+
+#if OS_IS_WIN32
+/*
+**	Extra Win32 hacks.
+**
+**	Despite Microsoft claim of windows being POSIX compatibile it has '_sleep'
+**	instead of 'sleep'.
+*/
+
+#define sleep _sleep
+#endif
+
diff --git a/lsr-tests/varispeed_test.c b/lsr-tests/varispeed_test.c
new file mode 100644
index 0000000..52b2f43
--- /dev/null
+++ b/lsr-tests/varispeed_test.c
@@ -0,0 +1,152 @@
+/*
+** Copyright (C) 2006-2011 Erik de Castro Lopo <erikd@mega-nerd.com>
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+**
+** You should have received a copy of the GNU General Public License
+** along with this program; if not, write to the Free Software
+** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#include <samplerate.h>
+
+#include "util.h"
+
+#define	BUFFER_LEN		(1 << 16)
+
+static void varispeed_test (int converter, double target_snr) ;
+
+int
+main (void)
+{
+	puts ("") ;
+	printf ("    Zero Order Hold interpolator    : ") ;
+	varispeed_test (SRC_ZERO_ORDER_HOLD, 10.0) ;
+
+	printf ("    Linear interpolator             : ") ;
+	varispeed_test (SRC_LINEAR, 10.0) ;
+
+	printf ("    Sinc interpolator               : ") ;
+	varispeed_test (SRC_SINC_FASTEST, 115.0) ;
+
+	puts ("") ;
+
+	return 0 ;
+} /* main */
+
+static void
+varispeed_test (int converter, double target_snr)
+{	static float input [BUFFER_LEN], output [BUFFER_LEN] ;
+	double sine_freq, snr ;
+
+	SRC_STATE	*src_state ;
+	SRC_DATA	src_data ;
+
+	int input_len, error ;
+
+	memset (input, 0, sizeof (input)) ;
+
+	input_len = ARRAY_LEN (input) / 2 ;
+
+	sine_freq = 0.0111 ;
+	gen_windowed_sines (1, &sine_freq, 1.0, input, input_len) ;
+
+	/* Perform sample rate conversion. */
+	if ((src_state = src_new (converter, 1, &error)) == NULL)
+	{	printf ("\n\nLine %d : src_new() failed : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_data.end_of_input = 1 ;
+
+	src_data.data_in = input ;
+	src_data.input_frames = input_len ;
+
+	src_data.src_ratio = 3.0 ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = ARRAY_LEN (output) ;
+
+	if ((error = src_set_ratio (src_state, 1.0 / src_data.src_ratio)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	if ((error = src_process (src_state, &src_data)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		printf ("  src_data.input_frames  : %ld\n", src_data.input_frames) ;
+		printf ("  src_data.output_frames : %ld\n\n", src_data.output_frames) ;
+		exit (1) ;
+		} ;
+
+	if (src_data.input_frames_used != input_len)
+	{	printf ("\n\nLine %d : unused input.\n", __LINE__) ;
+		printf ("\tinput_len         : %d\n", input_len) ;
+		printf ("\tinput_frames_used : %ld\n\n", src_data.input_frames_used) ;
+		exit (1) ;
+		} ;
+
+	/* Copy the last output to the input. */
+	memcpy (input, output, sizeof (input)) ;
+	reverse_data (input, src_data.output_frames_gen) ;
+
+	if ((error = src_reset (src_state)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	src_data.end_of_input = 1 ;
+
+	src_data.data_in = input ;
+	input_len = src_data.input_frames = src_data.output_frames_gen ;
+
+	src_data.data_out = output ;
+	src_data.output_frames = ARRAY_LEN (output) ;
+
+	if ((error = src_set_ratio (src_state, 1.0 / src_data.src_ratio)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		exit (1) ;
+		} ;
+
+	if ((error = src_process (src_state, &src_data)))
+	{	printf ("\n\nLine %d : %s\n\n", __LINE__, src_strerror (error)) ;
+		printf ("  src_data.input_frames  : %ld\n", src_data.input_frames) ;
+		printf ("  src_data.output_frames : %ld\n\n", src_data.output_frames) ;
+		exit (1) ;
+		} ;
+
+	if (src_data.input_frames_used != input_len)
+	{	printf ("\n\nLine %d : unused input.\n", __LINE__) ;
+		printf ("\tinput_len         : %d\n", input_len) ;
+		printf ("\tinput_frames_used : %ld\n\n", src_data.input_frames_used) ;
+		exit (1) ;
+		} ;
+
+	src_state = src_delete (src_state) ;
+
+	snr = calculate_snr (output, src_data.output_frames_gen, 1) ;
+
+	if (target_snr > snr)
+	{	printf ("\n\nLine %d : snr (%3.1f) does not meet target (%3.1f)\n\n", __LINE__, snr, target_snr) ;
+		save_oct_float ("varispeed.mat", input, src_data.input_frames, output, src_data.output_frames_gen) ;
+		exit (1) ;
+		} ;
+
+	puts ("ok") ;
+
+	return ;
+} /* varispeed_test */
+
diff --git a/msvc/README b/msvc/README
new file mode 100644
index 0000000..b3af7a7
--- /dev/null
+++ b/msvc/README
@@ -0,0 +1,9 @@
+SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+
+Cmake is able to configure, build (as either a DLL or a static library),
+and install libsoxr for general use on MS-Windows as on other OSs.
+However, for projects that prefer to maintain a more monolithic build
+structure using the MSVC compiler, the accompanying files may be useful.
+
+ * libsoxr.vcproj      Builds a static lib for MSVC ver >= 9 (2008).
+ * soxr-config.h       Pre-configured for a typical Win32 system.
diff --git a/msvc/libsoxr.vcproj b/msvc/libsoxr.vcproj
new file mode 100644
index 0000000..b1e1714
--- /dev/null
+++ b/msvc/libsoxr.vcproj
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="libsoxr"
+	ProjectGUID="{af9ad75c-4785-4432-bac3-adab1e7f1192}"
+	RootNamespace="libsoxr"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform Name="Win32" />
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			CharacterSet="2"
+			WholeProgramOptimization="0"
+			>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="."
+				PreprocessorDefinitions="_DEBUG;_USE_MATH_DEFINES;_CRT_SECURE_NO_WARNINGS;SOXR_LIB"
+				StringPooling="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				EnableFunctionLevelLinking="true"
+				WarningLevel="3"
+				DebugInformationFormat="4"
+				CompileAs="0"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="4"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				AdditionalIncludeDirectories="."
+				PreprocessorDefinitions="NDEBUG;_USE_MATH_DEFINES;_CRT_SECURE_NO_WARNINGS;SOXR_LIB"
+				StringPooling="true"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="true"
+				WarningLevel="3"
+				CompileAs="0"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter Name="Source Files" >
+			<File RelativePath="../src/data-io.c" />
+			<File RelativePath="../src/dbesi0.c" />
+			<File RelativePath="../src/fft4g32.c" />
+			<File RelativePath="../src/fft4g64.c" />
+			<File RelativePath="../src/filter.c" />
+			<File RelativePath="../src/lsr.c" />
+			<File RelativePath="../src/pffft32s.c" />
+			<File RelativePath="../src/rate32.c" />
+			<File RelativePath="../src/rate32s.c" />
+			<File RelativePath="../src/rate64.c" />
+			<File RelativePath="../src/simd.c" />
+			<File RelativePath="../src/soxr.c" />
+			<File RelativePath="../src/vr32.c" />
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/msvc/soxr-config.h b/msvc/soxr-config.h
new file mode 100644
index 0000000..935beed
--- /dev/null
+++ b/msvc/soxr-config.h
@@ -0,0 +1,57 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* N.B. Pre-configured for typical Win32 systems.  Normal procedure is to use
+ * the cmake configuration and build system.  See INSTALL. */
+
+#if !defined soxr_config_included
+#define soxr_config_included
+
+#define SOXR_VERSION_MAJOR 0
+#define SOXR_VERSION_MINOR 0
+#define SOXR_VERSION_PATCH 1
+#define SOXR_VERSION "0.0.5"
+
+#define HAVE_SINGLE_PRECISION 1
+#define HAVE_DOUBLE_PRECISION 1
+#define HAVE_VR         1
+#define HAVE_AVFFT      0
+#define HAVE_SIMD       1
+#define HAVE_FENV_H     0
+#define HAVE_LRINT      0
+#define WORDS_BIGENDIAN 0
+
+#include <limits.h>
+
+#undef bool
+#undef false
+#undef true
+#define bool int
+#define false 0
+#define true 1
+
+#undef int16_t
+#undef int32_t
+#undef int64_t
+#undef uint32_t
+#undef uint64_t
+#define int16_t short
+#if LONG_MAX > 2147483647L
+  #define int32_t int
+  #define int64_t long
+  #define DBL (double)
+#elif LONG_MAX < 2147483647L
+#error this library requires that 'long int' has at least 32-bits
+#else
+  #define int32_t long
+  #if defined(_MSC_VER)
+    #define int64_t __int64
+  #else
+    #define int64_t long long
+  #endif
+  #define DBL
+#endif
+#define uint32_t unsigned int32_t
+#define uint64_t unsigned int64_t
+
+#endif
diff --git a/soxr-config.h.in b/soxr-config.h.in
new file mode 100644
index 0000000..8e2a603
--- /dev/null
+++ b/soxr-config.h.in
@@ -0,0 +1,50 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_config_included
+#define soxr_config_included
+
+#define SOXR_VERSION         "@PROJECT_VERSION@"
+#define HAVE_SINGLE_PRECISION @HAVE_SINGLE_PRECISION@
+#define HAVE_DOUBLE_PRECISION @HAVE_DOUBLE_PRECISION@
+#define HAVE_VR               @HAVE_VR@
+#define HAVE_AVFFT            @HAVE_AVFFT@
+#define HAVE_SIMD             @HAVE_SIMD@
+#define HAVE_FENV_H           @HAVE_FENV_H@
+#define HAVE_LRINT            @HAVE_LRINT@
+#define WORDS_BIGENDIAN       @WORDS_BIGENDIAN@
+
+#include <limits.h>
+
+#undef bool
+#undef false
+#undef true
+#define bool int
+#define false 0
+#define true 1
+
+#undef int16_t
+#undef int32_t
+#undef int64_t
+#undef uint32_t
+#undef uint64_t
+#define int16_t short
+#if LONG_MAX > 2147483647L
+  #define int32_t int
+  #define int64_t long
+  #define DBL (double)
+#elif LONG_MAX < 2147483647L
+#error this library requires that 'long int' has at least 32-bits
+#else
+  #define int32_t long
+  #if defined _MSC_VER
+    #define int64_t __int64
+  #else
+    #define int64_t long long
+  #endif
+  #define DBL
+#endif
+#define uint32_t unsigned int32_t
+#define uint64_t unsigned int64_t
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..f4d467c
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,112 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+add_definitions (${PROJECT_C_FLAGS} -DSOXR_LIB)
+
+
+
+# Libsoxr configuration:
+
+set (RDFT32 fft4g32)
+if (WITH_AVFFT AND AVCODEC_FOUND)
+  set (RDFT32S avfft32s)
+elseif (WITH_PFFFT)
+  set (RDFT32S pffft32s)
+elseif (WITH_SIMD)
+  set (RDFT32S fft4g32s)
+endif ()
+
+if (WITH_DOUBLE_PRECISION)
+  set (DP_SOURCES rate64)
+endif ()
+
+if (WITH_SINGLE_PRECISION)
+  set (SP_SOURCES rate32 ${RDFT32})
+endif ()
+
+if (HAVE_VR)
+  set (VR_SOURCES vr32)
+endif ()
+
+if (HAVE_SIMD)
+  set (SIMD_SOURCES rate32s ${RDFT32S} simd)
+  foreach (source ${SIMD_SOURCES})
+    set_property (SOURCE ${source} PROPERTY COMPILE_FLAGS ${SIMD_C_FLAGS})
+  endforeach ()
+endif ()
+
+
+
+# Libsoxr:
+
+add_library (${PROJECT_NAME} ${LIB_TYPE} ${PROJECT_NAME}.c data-io dbesi0 filter fft4g64
+  ${SP_SOURCES} ${VR_SOURCES} ${DP_SOURCES} ${SIMD_SOURCES})
+set_target_properties (${PROJECT_NAME} PROPERTIES
+  VERSION "${SO_VERSION}"
+  SOVERSION ${SO_VERSION_MAJOR}
+  INSTALL_NAME_DIR ${LIB_INSTALL_DIR}
+  LINK_INTERFACE_LIBRARIES ""
+  PUBLIC_HEADER "${PROJECT_NAME}.h")
+if (BUILD_FRAMEWORK)
+  set_target_properties (${PROJECT_NAME} PROPERTIES FRAMEWORK TRUE)
+elseif (NOT WIN32)
+  set (TARGET_PCS ${CMAKE_CURRENT_BINARY_DIR}/lib${PROJECT_NAME}.pc)
+  configure_file (${CMAKE_CURRENT_SOURCE_DIR}/lib${PROJECT_NAME}.pc.in ${TARGET_PCS})
+  install (FILES ${CMAKE_CURRENT_BINARY_DIR}/lib${PROJECT_NAME}.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
+endif ()
+
+
+
+# LSR bindings:
+
+if (WITH_LSR_BINDINGS)
+  set (LSR ${PROJECT_NAME}-lsr)
+  set (LSR_SO_VERSION 0.1.8)
+  set (LSR_SO_VERSION_MAJOR 0)
+  add_library (${LSR} ${LIB_TYPE} lsr)
+  target_link_libraries (${LSR} ${PROJECT_NAME})
+  set_target_properties (${LSR} PROPERTIES
+    VERSION "${LSR_SO_VERSION}"
+    SOVERSION ${LSR_SO_VERSION_MAJOR}
+    INSTALL_NAME_DIR ${LIB_INSTALL_DIR}
+    LINK_INTERFACE_LIBRARIES ""
+    PUBLIC_HEADER "${LSR}.h")
+  if (BUILD_FRAMEWORK)
+    set_target_properties (${LSR} PROPERTIES FRAMEWORK TRUE)
+  elseif (NOT WIN32)
+    set (TARGET_PCS "${TARGET_PCS} ${CMAKE_CURRENT_BINARY_DIR}/lib${LSR}.pc")
+    configure_file (${CMAKE_CURRENT_SOURCE_DIR}/lib${LSR}.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lib${LSR}.pc)
+    install (FILES ${CMAKE_CURRENT_BINARY_DIR}/lib${LSR}.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
+  endif ()
+endif ()
+
+
+
+# Installation (from build from source):
+
+install (TARGETS ${PROJECT_NAME} ${LSR}
+  FRAMEWORK DESTINATION ${FRAMEWORK_INSTALL_DIR}
+  LIBRARY DESTINATION ${LIB_INSTALL_DIR}
+  RUNTIME DESTINATION ${BIN_INSTALL_DIR}
+  ARCHIVE DESTINATION ${LIB_INSTALL_DIR}
+  PUBLIC_HEADER DESTINATION ${INCLUDE_INSTALL_DIR})
+
+
+
+# Packaging (for unix-like distributions):
+
+get_property (LIB1 TARGET ${PROJECT_NAME} PROPERTY LOCATION)
+if (BUILD_SHARED_LIBS)
+  set (LIB1 ${LIB1}.${SO_VERSION_MAJOR} ${LIB1}.${SO_VERSION})
+endif ()
+list (APPEND TARGET_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}.h")
+if (WITH_LSR_BINDINGS)
+  get_property (LIB2 TARGET ${LSR} PROPERTY LOCATION)
+  if (BUILD_SHARED_LIBS)
+    set (LIB2 ${LIB2}.${LSR_SO_VERSION_MAJOR} ${LIB2}.${LSR_SO_VERSION})
+  endif ()
+  list (APPEND TARGET_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/${LSR}.h")
+endif ()
+set (TARGET_LIBS ${LIB1} ${LIB2})
+configure_file (${CMAKE_CURRENT_SOURCE_DIR}/libsoxr.src.in ${CMAKE_CURRENT_BINARY_DIR}/libsoxr.src)
+configure_file (${CMAKE_CURRENT_SOURCE_DIR}/libsoxr-dev.src.in ${CMAKE_CURRENT_BINARY_DIR}/libsoxr-dev.src)
diff --git a/src/aliases.h b/src/aliases.h
new file mode 100644
index 0000000..035830d
--- /dev/null
+++ b/src/aliases.h
@@ -0,0 +1,37 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if defined SOXR_LIB
+
+#define lsx_bessel_I_0                 _soxr_bessel_I_0
+#define lsx_cdft_f                     _soxr_cdft_f
+#define lsx_cdft                       _soxr_cdft
+#define lsx_clear_fft_cache_f          _soxr_clear_fft_cache_f
+#define lsx_clear_fft_cache            _soxr_clear_fft_cache
+#define lsx_ddct_f                     _soxr_ddct_f
+#define lsx_ddct                       _soxr_ddct
+#define lsx_ddst_f                     _soxr_ddst_f
+#define lsx_ddst                       _soxr_ddst
+#define lsx_design_lpf                 _soxr_design_lpf
+#define lsx_dfct_f                     _soxr_dfct_f
+#define lsx_dfct                       _soxr_dfct
+#define lsx_dfst_f                     _soxr_dfst_f
+#define lsx_dfst                       _soxr_dfst
+#define lsx_fir_to_phase               _soxr_fir_to_phase
+#define lsx_init_fft_cache_f           _soxr_init_fft_cache_f
+#define lsx_init_fft_cache             _soxr_init_fft_cache
+#define lsx_kaiser_beta                _soxr_kaiser_beta
+#define lsx_kaiser_params              _soxr_kaiser_params
+#define lsx_make_lpf                   _soxr_make_lpf
+#define lsx_ordered_convolve_f         _soxr_ordered_convolve_f
+#define lsx_ordered_convolve           _soxr_ordered_convolve
+#define lsx_ordered_partial_convolve_f _soxr_ordered_partial_convolve_f
+#define lsx_ordered_partial_convolve   _soxr_ordered_partial_convolve
+#define lsx_rdft_f                     _soxr_rdft_f
+#define lsx_rdft                       _soxr_rdft
+#define lsx_safe_cdft_f                _soxr_safe_cdft_f
+#define lsx_safe_cdft                  _soxr_safe_cdft
+#define lsx_safe_rdft_f                _soxr_safe_rdft_f
+#define lsx_safe_rdft                  _soxr_safe_rdft
+
+#endif
diff --git a/src/avfft32.c b/src/avfft32.c
new file mode 100644
index 0000000..ab47be5
--- /dev/null
+++ b/src/avfft32.c
@@ -0,0 +1,27 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <math.h>
+#include <libavcodec/avfft.h>
+#include "filter.h"
+
+static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
+static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
+static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
+static int multiplier(void) {return 2;}
+static void nothing(void) {}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32_cb[] = {
+  (fn_t)forward_setup,
+  (fn_t)backward_setup,
+  (fn_t)av_rdft_end,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)_soxr_ordered_convolve_f,
+  (fn_t)_soxr_ordered_partial_convolve_f,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
diff --git a/src/avfft32s.c b/src/avfft32s.c
new file mode 100644
index 0000000..9345377
--- /dev/null
+++ b/src/avfft32s.c
@@ -0,0 +1,27 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <math.h>
+#include <libavcodec/avfft.h>
+#include "simd.h"
+
+static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
+static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
+static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
+static int multiplier(void) {return 2;}
+static void nothing(void) {}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32s_cb[] = {
+  (fn_t)forward_setup,
+  (fn_t)backward_setup,
+  (fn_t)av_rdft_end,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)_soxr_ordered_convolve_simd,
+  (fn_t)_soxr_ordered_partial_convolve_simd,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
diff --git a/src/ccrw2.h b/src/ccrw2.h
new file mode 100644
index 0000000..6195532
--- /dev/null
+++ b/src/ccrw2.h
@@ -0,0 +1,73 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Concurrent Control with "Readers" and "Writers", P.J. Courtois et al, 1971 */
+
+#if !defined ccrw2_included
+#define ccrw2_included
+
+#include "internal.h"
+
+#if defined _OPENMP
+
+#include <omp.h>
+
+typedef struct {
+  int readcount, writecount; /* initial value = 0 */
+  omp_lock_t mutex_1, mutex_2, mutex_3, w, r; /* initial value = 1 */
+} ccrw2_t; /* Problem #2: `writers-preference' */
+
+#define ccrw2_become_reader(p) do {\
+  omp_set_lock(&p.mutex_3);\
+    omp_set_lock(&p.r);\
+      omp_set_lock(&p.mutex_1);\
+        if (++p.readcount == 1) omp_set_lock(&p.w);\
+      omp_unset_lock(&p.mutex_1);\
+    omp_unset_lock(&p.r);\
+  omp_unset_lock(&p.mutex_3);\
+} while (0)
+#define ccrw2_cease_reading(p) do {\
+  omp_set_lock(&p.mutex_1);\
+    if (!--p.readcount) omp_unset_lock(&p.w);\
+  omp_unset_lock(&p.mutex_1);\
+} while (0)
+#define ccrw2_become_writer(p) do {\
+  omp_set_lock(&p.mutex_2);\
+    if (++p.writecount == 1) omp_set_lock(&p.r);\
+  omp_unset_lock(&p.mutex_2);\
+  omp_set_lock(&p.w);\
+} while (0)
+#define ccrw2_cease_writing(p) do {\
+  omp_unset_lock(&p.w);\
+  omp_set_lock(&p.mutex_2);\
+    if (!--p.writecount) omp_unset_lock(&p.r);\
+  omp_unset_lock(&p.mutex_2);\
+} while (0)
+#define ccrw2_init(p) do {\
+  omp_init_lock(&p.mutex_1);\
+  omp_init_lock(&p.mutex_2);\
+  omp_init_lock(&p.mutex_3);\
+  omp_init_lock(&p.w);\
+  omp_init_lock(&p.r);\
+} while (0)
+#define ccrw2_clear(p) do {\
+  omp_destroy_lock(&p.r);\
+  omp_destroy_lock(&p.w);\
+  omp_destroy_lock(&p.mutex_3);\
+  omp_destroy_lock(&p.mutex_2);\
+  omp_destroy_lock(&p.mutex_1);\
+} while (0)
+
+#else
+
+typedef int ccrw2_t;
+#define ccrw2_become_reader(x) (void)(x)
+#define ccrw2_cease_reading(x) (void)(x)
+#define ccrw2_become_writer(x) (void)(x)
+#define ccrw2_cease_writing(x) (void)(x)
+#define ccrw2_init(x) (void)(x)
+#define ccrw2_clear(x) (void)(x)
+
+#endif /* _OPENMP */
+
+#endif
diff --git a/src/data-io.c b/src/data-io.c
new file mode 100644
index 0000000..dbbd30f
--- /dev/null
+++ b/src/data-io.c
@@ -0,0 +1,249 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <limits.h>
+#include <math.h>
+#include <string.h>
+
+#include "data-io.h"
+#include "internal.h"
+
+
+
+#define DEINTERLEAVE_FROM(T,flag) do { \
+  unsigned i; \
+  size_t j; \
+  T const * src = *src0; \
+  if (ch > 1) \
+    for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) dest[i][j] = (DEINTERLEAVE_TO)*src++; \
+  else if (flag) memcpy(dest[0], src, n * sizeof(T)), src = &src[n]; \
+  else for (j = 0; j < n; dest[0][j++] = (DEINTERLEAVE_TO)*src++); \
+  *src0 = src; \
+} while (0)
+
+
+
+#if HAVE_DOUBLE_PRECISION
+void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */
+    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
+{
+#define DEINTERLEAVE_TO double
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: DEINTERLEAVE_FROM(float, 0); break;
+    case SOXR_FLOAT64: DEINTERLEAVE_FROM(double, 1); break;
+    case SOXR_INT32:   DEINTERLEAVE_FROM(int32_t, 0); break;
+    case SOXR_INT16:   DEINTERLEAVE_FROM(int16_t, 0); break;
+    default: break;
+  }
+}
+#endif
+
+
+
+#if HAVE_SINGLE_PRECISION
+void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
+    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
+{
+#undef DEINTERLEAVE_TO
+#define DEINTERLEAVE_TO float
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: DEINTERLEAVE_FROM(float, 1); break;
+    case SOXR_FLOAT64: DEINTERLEAVE_FROM(double, 0); break;
+    case SOXR_INT32:   DEINTERLEAVE_FROM(int32_t, 0); break;
+    case SOXR_INT16:   DEINTERLEAVE_FROM(int16_t, 0); break;
+    default: break;
+  }
+}
+#endif
+
+
+
+#include "rint.h"
+
+#if HAVE_FENV_H
+  #include <fenv.h>
+#elif defined _MSC_VER
+  #define FE_INVALID    1
+  #define FE_DIVBYZERO  4
+  #define FE_OVERFLOW   8
+  #define FE_UNDERFLOW  16
+  #define FE_INEXACT    32
+  #define FE_ALL_EXCEPT (FE_INEXACT|FE_DIVBYZERO|FE_UNDERFLOW|FE_OVERFLOW|FE_INVALID)
+  static __inline int fetestexcept(int excepts)
+  {
+    short status_word;
+    __asm fnstsw status_word
+    return status_word & excepts & FE_ALL_EXCEPT;
+  }
+
+  static __inline int feclearexcept(int excepts)
+  {
+    int16_t status[14];
+    __asm fnstenv status
+    status[2] &= ~(excepts & FE_ALL_EXCEPT);
+    __asm fldenv status
+    return 0;
+  }
+#endif
+
+
+
+#if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__
+  #if __STDC_VERSION__ >= 199901L
+    #pragma STDC FENV_ACCESS ON
+  #endif
+#endif
+
+#if HAVE_DOUBLE_PRECISION
+#define FLOATX double
+
+#define LSX_RINT_CLIP_2 lsx_rint32_clip_2
+#define LSX_RINT_CLIP lsx_rint32_clip
+#define RINT_CLIP rint32_clip
+#define RINT rint32
+#if defined FPU_RINT32
+  #define FPU_RINT
+#endif
+#define RINT_T int32_t
+#define RINT_MAX 2147483647L
+#include "rint-clip.h"
+
+#define LSX_RINT_CLIP_2 lsx_rint16_clip_2
+#define LSX_RINT_CLIP lsx_rint16_clip
+#define RINT_CLIP rint16_clip
+#define RINT rint16
+#if defined FPU_RINT16
+  #define FPU_RINT
+#endif
+#define RINT_T int16_t
+#define RINT_MAX 32767
+#include "rint-clip.h"
+
+#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither
+#define LSX_RINT_CLIP lsx_rint16_clip_dither
+#define RINT_CLIP rint16_clip_dither
+#define RINT rint16
+#if defined FPU_RINT16
+  #define FPU_RINT
+#endif
+#define RINT_T int16_t
+#define RINT_MAX 32767
+#define DITHER
+#include "rint-clip.h"
+
+#undef FLOATX
+#endif
+
+
+
+#if HAVE_SINGLE_PRECISION
+#define FLOATX float
+
+#define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f
+#define LSX_RINT_CLIP lsx_rint32_clip_f
+#define RINT_CLIP rint32_clip_f
+#define RINT rint32
+#if defined FPU_RINT32
+  #define FPU_RINT
+#endif
+#define RINT_T int32_t
+#define RINT_MAX 2147483647L
+#include "rint-clip.h"
+
+#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_f
+#define LSX_RINT_CLIP lsx_rint16_clip_f
+#define RINT_CLIP rint16_clip_f
+#define RINT rint16
+#if defined FPU_RINT16
+  #define FPU_RINT
+#endif
+#define RINT_T int16_t
+#define RINT_MAX 32767
+#include "rint-clip.h"
+
+#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither_f
+#define LSX_RINT_CLIP lsx_rint16_clip_dither_f
+#define RINT_CLIP rint16_clip_dither_f
+#define RINT rint16
+#if defined FPU_RINT16
+  #define FPU_RINT
+#endif
+#define RINT_T int16_t
+#define RINT_MAX 32767
+#define DITHER
+#include "rint-clip.h"
+
+#undef FLOATX
+#endif
+
+#if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__
+  #if __STDC_VERSION__ >= 199901L
+    #pragma STDC FENV_ACCESS OFF
+  #endif
+#endif
+
+
+
+#define INTERLEAVE_TO(T,flag) do { \
+  unsigned i; \
+  size_t j; \
+  T * dest = *dest0; \
+  if (ch > 1) \
+  for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) *dest++ = (T)src[i][j]; \
+  else if (flag) memcpy(dest, src[0], n * sizeof(T)), dest = &dest[n]; \
+  else for (j = 0; j < n; *dest++ = (T)src[0][j++]); \
+  *dest0 = dest; \
+  return 0; \
+} while (0)
+
+#if HAVE_DOUBLE_PRECISION
+size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
+  double const * const * src, size_t n, unsigned ch, unsigned long * seed)
+{
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: INTERLEAVE_TO(float, 0);
+    case SOXR_FLOAT64: INTERLEAVE_TO(double, 1);
+
+    case SOXR_INT32: if (ch == 1)
+        return lsx_rint32_clip(dest0, src[0], n);
+      return lsx_rint32_clip_2(dest0, src, ch, n);
+
+    case SOXR_INT16: if (seed) {
+      if (ch == 1)
+        return lsx_rint16_clip_dither(dest0, src[0], n, seed);
+      return lsx_rint16_clip_2_dither(dest0, src, ch, n, seed);
+    }
+    if (ch == 1)
+        return lsx_rint16_clip(dest0, src[0], n);
+      return lsx_rint16_clip_2(dest0, src, ch, n);
+    default: break;
+  }
+  return 0;
+}
+#endif
+
+#if HAVE_SINGLE_PRECISION
+size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0,
+  float const * const * src, size_t n, unsigned ch, unsigned long * seed)
+{
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: INTERLEAVE_TO(float, 1);
+    case SOXR_FLOAT64: INTERLEAVE_TO(double, 0);
+
+    case SOXR_INT32: if (ch == 1)
+        return lsx_rint32_clip_f(dest0, src[0], n);
+      return lsx_rint32_clip_2_f(dest0, src, ch, n);
+
+    case SOXR_INT16: if (seed) {
+      if (ch == 1)
+        return lsx_rint16_clip_dither_f(dest0, src[0], n, seed);
+      return lsx_rint16_clip_2_dither_f(dest0, src, ch, n, seed);
+    }
+    if (ch == 1)
+        return lsx_rint16_clip_f(dest0, src[0], n);
+      return lsx_rint16_clip_2_f(dest0, src, ch, n);
+    default: break;
+  }
+  return 0;
+}
+#endif
diff --git a/src/data-io.h b/src/data-io.h
new file mode 100644
index 0000000..46766d8
--- /dev/null
+++ b/src/data-io.h
@@ -0,0 +1,39 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_data_io_included
+#define soxr_data_io_included
+
+#include "soxr.h"
+
+void _soxr_deinterleave(
+    double * * dest,
+    soxr_datatype_t data_type,
+    void const * * src0,
+    size_t n,
+    unsigned ch);
+
+void _soxr_deinterleave_f(
+    float * * dest,
+    soxr_datatype_t data_type,
+    void const * * src0,
+    size_t n,
+    unsigned ch);
+
+size_t /* clips */ _soxr_interleave(
+    soxr_datatype_t data_type,
+    void * * dest,
+    double const * const * src,
+    size_t n,
+    unsigned ch,
+    unsigned long * seed);
+
+size_t /* clips */ _soxr_interleave_f(
+    soxr_datatype_t data_type,
+    void * * dest,
+    float const * const * src,
+    size_t n,
+    unsigned ch,
+    unsigned long * seed);
+
+#endif
diff --git a/src/dbesi0.c b/src/dbesi0.c
new file mode 100644
index 0000000..654216e
--- /dev/null
+++ b/src/dbesi0.c
@@ -0,0 +1,149 @@
+/*  Copyright(C) 1996 Takuya OOURA
+
+You may use, copy, modify this code for any purpose and
+without fee.
+
+Package home:  http://www.kurims.kyoto-u.ac.jp/~ooura/bessel.html
+*/
+
+#include "filter.h"
+#define dbesi0 lsx_bessel_I_0
+
+/* Bessel I_0(x) function in double precision */
+
+#include <math.h>
+
+double dbesi0(double x)
+{
+    int k;
+    double w, t, y;
+    static double a[65] = {
+        8.5246820682016865877e-11, 2.5966600546497407288e-9,
+        7.9689994568640180274e-8, 1.9906710409667748239e-6,
+        4.0312469446528002532e-5, 6.4499871606224265421e-4,
+        0.0079012345761930579108, 0.071111111109207045212,
+        0.444444444444724909, 1.7777777777777532045,
+        4.0000000000000011182, 3.99999999999999998,
+        1.0000000000000000001,
+        1.1520919130377195927e-10, 2.2287613013610985225e-9,
+        8.1903951930694585113e-8, 1.9821560631611544984e-6,
+        4.0335461940910133184e-5, 6.4495330974432203401e-4,
+        0.0079013012611467520626, 0.071111038160875566622,
+        0.44444450319062699316, 1.7777777439146450067,
+        4.0000000132337935071, 3.9999999968569015366,
+        1.0000000003426703174,
+        1.5476870780515238488e-10, 1.2685004214732975355e-9,
+        9.2776861851114223267e-8, 1.9063070109379044378e-6,
+        4.0698004389917945832e-5, 6.4370447244298070713e-4,
+        0.0079044749458444976958, 0.071105052411749363882,
+        0.44445280640924755082, 1.7777694934432109713,
+        4.0000055808824003386, 3.9999977081165740932,
+        1.0000004333949319118,
+        2.0675200625006793075e-10, -6.1689554705125681442e-10,
+        1.2436765915401571654e-7, 1.5830429403520613423e-6,
+        4.2947227560776583326e-5, 6.3249861665073441312e-4,
+        0.0079454472840953930811, 0.070994327785661860575,
+        0.44467219586283000332, 1.7774588182255374745,
+        4.0003038986252717972, 3.9998233869142057195,
+        1.0000472932961288324,
+        2.7475684794982708655e-10, -3.8991472076521332023e-9,
+        1.9730170483976049388e-7, 5.9651531561967674521e-7,
+        5.1992971474748995357e-5, 5.7327338675433770752e-4,
+        0.0082293143836530412024, 0.069990934858728039037,
+        0.44726764292723985087, 1.7726685170014087784,
+        4.0062907863712704432, 3.9952750700487845355,
+        1.0016354346654179322
+    };
+    static double b[70] = {
+        6.7852367144945531383e-8, 4.6266061382821826854e-7,
+        6.9703135812354071774e-6, 7.6637663462953234134e-5,
+        7.9113515222612691636e-4, 0.0073401204731103808981,
+        0.060677114958668837046, 0.43994941411651569622,
+        2.7420017097661750609, 14.289661921740860534,
+        59.820609640320710779, 188.78998681199150629,
+        399.8731367825601118, 427.56411572180478514,
+        1.8042097874891098754e-7, 1.2277164312044637357e-6,
+        1.8484393221474274861e-5, 2.0293995900091309208e-4,
+        0.0020918539850246207459, 0.019375315654033949297,
+        0.15985869016767185908, 1.1565260527420641724,
+        7.1896341224206072113, 37.354773811947484532,
+        155.80993164266268457, 489.5211371158540918,
+        1030.9147225169564806, 1093.5883545113746958,
+        4.8017305613187493564e-7, 3.261317843912380074e-6,
+        4.9073137508166159639e-5, 5.3806506676487583755e-4,
+        0.0055387918291051866561, 0.051223717488786549025,
+        0.42190298621367914765, 3.0463625987357355872,
+        18.895299447327733204, 97.915189029455461554,
+        407.13940115493494659, 1274.3088990480582632,
+        2670.9883037012547506, 2815.7166284662544712,
+        1.2789926338424623394e-6, 8.6718263067604918916e-6,
+        1.3041508821299929489e-4, 0.001428224737372747892,
+        0.014684070635768789378, 0.13561403190404185755,
+        1.1152592585977393953, 8.0387088559465389038,
+        49.761318895895479206, 257.2684232313529138,
+        1066.8543146269566231, 3328.3874581009636362,
+        6948.8586598121634874, 7288.4893398212481055,
+        3.409350368197032893e-6, 2.3079025203103376076e-5,
+        3.4691373283901830239e-4, 0.003794994977222908545,
+        0.038974209677945602145, 0.3594948380414878371,
+        2.9522878893539528226, 21.246564609514287056,
+        131.28727387146173141, 677.38107093296675421,
+        2802.3724744545046518, 8718.5731420798254081,
+        18141.348781638832286, 18948.925349296308859
+    };
+    static double c[45] = {
+        2.5568678676452702768e-15, 3.0393953792305924324e-14,
+        6.3343751991094840009e-13, 1.5041298011833009649e-11,
+        4.4569436918556541414e-10, 1.746393051427167951e-8,
+        1.0059224011079852317e-6, 1.0729838945088577089e-4,
+        0.05150322693642527738,
+        5.2527963991711562216e-15, 7.202118481421005641e-15,
+        7.2561421229904797156e-13, 1.482312146673104251e-11,
+        4.4602670450376245434e-10, 1.7463600061788679671e-8,
+        1.005922609132234756e-6, 1.0729838937545111487e-4,
+        0.051503226936437300716,
+        1.3365917359358069908e-14, -1.2932643065888544835e-13,
+        1.7450199447905602915e-12, 1.0419051209056979788e-11,
+        4.58047881980598326e-10, 1.7442405450073548966e-8,
+        1.0059461453281292278e-6, 1.0729837434500161228e-4,
+        0.051503226940658446941,
+        5.3771611477352308649e-14, -1.1396193006413731702e-12,
+        1.2858641335221653409e-11, -5.9802086004570057703e-11,
+        7.3666894305929510222e-10, 1.6731837150730356448e-8,
+        1.0070831435812128922e-6, 1.0729733111203704813e-4,
+        0.051503227360726294675,
+        3.7819492084858931093e-14, -4.8600496888588034879e-13,
+        1.6898350504817224909e-12, 4.5884624327524255865e-11,
+        1.2521615963377513729e-10, 1.8959658437754727957e-8,
+        1.0020716710561353622e-6, 1.073037119856927559e-4,
+        0.05150322383300230775
+    };
+
+    w = fabs(x);
+    if (w < 8.5) {
+        t = w * w * 0.0625;
+        k = 13 * ((int) t);
+        y = (((((((((((a[k] * t + a[k + 1]) * t +
+            a[k + 2]) * t + a[k + 3]) * t + a[k + 4]) * t +
+            a[k + 5]) * t + a[k + 6]) * t + a[k + 7]) * t +
+            a[k + 8]) * t + a[k + 9]) * t + a[k + 10]) * t +
+            a[k + 11]) * t + a[k + 12];
+    } else if (w < 12.5) {
+        k = (int) w;
+        t = w - k;
+        k = 14 * (k - 8);
+        y = ((((((((((((b[k] * t + b[k + 1]) * t +
+            b[k + 2]) * t + b[k + 3]) * t + b[k + 4]) * t +
+            b[k + 5]) * t + b[k + 6]) * t + b[k + 7]) * t +
+            b[k + 8]) * t + b[k + 9]) * t + b[k + 10]) * t +
+            b[k + 11]) * t + b[k + 12]) * t + b[k + 13];
+    } else {
+        t = 60 / w;
+        k = 9 * ((int) t);
+        y = ((((((((c[k] * t + c[k + 1]) * t +
+            c[k + 2]) * t + c[k + 3]) * t + c[k + 4]) * t +
+            c[k + 5]) * t + c[k + 6]) * t + c[k + 7]) * t +
+            c[k + 8]) * sqrt(t) * exp(w);
+    }
+    return y;
+}
diff --git a/src/fft4g.c b/src/fft4g.c
new file mode 100644
index 0000000..5fae8a6
--- /dev/null
+++ b/src/fft4g.c
@@ -0,0 +1,1352 @@
+/* Copyright Takuya OOURA, 1996-2001.
+
+You may use, copy, modify and distribute this code for any
+purpose (include commercial use) and without fee.  Please
+refer to this package when you modify this code.
+
+Package home:  http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+
+Fast Fourier/Cosine/Sine Transform
+    dimension   :one
+    data length :power of 2
+    decimation  :frequency
+    radix       :4, 2
+    data        :inplace
+    table       :use
+functions
+    cdft: Complex Discrete Fourier Transform
+    rdft: Real Discrete Fourier Transform
+    ddct: Discrete Cosine Transform
+    ddst: Discrete Sine Transform
+    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+    void cdft(int, int, double *, int *, double *);
+    void rdft(int, int, double *, int *, double *);
+    void ddct(int, int, double *, int *, double *);
+    void ddst(int, int, double *, int *, double *);
+    void dfct(int, double *, double *, int *, double *);
+    void dfst(int, double *, double *, int *, double *);
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+    [definition]
+        <case1>
+            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+        <case2>
+            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            cdft(2*n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            cdft(2*n, -1, a, ip, w);
+    [parameters]
+        2*n            :data length (int)
+                        n >= 1, n = power of 2
+        a[0...2*n-1]   :input/output data (double *)
+                        input data
+                            a[2*j] = Re(x[j]),
+                            a[2*j+1] = Im(x[j]), 0<=j<n
+                        output data
+                            a[2*k] = Re(X[k]),
+                            a[2*k+1] = Im(X[k]), 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            cdft(2*n, -1, a, ip, w);
+        is
+            cdft(2*n, 1, a, ip, w);
+            for (j = 0; j <= 2 * n - 1; j++) {
+                a[j] *= 1.0 / n;
+            }
+        .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+    [definition]
+        <case1> RDFT
+            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+        <case2> IRDFT (excluding scale)
+            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
+                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
+                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            rdft(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            rdft(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        <case1>
+                            output data
+                                a[2*k] = R[k], 0<=k<n/2
+                                a[2*k+1] = I[k], 0<k<n/2
+                                a[1] = R[n/2]
+                        <case2>
+                            input data
+                                a[2*j] = R[j], 0<=j<n/2
+                                a[2*j+1] = I[j], 0<j<n/2
+                                a[1] = R[n/2]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            rdft(n, 1, a, ip, w);
+        is
+            rdft(n, -1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+    [definition]
+        <case1> IDCT (excluding scale)
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DCT
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddct(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddct(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        output data
+                            a[k] = C[k], 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddct(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddct(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+    [definition]
+        <case1> IDST (excluding scale)
+            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DST
+            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddst(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddst(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        <case1>
+                            input data
+                                a[j] = A[j], 0<j<n
+                                a[0] = A[n]
+                            output data
+                                a[k] = S[k], 0<=k<n
+                        <case2>
+                            output data
+                                a[k] = S[k], 0<k<n
+                                a[0] = S[n]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddst(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddst(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+    [definition]
+        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+    [usage]
+        ip[0] = 0; // first time only
+        dfct(n, a, t, ip, w);
+    [parameters]
+        n              :data length - 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n]       :input/output data (double *)
+                        output data
+                            a[k] = C[k], 0<=k<=n
+        t[0...n/2]     :work area (double *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+        is
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+            for (j = 0; j <= n; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+    [definition]
+        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+    [usage]
+        ip[0] = 0; // first time only
+        dfst(n, a, t, ip, w);
+    [parameters]
+        n              :data length + 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        output data
+                            a[k] = S[k], 0<k<n
+                        (a[0] is used for work area)
+        t[0...n/2-1]   :work area (double *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            dfst(n, a, t, ip, w);
+        is
+            dfst(n, a, t, ip, w);
+            for (j = 1; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+Appendix :
+    The cos/sin table is recalculated when the larger table required.
+    w[] and ip[] are compatible with all routines.
+*/
+
+
+#include <math.h>
+#include "fft4g.h"
+
+#ifdef FFT4G_FLOAT
+  #define double float
+  #define one_half 0.5f
+
+#if defined _MSC_VER
+  #define sin   (float)sin
+  #define cos   (float)cos
+  #define atan  (float)atan
+#else
+  #define sin   sinf
+  #define cos   cosf
+  #define atan  atanf
+#endif
+
+  #define cdft  lsx_cdft_f
+  #define rdft  lsx_rdft_f
+  #define ddct  lsx_ddct_f
+  #define ddst  lsx_ddst_f
+  #define dfct  lsx_dfct_f
+  #define dfst  lsx_dfst_f
+#else
+  #define one_half 0.5
+  #define cdft  lsx_cdft
+  #define rdft  lsx_rdft
+  #define ddct  lsx_ddct
+  #define ddst  lsx_ddst
+  #define dfct  lsx_dfct
+  #define dfst  lsx_dfst
+#endif
+
+static void bitrv2conj(int n, int *ip, double *a);
+static void bitrv2(int n, int *ip, double *a);
+static void cft1st(int n, double *a, double const *w);
+static void cftbsub(int n, double *a, double const *w);
+static void cftfsub(int n, double *a, double const *w);
+static void cftmdl(int n, int l, double *a, double const *w);
+static void dctsub(int n, double *a, int nc, double const *c);
+static void dstsub(int n, double *a, int nc, double const *c);
+static void makect(int nc, int *ip, double *c);
+static void makewt(int nw, int *ip, double *w);
+static void rftbsub(int n, double *a, int nc, double const *c);
+static void rftfsub(int n, double *a, int nc, double const *c);
+
+
+void cdft(int n, int isgn, double *a, int *ip, double *w)
+{
+    if (n > (ip[0] << 2)) {
+        makewt(n >> 2, ip, w);
+    }
+    if (n > 4) {
+        if (isgn >= 0) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+        } else {
+            bitrv2conj(n, ip + 2, a);
+            cftbsub(n, a, w);
+        }
+    } else if (n == 4) {
+        cftfsub(n, a, w);
+    }
+}
+
+
+void rdft(int n, int isgn, double *a, int *ip, double *w)
+{
+    int nw, nc;
+    double xi;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 2)) {
+        nc = n >> 2;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xi = a[0] - a[1];
+        a[0] += a[1];
+        a[1] = xi;
+    } else {
+        a[1] = one_half * (a[0] - a[1]);
+        a[0] -= a[1];
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+}
+
+
+void ddct(int n, int isgn, double *a, int *ip, double *w)
+{
+    int j, nw, nc;
+    double xr;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = a[j] - a[j - 1];
+            a[j] += a[j - 1];
+        }
+        a[1] = a[0] - xr;
+        a[0] += xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dctsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = a[j] - a[j + 1];
+            a[j] += a[j + 1];
+        }
+        a[n - 1] = xr;
+    }
+}
+
+
+void ddst(int n, int isgn, double *a, int *ip, double *w)
+{
+    int j, nw, nc;
+    double xr;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = -a[j] - a[j - 1];
+            a[j] -= a[j - 1];
+        }
+        a[1] = a[0] + xr;
+        a[0] -= xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dstsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = -a[j] - a[j + 1];
+            a[j] -= a[j + 1];
+        }
+        a[n - 1] = -xr;
+    }
+}
+
+
+void dfct(int n, double *a, double *t, int *ip, double *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    double xr, xi, yr, yi;
+
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    m = n >> 1;
+    yi = a[m];
+    xi = a[0] + a[n];
+    a[0] -= a[n];
+    t[0] = xi - yi;
+    t[m] = xi + yi;
+    if (n > 2) {
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] - a[n - j];
+            xi = a[j] + a[n - j];
+            yr = a[k] - a[n - k];
+            yi = a[k] + a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi - yi;
+            t[k] = xi + yi;
+        }
+        t[mh] = a[mh] + a[n - mh];
+        a[mh] -= a[n - mh];
+        dctsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[0] - a[1];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] + a[j + 1];
+            a[2 * j - 1] = a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dctsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[0] - t[1];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = t[j] - t[j + 1];
+                a[k + l] = t[j] + t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 0; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] - t[m + j];
+                t[k] = t[m + k] + t[m + j];
+            }
+            t[mh] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+        a[n] = t[2] - t[1];
+        a[0] = t[2] + t[1];
+    } else {
+        a[1] = a[0];
+        a[2] = t[0];
+        a[0] = t[1];
+    }
+}
+
+
+void dfst(int n, double *a, double *t, int *ip, double *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    double xr, xi, yr, yi;
+
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    if (n > 2) {
+        m = n >> 1;
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] + a[n - j];
+            xi = a[j] - a[n - j];
+            yr = a[k] + a[n - k];
+            yi = a[k] - a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi + yi;
+            t[k] = xi - yi;
+        }
+        t[0] = a[mh] - a[n - mh];
+        a[mh] += a[n - mh];
+        a[0] = a[m];
+        dstsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[1] - a[0];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] - a[j + 1];
+            a[2 * j - 1] = -a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dstsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[1] - t[0];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = -t[j] - t[j + 1];
+                a[k + l] = t[j] - t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 1; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] + t[m + j];
+                t[k] = t[m + k] - t[m + j];
+            }
+            t[0] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+    }
+    a[0] = 0;
+}
+
+
+/* -------- initializing routines -------- */
+
+
+static void makewt(int nw, int *ip, double *w)
+{
+    int j, nwh;
+    double delta, x, y;
+
+    ip[0] = nw;
+    ip[1] = 1;
+    if (nw > 2) {
+        nwh = nw >> 1;
+        delta = atan(1.0) / (double)nwh;
+        w[0] = 1;
+        w[1] = 0;
+        w[nwh] = cos(delta * (double)nwh);
+        w[nwh + 1] = w[nwh];
+        if (nwh > 2) {
+            for (j = 2; j < nwh; j += 2) {
+                x = cos(delta * (double)j);
+                y = sin(delta * (double)j);
+                w[j] = x;
+                w[j + 1] = y;
+                w[nw - j] = y;
+                w[nw - j + 1] = x;
+            }
+            bitrv2(nw, ip + 2, w);
+        }
+    }
+}
+
+
+static void makect(int nc, int *ip, double *c)
+{
+    int j, nch;
+    double delta;
+
+    ip[1] = nc;
+    if (nc > 1) {
+        nch = nc >> 1;
+        delta = atan(1.0) / (double)nch;
+        c[0] = cos(delta * (double)nch);
+        c[nch] = one_half * c[0];
+        for (j = 1; j < nch; j++) {
+            c[j] = one_half * cos(delta * (double)j);
+            c[nc - j] = one_half * sin(delta * (double)j);
+        }
+    }
+}
+
+
+/* -------- child routines -------- */
+
+
+static void bitrv2(int n, int *ip0, double *a)
+{
+    int j, j1, k, k1, l, m, m2, ip[1024];
+    double xr, xi, yr, yi;
+
+    (void)ip0;
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            j1 = 2 * k + m2 + ip[k];
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    } else {
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+        }
+    }
+}
+
+
+static void bitrv2conj(int n, int *ip0, double *a)
+{
+    int j, j1, k, k1, l, m, m2, ip[256];
+    double xr, xi, yr, yi;
+
+    (void)ip0;
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            j1 = k1 + m2;
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            k1 += m2;
+            a[k1 + 1] = -a[k1 + 1];
+        }
+    } else {
+        a[1] = -a[1];
+        a[m2 + 1] = -a[m2 + 1];
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            a[k1 + m2 + 1] = -a[k1 + m2 + 1];
+        }
+    }
+}
+
+
+static void cftfsub(int n, double *a, double const *w)
+{
+    int j, j1, j2, j3, l;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i - x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i + x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i - x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = a[j + 1] - a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] += a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+
+static void cftbsub(int n, double *a, double const *w)
+{
+    int j, j1, j2, j3, l;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = -a[j + 1] - a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = -a[j + 1] + a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i - x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i + x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i - x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i + x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = -a[j + 1] + a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] = -a[j + 1] - a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+
+static void cft1st(int n, double *a, double const *w)
+{
+    int j, k1, k2;
+    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    x0r = a[0] + a[2];
+    x0i = a[1] + a[3];
+    x1r = a[0] - a[2];
+    x1i = a[1] - a[3];
+    x2r = a[4] + a[6];
+    x2i = a[5] + a[7];
+    x3r = a[4] - a[6];
+    x3i = a[5] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[2] = x1r - x3i;
+    a[3] = x1i + x3r;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+    wk1r = w[2];
+    x0r = a[8] + a[10];
+    x0i = a[9] + a[11];
+    x1r = a[8] - a[10];
+    x1i = a[9] - a[11];
+    x2r = a[12] + a[14];
+    x2i = a[13] + a[15];
+    x3r = a[12] - a[14];
+    x3i = a[13] - a[15];
+    a[8] = x0r + x2r;
+    a[9] = x0i + x2i;
+    a[12] = x2i - x0i;
+    a[13] = x0r - x2r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[10] = wk1r * (x0r - x0i);
+    a[11] = wk1r * (x0r + x0i);
+    x0r = x3i + x1r;
+    x0i = x3r - x1i;
+    a[14] = wk1r * (x0i - x0r);
+    a[15] = wk1r * (x0i + x0r);
+    k1 = 0;
+    for (j = 16; j < n; j += 16) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        x0r = a[j] + a[j + 2];
+        x0i = a[j + 1] + a[j + 3];
+        x1r = a[j] - a[j + 2];
+        x1i = a[j + 1] - a[j + 3];
+        x2r = a[j + 4] + a[j + 6];
+        x2i = a[j + 5] + a[j + 7];
+        x3r = a[j + 4] - a[j + 6];
+        x3i = a[j + 5] - a[j + 7];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 4] = wk2r * x0r - wk2i * x0i;
+        a[j + 5] = wk2r * x0i + wk2i * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 2] = wk1r * x0r - wk1i * x0i;
+        a[j + 3] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 6] = wk3r * x0r - wk3i * x0i;
+        a[j + 7] = wk3r * x0i + wk3i * x0r;
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        x0r = a[j + 8] + a[j + 10];
+        x0i = a[j + 9] + a[j + 11];
+        x1r = a[j + 8] - a[j + 10];
+        x1i = a[j + 9] - a[j + 11];
+        x2r = a[j + 12] + a[j + 14];
+        x2i = a[j + 13] + a[j + 15];
+        x3r = a[j + 12] - a[j + 14];
+        x3i = a[j + 13] - a[j + 15];
+        a[j + 8] = x0r + x2r;
+        a[j + 9] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 12] = -wk2i * x0r - wk2r * x0i;
+        a[j + 13] = -wk2i * x0i + wk2r * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 10] = wk1r * x0r - wk1i * x0i;
+        a[j + 11] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 14] = wk3r * x0r - wk3i * x0i;
+        a[j + 15] = wk3r * x0i + wk3i * x0r;
+    }
+}
+
+
+static void cftmdl(int n, int l, double *a, double const *w)
+{
+    int j, j1, j2, j3, k, k1, k2, m, m2;
+    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    m = l << 2;
+    for (j = 0; j < l; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x0r - x2r;
+        a[j2 + 1] = x0i - x2i;
+        a[j1] = x1r - x3i;
+        a[j1 + 1] = x1i + x3r;
+        a[j3] = x1r + x3i;
+        a[j3 + 1] = x1i - x3r;
+    }
+    wk1r = w[2];
+    for (j = m; j < l + m; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x2i - x0i;
+        a[j2 + 1] = x0r - x2r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j1] = wk1r * (x0r - x0i);
+        a[j1 + 1] = wk1r * (x0r + x0i);
+        x0r = x3i + x1r;
+        x0i = x3r - x1i;
+        a[j3] = wk1r * (x0i - x0r);
+        a[j3 + 1] = wk1r * (x0i + x0r);
+    }
+    k1 = 0;
+    m2 = 2 * m;
+    for (k = m2; k < n; k += m2) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        for (j = k; j < l + k; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = wk2r * x0r - wk2i * x0i;
+            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        for (j = k + m; j < l + (k + m); j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = -wk2i * x0r - wk2r * x0i;
+            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+    }
+}
+
+
+static void rftfsub(int n, double *a, int nc, double const *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = one_half - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr - wki * xi;
+        yi = wkr * xi + wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+
+static void rftbsub(int n, double *a, int nc, double const *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+
+    a[1] = -a[1];
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = one_half - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr + wki * xi;
+        yi = wkr * xi - wki * xr;
+        a[j] -= yr;
+        a[j + 1] = yi - a[j + 1];
+        a[k] += yr;
+        a[k + 1] = yi - a[k + 1];
+    }
+    a[m + 1] = -a[m + 1];
+}
+
+
+static void dctsub(int n, double *a, int nc, double const *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr;
+
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[j] - wkr * a[k];
+        a[j] = wkr * a[j] + wki * a[k];
+        a[k] = xr;
+    }
+    a[m] *= c[0];
+}
+
+
+static void dstsub(int n, double *a, int nc, double const *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr;
+
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[k] - wkr * a[j];
+        a[k] = wkr * a[k] + wki * a[j];
+        a[j] = xr;
+    }
+    a[m] *= c[0];
+}
diff --git a/src/fft4g.h b/src/fft4g.h
new file mode 100644
index 0000000..f204ad9
--- /dev/null
+++ b/src/fft4g.h
@@ -0,0 +1,23 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+void lsx_cdft(int, int, double *, int *, double *);
+void lsx_rdft(int, int, double *, int *, double *);
+void lsx_ddct(int, int, double *, int *, double *);
+void lsx_ddst(int, int, double *, int *, double *);
+void lsx_dfct(int, double *, double *, int *, double *);
+void lsx_dfst(int, double *, double *, int *, double *);
+
+void lsx_cdft_f(int, int, float *, int *, float *);
+void lsx_rdft_f(int, int, float *, int *, float *);
+void lsx_ddct_f(int, int, float *, int *, float *);
+void lsx_ddst_f(int, int, float *, int *, float *);
+void lsx_dfct_f(int, float *, float *, int *, float *);
+void lsx_dfst_f(int, float *, float *, int *, float *);
+
+#define dft_br_len(l) (2ul + (1ul << (int)(log(l / 2 + .5) / log(2.)) / 2))
+#define dft_sc_len(l) ((unsigned long)l / 2)
+
+/* Over-allocate h by 2 to use these macros */
+#define LSX_PACK(h, n)   h[1] = h[n]
+#define LSX_UNPACK(h, n) h[n] = h[1], h[n + 1] = h[1] = 0;
diff --git a/src/fft4g32.c b/src/fft4g32.c
new file mode 100644
index 0000000..e71b48d
--- /dev/null
+++ b/src/fft4g32.c
@@ -0,0 +1,27 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "filter.h"
+#define FFT4G_FLOAT
+#include "fft4g.c"
+
+static void * null(void) {return 0;}
+static void forward (int length, void * setup, double * H) {lsx_safe_rdft_f(length,  1, H); (void)setup;}
+static void backward(int length, void * setup, double * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
+static int multiplier(void) {return 2;}
+static void nothing(void) {}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32_cb[] = {
+  (fn_t)null,
+  (fn_t)null,
+  (fn_t)nothing,
+  (fn_t)forward,
+  (fn_t)forward,
+  (fn_t)backward,
+  (fn_t)backward,
+  (fn_t)_soxr_ordered_convolve_f,
+  (fn_t)_soxr_ordered_partial_convolve_f,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
diff --git a/src/fft4g32s.c b/src/fft4g32s.c
new file mode 100644
index 0000000..3435959
--- /dev/null
+++ b/src/fft4g32s.c
@@ -0,0 +1,26 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "filter.h"
+#include "simd.h"
+
+static void * null(void) {return 0;}
+static void nothing(void) {}
+static void forward (int length, void * setup, float * H) {lsx_safe_rdft_f(length,  1, H); (void)setup;}
+static void backward(int length, void * setup, float * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
+static int multiplier(void) {return 2;}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32s_cb[] = {
+  (fn_t)null,
+  (fn_t)null,
+  (fn_t)nothing,
+  (fn_t)forward,
+  (fn_t)forward,
+  (fn_t)backward,
+  (fn_t)backward,
+  (fn_t)_soxr_ordered_convolve_simd,
+  (fn_t)_soxr_ordered_partial_convolve_simd,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
diff --git a/src/fft4g64.c b/src/fft4g64.c
new file mode 100644
index 0000000..b126f6b
--- /dev/null
+++ b/src/fft4g64.c
@@ -0,0 +1,29 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "filter.h"
+#include "fft4g.c"
+#include "soxr-config.h"
+
+#if HAVE_DOUBLE_PRECISION
+static void * null(void) {return 0;}
+static void nothing(void) {}
+static void forward (int length, void * setup, double * H) {lsx_safe_rdft(length,  1, H); (void)setup;}
+static void backward(int length, void * setup, double * H) {lsx_safe_rdft(length, -1, H); (void)setup;}
+static int multiplier(void) {return 2;}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft64_cb[] = {
+  (fn_t)null,
+  (fn_t)null,
+  (fn_t)nothing,
+  (fn_t)forward,
+  (fn_t)forward,
+  (fn_t)backward,
+  (fn_t)backward,
+  (fn_t)_soxr_ordered_convolve,
+  (fn_t)_soxr_ordered_partial_convolve,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
+#endif
diff --git a/src/fft4g_cache.h b/src/fft4g_cache.h
new file mode 100644
index 0000000..6f536b0
--- /dev/null
+++ b/src/fft4g_cache.h
@@ -0,0 +1,92 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+static int * LSX_FFT_BR;
+static DFT_FLOAT * LSX_FFT_SC;
+static int FFT_LEN = -1;
+static ccrw2_t FFT_CACHE_CCRW;
+
+void LSX_INIT_FFT_CACHE(void)
+{
+  if (FFT_LEN >= 0)
+    return;
+  assert(LSX_FFT_BR == NULL);
+  assert(LSX_FFT_SC == NULL);
+  assert(FFT_LEN == -1);
+  ccrw2_init(FFT_CACHE_CCRW);
+  FFT_LEN = 0;
+}
+
+void LSX_CLEAR_FFT_CACHE(void)
+{
+  assert(FFT_LEN >= 0);
+  ccrw2_clear(FFT_CACHE_CCRW);
+  free(LSX_FFT_BR);
+  free(LSX_FFT_SC);
+  LSX_FFT_SC = NULL;
+  LSX_FFT_BR = NULL;
+  FFT_LEN = -1;
+}
+
+static bool UPDATE_FFT_CACHE(int len)
+{
+  LSX_INIT_FFT_CACHE();
+  assert(lsx_is_power_of_2(len));
+  assert(FFT_LEN >= 0);
+  ccrw2_become_reader(FFT_CACHE_CCRW);
+  if (len > FFT_LEN) {
+    ccrw2_cease_reading(FFT_CACHE_CCRW);
+    ccrw2_become_writer(FFT_CACHE_CCRW);
+    if (len > FFT_LEN) {
+      int old_n = FFT_LEN;
+      FFT_LEN = len;
+      LSX_FFT_BR = realloc(LSX_FFT_BR, dft_br_len(FFT_LEN) * sizeof(*LSX_FFT_BR));
+      LSX_FFT_SC = realloc(LSX_FFT_SC, dft_sc_len(FFT_LEN) * sizeof(*LSX_FFT_SC));
+      if (!old_n) {
+        LSX_FFT_BR[0] = 0;
+#if SOXR_LIB
+        atexit(LSX_CLEAR_FFT_CACHE);
+#endif
+      }
+      return true;
+    }
+    ccrw2_cease_writing(FFT_CACHE_CCRW);
+    ccrw2_become_reader(FFT_CACHE_CCRW);
+  }
+  return false;
+}
+
+static void DONE_WITH_FFT_CACHE(bool is_writer)
+{
+  if (is_writer)
+    ccrw2_cease_writing(FFT_CACHE_CCRW);
+  else ccrw2_cease_reading(FFT_CACHE_CCRW);
+}
+
+void LSX_SAFE_RDFT(int len, int type, DFT_FLOAT * d)
+{
+  bool is_writer = UPDATE_FFT_CACHE(len);
+  LSX_RDFT(len, type, d, LSX_FFT_BR, LSX_FFT_SC);
+  DONE_WITH_FFT_CACHE(is_writer);
+}
+
+void LSX_SAFE_CDFT(int len, int type, DFT_FLOAT * d)
+{
+  bool is_writer = UPDATE_FFT_CACHE(len);
+  LSX_CDFT(len, type, d, LSX_FFT_BR, LSX_FFT_SC);
+  DONE_WITH_FFT_CACHE(is_writer);
+}
+
+#undef UPDATE_FFT_CACHE
+#undef LSX_SAFE_RDFT
+#undef LSX_SAFE_CDFT
+#undef LSX_RDFT
+#undef LSX_INIT_FFT_CACHE
+#undef LSX_FFT_SC
+#undef LSX_FFT_BR
+#undef LSX_CLEAR_FFT_CACHE
+#undef LSX_CDFT
+#undef FFT_LEN
+#undef FFT_CACHE_CCRW
+#undef DONE_WITH_FFT_CACHE
+#undef DFT_FLOAT
diff --git a/src/fifo.h b/src/fifo.h
new file mode 100644
index 0000000..86da38c
--- /dev/null
+++ b/src/fifo.h
@@ -0,0 +1,124 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#ifndef fifo_included
+#define fifo_included
+
+#if !defined FIFO_SIZE_T
+#define FIFO_SIZE_T size_t
+#endif
+
+#if !defined FIFO_REALLOC
+  #define FIFO_REALLOC(a,b,c) realloc(a,b)
+  #undef FIFO_FREE
+  #define FIFO_FREE free
+  #undef FIFO_MALLOC
+  #define FIFO_MALLOC malloc
+#endif
+
+typedef struct {
+  char * data;
+  size_t allocation;   /* Number of bytes allocated for data. */
+  size_t item_size;    /* Size of each item in data */
+  size_t begin;        /* Offset of the first byte to read. */
+  size_t end;          /* 1 + Offset of the last byte byte to read. */
+} fifo_t;
+
+#if !defined FIFO_MIN
+  #define FIFO_MIN 0x4000
+#endif
+
+#if !defined UNUSED
+  #define UNUSED
+#endif
+
+UNUSED static void fifo_clear(fifo_t * f)
+{
+  f->end = f->begin = 0;
+}
+
+UNUSED static void * fifo_reserve(fifo_t * f, FIFO_SIZE_T n0)
+{
+  size_t n = (size_t)n0;
+  n *= f->item_size;
+
+  if (f->begin == f->end)
+    fifo_clear(f);
+
+  while (1) {
+    if (f->end + n <= f->allocation) {
+      void *p = f->data + f->end;
+
+      f->end += n;
+      return p;
+    }
+    if (f->begin > FIFO_MIN) {
+      memmove(f->data, f->data + f->begin, f->end - f->begin);
+      f->end -= f->begin;
+      f->begin = 0;
+      continue;
+    }
+    f->data = FIFO_REALLOC(f->data, f->allocation + n, f->allocation);
+    f->allocation += n;
+    if (!f->data)
+      return 0;
+  }
+}
+
+UNUSED static void * fifo_write(fifo_t * f, FIFO_SIZE_T n0, void const * data)
+{
+  size_t n = (size_t)n0;
+  void * s = fifo_reserve(f, n0);
+  if (data)
+    memcpy(s, data, n * f->item_size);
+  return s;
+}
+
+UNUSED static void fifo_trim_to(fifo_t * f, FIFO_SIZE_T n0)
+{
+  size_t n = (size_t)n0;
+  n *= f->item_size;
+  f->end = f->begin + n;
+}
+
+UNUSED static void fifo_trim_by(fifo_t * f, FIFO_SIZE_T n0)
+{
+  size_t n = (size_t)n0;
+  n *= f->item_size;
+  f->end -= n;
+}
+
+UNUSED static FIFO_SIZE_T fifo_occupancy(fifo_t * f)
+{
+  return (FIFO_SIZE_T)((f->end - f->begin) / f->item_size);
+}
+
+UNUSED static void * fifo_read(fifo_t * f, FIFO_SIZE_T n0, void * data)
+{
+  size_t n = (size_t)n0;
+  char * ret = f->data + f->begin;
+  n *= f->item_size;
+  if (n > (f->end - f->begin))
+    return NULL;
+  if (data)
+    memcpy(data, ret, (size_t)n);
+  f->begin += n;
+  return ret;
+}
+
+#define fifo_read_ptr(f) fifo_read(f, (FIFO_SIZE_T)0, NULL)
+
+UNUSED static void fifo_delete(fifo_t * f)
+{
+  FIFO_FREE(f->data);
+}
+
+UNUSED static int fifo_create(fifo_t * f, FIFO_SIZE_T item_size)
+{
+  f->item_size = (size_t)item_size;
+  f->allocation = FIFO_MIN;
+  fifo_clear(f);
+  return !(f->data = FIFO_MALLOC(f->allocation));
+}
+
+#endif
diff --git a/src/filter.c b/src/filter.c
new file mode 100644
index 0000000..a8f7e56
--- /dev/null
+++ b/src/filter.c
@@ -0,0 +1,245 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "filter.h"
+
+#include <math.h>
+#if !defined M_PI
+#define M_PI    3.14159265358979323846
+#endif
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "fft4g.h"
+#include "ccrw2.h"
+
+#if 1 || HAVE_DOUBLE_PRECISION /* Always need this, for lsx_fir_to_phase. */
+#define DFT_FLOAT double
+#define DONE_WITH_FFT_CACHE done_with_fft_cache
+#define FFT_CACHE_CCRW fft_cache_ccrw
+#define FFT_LEN fft_len
+#define LSX_CDFT lsx_cdft
+#define LSX_CLEAR_FFT_CACHE lsx_clear_fft_cache
+#define LSX_FFT_BR lsx_fft_br
+#define LSX_FFT_SC lsx_fft_sc
+#define LSX_INIT_FFT_CACHE lsx_init_fft_cache
+#define LSX_RDFT lsx_rdft
+#define LSX_SAFE_CDFT lsx_safe_cdft
+#define LSX_SAFE_RDFT lsx_safe_rdft
+#define UPDATE_FFT_CACHE update_fft_cache
+#include "fft4g_cache.h"
+#endif
+
+#if HAVE_SINGLE_PRECISION && !HAVE_AVFFT
+#define DFT_FLOAT float
+#define DONE_WITH_FFT_CACHE done_with_fft_cache_f
+#define FFT_CACHE_CCRW fft_cache_ccrw_f
+#define FFT_LEN fft_len_f
+#define LSX_CDFT lsx_cdft_f
+#define LSX_CLEAR_FFT_CACHE lsx_clear_fft_cache_f
+#define LSX_FFT_BR lsx_fft_br_f
+#define LSX_FFT_SC lsx_fft_sc_f
+#define LSX_INIT_FFT_CACHE lsx_init_fft_cache_f
+#define LSX_RDFT lsx_rdft_f
+#define LSX_SAFE_CDFT lsx_safe_cdft_f
+#define LSX_SAFE_RDFT lsx_safe_rdft_f
+#define UPDATE_FFT_CACHE update_fft_cache_f
+#include "fft4g_cache.h"
+#endif
+
+#if HAVE_DOUBLE_PRECISION || !SOXR_LIB
+#define DFT_FLOAT double
+#define ORDERED_CONVOLVE lsx_ordered_convolve
+#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve
+#include "rdft.h"
+#endif
+
+#if HAVE_SINGLE_PRECISION
+#define DFT_FLOAT float
+#define ORDERED_CONVOLVE lsx_ordered_convolve_f
+#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve_f
+#include "rdft.h"
+#endif
+
+double lsx_kaiser_beta(double att, double tr_bw)
+{
+  if (att >= 60) {
+    static const double coefs[][4] = {
+      {-6.784957e-10,1.02856e-05,0.1087556,-0.8988365+.001},
+      {-6.897885e-10,1.027433e-05,0.10876,-0.8994658+.002},
+      {-1.000683e-09,1.030092e-05,0.1087677,-0.9007898+.003},
+      {-3.654474e-10,1.040631e-05,0.1087085,-0.8977766+.006},
+      {8.106988e-09,6.983091e-06,0.1091387,-0.9172048+.015},
+      {9.519571e-09,7.272678e-06,0.1090068,-0.9140768+.025},
+      {-5.626821e-09,1.342186e-05,0.1083999,-0.9065452+.05},
+      {-9.965946e-08,5.073548e-05,0.1040967,-0.7672778+.085},
+      {1.604808e-07,-5.856462e-05,0.1185998,-1.34824+.1},
+      {-1.511964e-07,6.363034e-05,0.1064627,-0.9876665+.18},
+    };
+    double realm = log(tr_bw/.0005)/log(2.);
+    double const * c0 = coefs[range_limit(  (int)realm, 0, (int)array_length(coefs)-1)];
+    double const * c1 = coefs[range_limit(1+(int)realm, 0, (int)array_length(coefs)-1)];
+    double b0 = ((c0[0]*att + c0[1])*att + c0[2])*att + c0[3];
+    double b1 = ((c1[0]*att + c1[1])*att + c1[2])*att + c1[3];
+    return b0 + (b1 - b0) * (realm - (int)realm);
+  }
+  if (att > 50   ) return .1102 * (att - 8.7);
+  if (att > 20.96) return .58417 * pow(att -20.96, .4) + .07886 * (att - 20.96);
+  return 0;
+}
+
+double * lsx_make_lpf(
+    int num_taps, double Fc, double beta, double rho, double scale)
+{
+  int i, m = num_taps - 1;
+  double * h = malloc((size_t)num_taps * sizeof(*h));
+  double mult = scale / lsx_bessel_I_0(beta), mult1 = 1 / (.5 * m + rho);
+  assert(Fc >= 0 && Fc <= 1);
+  lsx_debug("make_lpf(n=%i Fc=%.7g β=%g ρ=%g scale=%g)",
+      num_taps, Fc, beta, rho, scale);
+
+  if (h) for (i = 0; i <= m / 2; ++i) {
+    double z = i - .5 * m, x = z * M_PI, y = z * mult1;
+    h[i] = x? sin(Fc * x) / x : Fc;
+    h[i] *= lsx_bessel_I_0(beta * sqrt(1 - y * y)) * mult;
+    if (m - i != i)
+      h[m - i] = h[i];
+  }
+  return h;
+}
+
+void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps)
+{
+  *beta = *beta < 0? lsx_kaiser_beta(att, tr_bw * .5 / Fc): *beta;
+  att = att < 60? (att - 7.95) / (2.285 * M_PI * 2) :
+    ((.0007528358-1.577737e-05**beta)**beta+.6248022)**beta+.06186902;
+  *num_taps = !*num_taps? (int)ceil(att/tr_bw + 1) : *num_taps;
+}
+
+double * lsx_design_lpf(
+    double Fp,      /* End of pass-band */
+    double Fs,      /* Start of stop-band */
+    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI */
+    double att,     /* Stop-band attenuation in dB */
+    int * num_taps, /* 0: value will be estimated */
+    int k,          /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */
+    double beta)    /* <0: value will be estimated */
+{
+  int n = *num_taps, phases = max(k, 1), modulo = max(-k, 1);
+  double tr_bw, Fc, rho = phases == 1? .5 : att < 120? .63 : .75;
+
+  Fp /= fabs(Fn), Fs /= fabs(Fn);        /* Normalise to Fn = 1 */
+  tr_bw = .5 * (Fs - Fp); /* Transition band-width: 6dB to stop points */
+  tr_bw /= phases, Fs /= phases;
+  tr_bw = min(tr_bw, .5 * Fs);
+  Fc = Fs - tr_bw;
+  assert(Fc - tr_bw >= 0);
+  lsx_kaiser_params(att, Fc, tr_bw, &beta, num_taps);
+  if (!n)
+    *num_taps = phases > 1? *num_taps / phases * phases + phases - 1 :
+      (*num_taps + modulo - 2) / modulo * modulo + 1;
+  return Fn < 0? 0 : lsx_make_lpf(*num_taps, Fc, beta, rho, (double)phases);
+}
+
+static double safe_log(double x)
+{
+  assert(x >= 0);
+  if (x)
+    return log(x);
+  lsx_debug("log(0)");
+  return -26;
+}
+
+void lsx_fir_to_phase(double * * h, int * len, int * post_len, double phase)
+{
+  double * pi_wraps, * work, phase1 = (phase > 50 ? 100 - phase : phase) / 50;
+  int i, work_len, begin, end, imp_peak = 0, peak = 0;
+  double imp_sum = 0, peak_imp_sum = 0;
+  double prev_angle2 = 0, cum_2pi = 0, prev_angle1 = 0, cum_1pi = 0;
+
+  for (i = *len, work_len = 2 * 2 * 8; i > 1; work_len <<= 1, i >>= 1);
+
+  work = calloc((size_t)work_len + 2, sizeof(*work)); /* +2: (UN)PACK */
+  pi_wraps = malloc((((size_t)work_len + 2) / 2) * sizeof(*pi_wraps));
+
+  memcpy(work, *h, (size_t)*len * sizeof(*work));
+  lsx_safe_rdft(work_len, 1, work); /* Cepstral: */
+  LSX_UNPACK(work, work_len);
+
+  for (i = 0; i <= work_len; i += 2) {
+    double angle = atan2(work[i + 1], work[i]);
+    double detect = 2 * M_PI;
+    double delta = angle - prev_angle2;
+    double adjust = detect * ((delta < -detect * .7) - (delta > detect * .7));
+    prev_angle2 = angle;
+    cum_2pi += adjust;
+    angle += cum_2pi;
+    detect = M_PI;
+    delta = angle - prev_angle1;
+    adjust = detect * ((delta < -detect * .7) - (delta > detect * .7));
+    prev_angle1 = angle;
+    cum_1pi += fabs(adjust); /* fabs for when 2pi and 1pi have combined */
+    pi_wraps[i >> 1] = cum_1pi;
+
+    work[i] = safe_log(sqrt(sqr(work[i]) + sqr(work[i + 1])));
+    work[i + 1] = 0;
+  }
+  LSX_PACK(work, work_len);
+  lsx_safe_rdft(work_len, -1, work);
+  for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;
+
+  for (i = 1; i < work_len / 2; ++i) { /* Window to reject acausal components */
+    work[i] *= 2;
+    work[i + work_len / 2] = 0;
+  }
+  lsx_safe_rdft(work_len, 1, work);
+
+  for (i = 2; i < work_len; i += 2) /* Interpolate between linear & min phase */
+    work[i + 1] = phase1 * i / work_len * pi_wraps[work_len >> 1] +
+        (1 - phase1) * (work[i + 1] + pi_wraps[i >> 1]) - pi_wraps[i >> 1];
+
+  work[0] = exp(work[0]), work[1] = exp(work[1]);
+  for (i = 2; i < work_len; i += 2) {
+    double x = exp(work[i]);
+    work[i    ] = x * cos(work[i + 1]);
+    work[i + 1] = x * sin(work[i + 1]);
+  }
+
+  lsx_safe_rdft(work_len, -1, work);
+  for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;
+
+  /* Find peak pos. */
+  for (i = 0; i <= (int)(pi_wraps[work_len >> 1] / M_PI + .5); ++i) {
+    imp_sum += work[i];
+    if (fabs(imp_sum) > fabs(peak_imp_sum)) {
+      peak_imp_sum = imp_sum;
+      peak = i;
+    }
+    if (work[i] > work[imp_peak]) /* For debug check only */
+      imp_peak = i;
+  }
+  while (peak && fabs(work[peak-1]) > fabs(work[peak]) && work[peak-1] * work[peak] > 0)
+    --peak;
+
+  if (!phase1)
+    begin = 0;
+  else if (phase1 == 1)
+    begin = peak - *len / 2;
+  else {
+    begin = (int)((.997 - (2 - phase1) * .22) * *len + .5);
+    end   = (int)((.997 + (0 - phase1) * .22) * *len + .5);
+    begin = peak - (begin & ~3);
+    end   = peak + 1 + ((end + 3) & ~3);
+    *len = end - begin;
+    *h = realloc(*h, (size_t)*len * sizeof(**h));
+  }
+  for (i = 0; i < *len; ++i) (*h)[i] =
+    work[(begin + (phase > 50 ? *len - 1 - i : i) + work_len) & (work_len - 1)];
+  *post_len = phase > 50 ? peak - begin : begin + *len - (peak + 1);
+
+  lsx_debug("nPI=%g peak-sum@%i=%g (val@%i=%g); len=%i post=%i (%g%%)",
+      pi_wraps[work_len >> 1] / M_PI, peak, peak_imp_sum, imp_peak,
+      work[imp_peak], *len, *post_len, 100 - 100. * *post_len / (*len - 1));
+  free(pi_wraps), free(work);
+}
diff --git a/src/filter.h b/src/filter.h
new file mode 100644
index 0000000..700dbd2
--- /dev/null
+++ b/src/filter.h
@@ -0,0 +1,39 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_filter_included
+#define soxr_filter_included
+
+#include "aliases.h"
+
+double lsx_bessel_I_0(double x);
+void lsx_init_fft_cache(void);
+void lsx_clear_fft_cache(void);
+void lsx_init_fft_cache_f(void);
+void lsx_clear_fft_cache_f(void);
+#define lsx_is_power_of_2(x) !(x < 2 || (x & (x - 1)))
+void lsx_safe_rdft(int len, int type, double * d);
+void lsx_safe_cdft(int len, int type, double * d);
+void lsx_safe_rdft_f(int len, int type, float * d);
+void lsx_safe_cdft_f(int len, int type, float * d);
+void lsx_ordered_convolve(int n, void * not_used, double * a, const double * b);
+void lsx_ordered_convolve_f(int n, void * not_used, float * a, const float * b);
+void lsx_ordered_partial_convolve(int n, double * a, const double * b);
+void lsx_ordered_partial_convolve_f(int n, float * a, const float * b);
+
+double lsx_kaiser_beta(double att, double tr_bw);
+double * lsx_make_lpf(int num_taps, double Fc, double beta, double rho,
+    double scale);
+void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps);
+double * lsx_design_lpf(
+    double Fp,      /* End of pass-band */
+    double Fs,      /* Start of stop-band */
+    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI; < 0: dummy run */
+    double att,     /* Stop-band attenuation in dB */
+    int * num_taps, /* 0: value will be estimated */
+    int k,          /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */
+    double beta);   /* <0: value will be estimated */
+void lsx_fir_to_phase(double * * h, int * len,
+    int * post_len, double phase0);
+
+#endif
diff --git a/src/filters.h b/src/filters.h
new file mode 100644
index 0000000..39e7062
--- /dev/null
+++ b/src/filters.h
@@ -0,0 +1,151 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "half_coefs.h"
+
+#define FUNCTION h8
+#define CONVOLVE _ _ _ _ _ _ _ _
+#define h8_l 8
+#define COEFS half_fir_coefs_8
+#include "half-fir.h"
+
+#define FUNCTION h9
+#define CONVOLVE _ _ _ _ _ _ _ _ _
+#define h9_l 9
+#define COEFS half_fir_coefs_9
+#include "half-fir.h"
+
+#define FUNCTION h10
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _
+#define h10_l 10
+#define COEFS half_fir_coefs_10
+#include "half-fir.h"
+
+#define FUNCTION h11
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _
+#define h11_l 11
+#define COEFS half_fir_coefs_11
+#include "half-fir.h"
+
+#define FUNCTION h12
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _
+#define h12_l 12
+#define COEFS half_fir_coefs_12
+#include "half-fir.h"
+
+#define FUNCTION h13
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _
+#define h13_l 13
+#define COEFS half_fir_coefs_13
+#include "half-fir.h"
+
+static struct {int num_coefs; stage_fn_t fn; float att;} const half_firs[] = {
+  { 8, h8 , 136.51f},
+  { 9, h9 , 152.32f},
+  {10, h10, 168.07f},
+  {11, h11, 183.78f},
+  {12, h12, 199.44f},
+  {13, h13, 212.75f},
+};
+
+#define HI_PREC_CLOCK
+
+#define VAR_LENGTH p->n
+#define VAR_CONVOLVE while (j < FIR_LENGTH) _
+#define VAR_POLY_PHASE_BITS p->phase_bits
+
+#define FUNCTION vpoly0
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
+#include "poly-fir0.h"
+
+#define FUNCTION vpoly1
+#define COEF_INTERP 1
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
+#include "poly-fir.h"
+
+#define FUNCTION vpoly2
+#define COEF_INTERP 2
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
+#include "poly-fir.h"
+
+#define FUNCTION vpoly3
+#define COEF_INTERP 3
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
+#include "poly-fir.h"
+
+#undef HI_PREC_CLOCK
+
+#define U100_l 42
+#if RATE_SIMD_POLY
+  #define U100_l_EXTRA _ _
+  #define u100_l_EXTRA _
+  #define U100_l_EXTRA_LENGTH 2
+  #define u100_l_EXTRA_LENGTH 1
+#else
+  #define U100_l_EXTRA
+  #define u100_l_EXTRA
+  #define U100_l_EXTRA_LENGTH 0
+  #define u100_l_EXTRA_LENGTH 0
+#endif
+#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ U100_l_EXTRA
+#define FUNCTION U100_0
+#define FIR_LENGTH (U100_l + U100_l_EXTRA_LENGTH)
+#define CONVOLVE poly_fir_convolve_U100
+#include "poly-fir0.h"
+
+#define u100_l 11
+#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _ u100_l_EXTRA
+#define FUNCTION u100_0
+#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
+#define CONVOLVE poly_fir_convolve_u100
+#include "poly-fir0.h"
+
+#define FUNCTION u100_1
+#define COEF_INTERP 1
+#define PHASE_BITS 8
+#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
+#define CONVOLVE poly_fir_convolve_u100
+#include "poly-fir.h"
+#define u100_1_b 8
+
+#define FUNCTION u100_2
+#define COEF_INTERP 2
+#define PHASE_BITS 6
+#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
+#define CONVOLVE poly_fir_convolve_u100
+#include "poly-fir.h"
+#define u100_2_b 6
+
+typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t;
+typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t;
+
+static poly_fir_t const poly_firs[] = {
+  {-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}},
+  {-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}},
+  {-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}},
+  {-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}},
+  {-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}},
+  {-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}},
+
+  {-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}},
+  {-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}},
+  {-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}},
+  {-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}},
+  {-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}},
+  {-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}},
+
+  {10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}},
+  {11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}},
+  {-1, {{0, vpoly0}, {   9, vpoly1}, {  6, vpoly2}}},
+  {-1, {{0, vpoly0}, {  11, vpoly1}, {  7, vpoly2}}},
+  {-1, {{0, vpoly0}, {  13, vpoly1}, {  8, vpoly2}}},
+  {-1, {{0, vpoly0}, {  10, vpoly2}, {  8, vpoly3}}},
+  {-1, {{0, vpoly0}, {  12, vpoly2}, {  9, vpoly3}}},
+};
diff --git a/src/half-fir.h b/src/half-fir.h
new file mode 100644
index 0000000..852cb78
--- /dev/null
+++ b/src/half-fir.h
@@ -0,0 +1,25 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Down-sample by a factor of 2 using a FIR with odd length (LEN).*/
+/* Input must be preceded and followed by LEN >> 1 samples. */
+
+#define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_out = (stage_occupancy(p) + 1) / 2;
+  sample_t * output = fifo_reserve(output_fifo, num_out);
+
+  for (i = 0; i < num_out; ++i, input += 2) {
+    int j = 0;
+    sample_t sum = input[0] * .5f;
+    CONVOLVE
+    output[i] = sum;
+  }
+  fifo_read(&p->fifo, 2 * num_out, NULL);
+}
+#undef _
+#undef COEFS
+#undef CONVOLVE
+#undef FUNCTION
diff --git a/src/half_coefs.h b/src/half_coefs.h
new file mode 100644
index 0000000..e08b248
--- /dev/null
+++ b/src/half_coefs.h
@@ -0,0 +1,57 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if defined __GNUC__
+  #pragma GCC system_header
+#elif defined __SUNPRO_C
+  #pragma disable_warn
+#elif defined _MSC_VER
+  #pragma warning(push, 1)
+#endif
+
+static const sample_t half_fir_coefs_8[] = {
+  0.3115465451887802, -0.08734497241282892, 0.03681452335604365,
+  -0.01518925831569441, 0.005454118437408876, -0.001564400922162005,
+  0.0003181701445034203, -3.48001341225749e-5,
+};
+
+static const sample_t half_fir_coefs_9[] = {
+  0.3122703613711853, -0.08922155288172305, 0.03913974805854332,
+  -0.01725059723447163, 0.006858970092378141, -0.002304518467568703,
+  0.0006096426006051062, -0.0001132393923815236, 1.119795386287666e-5,
+};
+
+static const sample_t half_fir_coefs_10[] = {
+  0.3128545521327376, -0.09075671986104322, 0.04109637155154835,
+  -0.01906629512749895, 0.008184039342054333, -0.0030766775017262,
+  0.0009639607022414314, -0.0002358552746579827, 4.025184282444155e-5,
+  -3.629779111541012e-6,
+};
+
+static const sample_t half_fir_coefs_11[] = {
+  0.3133358837508807, -0.09203588680609488, 0.04276515428384758,
+  -0.02067356614745591, 0.00942253142371517, -0.003856330993895144,
+  0.001363470684892284, -0.0003987400965541919, 9.058629923971627e-5,
+  -1.428553070915318e-5, 1.183455238783835e-6,
+};
+
+static const sample_t half_fir_coefs_12[] = {
+  0.3137392991811407, -0.0931182192961332, 0.0442050575271454,
+  -0.02210391200618091, 0.01057473015666001, -0.00462766983973885,
+  0.001793630226239453, -0.0005961819959665878, 0.0001631475979359577,
+  -3.45557865639653e-5, 5.06188341942088e-6, -3.877010943315563e-7,
+};
+
+static const sample_t half_fir_coefs_13[] = {
+  0.3140822554324578, -0.0940458550886253, 0.04545990399121566,
+  -0.02338339450796002, 0.01164429409071052, -0.005380686021429845,
+  0.002242915773871009, -0.000822047600000082, 0.0002572510962395222,
+  -6.607320708956279e-5, 1.309926399120154e-5, -1.790719575255006e-6,
+  1.27504961098836e-7,
+};
+
+#if defined __SUNPRO_C
+  #pragma enable_warn
+#elif defined _MSC_VER
+  #pragma warning(pop)
+#endif
diff --git a/src/internal.h b/src/internal.h
new file mode 100644
index 0000000..49167ea
--- /dev/null
+++ b/src/internal.h
@@ -0,0 +1,43 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_internal_included
+#define soxr_internal_included
+
+#include "soxr-config.h"
+
+#undef min
+#undef max
+#define min(a, b) ((a) <= (b) ? (a) : (b))
+#define max(a, b) ((a) >= (b) ? (a) : (b))
+
+#define range_limit(x, lower, upper) (min(max(x, lower), upper))
+#define linear_to_dB(x) (log10(x) * 20)
+#define array_length(a) (sizeof(a)/sizeof(a[0]))
+#define AL(a) array_length(a)
+#define iAL(a) (int)AL(a)
+#define sqr(a) ((a) * (a))
+#ifdef __GNUC__
+#define UNUSED __attribute__ ((unused))
+#endif
+
+#if defined NDEBUG
+  #ifdef __GNUC__
+    void lsx_dummy(char const *, ...);
+  #else
+    static __inline void lsx_dummy(char const * x, ...) {}
+  #endif
+  #define lsx_debug if(0) lsx_dummy
+#else
+  #include <stdarg.h>
+  #include <stdio.h>
+  UNUSED static void lsx_debug(char const * fmt, ...)
+  {
+    va_list args;
+    va_start(args, fmt);
+    vfprintf(stderr, fmt, args);
+    fputc('\n', stderr);
+    va_end(args);
+  }
+#endif
+#endif
diff --git a/src/libsoxr-dev.src.in b/src/libsoxr-dev.src.in
new file mode 100644
index 0000000..ce879f9
--- /dev/null
+++ b/src/libsoxr-dev.src.in
@@ -0,0 +1,2 @@
+set(TARGET_HEADERS "@TARGET_HEADERS@")
+set(TARGET_PCS "@TARGET_PCS@")
diff --git a/src/libsoxr-lsr.pc.in b/src/libsoxr-lsr.pc.in
new file mode 100644
index 0000000..0b22cde
--- /dev/null
+++ b/src/libsoxr-lsr.pc.in
@@ -0,0 +1,11 @@
+prefix=${CMAKE_INSTALL_PREFIX}
+exec_prefix=${CMAKE_INSTALL_PREFIX}
+libdir=${LIB_INSTALL_DIR}
+includedir=${INCLUDE_INSTALL_DIR}
+
+Name: lib${LSR}
+Description: ${DESCRIPTION_SUMMARY} (with libsamplerate-like bindings)
+Requires: libsoxr
+Version: ${PROJECT_VERSION}
+Libs: -L${LIB_INSTALL_DIR} -l${LSR}
+Cflags: -I${INCLUDE_INSTALL_DIR}
diff --git a/src/libsoxr.pc.in b/src/libsoxr.pc.in
new file mode 100644
index 0000000..6c31f6b
--- /dev/null
+++ b/src/libsoxr.pc.in
@@ -0,0 +1,11 @@
+prefix=${CMAKE_INSTALL_PREFIX}
+exec_prefix=${CMAKE_INSTALL_PREFIX}
+libdir=${LIB_INSTALL_DIR}
+includedir=${INCLUDE_INSTALL_DIR}
+
+Name: lib${PROJECT_NAME}
+Description: ${DESCRIPTION_SUMMARY}
+Requires:
+Version: ${PROJECT_VERSION}
+Libs: -L${LIB_INSTALL_DIR} -l${PROJECT_NAME}
+Cflags: -I${INCLUDE_INSTALL_DIR}
diff --git a/src/libsoxr.src.in b/src/libsoxr.src.in
new file mode 100644
index 0000000..1c926ff
--- /dev/null
+++ b/src/libsoxr.src.in
@@ -0,0 +1 @@
+set(TARGET_LIBS "@TARGET_LIBS@")
diff --git a/src/lsr.c b/src/lsr.c
new file mode 100644
index 0000000..aeb8959
--- /dev/null
+++ b/src/lsr.c
@@ -0,0 +1,114 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Wrapper mostly compatible with `libsamplerate'. */
+
+#include <assert.h>
+#include <stdlib.h>
+#include "soxr.h"
+
+/* Runtime casts: */
+typedef struct io_t {
+  float *in,*out; long ilen,olen,idone,odone; int eoi; double oi_ratio;} io_t;
+#define SRC_DATA io_t
+typedef struct  soxr SRC_STATE;
+#define src_callback_t soxr_input_fn_t
+#define SRC_ERROR soxr_error_t
+#define SRC_SRCTYPE unsigned
+
+#include "soxr-lsr.h"
+#include "rint.h"
+
+
+soxr_error_t src_simple(io_t * p, unsigned id, int channels)
+{
+  size_t idone, odone;
+  soxr_error_t error;
+  soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_LSR0Q + id, 0);
+  char const * e = getenv("SOXR_LSR_NUM_THREADS");
+  soxr_runtime_spec_t r_spec = soxr_runtime_spec(!(e && atoi(e) != 1));
+  assert (channels > 0);
+  assert (p->ilen >= 0);
+  assert (p->olen >= 0);
+  error = soxr_oneshot(1, p->oi_ratio, (unsigned)channels,
+      p->in, (size_t)p->ilen, &idone, p->out, (size_t)p->olen, &odone,
+      0, &q_spec, &r_spec);
+  p->idone = (long)idone, p->odone = (long)odone;
+  return error;
+}
+
+soxr_t src_callback_new(soxr_input_fn_t fn, unsigned id, int channels, SRC_ERROR * error0, void * p)
+{
+  soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_LSR0Q + id, 0);
+  char const * e = getenv("SOXR_LSR_NUM_THREADS");
+  soxr_runtime_spec_t r_spec = soxr_runtime_spec(!(e && atoi(e) != 1));
+  soxr_error_t error;
+  soxr_t soxr = 0;
+  assert (channels > 0);
+  /* To minimise latency e.g. for real-time playback:
+  if (id == 2)
+    r_spec.log2_large_dft_size = r_spec.log2_min_dft_size = 8;
+    */
+  soxr = soxr_create(0, 0, (unsigned)channels, &error, 0, &q_spec, &r_spec);
+  if (soxr)
+    error = soxr_set_input_fn(soxr, fn, p, 0);
+  *(int *)error0 = (int)(long)error;
+  return soxr;
+}
+
+soxr_error_t src_process(soxr_t p, io_t * io)
+{
+  if (!p || !io) return "null pointer";
+  soxr_set_error(p, soxr_set_io_ratio(p, 1/io->oi_ratio, (size_t)io->olen));
+
+  { size_t idone , odone;
+  soxr_process(p, io->in, (size_t)(io->eoi? ~io->ilen : io->ilen), /* hack */
+      &idone, io->out, (size_t)io->olen, &odone);
+  io->idone = (long)idone, io->odone = (long)odone;
+  return soxr_error(p); }
+}
+
+long src_callback_read(soxr_t p, double oi_ratio, long olen, float * obuf)
+{
+  if (!p || olen < 0) return -1;
+  soxr_set_error(p, soxr_set_io_ratio(p, 1/oi_ratio, (size_t)olen));
+  return (long)soxr_output(p, obuf, (size_t)olen);
+}
+
+void src_float_to_short_array(float const * src, short * dest, int len)
+{
+  double d, N = 1. + SHRT_MAX;
+  assert (src && dest);
+  while (len--) d = src[len] * N, dest[len] = (short)(d > N - 1? (short)(N - 1) : d < -N? (short)-N : rint16(d));
+}
+
+void src_short_to_float_array(short const * src, float * dest, int len)
+{
+  assert (src && dest);
+  while (len--) dest[len] = (float)(src[len] * (1 / (1. + SHRT_MAX)));
+}
+
+void src_float_to_int_array(float const * src, int * dest, int len)
+{
+  double d, N = 32768. * 65536.; /* N.B. int32, not int! (Also next fn.) */
+  assert (src && dest);
+  while (len--) d = src[len] * N, dest[len] = d >= N - 1? (int)(N - 1) : d < -N? (int)(-N) : rint32(d);
+}
+
+void src_int_to_float_array(int const * src, float * dest, int len)
+{
+  assert (src && dest);
+  while (len--) dest[len] = (float)(src[len] * (1 / (32768. * 65536.)));
+}
+
+static char const * const names[] = {"LSR best sinc", "LSR medium sinc", "LSR fastest sinc", "LSR ZOH", "LSR linear", "SoX VHQ"};
+char const * src_get_name(unsigned n)         {return n < 5u + !getenv("SOXR_LSR_STRICT")? names[n] : 0;}
+char const * src_get_description(unsigned id) {return src_get_name(id);}
+char const * src_get_version(void)            {return soxr_version();}
+char const * src_strerror(soxr_error_t error) {return error == (soxr_error_t)1? "Placeholder." : soxr_strerror(error);}
+int src_is_valid_ratio(double oi_ratio)       {return getenv("SOXR_LSR_STRICT")? oi_ratio >= 1./256 && oi_ratio <= 256 : oi_ratio > 0;}
+soxr_error_t src_error(soxr_t p)              {return soxr_error(p);}
+soxr_error_t src_reset(soxr_t p)              {return soxr_clear(p);}
+soxr_t src_delete(soxr_t p)                   {soxr_delete(p); return 0;}
+soxr_error_t src_set_ratio(soxr_t p, double oi_ratio) {return soxr_set_io_ratio(p, 1/oi_ratio, 0);}
+soxr_t src_new(unsigned id, int channels, SRC_ERROR * error) {return src_callback_new(0, id, channels, error, 0);}
diff --git a/src/pffft.c b/src/pffft.c
new file mode 100644
index 0000000..0aab690
--- /dev/null
+++ b/src/pffft.c
@@ -0,0 +1,1729 @@
+/* Copyright (c) 2011  Julien Pommier ( pommier@modartt.com )
+
+   Based on original fortran 77 code from FFTPACKv4 from NETLIB
+   (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber
+   of NCAR, in 1985.
+
+   As confirmed by the NCAR fftpack software curators, the following
+   FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
+   released under the same terms.
+
+   FFTPACK license:
+
+   http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
+
+   Copyright (c) 2004 the University Corporation for Atmospheric
+   Research ("UCAR"). All rights reserved. Developed by NCAR's
+   Computational and Information Systems Laboratory, UCAR,
+   www.cisl.ucar.edu.
+
+   Redistribution and use of the Software in source and binary forms,
+   with or without modification, is permitted provided that the
+   following conditions are met:
+
+   - Neither the names of NCAR's Computational and Information Systems
+   Laboratory, the University Corporation for Atmospheric Research,
+   nor the names of its sponsors or contributors may be used to
+   endorse or promote products derived from this Software without
+   specific prior written permission.
+
+   - Redistributions of source code must retain the above copyright
+   notices, this list of conditions, and the disclaimer below.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions, and the disclaimer below in the
+   documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+   SOFTWARE.
+
+
+   PFFFT : a Pretty Fast FFT.
+
+   This file is largerly based on the original FFTPACK implementation, modified in
+   order to take advantage of SIMD instructions of modern CPUs.
+*/
+
+/*
+  ChangeLog:
+  - 2011/10/02, version 1: This is the very first release of this file.
+*/
+
+#if !defined PFFT_MACROS_ONLY
+#include "pffft.h"
+#include "simd.h"
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+
+#define pffft_aligned_free    _soxr_simd_aligned_free
+#define pffft_aligned_malloc  _soxr_simd_aligned_malloc
+#define pffft_aligned_calloc  _soxr_simd_aligned_calloc
+#endif
+
+/*
+   vector support macros: the rest of the code is independant of
+   SSE/Altivec/NEON -- adding support for other platforms with 4-element
+   vectors should be limited to these macros
+*/
+
+
+/* define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code */
+/*#define PFFFT_SIMD_DISABLE */
+
+/* detect compiler flavour */
+#if defined(_MSC_VER)
+#  define COMPILER_MSVC
+#elif defined(__GNUC__)
+#  define COMPILER_GCC
+#endif
+
+#if defined(COMPILER_GCC)
+#  define ALWAYS_INLINE(return_type) return_type __attribute__ ((always_inline))
+#  define NEVER_INLINE(return_type) return_type __attribute__ ((noinline))
+#  define RESTRICT __restrict
+/*#  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; */
+#elif defined(COMPILER_MSVC)
+#  define ALWAYS_INLINE(return_type) __forceinline return_type
+#  define NEVER_INLINE(return_type) __declspec(noinline) return_type
+#  define RESTRICT __restrict
+/*#  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (v4sf*)_alloca(size__ * sizeof(type__)) */
+#endif
+
+/*
+   Altivec support macros
+*/
+#if !defined(PFFFT_SIMD_DISABLE) && (defined(__ppc__) || defined(__ppc64__))
+typedef vector float v4sf;
+#  define SIMD_SZ 4
+#  define VZERO() ((vector float) vec_splat_u8(0))
+#  define VMUL(a,b) vec_madd(a,b, VZERO())
+#  define VADD(a,b) vec_add(a,b)
+#  define VMADD(a,b,c) vec_madd(a,b,c)
+#  define VSUB(a,b) vec_sub(a,b)
+inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0); }
+#  define LD_PS1(p) ld_ps1(&p)
+#  define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = vec_mergeh(in1, in2); out2 = vec_mergel(in1, in2); out1 = tmp__; }
+#  define UNINTERLEAVE2(in1, in2, out1, out2) {                           \
+    vector unsigned char vperm1 =  (vector unsigned char)(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); \
+    vector unsigned char vperm2 =  (vector unsigned char)(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); \
+    v4sf tmp__ = vec_perm(in1, in2, vperm1); out2 = vec_perm(in1, in2, vperm2); out1 = tmp__; \
+  }
+#  define VTRANSPOSE4(x0,x1,x2,x3) {              \
+    v4sf y0 = vec_mergeh(x0, x2);               \
+    v4sf y1 = vec_mergel(x0, x2);               \
+    v4sf y2 = vec_mergeh(x1, x3);               \
+    v4sf y3 = vec_mergel(x1, x3);               \
+    x0 = vec_mergeh(y0, y2);                    \
+    x1 = vec_mergel(y0, y2);                    \
+    x2 = vec_mergeh(y1, y3);                    \
+    x3 = vec_mergel(y1, y3);                    \
+  }
+#  define VSWAPHL(a,b) vec_perm(a,b, (vector unsigned char)(16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15))
+#  define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
+
+/*
+  SSE1 support macros
+*/
+#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86))
+
+#include <xmmintrin.h>
+typedef __m128 v4sf;
+#  define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */
+#  define VZERO() _mm_setzero_ps()
+#  define VMUL(a,b) _mm_mul_ps(a,b)
+#  define VADD(a,b) _mm_add_ps(a,b)
+#  define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c)
+#  define VSUB(a,b) _mm_sub_ps(a,b)
+#  define LD_PS1(p) _mm_set1_ps(p)
+#  define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_unpacklo_ps(in1, in2); out2 = _mm_unpackhi_ps(in1, in2); out1 = tmp__; }
+#  define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; }
+#  define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3)
+#  define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0))
+#  define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
+
+/*
+  ARM NEON support macros
+*/
+#elif !defined(PFFFT_SIMD_DISABLE) && defined(__arm__)
+#  include <arm_neon.h>
+typedef float32x4_t v4sf;
+#  define SIMD_SZ 4
+#  define VZERO() vdupq_n_f32(0)
+#  define VMUL(a,b) vmulq_f32(a,b)
+#  define VADD(a,b) vaddq_f32(a,b)
+#  define VMADD(a,b,c) vmlaq_f32(c,a,b)
+#  define VSUB(a,b) vsubq_f32(a,b)
+#  define LD_PS1(p) vld1q_dup_f32(&(p))
+#  define INTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vzipq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; }
+#  define UNINTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vuzpq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; }
+#  define VTRANSPOSE4_(x0,x1,x2,x3) {                                    \
+    float32x4x2_t t0_ = vzipq_f32(x0, x2);                              \
+    float32x4x2_t t1_ = vzipq_f32(x1, x3);                              \
+    float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]);              \
+    float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]);              \
+    x0 = u0_.val[0]; x1 = u0_.val[1]; x2 = u1_.val[0]; x3 = u1_.val[1]; \
+  }
+/* marginally faster version */
+#  define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); }
+#  define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a))
+#  define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0)
+#else
+#  if !defined(PFFFT_SIMD_DISABLE)
+#    warning "building with simd disabled !\n";
+#    define PFFFT_SIMD_DISABLE /* fallback to scalar code */
+#  endif
+#endif
+
+/* fallback mode for situations where SSE/Altivec are not available, use scalar mode instead */
+#ifdef PFFFT_SIMD_DISABLE
+typedef float v4sf;
+#  define SIMD_SZ 1
+#  define VZERO() 0.f
+#  define VMUL(a,b) ((a)*(b))
+#  define VADD(a,b) ((a)+(b))
+#  define VMADD(a,b,c) ((a)*(b)+(c))
+#  define VSUB(a,b) ((a)-(b))
+#  define LD_PS1(p) (p)
+#  define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0)
+#endif
+
+/* shortcuts for complex multiplcations */
+#define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); }
+#define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); }
+
+#if !defined(PFFFT_SIMD_DISABLE)
+typedef union v4sf_union {
+  v4sf  v;
+  float f[4];
+} v4sf_union;
+
+#if 0
+#include <string.h>
+
+#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3))
+
+/* detect bugs with the vector support macros */
+void validate_pffft_simd() {
+  float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 };
+  v4sf_union a0, a1, a2, a3, t, u;
+  memcpy(a0.f, f, 4*sizeof(float));
+  memcpy(a1.f, f+4, 4*sizeof(float));
+  memcpy(a2.f, f+8, 4*sizeof(float));
+  memcpy(a3.f, f+12, 4*sizeof(float));
+
+  t = a0; u = a1; t.v = VZERO();
+  printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0);
+  t.v = VADD(a1.v, a2.v);
+  printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18);
+  t.v = VMUL(a1.v, a2.v);
+  printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
+  t.v = VMADD(a1.v, a2.v,a0.v);
+  printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
+
+  INTERLEAVE2(a1.v,a2.v,t.v,u.v);
+  printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+  assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
+  UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
+  printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+  assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
+
+  t.v=LD_PS1(f[15]);
+  printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 15, 15, 15, 15);
+  t.v = VSWAPHL(a1.v, a2.v);
+  printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 8, 9, 6, 7);
+  VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
+  printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
+         a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
+         a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
+  assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
+}
+#endif
+#endif /*!PFFFT_SIMD_DISABLE */
+
+#if !defined PFFT_MACROS_ONLY
+
+
+#if defined (COMPILER_MSVC)
+  #define sin   (float)sin
+  #define cos   (float)cos
+#else
+  #define sin   sinf
+  #define cos   cosf
+#endif
+
+/*
+int pffft_simd_size() { return SIMD_SZ; }
+*/
+
+/*
+  passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
+*/
+static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1, float fsign) {
+  int k, i;
+  int l1ido = l1*ido;
+  if (ido <= 2) {
+    for (k=0; k < l1ido; k += ido, ch += ido, cc+= 2*ido) {
+      ch[0]         = VADD(cc[0], cc[ido+0]);
+      ch[l1ido]     = VSUB(cc[0], cc[ido+0]);
+      ch[1]         = VADD(cc[1], cc[ido+1]);
+      ch[l1ido + 1] = VSUB(cc[1], cc[ido+1]);
+    }
+  } else {
+    for (k=0; k < l1ido; k += ido, ch += ido, cc += 2*ido) {
+      for (i=0; i<ido-1; i+=2) {
+        v4sf tr2 = VSUB(cc[i+0], cc[i+ido+0]);
+        v4sf ti2 = VSUB(cc[i+1], cc[i+ido+1]);
+        v4sf wr = LD_PS1(wa1[i]), wi = VMUL(LD_PS1(fsign), LD_PS1(wa1[i+1]));
+        ch[i]   = VADD(cc[i+0], cc[i+ido+0]);
+        ch[i+1] = VADD(cc[i+1], cc[i+ido+1]);
+        VCPLXMUL(tr2, ti2, wr, wi);
+        ch[i+l1ido]   = tr2;
+        ch[i+l1ido+1] = ti2;
+      }
+    }
+  }
+}
+
+/*
+  passf3 and passb3 has been merged here, fsign = -1 for passf3, +1 for passb3
+*/
+static NEVER_INLINE(void) passf3_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
+                                    const float *wa1, const float *wa2, float fsign) {
+  static const float taur = -0.5f;
+  float taui = 0.866025403784439f*fsign;
+  int i, k;
+  v4sf tr2, ti2, cr2, ci2, cr3, ci3, dr2, di2, dr3, di3;
+  int l1ido = l1*ido;
+  float wr1, wi1, wr2, wi2;
+  assert(ido > 2);
+  for (k=0; k< l1ido; k += ido, cc+= 3*ido, ch +=ido) {
+    for (i=0; i<ido-1; i+=2) {
+      tr2 = VADD(cc[i+ido], cc[i+2*ido]);
+      cr2 = VADD(cc[i], VMUL(LD_PS1(taur),tr2));
+      ch[i]    = VADD(cc[i], tr2);
+      ti2 = VADD(cc[i+ido+1], cc[i+2*ido+1]);
+      ci2 = VADD(cc[i    +1], VMUL(LD_PS1(taur),ti2));
+      ch[i+1]  = VADD(cc[i+1], ti2);
+      cr3 = VMUL(LD_PS1(taui), VSUB(cc[i+ido], cc[i+2*ido]));
+      ci3 = VMUL(LD_PS1(taui), VSUB(cc[i+ido+1], cc[i+2*ido+1]));
+      dr2 = VSUB(cr2, ci3);
+      dr3 = VADD(cr2, ci3);
+      di2 = VADD(ci2, cr3);
+      di3 = VSUB(ci2, cr3);
+      wr1=wa1[i], wi1=fsign*wa1[i+1], wr2=wa2[i], wi2=fsign*wa2[i+1];
+      VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1));
+      ch[i+l1ido] = dr2;
+      ch[i+l1ido + 1] = di2;
+      VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2));
+      ch[i+2*l1ido] = dr3;
+      ch[i+2*l1ido+1] = di3;
+    }
+  }
+} /* passf3 */
+
+static NEVER_INLINE(void) passf4_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
+                                    const float *wa1, const float *wa2, const float *wa3, float fsign) {
+  /* isign == -1 for forward transform and +1 for backward transform */
+
+  int i, k;
+  v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
+  int l1ido = l1*ido;
+  if (ido == 2) {
+    for (k=0; k < l1ido; k += ido, ch += ido, cc += 4*ido) {
+      tr1 = VSUB(cc[0], cc[2*ido + 0]);
+      tr2 = VADD(cc[0], cc[2*ido + 0]);
+      ti1 = VSUB(cc[1], cc[2*ido + 1]);
+      ti2 = VADD(cc[1], cc[2*ido + 1]);
+      ti4 = VMUL(VSUB(cc[1*ido + 0], cc[3*ido + 0]), LD_PS1(fsign));
+      tr4 = VMUL(VSUB(cc[3*ido + 1], cc[1*ido + 1]), LD_PS1(fsign));
+      tr3 = VADD(cc[ido + 0], cc[3*ido + 0]);
+      ti3 = VADD(cc[ido + 1], cc[3*ido + 1]);
+
+      ch[0*l1ido + 0] = VADD(tr2, tr3);
+      ch[0*l1ido + 1] = VADD(ti2, ti3);
+      ch[1*l1ido + 0] = VADD(tr1, tr4);
+      ch[1*l1ido + 1] = VADD(ti1, ti4);
+      ch[2*l1ido + 0] = VSUB(tr2, tr3);
+      ch[2*l1ido + 1] = VSUB(ti2, ti3);
+      ch[3*l1ido + 0] = VSUB(tr1, tr4);
+      ch[3*l1ido + 1] = VSUB(ti1, ti4);
+    }
+  } else {
+    for (k=0; k < l1ido; k += ido, ch+=ido, cc += 4*ido) {
+      for (i=0; i<ido-1; i+=2) {
+        float wr1, wi1, wr2, wi2, wr3, wi3;
+        tr1 = VSUB(cc[i + 0], cc[i + 2*ido + 0]);
+        tr2 = VADD(cc[i + 0], cc[i + 2*ido + 0]);
+        ti1 = VSUB(cc[i + 1], cc[i + 2*ido + 1]);
+        ti2 = VADD(cc[i + 1], cc[i + 2*ido + 1]);
+        tr4 = VMUL(VSUB(cc[i + 3*ido + 1], cc[i + 1*ido + 1]), LD_PS1(fsign));
+        ti4 = VMUL(VSUB(cc[i + 1*ido + 0], cc[i + 3*ido + 0]), LD_PS1(fsign));
+        tr3 = VADD(cc[i + ido + 0], cc[i + 3*ido + 0]);
+        ti3 = VADD(cc[i + ido + 1], cc[i + 3*ido + 1]);
+
+        ch[i] = VADD(tr2, tr3);
+        cr3    = VSUB(tr2, tr3);
+        ch[i + 1] = VADD(ti2, ti3);
+        ci3 = VSUB(ti2, ti3);
+
+        cr2 = VADD(tr1, tr4);
+        cr4 = VSUB(tr1, tr4);
+        ci2 = VADD(ti1, ti4);
+        ci4 = VSUB(ti1, ti4);
+        wr1=wa1[i], wi1=fsign*wa1[i+1];
+        VCPLXMUL(cr2, ci2, LD_PS1(wr1), LD_PS1(wi1));
+        wr2=wa2[i], wi2=fsign*wa2[i+1];
+        ch[i + l1ido] = cr2;
+        ch[i + l1ido + 1] = ci2;
+
+        VCPLXMUL(cr3, ci3, LD_PS1(wr2), LD_PS1(wi2));
+        wr3=wa3[i], wi3=fsign*wa3[i+1];
+        ch[i + 2*l1ido] = cr3;
+        ch[i + 2*l1ido + 1] = ci3;
+
+        VCPLXMUL(cr4, ci4, LD_PS1(wr3), LD_PS1(wi3));
+        ch[i + 3*l1ido] = cr4;
+        ch[i + 3*l1ido + 1] = ci4;
+      }
+    }
+  }
+} /* passf4 */
+
+static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, const float *wa1) {
+  static const float minus_one = -1.f;
+  int i, k, l1ido = l1*ido;
+  for (k=0; k < l1ido; k += ido) {
+    v4sf a = cc[k], b = cc[k + l1ido];
+    ch[2*k] = VADD(a, b);
+    ch[2*(k+ido)-1] = VSUB(a, b);
+  }
+  if (ido < 2) return;
+  if (ido != 2) {
+    for (k=0; k < l1ido; k += ido) {
+      for (i=2; i<ido; i+=2) {
+        v4sf tr2 = cc[i - 1 + k + l1ido], ti2 = cc[i + k + l1ido];
+        v4sf br = cc[i - 1 + k], bi = cc[i + k];
+        VCPLXMULCONJ(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
+        ch[i + 2*k] = VADD(bi, ti2);
+        ch[2*(k+ido) - i] = VSUB(ti2, bi);
+        ch[i - 1 + 2*k] = VADD(br, tr2);
+        ch[2*(k+ido) - i -1] = VSUB(br, tr2);
+      }
+    }
+    if (ido % 2 == 1) return;
+  }
+  for (k=0; k < l1ido; k += ido) {
+    ch[2*k + ido] = VMUL(LD_PS1(minus_one), cc[ido-1 + k + l1ido]);
+    ch[2*k + ido-1] = cc[k + ido-1];
+  }
+} /* radf2 */
+
+
+static NEVER_INLINE(void) radb2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1) {
+  static const float minus_two=-2;
+  int i, k, l1ido = l1*ido;
+  v4sf a,b,c,d, tr2, ti2;
+  for (k=0; k < l1ido; k += ido) {
+    a = cc[2*k]; b = cc[2*(k+ido) - 1];
+    ch[k] = VADD(a, b);
+    ch[k + l1ido] =VSUB(a, b);
+  }
+  if (ido < 2) return;
+  if (ido != 2) {
+    for (k = 0; k < l1ido; k += ido) {
+      for (i = 2; i < ido; i += 2) {
+        a = cc[i-1 + 2*k]; b = cc[2*(k + ido) - i - 1];
+        c = cc[i+0 + 2*k]; d = cc[2*(k + ido) - i + 0];
+        ch[i-1 + k] = VADD(a, b);
+        tr2 = VSUB(a, b);
+        ch[i+0 + k] = VSUB(c, d);
+        ti2 = VADD(c, d);
+        VCPLXMUL(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
+        ch[i-1 + k + l1ido] = tr2;
+        ch[i+0 + k + l1ido] = ti2;
+      }
+    }
+    if (ido % 2 == 1) return;
+  }
+  for (k = 0; k < l1ido; k += ido) {
+    a = cc[2*k + ido-1]; b = cc[2*k + ido];
+    ch[k + ido-1] = VADD(a,a);
+    ch[k + ido-1 + l1ido] = VMUL(LD_PS1(minus_two), b);
+  }
+} /* radb2 */
+
+static void radf3_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
+                     const float *wa1, const float *wa2) {
+  static const float taur = -0.5f;
+  static const float taui = 0.866025403784439f;
+  int i, k, ic;
+  v4sf ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3, wr1, wi1, wr2, wi2;
+  for (k=0; k<l1; k++) {
+    cr2 = VADD(cc[(k + l1)*ido], cc[(k + 2*l1)*ido]);
+    ch[3*k*ido] = VADD(cc[k*ido], cr2);
+    ch[(3*k+2)*ido] = VMUL(LD_PS1(taui), VSUB(cc[(k + l1*2)*ido], cc[(k + l1)*ido]));
+    ch[ido-1 + (3*k + 1)*ido] = VADD(cc[k*ido], VMUL(LD_PS1(taur), cr2));
+  }
+  if (ido == 1) return;
+  for (k=0; k<l1; k++) {
+    for (i=2; i<ido; i+=2) {
+      ic = ido - i;
+      wr1 = LD_PS1(wa1[i - 2]); wi1 = LD_PS1(wa1[i - 1]);
+      dr2 = cc[i - 1 + (k + l1)*ido]; di2 = cc[i + (k + l1)*ido];
+      VCPLXMULCONJ(dr2, di2, wr1, wi1);
+
+      wr2 = LD_PS1(wa2[i - 2]); wi2 = LD_PS1(wa2[i - 1]);
+      dr3 = cc[i - 1 + (k + l1*2)*ido]; di3 = cc[i + (k + l1*2)*ido];
+      VCPLXMULCONJ(dr3, di3, wr2, wi2);
+
+      cr2 = VADD(dr2, dr3);
+      ci2 = VADD(di2, di3);
+      ch[i - 1 + 3*k*ido] = VADD(cc[i - 1 + k*ido], cr2);
+      ch[i + 3*k*ido] = VADD(cc[i + k*ido], ci2);
+      tr2 = VADD(cc[i - 1 + k*ido], VMUL(LD_PS1(taur), cr2));
+      ti2 = VADD(cc[i + k*ido], VMUL(LD_PS1(taur), ci2));
+      tr3 = VMUL(LD_PS1(taui), VSUB(di2, di3));
+      ti3 = VMUL(LD_PS1(taui), VSUB(dr3, dr2));
+      ch[i - 1 + (3*k + 2)*ido] = VADD(tr2, tr3);
+      ch[ic - 1 + (3*k + 1)*ido] = VSUB(tr2, tr3);
+      ch[i + (3*k + 2)*ido] = VADD(ti2, ti3);
+      ch[ic + (3*k + 1)*ido] = VSUB(ti3, ti2);
+    }
+  }
+} /* radf3 */
+
+
+static void radb3_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
+                     const float *wa1, const float *wa2)
+{
+  static const float taur = -0.5f;
+  static const float taui = 0.866025403784439f;
+  static const float taui_2 = 0.866025403784439f*2;
+  int i, k, ic;
+  v4sf ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
+  for (k=0; k<l1; k++) {
+    tr2 = cc[ido-1 + (3*k + 1)*ido]; tr2 = VADD(tr2,tr2);
+    cr2 = VMADD(LD_PS1(taur), tr2, cc[3*k*ido]);
+    ch[k*ido] = VADD(cc[3*k*ido], tr2);
+    ci3 = VMUL(LD_PS1(taui_2), cc[(3*k + 2)*ido]);
+    ch[(k + l1)*ido] = VSUB(cr2, ci3);
+    ch[(k + 2*l1)*ido] = VADD(cr2, ci3);
+  }
+  if (ido == 1) return;
+  for (k=0; k<l1; k++) {
+    for (i=2; i<ido; i+=2) {
+      ic = ido - i;
+      tr2 = VADD(cc[i - 1 + (3*k + 2)*ido], cc[ic - 1 + (3*k + 1)*ido]);
+      cr2 = VMADD(LD_PS1(taur), tr2, cc[i - 1 + 3*k*ido]);
+      ch[i - 1 + k*ido] = VADD(cc[i - 1 + 3*k*ido], tr2);
+      ti2 = VSUB(cc[i + (3*k + 2)*ido], cc[ic + (3*k + 1)*ido]);
+      ci2 = VMADD(LD_PS1(taur), ti2, cc[i + 3*k*ido]);
+      ch[i + k*ido] = VADD(cc[i + 3*k*ido], ti2);
+      cr3 = VMUL(LD_PS1(taui), VSUB(cc[i - 1 + (3*k + 2)*ido], cc[ic - 1 + (3*k + 1)*ido]));
+      ci3 = VMUL(LD_PS1(taui), VADD(cc[i + (3*k + 2)*ido], cc[ic + (3*k + 1)*ido]));
+      dr2 = VSUB(cr2, ci3);
+      dr3 = VADD(cr2, ci3);
+      di2 = VADD(ci2, cr3);
+      di3 = VSUB(ci2, cr3);
+      VCPLXMUL(dr2, di2, LD_PS1(wa1[i-2]), LD_PS1(wa1[i-1]));
+      ch[i - 1 + (k + l1)*ido] = dr2;
+      ch[i + (k + l1)*ido] = di2;
+      VCPLXMUL(dr3, di3, LD_PS1(wa2[i-2]), LD_PS1(wa2[i-1]));
+      ch[i - 1 + (k + 2*l1)*ido] = dr3;
+      ch[i + (k + 2*l1)*ido] = di3;
+    }
+  }
+} /* radb3 */
+
+
+static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf * RESTRICT ch,
+                                   const float * RESTRICT wa1, const float * RESTRICT wa2, const float * RESTRICT wa3)
+{
+  static const float minus_hsqt2 = (float)-0.7071067811865475;
+  int i, k, l1ido = l1*ido;
+  {
+    const v4sf *RESTRICT cc_ = cc, * RESTRICT cc_end = cc + l1ido;
+    v4sf * RESTRICT ch_ = ch;
+    while (cc < cc_end) {
+      /* this loop represents between 25% and 40% of total radf4_ps cost ! */
+      v4sf a0 = cc[0], a1 = cc[l1ido];
+      v4sf a2 = cc[2*l1ido], a3 = cc[3*l1ido];
+      v4sf tr1 = VADD(a1, a3);
+      v4sf tr2 = VADD(a0, a2);
+      ch[2*ido-1] = VSUB(a0, a2);
+      ch[2*ido  ] = VSUB(a3, a1);
+      ch[0      ] = VADD(tr1, tr2);
+      ch[4*ido-1] = VSUB(tr2, tr1);
+      cc += ido; ch += 4*ido;
+    }
+    cc = cc_; ch = ch_;
+  }
+  if (ido < 2) return;
+  if (ido != 2) {
+    for (k = 0; k < l1ido; k += ido) {
+      const v4sf * RESTRICT pc = (v4sf*)(cc + 1 + k);
+      for (i=2; i<ido; i += 2, pc += 2) {
+        int ic = ido - i;
+        v4sf wr, wi, cr2, ci2, cr3, ci3, cr4, ci4;
+        v4sf tr1, ti1, tr2, ti2, tr3, ti3, tr4, ti4;
+
+        cr2 = pc[1*l1ido+0];
+        ci2 = pc[1*l1ido+1];
+        wr=LD_PS1(wa1[i - 2]);
+        wi=LD_PS1(wa1[i - 1]);
+        VCPLXMULCONJ(cr2,ci2,wr,wi);
+
+        cr3 = pc[2*l1ido+0];
+        ci3 = pc[2*l1ido+1];
+        wr = LD_PS1(wa2[i-2]);
+        wi = LD_PS1(wa2[i-1]);
+        VCPLXMULCONJ(cr3, ci3, wr, wi);
+
+        cr4 = pc[3*l1ido];
+        ci4 = pc[3*l1ido+1];
+        wr = LD_PS1(wa3[i-2]);
+        wi = LD_PS1(wa3[i-1]);
+        VCPLXMULCONJ(cr4, ci4, wr, wi);
+
+        /* at this point, on SSE, five of "cr2 cr3 cr4 ci2 ci3 ci4" should be loaded in registers */
+
+        tr1 = VADD(cr2,cr4);
+        tr4 = VSUB(cr4,cr2);
+        tr2 = VADD(pc[0],cr3);
+        tr3 = VSUB(pc[0],cr3);
+        ch[i - 1 + 4*k] = VADD(tr1,tr2);
+        ch[ic - 1 + 4*k + 3*ido] = VSUB(tr2,tr1); /* at this point tr1 and tr2 can be disposed */
+        ti1 = VADD(ci2,ci4);
+        ti4 = VSUB(ci2,ci4);
+        ch[i - 1 + 4*k + 2*ido] = VADD(ti4,tr3);
+        ch[ic - 1 + 4*k + 1*ido] = VSUB(tr3,ti4); /* dispose tr3, ti4 */
+        ti2 = VADD(pc[1],ci3);
+        ti3 = VSUB(pc[1],ci3);
+        ch[i + 4*k] = VADD(ti1, ti2);
+        ch[ic + 4*k + 3*ido] = VSUB(ti1, ti2);
+        ch[i + 4*k + 2*ido] = VADD(tr4, ti3);
+        ch[ic + 4*k + 1*ido] = VSUB(tr4, ti3);
+      }
+    }
+    if (ido % 2 == 1) return;
+  }
+  for (k=0; k<l1ido; k += ido) {
+    v4sf a = cc[ido-1 + k + l1ido], b = cc[ido-1 + k + 3*l1ido];
+    v4sf c = cc[ido-1 + k], d = cc[ido-1 + k + 2*l1ido];
+    v4sf ti1 = VMUL(LD_PS1(minus_hsqt2), VADD(a, b));
+    v4sf tr1 = VMUL(LD_PS1(minus_hsqt2), VSUB(b, a));
+    ch[ido-1 + 4*k] = VADD(tr1, c);
+    ch[ido-1 + 4*k + 2*ido] = VSUB(c, tr1);
+    ch[4*k + 1*ido] = VSUB(ti1, d);
+    ch[4*k + 3*ido] = VADD(ti1, d);
+  }
+} /* radf4 */
+
+
+static NEVER_INLINE(void) radb4_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
+                                   const float * RESTRICT wa1, const float * RESTRICT wa2, const float *RESTRICT wa3)
+{
+  static const float minus_sqrt2 = (float)-1.414213562373095;
+  static const float two = 2.f;
+  int i, k, l1ido = l1*ido;
+  v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
+  {
+    const v4sf *RESTRICT cc_ = cc, * RESTRICT ch_end = ch + l1ido;
+    v4sf *ch_ = ch;
+    while (ch < ch_end) {
+      v4sf a = cc[0], b = cc[4*ido-1];
+      v4sf c = cc[2*ido], d = cc[2*ido-1];
+      tr3 = VMUL(LD_PS1(two),d);
+      tr2 = VADD(a,b);
+      tr1 = VSUB(a,b);
+      tr4 = VMUL(LD_PS1(two),c);
+      ch[0*l1ido] = VADD(tr2, tr3);
+      ch[2*l1ido] = VSUB(tr2, tr3);
+      ch[1*l1ido] = VSUB(tr1, tr4);
+      ch[3*l1ido] = VADD(tr1, tr4);
+
+      cc += 4*ido; ch += ido;
+    }
+    cc = cc_; ch = ch_;
+  }
+  if (ido < 2) return;
+  if (ido != 2) {
+    for (k = 0; k < l1ido; k += ido) {
+      const v4sf * RESTRICT pc = (v4sf*)(cc - 1 + 4*k);
+      v4sf * RESTRICT ph = (v4sf*)(ch + k + 1);
+      for (i = 2; i < ido; i += 2) {
+
+        tr1 = VSUB(pc[i], pc[4*ido - i]);
+        tr2 = VADD(pc[i], pc[4*ido - i]);
+        ti4 = VSUB(pc[2*ido + i], pc[2*ido - i]);
+        tr3 = VADD(pc[2*ido + i], pc[2*ido - i]);
+        ph[0] = VADD(tr2, tr3);
+        cr3 = VSUB(tr2, tr3);
+
+        ti3 = VSUB(pc[2*ido + i + 1], pc[2*ido - i + 1]);
+        tr4 = VADD(pc[2*ido + i + 1], pc[2*ido - i + 1]);
+        cr2 = VSUB(tr1, tr4);
+        cr4 = VADD(tr1, tr4);
+
+        ti1 = VADD(pc[i + 1], pc[4*ido - i + 1]);
+        ti2 = VSUB(pc[i + 1], pc[4*ido - i + 1]);
+
+        ph[1] = VADD(ti2, ti3); ph += l1ido;
+        ci3 = VSUB(ti2, ti3);
+        ci2 = VADD(ti1, ti4);
+        ci4 = VSUB(ti1, ti4);
+        VCPLXMUL(cr2, ci2, LD_PS1(wa1[i-2]), LD_PS1(wa1[i-1]));
+        ph[0] = cr2;
+        ph[1] = ci2; ph += l1ido;
+        VCPLXMUL(cr3, ci3, LD_PS1(wa2[i-2]), LD_PS1(wa2[i-1]));
+        ph[0] = cr3;
+        ph[1] = ci3; ph += l1ido;
+        VCPLXMUL(cr4, ci4, LD_PS1(wa3[i-2]), LD_PS1(wa3[i-1]));
+        ph[0] = cr4;
+        ph[1] = ci4; ph = ph - 3*l1ido + 2;
+      }
+    }
+    if (ido % 2 == 1) return;
+  }
+  for (k=0; k < l1ido; k+=ido) {
+    int i0 = 4*k + ido;
+    v4sf c = cc[i0-1], d = cc[i0 + 2*ido-1];
+    v4sf a = cc[i0+0], b = cc[i0 + 2*ido+0];
+    tr1 = VSUB(c,d);
+    tr2 = VADD(c,d);
+    ti1 = VADD(b,a);
+    ti2 = VSUB(b,a);
+    ch[ido-1 + k + 0*l1ido] = VADD(tr2,tr2);
+    ch[ido-1 + k + 1*l1ido] = VMUL(LD_PS1(minus_sqrt2), VSUB(ti1, tr1));
+    ch[ido-1 + k + 2*l1ido] = VADD(ti2, ti2);
+    ch[ido-1 + k + 3*l1ido] = VMUL(LD_PS1(minus_sqrt2), VADD(ti1, tr1));
+  }
+} /* radb4 */
+
+static NEVER_INLINE(v4sf *) rfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
+                                      const float *wa, const int *ifac) {
+  v4sf *in  = (v4sf*)input_readonly;
+  v4sf *out = (in == work2 ? work1 : work2);
+  int nf = ifac[1], k1;
+  int l2 = n;
+  int iw = n-1;
+  assert(in != out && work1 != work2);
+  for (k1 = 1; k1 <= nf; ++k1) {
+    int kh = nf - k1;
+    int ip = ifac[kh + 2];
+    int l1 = l2 / ip;
+    int ido = n / l2;
+    iw -= (ip - 1)*ido;
+    switch (ip) {
+      case 4: {
+        int ix2 = iw + ido;
+        int ix3 = ix2 + ido;
+        radf4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
+      } break;
+      case 3: {
+        int ix2 = iw + ido;
+        radf3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
+      } break;
+      case 2:
+        radf2_ps(ido, l1, in, out, &wa[iw]);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    l2 = l1;
+    if (out == work2) {
+      out = work1; in = work2;
+    } else {
+      out = work2; in = work1;
+    }
+  }
+  return in; /* this is in fact the output .. */
+} /* rfftf1 */
+
+static NEVER_INLINE(v4sf *) rfftb1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
+                                      const float *wa, const int *ifac) {
+  v4sf *in  = (v4sf*)input_readonly;
+  v4sf *out = (in == work2 ? work1 : work2);
+  int nf = ifac[1], k1;
+  int l1 = 1;
+  int iw = 0;
+  assert(in != out);
+  for (k1=1; k1<=nf; k1++) {
+    int ip = ifac[k1 + 1];
+    int l2 = ip*l1;
+    int ido = n / l2;
+    switch (ip) {
+      case 4: {
+        int ix2 = iw + ido;
+        int ix3 = ix2 + ido;
+        radb4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
+      } break;
+      case 3: {
+        int ix2 = iw + ido;
+        radb3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
+      } break;
+      case 2:
+        radb2_ps(ido, l1, in, out, &wa[iw]);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    l1 = l2;
+    iw += (ip - 1)*ido;
+
+    if (out == work2) {
+      out = work1; in = work2;
+    } else {
+      out = work2; in = work1;
+    }
+  }
+  return in; /* this is in fact the output .. */
+}
+
+static int decompose(int n, int *ifac, const int ntryh[3]) {
+  int nl = n, nf = 0, i, j = 0;
+  for (j=0; j < 3; ++j) {
+    int ntry = ntryh[j];
+    while (nl != 1) {
+      int nq = nl / ntry;
+      int nr = nl - ntry * nq;
+      if (nr == 0) {
+        ifac[2+nf++] = ntry;
+        nl = nq;
+        if (ntry == 2 && nf != 1) {
+          for (i = 2; i <= nf; ++i) {
+            int ib = nf - i + 2;
+            ifac[ib + 1] = ifac[ib];
+          }
+          ifac[2] = 2;
+        }
+      } else break;
+    }
+  }
+  ifac[0] = n;
+  ifac[1] = nf;
+  return nf;
+}
+
+
+
+static void rffti1_ps(int n, float *wa, int *ifac)
+{
+  static const int ntryh[3] = { 4,2,3 };
+  int k1, j, ii;
+
+  int nf = decompose(n,ifac,ntryh);
+  float argh = (float)((2*M_PI) / n);
+  int is = 0;
+  int nfm1 = nf - 1;
+  int l1 = 1;
+  if (nfm1 == 0) return;
+  for (k1 = 1; k1 <= nfm1; k1++) {
+    int ip = ifac[k1 + 1];
+    int ld = 0;
+    int l2 = l1*ip;
+    int ido = n / l2;
+    int ipm = ip - 1;
+    for (j = 1; j <= ipm; ++j) {
+      float argld;
+      int i = is, fi=0;
+      ld += l1;
+      argld = (float)ld*argh;
+      for (ii = 3; ii <= ido; ii += 2) {
+        i += 2;
+        fi += 1;
+        wa[i - 2] = cos((float)fi*argld);
+        wa[i - 1] = sin((float)fi*argld);
+      }
+      is += ido;
+    }
+    l1 = l2;
+  }
+} /* rffti1 */
+
+static void cffti1_ps(int n, float *wa, int *ifac)
+{
+  static const int ntryh[3] = { 3,4,2 };
+  int k1, j, ii;
+
+  int nf = decompose(n,ifac,ntryh);
+  float argh = (float)((2*M_PI)/n);
+  int i = 1;
+  int l1 = 1;
+  for (k1=1; k1<=nf; k1++) {
+    int ip = ifac[k1+1];
+    int ld = 0;
+    int l2 = l1*ip;
+    int ido = n / l2;
+    int idot = ido + ido + 2;
+    int ipm = ip - 1;
+    for (j=1; j<=ipm; j++) {
+      float argld;
+      int i1 = i, fi = 0;
+      wa[i-1] = 1;
+      wa[i] = 0;
+      ld += l1;
+      argld = (float)ld*argh;
+      for (ii = 4; ii <= idot; ii += 2) {
+        i += 2;
+        fi += 1;
+        wa[i-1] = cos((float)fi*argld);
+        wa[i] = sin((float)fi*argld);
+      }
+      if (ip > 5) {
+        wa[i1-1] = wa[i-1];
+        wa[i1] = wa[i];
+      }
+    }
+    l1 = l2;
+  }
+} /* cffti1 */
+
+
+static v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2, const float *wa, const int *ifac, int isign) {
+  v4sf *in  = (v4sf*)input_readonly;
+  v4sf *out = (in == work2 ? work1 : work2);
+  int nf = ifac[1], k1;
+  int l1 = 1;
+  int iw = 0;
+  assert(in != out && work1 != work2);
+  for (k1=2; k1<=nf+1; k1++) {
+    int ip = ifac[k1];
+    int l2 = ip*l1;
+    int ido = n / l2;
+    int idot = ido + ido;
+    switch (ip) {
+      case 4: {
+        int ix2 = iw + idot;
+        int ix3 = ix2 + idot;
+        passf4_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], (float)isign);
+      } break;
+      case 2: {
+        passf2_ps(idot, l1, in, out, &wa[iw], (float)isign);
+      } break;
+      case 3: {
+        int ix2 = iw + idot;
+        passf3_ps(idot, l1, in, out, &wa[iw], &wa[ix2], (float)isign);
+      } break;
+      default:
+        assert(0);
+    }
+    l1 = l2;
+    iw += (ip - 1)*idot;
+    if (out == work2) {
+      out = work1; in = work2;
+    } else {
+      out = work2; in = work1;
+    }
+  }
+
+  return in; /* this is in fact the output .. */
+}
+
+
+struct PFFFT_Setup {
+  int     N;
+  int     Ncvec; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */
+  int ifac[15];
+  pffft_transform_t transform;
+  v4sf *data; /* allocated room for twiddle coefs */
+  float *e;    /* points into 'data' , N/4*3 elements */
+  float *twiddle; /* points into 'data', N/4 elements */
+};
+
+PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) {
+  int k, m;
+  PFFFT_Setup *s = (PFFFT_Setup*)malloc(sizeof(PFFFT_Setup));
+  if (!s)
+    return s;
+  if (transform == PFFFT_REAL) { assert(N >= 32); }
+  if (transform == PFFFT_COMPLEX) { assert(N >= 16); }
+  /*assert((N % 32) == 0); */
+  s->N = N;
+  s->transform = transform;
+  /* nb of complex simd vectors */
+  s->Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ;
+  s->data = (v4sf*)pffft_aligned_malloc(2*(size_t)s->Ncvec * sizeof(v4sf));
+  if (!s->data) {
+    free(s);
+    return 0;
+  }
+  s->e = (float*)s->data;
+  s->twiddle = (float*)(s->data + (2*s->Ncvec*(SIMD_SZ-1))/SIMD_SZ);
+
+  if (transform == PFFFT_REAL) {
+    for (k=0; k < s->Ncvec; ++k) {
+      int i = k/SIMD_SZ;
+      int j = k%SIMD_SZ;
+      for (m=0; m < SIMD_SZ-1; ++m) {
+        float A = (float)(-2*M_PI*(m+1)*k / N);
+        s->e[(2*(i*3 + m) + 0) * SIMD_SZ + j] = cos(A);
+        s->e[(2*(i*3 + m) + 1) * SIMD_SZ + j] = sin(A);
+      }
+    }
+    rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac);
+  } else {
+    for (k=0; k < s->Ncvec; ++k) {
+      int i = k/SIMD_SZ;
+      int j = k%SIMD_SZ;
+      for (m=0; m < SIMD_SZ-1; ++m) {
+        float A = (float)(-2*M_PI*(m+1)*k / N);
+        s->e[(2*(i*3 + m) + 0)*SIMD_SZ + j] = cos(A);
+        s->e[(2*(i*3 + m) + 1)*SIMD_SZ + j] = sin(A);
+      }
+    }
+    cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac);
+  }
+  return s;
+}
+
+
+static void pffft_destroy_setup(PFFFT_Setup *s) {
+  if(s){
+    pffft_aligned_free(s->data);
+    free(s);
+  }
+}
+
+#if !defined(PFFFT_SIMD_DISABLE)
+
+/* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */
+static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) {
+  v4sf g0, g1;
+  int k;
+  INTERLEAVE2(in[0], in[1], g0, g1); in += in_stride;
+
+  *--out = VSWAPHL(g0, g1); /* [g0l, g0h], [g1l g1h] -> [g1l, g0h] */
+  for (k=1; k < N; ++k) {
+    v4sf h0, h1;
+    INTERLEAVE2(in[0], in[1], h0, h1); in += in_stride;
+    *--out = VSWAPHL(g1, h0);
+    *--out = VSWAPHL(h0, h1);
+    g1 = h1;
+  }
+  *--out = VSWAPHL(g1, g0);
+}
+
+static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) {
+  v4sf g0, g1, h0, h1;
+  int k;
+  g0 = g1 = in[0]; ++in;
+  for (k=1; k < N; ++k) {
+    h0 = *in++; h1 = *in++;
+    g1 = VSWAPHL(g1, h0);
+    h0 = VSWAPHL(h0, h1);
+    UNINTERLEAVE2(h0, g1, out[0], out[1]); out += out_stride;
+    g1 = h1;
+  }
+  h0 = *in++; h1 = g0;
+  g1 = VSWAPHL(g1, h0);
+  h0 = VSWAPHL(h0, h1);
+  UNINTERLEAVE2(h0, g1, out[0], out[1]);
+}
+
+static void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) {
+  int k, N = setup->N, Ncvec = setup->Ncvec;
+  const v4sf *vin = (const v4sf*)in;
+  v4sf *vout = (v4sf*)out;
+  assert(in != out);
+  if (setup->transform == PFFFT_REAL) {
+    int k, dk = N/32;
+    if (direction == PFFFT_FORWARD) {
+      for (k=0; k < dk; ++k) {
+        INTERLEAVE2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]);
+        INTERLEAVE2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]);
+      }
+      reversed_copy(dk, vin+2, 8, (v4sf*)(out + N/2));
+      reversed_copy(dk, vin+6, 8, (v4sf*)(out + N));
+    } else {
+      for (k=0; k < dk; ++k) {
+        UNINTERLEAVE2(vin[2*(0*dk + k) + 0], vin[2*(0*dk + k) + 1], vout[k*8 + 0], vout[k*8 + 1]);
+        UNINTERLEAVE2(vin[2*(2*dk + k) + 0], vin[2*(2*dk + k) + 1], vout[k*8 + 4], vout[k*8 + 5]);
+      }
+      unreversed_copy(dk, (v4sf*)(in + N/4), (v4sf*)(out + N - 6*SIMD_SZ), -8);
+      unreversed_copy(dk, (v4sf*)(in + 3*N/4), (v4sf*)(out + N - 2*SIMD_SZ), -8);
+    }
+  } else {
+    if (direction == PFFFT_FORWARD) {
+      for (k=0; k < Ncvec; ++k) {
+        int kk = (k/4) + (k%4)*(Ncvec/4);
+        INTERLEAVE2(vin[k*2], vin[k*2+1], vout[kk*2], vout[kk*2+1]);
+      }
+    } else {
+      for (k=0; k < Ncvec; ++k) {
+        int kk = (k/4) + (k%4)*(Ncvec/4);
+        UNINTERLEAVE2(vin[kk*2], vin[kk*2+1], vout[k*2], vout[k*2+1]);
+      }
+    }
+  }
+}
+
+static void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
+  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
+  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
+  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
+  assert(in != out);
+  for (k=0; k < dk; ++k) {
+    r0 = in[8*k+0]; i0 = in[8*k+1];
+    r1 = in[8*k+2]; i1 = in[8*k+3];
+    r2 = in[8*k+4]; i2 = in[8*k+5];
+    r3 = in[8*k+6]; i3 = in[8*k+7];
+    VTRANSPOSE4(r0,r1,r2,r3);
+    VTRANSPOSE4(i0,i1,i2,i3);
+    VCPLXMUL(r1,i1,e[k*6+0],e[k*6+1]);
+    VCPLXMUL(r2,i2,e[k*6+2],e[k*6+3]);
+    VCPLXMUL(r3,i3,e[k*6+4],e[k*6+5]);
+
+    sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2);
+    sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3);
+    si0 = VADD(i0,i2); di0 = VSUB(i0, i2);
+    si1 = VADD(i1,i3); di1 = VSUB(i1, i3);
+
+    /*
+      transformation for each column is:
+
+      [1   1   1   1   0   0   0   0]   [r0]
+      [1   0  -1   0   0  -1   0   1]   [r1]
+      [1  -1   1  -1   0   0   0   0]   [r2]
+      [1   0  -1   0   0   1   0  -1]   [r3]
+      [0   0   0   0   1   1   1   1] * [i0]
+      [0   1   0  -1   1   0  -1   0]   [i1]
+      [0   0   0   0   1  -1   1  -1]   [i2]
+      [0  -1   0   1   1   0  -1   0]   [i3]
+    */
+
+    r0 = VADD(sr0, sr1); i0 = VADD(si0, si1);
+    r1 = VADD(dr0, di1); i1 = VSUB(di0, dr1);
+    r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1);
+    r3 = VSUB(dr0, di1); i3 = VADD(di0, dr1);
+
+    *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1;
+    *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3;
+  }
+}
+
+static void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
+  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
+  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
+  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
+  assert(in != out);
+  for (k=0; k < dk; ++k) {
+    r0 = in[8*k+0]; i0 = in[8*k+1];
+    r1 = in[8*k+2]; i1 = in[8*k+3];
+    r2 = in[8*k+4]; i2 = in[8*k+5];
+    r3 = in[8*k+6]; i3 = in[8*k+7];
+
+    sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2);
+    sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3);
+    si0 = VADD(i0,i2); di0 = VSUB(i0, i2);
+    si1 = VADD(i1,i3); di1 = VSUB(i1, i3);
+
+    r0 = VADD(sr0, sr1); i0 = VADD(si0, si1);
+    r1 = VSUB(dr0, di1); i1 = VADD(di0, dr1);
+    r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1);
+    r3 = VADD(dr0, di1); i3 = VSUB(di0, dr1);
+
+    VCPLXMULCONJ(r1,i1,e[k*6+0],e[k*6+1]);
+    VCPLXMULCONJ(r2,i2,e[k*6+2],e[k*6+3]);
+    VCPLXMULCONJ(r3,i3,e[k*6+4],e[k*6+5]);
+
+    VTRANSPOSE4(r0,r1,r2,r3);
+    VTRANSPOSE4(i0,i1,i2,i3);
+
+    *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1;
+    *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3;
+  }
+}
+
+
+static ALWAYS_INLINE(void) pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in,
+                            const v4sf *e, v4sf *out) {
+  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
+  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
+  r0 = *in0; i0 = *in1;
+  r1 = *in++; i1 = *in++; r2 = *in++; i2 = *in++; r3 = *in++; i3 = *in++;
+  VTRANSPOSE4(r0,r1,r2,r3);
+  VTRANSPOSE4(i0,i1,i2,i3);
+
+  /*
+    transformation for each column is:
+
+    [1   1   1   1   0   0   0   0]   [r0]
+    [1   0  -1   0   0  -1   0   1]   [r1]
+    [1   0  -1   0   0   1   0  -1]   [r2]
+    [1  -1   1  -1   0   0   0   0]   [r3]
+    [0   0   0   0   1   1   1   1] * [i0]
+    [0  -1   0   1  -1   0   1   0]   [i1]
+    [0  -1   0   1   1   0  -1   0]   [i2]
+    [0   0   0   0  -1   1  -1   1]   [i3]
+  */
+
+  /*cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */
+  /*cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */
+
+  VCPLXMUL(r1,i1,e[0],e[1]);
+  VCPLXMUL(r2,i2,e[2],e[3]);
+  VCPLXMUL(r3,i3,e[4],e[5]);
+
+  /*cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */
+  /*cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */
+
+  sr0 = VADD(r0,r2); dr0 = VSUB(r0,r2);
+  sr1 = VADD(r1,r3); dr1 = VSUB(r3,r1);
+  si0 = VADD(i0,i2); di0 = VSUB(i0,i2);
+  si1 = VADD(i1,i3); di1 = VSUB(i3,i1);
+
+  r0 = VADD(sr0, sr1);
+  r3 = VSUB(sr0, sr1);
+  i0 = VADD(si0, si1);
+  i3 = VSUB(si1, si0);
+  r1 = VADD(dr0, di1);
+  r2 = VSUB(dr0, di1);
+  i1 = VSUB(dr1, di0);
+  i2 = VADD(dr1, di0);
+
+  *out++ = r0;
+  *out++ = i0;
+  *out++ = r1;
+  *out++ = i1;
+  *out++ = r2;
+  *out++ = i2;
+  *out++ = r3;
+  *out++ = i3;
+
+}
+
+static NEVER_INLINE(void) pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
+  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
+  /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
+
+  v4sf_union cr, ci, *uout = (v4sf_union*)out;
+  v4sf save = in[7], zero=VZERO();
+  float xr0, xi0, xr1, xi1, xr2, xi2, xr3, xi3;
+  static const float s = (float)(M_SQRT2/2);
+
+  cr.v = in[0]; ci.v = in[Ncvec*2-1];
+  assert(in != out);
+  pffft_real_finalize_4x4(&zero, &zero, in+1, e, out);
+
+  /*
+    [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3]
+
+    [Xr(1)]  ] [1   1   1   1   0   0   0   0]
+    [Xr(N/4) ] [0   0   0   0   1   s   0  -s]
+    [Xr(N/2) ] [1   0  -1   0   0   0   0   0]
+    [Xr(3N/4)] [0   0   0   0   1  -s   0   s]
+    [Xi(1)   ] [1  -1   1  -1   0   0   0   0]
+    [Xi(N/4) ] [0   0   0   0   0  -s  -1  -s]
+    [Xi(N/2) ] [0  -1   0   1   0   0   0   0]
+    [Xi(3N/4)] [0   0   0   0   0  -s   1  -s]
+  */
+
+  xr0=(cr.f[0]+cr.f[2]) + (cr.f[1]+cr.f[3]); uout[0].f[0] = xr0;
+  xi0=(cr.f[0]+cr.f[2]) - (cr.f[1]+cr.f[3]); uout[1].f[0] = xi0;
+  xr2=(cr.f[0]-cr.f[2]);                     uout[4].f[0] = xr2;
+  xi2=(cr.f[3]-cr.f[1]);                     uout[5].f[0] = xi2;
+  xr1= ci.f[0] + s*(ci.f[1]-ci.f[3]);        uout[2].f[0] = xr1;
+  xi1=-ci.f[2] - s*(ci.f[1]+ci.f[3]);        uout[3].f[0] = xi1;
+  xr3= ci.f[0] - s*(ci.f[1]-ci.f[3]);        uout[6].f[0] = xr3;
+  xi3= ci.f[2] - s*(ci.f[1]+ci.f[3]);        uout[7].f[0] = xi3;
+
+  for (k=1; k < dk; ++k) {
+    v4sf save_next = in[8*k+7];
+    pffft_real_finalize_4x4(&save, &in[8*k+0], in + 8*k+1,
+                           e + k*6, out + k*8);
+    save = save_next;
+  }
+
+}
+
+static ALWAYS_INLINE(void) pffft_real_preprocess_4x4(const v4sf *in,
+                                             const v4sf *e, v4sf *out, int first) {
+  v4sf r0=in[0], i0=in[1], r1=in[2], i1=in[3], r2=in[4], i2=in[5], r3=in[6], i3=in[7];
+  /*
+    transformation for each column is:
+
+    [1   1   1   1   0   0   0   0]   [r0]
+    [1   0   0  -1   0  -1  -1   0]   [r1]
+    [1  -1  -1   1   0   0   0   0]   [r2]
+    [1   0   0  -1   0   1   1   0]   [r3]
+    [0   0   0   0   1  -1   1  -1] * [i0]
+    [0  -1   1   0   1   0   0   1]   [i1]
+    [0   0   0   0   1   1  -1  -1]   [i2]
+    [0   1  -1   0   1   0   0   1]   [i3]
+  */
+
+  v4sf sr0 = VADD(r0,r3), dr0 = VSUB(r0,r3);
+  v4sf sr1 = VADD(r1,r2), dr1 = VSUB(r1,r2);
+  v4sf si0 = VADD(i0,i3), di0 = VSUB(i0,i3);
+  v4sf si1 = VADD(i1,i2), di1 = VSUB(i1,i2);
+
+  r0 = VADD(sr0, sr1);
+  r2 = VSUB(sr0, sr1);
+  r1 = VSUB(dr0, si1);
+  r3 = VADD(dr0, si1);
+  i0 = VSUB(di0, di1);
+  i2 = VADD(di0, di1);
+  i1 = VSUB(si0, dr1);
+  i3 = VADD(si0, dr1);
+
+  VCPLXMULCONJ(r1,i1,e[0],e[1]);
+  VCPLXMULCONJ(r2,i2,e[2],e[3]);
+  VCPLXMULCONJ(r3,i3,e[4],e[5]);
+
+  VTRANSPOSE4(r0,r1,r2,r3);
+  VTRANSPOSE4(i0,i1,i2,i3);
+
+  if (!first) {
+    *out++ = r0;
+    *out++ = i0;
+  }
+  *out++ = r1;
+  *out++ = i1;
+  *out++ = r2;
+  *out++ = i2;
+  *out++ = r3;
+  *out++ = i3;
+}
+
+static NEVER_INLINE(void) pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
+  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
+  /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
+
+  v4sf_union Xr, Xi, *uout = (v4sf_union*)out;
+  float cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3;
+  static const float s = (float)M_SQRT2;
+  assert(in != out);
+  for (k=0; k < 4; ++k) {
+    Xr.f[k] = ((float*)in)[8*k];
+    Xi.f[k] = ((float*)in)[8*k+4];
+  }
+
+  pffft_real_preprocess_4x4(in, e, out+1, 1); /* will write only 6 values */
+
+  /*
+    [Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3]
+
+    [cr0] [1   0   2   0   1   0   0   0]
+    [cr1] [1   0   0   0  -1   0  -2   0]
+    [cr2] [1   0  -2   0   1   0   0   0]
+    [cr3] [1   0   0   0  -1   0   2   0]
+    [ci0] [0   2   0   2   0   0   0   0]
+    [ci1] [0   s   0  -s   0  -s   0  -s]
+    [ci2] [0   0   0   0   0  -2   0   2]
+    [ci3] [0  -s   0   s   0  -s   0  -s]
+  */
+  for (k=1; k < dk; ++k) {
+    pffft_real_preprocess_4x4(in+8*k, e + k*6, out-1+k*8, 0);
+  }
+
+  cr0=(Xr.f[0]+Xi.f[0]) + 2*Xr.f[2]; uout[0].f[0] = cr0;
+  cr1=(Xr.f[0]-Xi.f[0]) - 2*Xi.f[2]; uout[0].f[1] = cr1;
+  cr2=(Xr.f[0]+Xi.f[0]) - 2*Xr.f[2]; uout[0].f[2] = cr2;
+  cr3=(Xr.f[0]-Xi.f[0]) + 2*Xi.f[2]; uout[0].f[3] = cr3;
+  ci0= 2*(Xr.f[1]+Xr.f[3]);                       uout[2*Ncvec-1].f[0] = ci0;
+  ci1= s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[1] = ci1;
+  ci2= 2*(Xi.f[3]-Xi.f[1]);                       uout[2*Ncvec-1].f[2] = ci2;
+  ci3=-s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[3] = ci3;
+}
+
+
+static void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *foutput, v4sf *scratch,
+                             pffft_direction_t direction, int ordered) {
+  int k, Ncvec   = setup->Ncvec;
+  int nf_odd = (setup->ifac[1] & 1);
+
+  /* temporary buffer is allocated on the stack if the scratch pointer is NULL */
+  /*int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); */
+  /*VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); */
+
+  int ib = (nf_odd ^ ordered ? 1 : 0);
+  const v4sf *vinput = (const v4sf*)finput;
+  v4sf *voutput      = (v4sf*)foutput;
+  v4sf *buff[2];
+  buff[0] = voutput, buff[1] = scratch /*? scratch : scratch_on_stack*/;
+
+  /*if (scratch == 0) scratch = scratch_on_stack; */
+
+  assert(VALIGNED(finput) && VALIGNED(foutput));
+
+  /*assert(finput != foutput); */
+  if (direction == PFFFT_FORWARD) {
+    ib = !ib;
+    if (setup->transform == PFFFT_REAL) {
+      ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
+      pffft_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e);
+    } else {
+      v4sf *tmp = buff[ib];
+      for (k=0; k < Ncvec; ++k) {
+        UNINTERLEAVE2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]);
+      }
+      ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib],
+                      setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
+      pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e);
+    }
+    if (ordered) {
+      pffft_zreorder(setup, (float*)buff[!ib], (float*)buff[ib], PFFFT_FORWARD);
+    } else ib = !ib;
+  } else {
+    if (vinput == buff[ib]) {
+      ib = !ib; /* may happen when finput == foutput */
+    }
+    if (ordered) {
+      pffft_zreorder(setup, (float*)vinput, (float*)buff[ib], PFFFT_BACKWARD);
+      vinput = buff[ib]; ib = !ib;
+    }
+    if (setup->transform == PFFFT_REAL) {
+      pffft_real_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e);
+      ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1],
+                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
+    } else {
+      pffft_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e);
+      ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1],
+                      setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1);
+      for (k=0; k < Ncvec; ++k) {
+        INTERLEAVE2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]);
+      }
+    }
+  }
+
+  if (buff[ib] != voutput) {
+    /* extra copy required -- this situation should only happen when finput == foutput */
+    assert(finput==foutput);
+    for (k=0; k < Ncvec; ++k) {
+      v4sf a = buff[ib][2*k], b = buff[ib][2*k+1];
+      voutput[2*k] = a; voutput[2*k+1] = b;
+    }
+    ib = !ib;
+  }
+  assert(buff[ib] == voutput);
+}
+
+#if 0
+static void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) {
+  int i, Ncvec = s->Ncvec;
+  const v4sf * RESTRICT va = (const v4sf*)a;
+  const v4sf * RESTRICT vb = (const v4sf*)b;
+  v4sf * RESTRICT vab = (v4sf*)ab;
+
+#ifdef __arm__
+  __builtin_prefetch(va);
+  __builtin_prefetch(vb);
+  __builtin_prefetch(vab);
+  __builtin_prefetch(va+2);
+  __builtin_prefetch(vb+2);
+  __builtin_prefetch(vab+2);
+  __builtin_prefetch(va+4);
+  __builtin_prefetch(vb+4);
+  __builtin_prefetch(vab+4);
+  __builtin_prefetch(va+6);
+  __builtin_prefetch(vb+6);
+  __builtin_prefetch(vab+6);
+#endif
+
+  float ar, ai, br, bi, abr, abi;
+  v4sf vscal = LD_PS1(scaling);
+
+  assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
+  ar = ((v4sf_union*)va)[0].f[0];
+  ai = ((v4sf_union*)va)[1].f[0];
+  br = ((v4sf_union*)vb)[0].f[0];
+  bi = ((v4sf_union*)vb)[1].f[0];
+  abr = ((v4sf_union*)vab)[0].f[0];
+  abi = ((v4sf_union*)vab)[1].f[0];
+
+#ifdef __arm__
+#  if 1 /* inline asm version */
+  const float *a_ = a, *b_ = b; float *ab_ = ab;
+  int N = Ncvec;
+  asm volatile("mov         r8, %2                  \n"
+               "vdup.f32    q15, %4                 \n"
+               "1:                                  \n"
+               "pld         [%0,#64]                \n"
+               "pld         [%1,#64]                \n"
+               "pld         [%2,#64]                \n"
+               "pld         [%0,#96]                \n"
+               "pld         [%1,#96]                \n"
+               "pld         [%2,#96]                \n"
+               "vld1.f32    {q0,q1},   [%0,:128]!         \n"
+               "vld1.f32    {q4,q5},   [%1,:128]!         \n"
+               "vld1.f32    {q2,q3},   [%0,:128]!         \n"
+               "vld1.f32    {q6,q7},   [%1,:128]!         \n"
+               "vld1.f32    {q8,q9},   [r8,:128]!          \n"
+
+               "vmul.f32    q10, q0, q4             \n"
+               "vmul.f32    q11, q0, q5             \n"
+               "vmul.f32    q12, q2, q6             \n"
+               "vmul.f32    q13, q2, q7             \n"
+               "vmls.f32    q10, q1, q5             \n"
+               "vmla.f32    q11, q1, q4             \n"
+               "vld1.f32    {q0,q1}, [r8,:128]!     \n"
+               "vmls.f32    q12, q3, q7             \n"
+               "vmla.f32    q13, q3, q6             \n"
+               "vmla.f32    q8, q10, q15            \n"
+               "vmla.f32    q9, q11, q15            \n"
+               "vmla.f32    q0, q12, q15            \n"
+               "vmla.f32    q1, q13, q15            \n"
+               "vst1.f32    {q8,q9},[%2,:128]!    \n"
+               "vst1.f32    {q0,q1},[%2,:128]!    \n"
+               "subs        %3, #2                  \n"
+               "bne         1b                      \n"
+               : "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N) : "r"(scaling) : "r8", "q0","q1","q2","q3","q4","q5","q6","q7","q8","q9", "q10","q11","q12","q13","q15","memory");
+
+#  else /* neon instrinsics version, 30% slower that the asm one with gcc 4.6 */
+  v4sf a1r, a1i, b1r, b1i;
+  v4sf a2r, a2i, b2r, b2i;
+  v4sf ab1r, ab1i, ab2r, ab2i;
+  for (i=0; i < Ncvec; i += 2) {
+    __builtin_prefetch(va+8);
+    __builtin_prefetch(va+10);
+
+    a1r = *va++; a1i = *va++;
+    a2r = *va++; a2i = *va++;
+    b1r = *vb++; b1i = *vb++;
+    b2r = *vb++; b2i = *vb++;
+    ab1r = vab[0]; ab1i = vab[1];
+    ab2r = vab[2]; ab2i = vab[3];
+
+    v4sf z1r = VMUL(a1r, b1r);
+    v4sf z2r = VMUL(a2r, b2r);
+    v4sf z1i = VMUL(a1r, b1i);
+    v4sf z2i = VMUL(a2r, b2i);
+
+    __builtin_prefetch(vb+4);
+    __builtin_prefetch(vb+6);
+
+    z1r = vmlsq_f32(z1r, a1i, b1i);
+    z2r = vmlsq_f32(z2r, a2i, b2i);
+    z1i = vmlaq_f32(z1i, a1i, b1r);
+    z2i = vmlaq_f32(z2i, a2i, b2r);
+
+    __builtin_prefetch(vab+4);
+    __builtin_prefetch(vab+6);
+
+    ab1r = vmlaq_f32(ab1r, z1r, vscal);
+    ab2r = vmlaq_f32(ab2r, z2r, vscal);
+    ab1i = vmlaq_f32(ab1i, z1i, vscal);
+    ab2i = vmlaq_f32(ab2i, z2i, vscal);
+
+    *vab++ = ab1r; *vab++ = ab1i;
+    *vab++ = ab2r; *vab++ = ab2i;
+  }
+#  endif
+
+#else /* not ARM, no need to use a special routine */
+  for (i=0; i < Ncvec; i += 2) {
+    v4sf ar, ai, br, bi;
+    ar = va[2*i+0]; ai = va[2*i+1];
+    br = vb[2*i+0]; bi = vb[2*i+1];
+    VCPLXMUL(ar, ai, br, bi);
+    vab[2*i+0] = VMADD(ar, vscal, vab[2*i+0]);
+    vab[2*i+1] = VMADD(ai, vscal, vab[2*i+1]);
+    ar = va[2*i+2]; ai = va[2*i+3];
+    br = vb[2*i+2]; bi = vb[2*i+3];
+    VCPLXMUL(ar, ai, br, bi);
+    vab[2*i+2] = VMADD(ar, vscal, vab[2*i+2]);
+    vab[2*i+3] = VMADD(ai, vscal, vab[2*i+3]);
+  }
+#endif
+  if (s->transform == PFFFT_REAL) {
+    ((v4sf_union*)vab)[0].f[0] = abr + ar*br*scaling;
+    ((v4sf_union*)vab)[1].f[0] = abi + ai*bi*scaling;
+  }
+}
+#endif
+
+static void pffft_zconvolve(PFFFT_Setup *s, const float *a, const float *b, float *ab) {
+  int i, Ncvec = s->Ncvec;
+  const v4sf * /*RESTRICT*/ va = (const v4sf*)a;
+  const v4sf * RESTRICT vb = (const v4sf*)b;
+  v4sf * /*RESTRICT*/ vab = (v4sf*)ab;
+
+  float ar, ai, br, bi;
+
+#ifdef __arm__
+#error
+#endif
+  assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
+  ar = ((v4sf_union*)va)[0].f[0];
+  ai = ((v4sf_union*)va)[1].f[0];
+  br = ((v4sf_union*)vb)[0].f[0];
+  bi = ((v4sf_union*)vb)[1].f[0];
+
+  for (i=0; i < Ncvec; i += 2) {
+    v4sf ar, ai, br, bi;
+    ar = va[2*i+0]; ai = va[2*i+1];
+    br = vb[2*i+0]; bi = vb[2*i+1];
+    VCPLXMUL(ar, ai, br, bi);
+    vab[2*i+0] = ar;
+    vab[2*i+1] = ai;
+    ar = va[2*i+2]; ai = va[2*i+3];
+    br = vb[2*i+2]; bi = vb[2*i+3];
+    VCPLXMUL(ar, ai, br, bi);
+    vab[2*i+2] = ar;
+    vab[2*i+3] = ai;
+  }
+  if (s->transform == PFFFT_REAL) {
+    ((v4sf_union*)vab)[0].f[0] = ar*br;
+    ((v4sf_union*)vab)[1].f[0] = ai*bi;
+  }
+}
+
+
+
+#else /* defined(PFFFT_SIMD_DISABLE) */
+
+/* standard routine using scalar floats, without SIMD stuff. */
+
+#define pffft_zreorder_nosimd pffft_zreorder
+static void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) {
+  int k, N = setup->N;
+  if (setup->transform == PFFFT_COMPLEX) {
+    for (k=0; k < 2*N; ++k) out[k] = in[k];
+    return;
+  }
+  else if (direction == PFFFT_FORWARD) {
+    float x_N = in[N-1];
+    for (k=N-1; k > 1; --k) out[k] = in[k-1];
+    out[0] = in[0];
+    out[1] = x_N;
+  } else {
+    float x_N = in[1];
+    for (k=1; k < N-1; ++k) out[k] = in[k+1];
+    out[0] = in[0];
+    out[N-1] = x_N;
+  }
+}
+
+#define pffft_transform_internal_nosimd pffft_transform_internal
+static void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, float *output, float *scratch,
+                                    pffft_direction_t direction, int ordered) {
+  int Ncvec   = setup->Ncvec;
+  int nf_odd = (setup->ifac[1] & 1);
+
+  /* temporary buffer is allocated on the stack if the scratch pointer is NULL */
+  /*int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); */
+  /*VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); */
+  /*if (scratch == 0) scratch = scratch_on_stack; */
+
+  int ib;
+  float *buff[2];
+  buff[0] = output, buff[1] = scratch;
+  if (setup->transform == PFFFT_COMPLEX) ordered = 0; /* it is always ordered. */
+  ib = (nf_odd ^ ordered ? 1 : 0);
+
+  if (direction == PFFFT_FORWARD) {
+    if (setup->transform == PFFFT_REAL) {
+      ib = (rfftf1_ps(Ncvec*2, input, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
+    } else {
+      ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
+    }
+    if (ordered) {
+      pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD); ib = !ib;
+    }
+  } else {
+    if (input == buff[ib]) {
+      ib = !ib; /* may happen when finput == foutput */
+    }
+    if (ordered) {
+      pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD);
+      input = buff[!ib];
+    }
+    if (setup->transform == PFFFT_REAL) {
+      ib = (rfftb1_ps(Ncvec*2, input, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
+    } else {
+      ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1);
+    }
+  }
+  if (buff[ib] != output) {
+    int k;
+    /* extra copy required -- this situation should happens only when finput == foutput */
+    assert(input==output);
+    for (k=0; k < Ncvec; ++k) {
+      float a = buff[ib][2*k], b = buff[ib][2*k+1];
+      output[2*k] = a; output[2*k+1] = b;
+    }
+    ib = !ib;
+  }
+  assert(buff[ib] == output);
+}
+
+#if 0
+#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate
+static void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a, const float *b,
+                                       float *ab, float scaling) {
+  int i, Ncvec = s->Ncvec;
+
+  if (s->transform == PFFFT_REAL) {
+    /* take care of the fftpack ordering */
+    ab[0] += a[0]*b[0]*scaling;
+    ab[2*Ncvec-1] += a[2*Ncvec-1]*b[2*Ncvec-1]*scaling;
+    ++ab; ++a; ++b; --Ncvec;
+  }
+  for (i=0; i < Ncvec; ++i) {
+    float ar, ai, br, bi;
+    ar = a[2*i+0]; ai = a[2*i+1];
+    br = b[2*i+0]; bi = b[2*i+1];
+    VCPLXMUL(ar, ai, br, bi);
+    ab[2*i+0] += ar*scaling;
+    ab[2*i+1] += ai*scaling;
+  }
+}
+#endif
+
+#define pffft_zconvolve_nosimd pffft_zconvolve
+static void pffft_zconvolve_nosimd(PFFFT_Setup *s, const float *a, const float *b, float *ab) {
+  int i, Ncvec = s->Ncvec;
+
+  if (s->transform == PFFFT_REAL) {
+    /* take care of the fftpack ordering */
+    ab[0] = a[0]*b[0];
+    ab[2*Ncvec-1] = a[2*Ncvec-1]*b[2*Ncvec-1];
+    ++ab; ++a; ++b; --Ncvec;
+  }
+  for (i=0; i < Ncvec; ++i) {
+    float ar, ai, br, bi;
+    ar = a[2*i+0]; ai = a[2*i+1];
+    br = b[2*i+0]; bi = b[2*i+1];
+    VCPLXMUL(ar, ai, br, bi);
+    ab[2*i+0] = ar;
+    ab[2*i+1] = ai;
+  }
+}
+
+#endif /* defined(PFFFT_SIMD_DISABLE) */
+
+static void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) {
+  pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 0);
+}
+
+static void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) {
+  pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 1);
+}
+
+
+static void pffft_reorder_back(int length, void * setup, float * data, float * work)
+{
+  memcpy(work, data, (unsigned)length * sizeof(*work));
+  pffft_zreorder(setup, work, data, PFFFT_BACKWARD);
+}
+#endif
diff --git a/src/pffft.h b/src/pffft.h
new file mode 100644
index 0000000..78d936b
--- /dev/null
+++ b/src/pffft.h
@@ -0,0 +1,177 @@
+/* Copyright (c) 2011  Julien Pommier ( pommier@modartt.com )
+
+   Based on original fortran 77 code from FFTPACKv4 from NETLIB,
+   authored by Dr Paul Swarztrauber of NCAR, in 1985.
+
+   As confirmed by the NCAR fftpack software curators, the following
+   FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
+   released under the same terms.
+
+   FFTPACK license:
+
+   http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
+
+   Copyright (c) 2004 the University Corporation for Atmospheric
+   Research ("UCAR"). All rights reserved. Developed by NCAR's
+   Computational and Information Systems Laboratory, UCAR,
+   www.cisl.ucar.edu.
+
+   Redistribution and use of the Software in source and binary forms,
+   with or without modification, is permitted provided that the
+   following conditions are met:
+
+   - Neither the names of NCAR's Computational and Information Systems
+   Laboratory, the University Corporation for Atmospheric Research,
+   nor the names of its sponsors or contributors may be used to
+   endorse or promote products derived from this Software without
+   specific prior written permission.
+
+   - Redistributions of source code must retain the above copyright
+   notices, this list of conditions, and the disclaimer below.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions, and the disclaimer below in the
+   documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+   SOFTWARE.
+*/
+
+/*
+   PFFFT : a Pretty Fast FFT.
+
+   This is basically an adaptation of the single precision fftpack
+   (v4) as found on netlib taking advantage of SIMD instruction found
+   on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
+
+   For architectures where no SIMD instruction is available, the code
+   falls back to a scalar version.
+
+   Restrictions:
+
+   - 1D transforms only, with 32-bit single precision.
+
+   - supports only transforms for inputs of length N of the form
+   N=(2^a)*(3^b), a >= 5 and b >=0 (32, 48, 64, 96, 128, 144 etc
+   are all acceptable lengths). Performance is best for 128<=N<=8192.
+
+   - all (float*) pointers in the functions below are expected to
+   have an "simd-compatible" alignment, that is 16 bytes on x86 and
+   powerpc CPUs.
+
+   You can allocate such buffers with the functions
+   pffft_aligned_malloc / pffft_aligned_free (or with stuff like
+   posix_memalign..)
+
+*/
+
+#ifndef PFFFT_H
+#define PFFFT_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  /* opaque struct holding internal stuff (precomputed twiddle factors)
+     this struct can be shared by many threads as it contains only
+     read-only data.
+  */
+  typedef struct PFFFT_Setup PFFFT_Setup;
+
+  /* direction of the transform */
+  typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
+
+  /* type of transform */
+  typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
+
+  /*
+    prepare for performing transforms of size N -- the returned
+    PFFFT_Setup structure is read-only so it can safely be shared by
+    multiple concurrent threads.
+  */
+  static PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
+  static void pffft_destroy_setup(PFFFT_Setup *);
+  /*
+     Perform a Fourier transform , The z-domain data is stored in the
+     most efficient order for transforming it back, or using it for
+     convolution. If you need to have its content sorted in the
+     "usual" way, that is as an array of interleaved complex numbers,
+     either use pffft_transform_ordered , or call pffft_zreorder after
+     the forward fft, and before the backward fft.
+
+     Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
+     Typically you will want to scale the backward transform by 1/N.
+
+     The 'work' pointer should point to an area of N (2*N for complex
+     fft) floats, properly aligned. [del]If 'work' is NULL, then stack will
+     be used instead (this is probably the beest strategy for small
+     FFTs, say for N < 16384).[/del]
+
+     input and output may alias.
+  */
+  static void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
+
+  /*
+     Similar to pffft_transform, but makes sure that the output is
+     ordered as expected (interleaved complex numbers).  This is
+     similar to calling pffft_transform and then pffft_zreorder.
+
+     input and output may alias.
+  */
+  static void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
+
+  /*
+     call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
+     PFFFT_FORWARD) if you want to have the frequency components in
+     the correct "canonical" order, as interleaved complex numbers.
+
+     (for real transforms, both 0-frequency and half frequency
+     components, which are real, are assembled in the first entry as
+     F(0)+i*F(n/2+1). Note that the original fftpack did place
+     F(n/2+1) at the end of the arrays).
+
+     input and output should not alias.
+  */
+  static void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
+
+  /*
+     Perform a multiplication of the frequency components of dft_a and
+     dft_b and accumulate them into dft_ab. The arrays should have
+     been obtained with pffft_transform(.., PFFFT_FORWARD) and should
+     *not* have been reordered with pffft_zreorder (otherwise just
+     perform the operation yourself as the dft coefs are stored as
+     interleaved complex numbers).
+
+     the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
+
+     The dft_a, dft_b and dft_ab pointers may alias.
+  void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
+  */
+
+  /*
+     the operation performed is: dft_ab = (dft_a * fdt_b)
+
+     The dft_a, dft_b and dft_ab pointers may alias.
+  */
+  static void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab);
+
+  /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
+  int pffft_simd_size(void);
+
+  static void pffft_reorder_back(int length, void * setup, float * data, float * work);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/pffft32.c b/src/pffft32.c
new file mode 100644
index 0000000..31d8d17
--- /dev/null
+++ b/src/pffft32.c
@@ -0,0 +1,30 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define PFFFT_SIMD_DISABLE
+#include "pffft.c"
+#include "filter.h"
+
+static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
+static void delete_setup(void * setup) {pffft_destroy_setup(setup);}
+static void forward  (int length, void * setup, float * h, float * scratch) {pffft_transform        (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform        (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H);  (void)length;}
+static int multiplier(void) {return 1;}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32_cb[] = {
+  (fn_t)setup,
+  (fn_t)setup,
+  (fn_t)delete_setup,
+  (fn_t)forward,
+  (fn_t)oforward,
+  (fn_t)backward,
+  (fn_t)obackward,
+  (fn_t)convolve,
+  (fn_t)_soxr_ordered_partial_convolve_f,
+  (fn_t)multiplier,
+  (fn_t)pffft_reorder_back,
+};
diff --git a/src/pffft32s.c b/src/pffft32s.c
new file mode 100644
index 0000000..d3196c8
--- /dev/null
+++ b/src/pffft32s.c
@@ -0,0 +1,27 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "pffft.c"
+
+static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
+static void forward  (int length, void * setup, float * h, float * scratch) {pffft_transform        (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform        (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H);                  (void)length;}
+static int multiplier(void) {return 1;}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32s_cb[] = {
+  (fn_t)setup,
+  (fn_t)setup,
+  (fn_t)pffft_destroy_setup,
+  (fn_t)forward,
+  (fn_t)oforward,
+  (fn_t)backward,
+  (fn_t)obackward,
+  (fn_t)convolve,
+  (fn_t)_soxr_ordered_partial_convolve_simd,
+  (fn_t)multiplier,
+  (fn_t)pffft_reorder_back,
+};
diff --git a/src/poly-fir.h b/src/poly-fir.h
new file mode 100644
index 0000000..51777bb
--- /dev/null
+++ b/src/poly-fir.h
@@ -0,0 +1,98 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Resample using an interpolated poly-phase FIR with length LEN.*/
+/* Input must be followed by LEN-1 samples. */
+
+#define a (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 0,j))
+#define b (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 1,j))
+#define c (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 2,j))
+#define d (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 3,j))
+#if COEF_INTERP == 0
+  #define _ sum += a *in[j], ++j;
+#elif COEF_INTERP == 1
+  #define _ sum += (b *x + a)*in[j], ++j;
+#elif COEF_INTERP == 2
+  #define _ sum += ((c *x + b)*x + a)*in[j], ++j;
+#elif COEF_INTERP == 3
+  #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j;
+#else
+  #error COEF_INTERP
+#endif
+
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+#if defined HI_PREC_CLOCK
+#if FLOAT_HI_PREC_CLOCK
+  if (p->use_hi_prec_clock) {
+    float_step_t at = p->at.flt;
+    for (i = 0; (int)at < num_in; ++i, at += p->step.flt) {
+      sample_t const * in = input + (int)at;
+      float_step_t frac = at - (int)at;
+      int phase = (int)(frac * (1 << PHASE_BITS));
+#if COEF_INTERP > 0
+      sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase);
+#endif
+      sample_t sum = 0;
+      int j = 0;
+      CONVOLVE
+      output[i] = sum;
+    }
+    fifo_read(&p->fifo, (int)at, NULL);
+    p->at.flt = at - (int)at;
+  } else
+#else
+  if (p->use_hi_prec_clock) {
+    for (i = 0; p->at.integer < num_in; ++i,
+        p->at.fix.ls.all += p->step.fix.ls.all,
+        p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) {
+      sample_t const * in = input + p->at.integer;
+      uint32_t frac = p->at.fraction;
+      int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */
+#if COEF_INTERP > 0              /* low-order bits, scaled to [0,1) */
+      sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32));
+#endif
+      sample_t sum = 0;
+      int j = 0;
+      CONVOLVE
+      output[i] = sum;
+    }
+    fifo_read(&p->fifo, p->at.integer, NULL);
+    p->at.integer = 0;
+  } else
+#endif
+#endif
+  {
+    for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
+      sample_t const * in = input + p->at.integer;
+      uint32_t frac = p->at.fraction;
+      int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */
+#if COEF_INTERP > 0              /* low-order bits, scaled to [0,1) */
+      sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32));
+#endif
+      sample_t sum = 0;
+      int j = 0;
+      CONVOLVE
+      output[i] = sum;
+    }
+    fifo_read(&p->fifo, p->at.integer, NULL);
+    p->at.integer = 0;
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+}
+
+#undef _
+#undef a
+#undef b
+#undef c
+#undef d
+#undef COEF_INTERP
+#undef CONVOLVE
+#undef FIR_LENGTH
+#undef FUNCTION
+#undef PHASE_BITS
diff --git a/src/poly-fir0.h b/src/poly-fir0.h
new file mode 100644
index 0000000..cc5eb67
--- /dev/null
+++ b/src/poly-fir0.h
@@ -0,0 +1,32 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Resample using a non-interpolated poly-phase FIR with length LEN.*/
+/* Input must be followed by LEN-1 samples. */
+
+#define _ sum += (coef(p->shared->poly_fir_coefs, 0, FIR_LENGTH, rem, 0, j)) *at[j], ++j;
+
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+  for (i = 0; p->at.integer < num_in * p->L; ++i, p->at.integer += p->step.integer) {
+    int div = p->at.integer / p->L, rem = p->at.integer % p->L;
+    sample_t const * at = input + div;
+    sample_t sum = 0;
+    int j = 0;
+    CONVOLVE
+    output[i] = sum;
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+  fifo_read(&p->fifo, p->at.integer / p->L, NULL);
+  p->at.integer = p->at.integer % p->L;
+}
+
+#undef _
+#undef CONVOLVE
+#undef FIR_LENGTH
+#undef FUNCTION
diff --git a/src/rate.h b/src/rate.h
new file mode 100644
index 0000000..4110b2b
--- /dev/null
+++ b/src/rate.h
@@ -0,0 +1,734 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <math.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "filter.h"
+#include "internal.h"
+
+#if defined SOXR_LIB
+
+extern struct {
+  void * (* forward_setup)(int);
+  void * (* backward_setup)(int);
+  void (* delete_setup)(void *);
+  void (* forward)(int, void *, sample_t *, sample_t *);
+  void (* oforward)(int, void *, sample_t *, sample_t *);
+  void (* backward)(int, void *, sample_t *, sample_t *);
+  void (* obackward)(int, void *, sample_t *, sample_t *);
+  void (* convolve)(int, void *, sample_t *, sample_t const *);
+  void (* convolve_portion)(int, sample_t *, sample_t const *);
+  int (* multiplier)(void);
+  void (* reorder_back)(int, void *, sample_t *, sample_t *);
+} RDFT_CB;
+
+#define rdft_forward_setup (*RDFT_CB.forward_setup)
+#define rdft_backward_setup (*RDFT_CB.backward_setup)
+#define rdft_delete_setup (*RDFT_CB.delete_setup)
+#define rdft_forward (*RDFT_CB.forward)
+#define rdft_oforward (*RDFT_CB.oforward)
+#define rdft_backward (*RDFT_CB.backward)
+#define rdft_obackward (*RDFT_CB.obackward)
+#define rdft_convolve (*RDFT_CB.convolve)
+#define rdft_convolve_portion (*RDFT_CB.convolve_portion)
+#define rdft_multiplier (*RDFT_CB.multiplier)
+#define rdft_reorder_back (*RDFT_CB.reorder_back)
+
+#endif
+
+#if RATE_SIMD /* Align for SIMD: */
+  #include "simd.h"
+#if 0 /* Not using this yet. */
+  #define RATE_SIMD_POLY 1
+  #define num_coefs4 ((num_coefs + 3) & ~3)
+  #define coefs4_check(i) ((i) < num_coefs)
+#else
+  #define RATE_SIMD_POLY 0
+  #define num_coefs4 num_coefs
+  #define coefs4_check(i) 1
+#endif
+
+  #define aligned_free    _soxr_simd_aligned_free
+  #define aligned_malloc  _soxr_simd_aligned_malloc
+  #define aligned_calloc  _soxr_simd_aligned_calloc
+#if 0
+  #define FIFO_REALLOC    aligned_realloc
+  #define FIFO_MALLOC     aligned_malloc
+  #define FIFO_FREE       aligned_free
+
+  static void * aligned_realloc(void * q, size_t nb_bytes, size_t copy_bytes) {
+    void * p = aligned_malloc(nb_bytes);
+    if (p) memcpy(p, q, copy_bytes);
+    aligned_free(q);
+    return p;
+  }
+#endif
+#else
+  #define RATE_SIMD_POLY 0
+  #define num_coefs4 num_coefs
+  #define coefs4_check(i) 1
+
+  #define aligned_free    free
+  #define aligned_malloc  malloc
+  #define aligned_calloc  calloc
+#endif
+
+#define  FIFO_SIZE_T int
+#include "fifo.h"
+
+typedef union { /* Int64 in parts */
+  #if WORDS_BIGENDIAN
+  struct {int32_t ms; uint32_t ls;} parts;
+  #else
+  struct {uint32_t ls; int32_t ms;} parts;
+  #endif
+  int64_t all;
+} int64p_t;
+
+typedef union { /* Uint64 in parts */
+  #if WORDS_BIGENDIAN
+  struct {uint32_t ms, ls;} parts;
+  #else
+  struct {uint32_t ls, ms;} parts;
+  #endif
+  uint64_t all;
+} uint64p_t;
+
+#define FLOAT_HI_PREC_CLOCK 0    /* Non-float hi-prec has ~96 bits. */
+#define float_step_t long double /* __float128 is also a (slow) option */
+
+#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
+
+#define raw_coef_t double
+
+static sample_t * prepare_coefs(raw_coef_t const * coefs, int num_coefs,
+    int num_phases, int interp_order, double multiplier)
+{
+  int i, j, length = num_coefs4 * num_phases;
+  sample_t * result = malloc((size_t)(length * (interp_order + 1)) * sizeof(*result));
+  double fm1 = coefs[0], f1 = 0, f2 = 0;
+
+  for (i = num_coefs4 - 1; i >= 0; --i)
+    for (j = num_phases - 1; j >= 0; --j) {
+      double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
+      int pos = i * num_phases + j - 1;
+      fm1 = coefs4_check(i) && pos > 0 ? coefs[pos - 1] * multiplier : 0;
+      switch (interp_order) {
+        case 1: b = f1 - f0; break;
+        case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
+        case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break;
+        default: if (interp_order) assert(0);
+      }
+      #define coef_coef(x) \
+        coef(result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i)
+      coef_coef(0) = (sample_t)f0;
+      if (interp_order > 0) coef_coef(1) = (sample_t)b;
+      if (interp_order > 1) coef_coef(2) = (sample_t)c;
+      if (interp_order > 2) coef_coef(3) = (sample_t)d;
+      #undef coef_coef
+      f2 = f1, f1 = f0;
+    }
+  return result;
+}
+
+typedef struct {
+  int        dft_length, num_taps, post_peak;
+  void       * dft_forward_setup, * dft_backward_setup;
+  sample_t   * coefs;
+} dft_filter_t;
+
+typedef struct { /* So generated filter coefs may be shared between channels */
+  sample_t   * poly_fir_coefs;
+  dft_filter_t dft_filter[2];
+} rate_shared_t;
+
+typedef enum {
+  irrational_stage = 1,
+  cubic_stage,
+  dft_stage,
+  half_stage,
+  rational_stage
+} stage_type_t;
+
+struct stage;
+typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
+#define MULT32 (65536. * 65536.)
+
+typedef union { /* Fixed point arithmetic */
+  struct {uint64p_t ls; int64p_t ms;} fix;
+  float_step_t flt;
+} step_t;
+
+typedef struct stage {
+  /* Common to all stage types: */
+  stage_type_t type;
+  stage_fn_t fn;
+  fifo_t     fifo;
+  int        pre;       /* Number of past samples to store */
+  int        pre_post;  /* pre + number of future samples to store */
+  int        preload;   /* Number of zero samples to pre-load the fifo */
+  double     out_in_ratio; /* For buffer management. */
+
+  /* For a stage with variable (run-time generated) filter coefs: */
+  rate_shared_t * shared;
+  unsigned   dft_filter_num; /* Which, if any, of the 2 DFT filters to use */
+  sample_t   * dft_scratch, * dft_out;
+
+  /* For a stage with variable L/M: */
+  step_t     at, step;
+  bool       use_hi_prec_clock;
+  int        L, remM;
+  int        n, phase_bits, block_len;
+  double     mult, phase0;
+} stage_t;
+
+#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
+#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
+
+static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo)
+{
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
+  sample_t const * input = stage_read_p(p);
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+#define integer  fix.ms.parts.ms
+#define fraction fix.ms.parts.ls
+#define whole    fix.ms.all
+  for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
+    sample_t const * s = input + p->at.integer;
+    double x = p->at.fraction * (1 / MULT32);
+    double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b);
+    double c = s[1]-*s-a-b;
+    output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s));
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+  fifo_read(&p->fifo, p->at.integer, NULL);
+  p->at.integer = 0;
+}
+
+#if RATE_SIMD
+  #define dft_out p->dft_out
+#else
+  #define dft_out output
+#endif
+
+static void dft_stage_fn(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t * output;
+  int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
+  rate_shared_t const * s = p->shared;
+  dft_filter_t const * f = &s->dft_filter[p->dft_filter_num];
+  int const overlap = f->num_taps - 1;
+
+  while (p->at.integer + p->L * num_in >= f->dft_length) {
+    div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L);
+    sample_t const * input = fifo_read_ptr(&p->fifo);
+    fifo_read(&p->fifo, divd.quot, NULL);
+    num_in -= divd.quot;
+
+    output = fifo_reserve(output_fifo, f->dft_length);
+
+    if (lsx_is_power_of_2(p->L)) { /* F-domain */
+      int portion = f->dft_length / p->L;
+      memcpy(dft_out, input, (unsigned)portion * sizeof(*dft_out));
+      rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch);
+      for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
+        dft_out[i] = dft_out[(portion << 1) - i],
+        dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
+      dft_out[portion] = dft_out[1];
+      dft_out[portion + 1] = 0;
+      dft_out[1] = dft_out[0];
+
+      for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) {
+        memcpy(dft_out + i, dft_out, (size_t)portion * sizeof(*dft_out));
+        dft_out[i + 1] = 0;
+      }
+      if (p->step.integer > 0)
+        rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
+    } else {
+      if (p->L == 1)
+        memcpy(dft_out, input, (size_t)f->dft_length * sizeof(*dft_out));
+      else {
+        memset(dft_out, 0, (size_t)f->dft_length * sizeof(*dft_out));
+        for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
+          dft_out[i] = input[j];
+        p->at.integer = p->L - 1 - divd.rem;
+      }
+      if (p->step.integer > 0)
+        rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
+      else
+        rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
+    }
+
+    if (p->step.integer > 0) {
+      rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs);
+      rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
+#if RATE_SIMD
+      if (p->step.integer == 1)
+        memcpy(output, dft_out, (size_t)f->dft_length * sizeof(sample_t));
+#endif
+      if (p->step.integer != 1) {
+        for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
+            i += p->step.integer)
+          output[j] = dft_out[i];
+        p->remM = i - (f->dft_length - overlap);
+        fifo_trim_by(output_fifo, f->dft_length - j);
+      }
+      else fifo_trim_by(output_fifo, overlap);
+    }
+    else { /* F-domain */
+      int m = -p->step.integer;
+      rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs);
+      rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch);
+#if RATE_SIMD
+      memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof(sample_t));
+#endif
+      fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m);
+    }
+  }
+}
+
+#undef dft_out
+
+/* Set to 4 x nearest power of 2 */
+/* or half of that if danger of causing too many cache misses. */
+static int set_dft_length(int num_taps, int min, int large)
+{
+  double d = log((double)num_taps) / log(2.);
+  return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large));
+}
+
+static void dft_stage_init(
+    unsigned instance, double Fp, double Fs, double Fn, double att,
+    double phase, stage_t * p, int L, int M, double * multiplier,
+    int min_dft_size, int large_dft_size)
+{
+  dft_filter_t * f = &p->shared->dft_filter[instance];
+  int num_taps = 0, dft_length = f->dft_length, i;
+
+  if (!dft_length) {
+    int k = phase == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4;
+    double * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.);
+
+    if (phase != 50)
+      lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase);
+    else f->post_peak = num_taps / 2;
+
+    dft_length = set_dft_length(num_taps, min_dft_size, large_dft_size);
+    f->coefs = aligned_calloc((size_t)dft_length, sizeof(*f->coefs));
+    for (i = 0; i < num_taps; ++i)
+      f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
+        = (sample_t)(h[i] * ((1. / dft_length) * rdft_multiplier() * L * *multiplier));
+    free(h);
+  }
+
+#if RATE_SIMD
+  p->dft_out = aligned_malloc(sizeof(sample_t) * (size_t)dft_length);
+#endif
+#if 1 /* In fact, currently, only pffft needs this. */
+  p->dft_scratch = aligned_malloc(2 * sizeof(sample_t) * (size_t)dft_length);
+#endif
+
+  if (!f->dft_length) {
+    void * coef_setup = rdft_forward_setup(dft_length);
+    int Lp = lsx_is_power_of_2(L)? L : 1;
+    int Mp = lsx_is_power_of_2(M)? M : 1;
+    f->dft_forward_setup = rdft_forward_setup(dft_length / Lp);
+    f->dft_backward_setup = rdft_backward_setup(dft_length / Mp);
+    if (Mp == 1)
+      rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch);
+    else
+      rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch);
+    rdft_delete_setup(coef_setup);
+    f->num_taps = num_taps;
+    f->dft_length = dft_length;
+    lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i",
+        num_taps, dft_length, Fp, Fs, Fn, att, L, M);
+  }
+  *multiplier = 1;
+  p->out_in_ratio = (double)L / M;
+  p->type = dft_stage;
+  p->fn = dft_stage_fn;
+  p->preload = f->post_peak / L;
+  p->at.integer    = f->post_peak % L;
+  p->L = L;
+  p->step.integer = abs(3-M) == 1 && Fs == 1 && 1? -M/2 : M;
+  p->dft_filter_num = instance;
+  p->block_len = f->dft_length - (f->num_taps - 1);
+  p->phase0 = p->at.integer / p->L;
+}
+
+#include "filters.h"
+
+typedef struct {
+  double     factor;
+  uint64_t   samples_in, samples_out;
+  int        num_stages;
+  stage_t    * stages;
+} rate_t;
+
+#define pre_stage       p->stages[shift]
+#define arb_stage       p->stages[shift + have_pre_stage]
+#define post_stage      p->stages[shift + have_pre_stage + have_arb_stage]
+#define have_pre_stage  (preM  * preL  != 1)
+#define have_arb_stage  (arbM  * arbL  != 1)
+#define have_post_stage (postM * postL != 1)
+
+#define TO_3dB(a)       ((1.6e-6*a-7.5e-4)*a+.646)
+#define LOW_Q_BW0_PC    (67 + 5 / 8.)
+
+typedef enum {
+  rolloff_none, rolloff_small /* <= 0.01 dB */, rolloff_medium /* <= 0.35 dB */
+} rolloff_t;
+
+
+static char const * rate_init(
+  /* Private work areas (to be supplied by the client):                       */
+  rate_t * p,                /* Per audio channel.                            */
+  rate_shared_t * shared,    /* Between channels (undergoing same rate change)*/
+
+  /* Public parameters:                                             Typically */
+  double factor,             /* Input rate divided by output rate.            */
+  double bits,               /* Required bit-accuracy (pass + stop)  16|20|28 */
+  double phase,              /* Linear/minimum etc. filter phase.       50    */
+  double bw_pc,              /* Pass-band % (0dB pt.) to preserve.   91.3|98.4*/
+  double anti_aliasing_pc,   /* % bandwidth without aliasing            100   */
+  rolloff_t rolloff,    /* Pass-band roll-off                    small   */
+  bool maintain_3dB_pt,      /*                                        true   */
+  double multiplier,         /* Linear gain to apply during conversion.   1   */
+
+  /* Primarily for test/development purposes:                                 */
+  bool use_hi_prec_clock,    /* Increase irrational ratio accuracy.   false   */
+  int interpolator,          /* Force a particular coef interpolator.   -1    */
+  size_t max_coefs_size,     /* k bytes of coefs to try to keep below.  400   */
+  bool noSmallIntOpt,        /* Disable small integer optimisations.  false   */
+  int log2_min_dft_size,
+  int log2_large_dft_size)
+{
+  double att = (bits + 1) * linear_to_dB(2.), attArb = att;    /* pass + stop */
+  double tbw0 = 1 - bw_pc / 100, Fs_a = 2 - anti_aliasing_pc / 100;
+  double arbM = factor, tbw_tighten = 1;
+  int n = 0, i, preL = 1, preM = 1, shift = 0, arbL = 1, postL = 1, postM = 1;
+  bool upsample = false, rational = false, iOpt = !noSmallIntOpt;
+  int mode = rolloff > rolloff_small? factor > 1 || bw_pc > LOW_Q_BW0_PC:
+    (int)ceil(2 + (bits - 17) / 4);
+  stage_t * s;
+
+  assert(factor > 0);
+  assert(!bits || (15 <= bits && bits <= 33));
+  assert(0 <= phase && phase <= 100);
+  assert(53 <= bw_pc && bw_pc <= 100);
+  assert(85 <= anti_aliasing_pc && anti_aliasing_pc <= 100);
+
+  p->factor = factor;
+  if (bits) while (!n++) {                               /* Determine stages: */
+    int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 :
+      (int)ceil(DBL max_coefs_size * 1000. / (U100_l * sizeof(sample_t)));
+    double d, epsilon = 0, frac;
+    upsample = arbM < 1;
+    for (i = (int)(arbM * .5), shift = 0; i >>= 1; arbM *= .5, ++shift);
+    preM = upsample || (arbM > 1.5 && arbM < 2);
+    postM = 1 + (arbM > 1 && preM), arbM /= postM;
+    preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL;
+    if ((frac = arbM - (int)arbM))
+      epsilon = fabs((uint32_t)(frac * MULT32 + .5) / (frac * MULT32) - 1);
+    for (i = 1, rational = !frac; i <= maxL && !rational; ++i) {
+      d = frac * i, try = (int)(d + .5);
+      if ((rational = fabs(try / d - 1) <= epsilon)) {    /* No long doubles! */
+        if (try == i)
+          arbM = ceil(arbM), shift += arbM > 2, arbM /= 1 + (arbM > 2);
+        else arbM = i * (int)arbM + try, arbL = i;
+      }
+    }
+    L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x;
+    if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) {
+      for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1);
+      arbM = arbM * postL / arbL / preL, arbL = 1, n = 0;
+    } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt))
+      preL = L, preM = M, arbM = arbL = postM = 1;
+    if (!mode && (!rational || !n))
+      ++mode, n = 0;
+  }
+
+  p->num_stages = shift + have_pre_stage + have_arb_stage + have_post_stage;
+  if (!p->num_stages && multiplier != 1) {
+    arbL = 0;
+    ++p->num_stages;
+  }
+  p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages));
+  for (i = 0; i < p->num_stages; ++i)
+    p->stages[i].shared = shared;
+
+  if ((n = p->num_stages) > 1) {                              /* Att. budget: */
+    if (have_arb_stage)
+      att += linear_to_dB(2.), attArb = att, --n;
+    att += linear_to_dB((double)n);
+  }
+
+  for (n = 0; (size_t)n + 1 < array_length(half_firs) && att > half_firs[n].att; ++n);
+  for (i = 0, s = p->stages; i < shift; ++i, ++s) {
+    s->type = half_stage;
+    s->fn = half_firs[n].fn;
+    s->pre_post = 4 * half_firs[n].num_coefs;
+    s->preload = s->pre = s->pre_post >> 1;
+  }
+
+  if (have_pre_stage) {
+    if (maintain_3dB_pt && have_post_stage) {    /* Trans. bands overlapping. */
+      double tbw3 = tbw0 * TO_3dB(att);                /* FFS: consider Fs_a. */
+      double x = ((2.1429e-4 - 5.2083e-7 * att) * att - .015863) * att + 3.95;
+      x = att * pow((tbw0 - tbw3) / (postM / (factor * postL) - 1 + tbw0), x);
+      if (x > .035) {
+        tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014;
+        lsx_debug("x=%g tbw_tighten=%g", x, tbw_tighten);
+      }
+    }
+    dft_stage_init(0, 1 - tbw0 * tbw_tighten, Fs_a, preM? max(preL, preM) :
+        arbM / arbL, att, phase, &pre_stage, preL, max(preM, 1), &multiplier,
+        log2_min_dft_size, log2_large_dft_size);
+  }
+
+  if (!bits && have_arb_stage) {                /* Quick and dirty arb stage: */
+    arb_stage.type = cubic_stage;
+    arb_stage.fn = cubic_stage_fn;
+    arb_stage.mult = multiplier, multiplier = 1;
+    arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5);
+    arb_stage.pre_post = max(3, arb_stage.step.integer);
+    arb_stage.preload = arb_stage.pre = 1;
+    arb_stage.out_in_ratio = MULT32 / (double)arb_stage.step.whole;
+  }
+  else if (have_arb_stage) {                     /* Higher quality arb stage: */
+    poly_fir_t const * f = &poly_firs[6*(upsample + !!preM) + mode - !upsample];
+    int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases;
+    size_t coefs_size;
+    double x = .5, at, Fp, Fs, Fn, mult = upsample? 1 : arbL / arbM;
+    poly_fir1_t const * f1;
+
+    Fn = !upsample && preM? x = arbM / arbL : 1;
+    Fp = !preM? mult : mode? .5 : 1;
+    Fs = 2 - Fp;           /* Ignore Fs_a; it would have little benefit here. */
+    Fp *= 1 - tbw0;
+    if (rolloff > rolloff_small && mode)
+      Fp = !preM? mult * .5 - .125 : mult * .05 + .1;
+    else if (rolloff == rolloff_small)
+      Fp = Fs - (Fs - .148 * x - Fp * .852) * (.00813 * bits + .973);
+
+    i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1;
+    do {
+      f1 = &f->interp[++i];
+      assert(f1->fn);
+      if (i)
+        arbM /= arbL, arbL = 1, rational = false;
+      phase_bits = (int)ceil(f1->scalar + log(mult)/log(2.));
+      phases = !rational? (1 << phase_bits) : arbL;
+      if (!f->interp[0].scalar) {
+        int phases0 = max(phases, 19), n0 = 0;
+        lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta);
+        num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM;
+      }
+      if ((num_coefs & 1) && rational && (arbL & 1))
+        phases <<= 1, arbL <<= 1, arbM *= 2;
+      at = arbL * (arb_stage.phase0 = .5 * (num_coefs & 1));
+      order = i + (i && mode > 4);
+      coefs_size = (size_t)(num_coefs4 * phases * (order + 1)) * sizeof(sample_t);
+    } while (interpolator < 0 && i < 2 && f->interp[i+1].fn &&
+        coefs_size / 1000 > max_coefs_size);
+
+    if (!arb_stage.shared->poly_fir_coefs) {
+      int num_taps = num_coefs * phases - 1;
+      raw_coef_t * coefs = lsx_design_lpf(
+          Fp, Fs, Fn, attArb, &num_taps, phases, f->beta);
+      arb_stage.shared->poly_fir_coefs = prepare_coefs(
+          coefs, num_coefs, phases, order, multiplier);
+      lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk",
+          num_coefs, phases, order, DBL coefs_size / 1000.);
+      free(coefs);
+    }
+    multiplier = 1;
+    arb_stage.type = rational? rational_stage : irrational_stage;
+    arb_stage.fn = f1->fn;
+    arb_stage.pre_post = num_coefs4 - 1;
+    arb_stage.preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs);
+    arb_stage.n = num_coefs4;
+    arb_stage.phase_bits = phase_bits;
+    arb_stage.L = arbL;
+    arb_stage.use_hi_prec_clock = mode > 1 && use_hi_prec_clock && !rational;
+#if FLOAT_HI_PREC_CLOCK
+    if (arb_stage.use_hi_prec_clock) {
+      arb_stage.at.flt = at;
+      arb_stage.step.flt = arbM;
+      arb_stage.out_in_ratio = (double)(arbL / arb_stage.step.flt);
+    } else
+#endif
+    {
+      arb_stage.at.whole = (int64_t)(at * MULT32 + .5);
+#if !FLOAT_HI_PREC_CLOCK
+      if (arb_stage.use_hi_prec_clock) {
+        arb_stage.at.fix.ls.parts.ms = 0x80000000ul;
+        arbM *= MULT32;
+        arb_stage.step.whole = (int64_t)arbM;
+        arbM -= (double)arb_stage.step.whole;
+        arbM *= MULT32 * MULT32;
+        arb_stage.step.fix.ls.all = (uint64_t)arbM;
+      } else
+#endif
+        arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5);
+      arb_stage.out_in_ratio = MULT32 * arbL / (double)arb_stage.step.whole;
+    }
+  }
+
+  if (have_post_stage)
+    dft_stage_init(1, 1 - (1 - (1 - tbw0) *
+        (upsample? factor * postL / postM : 1)) * tbw_tighten, Fs_a,
+        (double)max(postL, postM), att, phase, &post_stage, postL, postM,
+        &multiplier, log2_min_dft_size, log2_large_dft_size);
+
+
+  lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i",
+      1/factor, shift, preL, preM, arbL, arbM, postL, postM);
+  for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) {
+    fifo_create(&s->fifo, (int)sizeof(sample_t));
+    memset(fifo_reserve(&s->fifo, s->preload), 0, sizeof(sample_t) * (size_t)s->preload);
+    lsx_debug("%5i|%-5i preload=%i remL=%i o/i=%g",
+        s->pre, s->pre_post - s->pre, s->preload, s->at.integer, s->out_in_ratio);
+  }
+  fifo_create(&s->fifo, (int)sizeof(sample_t));
+  return 0;
+}
+
+static void rate_process(rate_t * p)
+{
+  stage_t * stage = p->stages;
+  int i;
+  for (i = 0; i < p->num_stages; ++i, ++stage)
+    stage->fn(stage, &(stage+1)->fifo);
+}
+
+static sample_t * rate_input(rate_t * p, sample_t const * samples, size_t n)
+{
+  p->samples_in += n;
+  return fifo_write(&p->stages[0].fifo, (int)n, samples);
+}
+
+static sample_t const * rate_output(rate_t * p, sample_t * samples, size_t * n)
+{
+  fifo_t * fifo = &p->stages[p->num_stages].fifo;
+  p->samples_out += *n = min(*n, (size_t)fifo_occupancy(fifo));
+  return fifo_read(fifo, (int)*n, samples);
+}
+
+static void rate_flush(rate_t * p)
+{
+  fifo_t * fifo = &p->stages[p->num_stages].fifo;
+#if defined _MSC_VER && _MSC_VER == 1200
+  uint64_t samples_out = (uint64_t)(int64_t)((double)(int64_t)p->samples_in / p->factor + .5);
+#else
+  uint64_t samples_out = (uint64_t)((double)p->samples_in / p->factor + .5);
+#endif
+  size_t remaining = (size_t)(samples_out - p->samples_out);
+  sample_t * buff = calloc(1024, sizeof(*buff));
+
+  if ((int)remaining > 0) {
+    while ((size_t)fifo_occupancy(fifo) < remaining) {
+      rate_input(p, buff, 1024);
+      rate_process(p);
+    }
+    fifo_trim_to(fifo, (int)remaining);
+    p->samples_in = 0;
+  }
+  free(buff);
+}
+
+static void rate_close(rate_t * p)
+{
+  rate_shared_t * shared = p->stages[0].shared;
+  int i;
+
+  for (i = 0; i <= p->num_stages; ++i) {
+    stage_t * s = &p->stages[i];
+    aligned_free(s->dft_scratch);
+    aligned_free(s->dft_out);
+    fifo_delete(&s->fifo);
+  }
+  if (shared) {
+    for (i = 0; i < 2; ++i) {
+      dft_filter_t * f= &shared->dft_filter[i];
+      aligned_free(f->coefs);
+      rdft_delete_setup(f->dft_forward_setup);
+      rdft_delete_setup(f->dft_backward_setup);
+    }
+    free(shared->poly_fir_coefs);
+    memset(shared, 0, sizeof(*shared));
+  }
+  free(p->stages);
+}
+
+#if defined SOXR_LIB
+static double rate_delay(rate_t * p)
+{
+#if defined _MSC_VER && _MSC_VER == 1200
+  double samples_out = (double)(int64_t)p->samples_in / p->factor;
+  return samples_out - (double)(int64_t)p->samples_out;
+#else
+  double samples_out = (double)p->samples_in / p->factor;
+  return samples_out - (double)p->samples_out;
+#endif
+}
+
+static void rate_sizes(size_t * shared, size_t * channel)
+{
+  *shared = sizeof(rate_shared_t);
+  *channel = sizeof(rate_t);
+}
+
+#include "soxr.h"
+
+static char const * rate_create(
+    void * channel,
+    void * shared,
+    double io_ratio,
+    soxr_quality_spec_t * q_spec,
+    soxr_runtime_spec_t * r_spec,
+    double scale)
+{
+  return rate_init(
+      channel, shared,
+      io_ratio,
+      q_spec->bits,
+      q_spec->phase,
+      q_spec->bw_pc,
+      q_spec->anti_aliasing_pc,
+      "\1\2\0"[q_spec->flags & 3],
+      !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT),
+      scale,
+      !!(q_spec->flags & SOXR_HI_PREC_CLOCK),
+      (int)(r_spec->flags & 3) - 1,
+      r_spec->coef_size_kbytes,
+      !!(r_spec->flags & SOXR_NOSMALLINTOPT),
+      (int)r_spec->log2_min_dft_size,
+      (int)r_spec->log2_large_dft_size);
+}
+
+static char const * id(void)
+{
+  return RATE_ID;
+}
+
+typedef void (* fn_t)(void);
+fn_t RATE_CB[] = {
+  (fn_t)rate_input,
+  (fn_t)rate_process,
+  (fn_t)rate_output,
+  (fn_t)rate_flush,
+  (fn_t)rate_close,
+  (fn_t)rate_delay,
+  (fn_t)rate_sizes,
+  (fn_t)rate_create,
+  (fn_t)0,
+  (fn_t)id,
+};
+#endif
diff --git a/src/rate32.c b/src/rate32.c
new file mode 100644
index 0000000..146aad6
--- /dev/null
+++ b/src/rate32.c
@@ -0,0 +1,9 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define sample_t   float
+#define RATE_SIMD  0
+#define RDFT_CB    _soxr_rdft32_cb
+#define RATE_CB    _soxr_rate32_cb
+#define RATE_ID    "single-precision"
+#include "rate.h"
diff --git a/src/rate32s.c b/src/rate32s.c
new file mode 100644
index 0000000..ddd4a67
--- /dev/null
+++ b/src/rate32s.c
@@ -0,0 +1,9 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define sample_t   float
+#define RATE_SIMD  1
+#define RDFT_CB    _soxr_rdft32s_cb
+#define RATE_CB    _soxr_rate32s_cb
+#define RATE_ID    "single-precision-SIMD"
+#include "rate.h"
diff --git a/src/rate64.c b/src/rate64.c
new file mode 100644
index 0000000..748224c
--- /dev/null
+++ b/src/rate64.c
@@ -0,0 +1,9 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define sample_t   double
+#define RATE_SIMD  0
+#define RDFT_CB    _soxr_rdft64_cb
+#define RATE_CB    _soxr_rate64_cb
+#define RATE_ID    "double-precision"
+#include "rate.h"
diff --git a/src/rdft.h b/src/rdft.h
new file mode 100644
index 0000000..93120ab
--- /dev/null
+++ b/src/rdft.h
@@ -0,0 +1,31 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+void ORDERED_CONVOLVE(int n, void * not_used, DFT_FLOAT * a, const DFT_FLOAT * b)
+{
+  int i;
+  a[0] *= b[0];
+  a[1] *= b[1];
+  for (i = 2; i < n; i += 2) {
+    DFT_FLOAT tmp = a[i];
+    a[i  ] = b[i  ] * tmp - b[i+1] * a[i+1];
+    a[i+1] = b[i+1] * tmp + b[i  ] * a[i+1];
+  }
+  (void)not_used;
+}
+
+void ORDERED_PARTIAL_CONVOLVE(int n, DFT_FLOAT * a, const DFT_FLOAT * b)
+{
+  int i;
+  a[0] *= b[0];
+  for (i = 2; i < n; i += 2) {
+    DFT_FLOAT tmp = a[i];
+    a[i  ] = b[i  ] * tmp - b[i+1] * a[i+1];
+    a[i+1] = b[i+1] * tmp + b[i  ] * a[i+1];
+  }
+  a[1] = b[i] * a[i] - b[i+1] * a[i+1];
+}
+
+#undef ORDERED_CONVOLVE
+#undef ORDERED_PARTIAL_CONVOLVE
+#undef DFT_FLOAT
diff --git a/src/rint-clip.h b/src/rint-clip.h
new file mode 100644
index 0000000..a156f16
--- /dev/null
+++ b/src/rint-clip.h
@@ -0,0 +1,153 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if defined DITHER
+
+#define DITHERING (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31))
+#define DITHER_RAND (seed = 1664525UL * seed + 1013904223UL) >> 3
+#define DITHER_VARS unsigned long ran1 = DITHER_RAND, ran2 = DITHER_RAND
+#define SEED_ARG , unsigned long * seed0
+#define SAVE_SEED *seed0 = seed
+#define COPY_SEED unsigned long seed = *seed0;
+#define COPY_SEED1 unsigned long seed1 = seed
+#define PASS_SEED1 , &seed1
+#define PASS_SEED0 , seed0
+
+#else
+
+#define DITHERING 0
+#define DITHER_VARS
+#define SEED_ARG
+#define SAVE_SEED
+#define COPY_SEED
+#define COPY_SEED1
+#define PASS_SEED1
+#define PASS_SEED0
+
+#endif
+
+
+
+#if defined FE_INVALID && defined FPU_RINT
+static void RINT_CLIP(RINT_T * const dest, FLOATX const * const src,
+    unsigned stride, size_t i, size_t const n, size_t * const clips SEED_ARG)
+{
+  COPY_SEED
+  DITHER_VARS;
+  for (; i < n; ++i) {
+    double d = src[i] + DITHERING;
+    dest[stride * i] = RINT(d);
+    if (fetestexcept(FE_INVALID)) {
+      feclearexcept(FE_INVALID);
+      dest[stride * i] = d > 0? RINT_MAX : -RINT_MAX - 1;
+      ++*clips;
+    }
+  }
+  SAVE_SEED;
+}
+#endif
+
+
+
+static size_t LSX_RINT_CLIP(void * * const dest0, FLOATX const * const src,
+    size_t const n SEED_ARG)
+{
+  size_t i, clips = 0;
+  RINT_T * dest = *dest0;
+  COPY_SEED
+#if defined FE_INVALID && defined FPU_RINT
+#define _ dest[i] = RINT(src[i] + DITHERING), ++i,
+  feclearexcept(FE_INVALID);
+  for (i = 0; i < (n & ~7u);) {
+    COPY_SEED1;
+    DITHER_VARS;
+    _ _ _ _ _ _ _ _ 0;
+    if (fetestexcept(FE_INVALID)) {
+      feclearexcept(FE_INVALID);
+      RINT_CLIP(dest, src, 1, i - 8, i, &clips PASS_SEED1);
+    }
+  }
+  RINT_CLIP(dest, src, 1, i, n, &clips PASS_SEED0);
+#else
+#define _ d = src[i] + DITHERING, dest[i++] = (RINT_T)(d > N - 1? ++clips, (RINT_T)(N - 1) : d < -N? ++clips, (RINT_T)(-N) : RINT(d)),
+  const double N = 1. + RINT_MAX;
+  double d;
+  for (i = 0; i < (n & ~7u);) {
+    DITHER_VARS;
+    _ _ _ _ _ _ _ _ 0;
+  }
+  {
+    DITHER_VARS;
+    for (; i < n; _ 0);
+  }
+#endif
+  SAVE_SEED;
+  *dest0 = dest + n;
+  return clips;
+}
+#undef _
+
+
+
+static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs,
+    unsigned const stride, size_t const n SEED_ARG)
+{
+  unsigned j;
+  size_t i, clips = 0;
+  RINT_T * dest = *dest0;
+  COPY_SEED
+#if defined FE_INVALID && defined FPU_RINT
+#define _ dest[stride * i] = RINT(src[i] + DITHERING), ++i,
+  feclearexcept(FE_INVALID);
+  for (j = 0; j < stride; ++j, ++dest) {
+    FLOATX const * const src = srcs[j];
+    for (i = 0; i < (n & ~7u);) {
+      COPY_SEED1;
+      DITHER_VARS;
+      _ _ _ _ _ _ _ _ 0;
+      if (fetestexcept(FE_INVALID)) {
+        feclearexcept(FE_INVALID);
+        RINT_CLIP(dest, src, stride, i - 8, i, &clips PASS_SEED1);
+      }
+    }
+    RINT_CLIP(dest, src, stride, i, n, &clips PASS_SEED0);
+  }
+#else
+#define _ d = src[i] + DITHERING, dest[stride * i++] = (RINT_T)(d > N - 1? ++clips, (RINT_T)(N - 1) : d < -N? ++clips, (RINT_T)(-N) : RINT(d)),
+  const double N = 1. + RINT_MAX;
+  double d;
+  for (j = 0; j < stride; ++j, ++dest) {
+    FLOATX const * const src = srcs[j];
+    for (i = 0; i < (n & ~7u);) {
+      DITHER_VARS;
+      _ _ _ _ _ _ _ _ 0;
+    }
+    {
+      DITHER_VARS;
+      for (; i < n; _ 0);
+    }
+  }
+#endif
+  SAVE_SEED;
+  *dest0 = dest + stride * (n - 1);
+  return clips;
+}
+#undef _
+
+#undef PASS_SEED0
+#undef PASS_SEED1
+#undef COPY_SEED1
+#undef COPY_SEED
+#undef SAVE_SEED
+#undef SEED_ARG
+#undef DITHER_VARS
+#undef DITHERING
+#undef DITHER
+
+#undef RINT_MAX
+#undef RINT_T
+#undef FPU_RINT
+#undef RINT
+#undef RINT_CLIP
+#undef LSX_RINT_CLIP
+#undef LSX_RINT_CLIP_2
diff --git a/src/rint.h b/src/rint.h
new file mode 100644
index 0000000..6a32a32
--- /dev/null
+++ b/src/rint.h
@@ -0,0 +1,68 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_rint_included
+#define soxr_rint_included
+
+#include "soxr-config.h"
+
+
+
+#if HAVE_LRINT && LONG_MAX == 2147483647L
+  #include <math.h>
+  #define FPU_RINT32
+  #define rint32 lrint
+#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+  #define FPU_RINT32
+  static __inline int32_t rint32(double input) {
+    int32_t result;
+    __asm__ __volatile__("fistpl %0": "=m"(result): "t"(input): "st");
+    return result;
+  }
+#elif defined __GNUC__ && defined __arm__
+  #define FPU_RINT32
+  static __inline int32_t rint32(double input) {
+    register int32_t result;
+    __asm__ __volatile__ ("ftosid %0, %P1": "=w"(result): "w"(input));
+    return result;
+  }
+#elif defined _MSC_VER && defined _M_IX86
+  #define FPU_RINT32
+  static __inline int32_t rint32(double input) {
+    int32_t result;
+    _asm {
+      fld input
+      fistp result
+    }
+    return result;
+  }
+#else
+  #define rint32(x) (int32_t)((x) < 0? x - .5 : x + .5)
+#endif
+
+
+
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+  #define FPU_RINT16
+  static __inline int16_t rint16(double input) {
+    int16_t result;
+    __asm__ __volatile__("fistps %0": "=m"(result): "t"(input): "st");
+    return result;
+  }
+#elif defined _MSC_VER && defined _M_IX86
+  #define FPU_RINT16
+  static __inline int16_t rint16(double input) {
+    int16_t result;
+    _asm {
+      fld input
+      fistp result
+    }
+    return result;
+  }
+#else
+  #define rint16(x) (int16_t)floor((x)+.5) /* Is this faster than in rint32? */
+#endif
+
+
+
+#endif
diff --git a/src/samplerate.h b/src/samplerate.h
new file mode 100644
index 0000000..911cc5d
--- /dev/null
+++ b/src/samplerate.h
@@ -0,0 +1 @@
+#include "soxr-lsr.h"
diff --git a/src/simd-dev.h b/src/simd-dev.h
new file mode 100644
index 0000000..fd084c6
--- /dev/null
+++ b/src/simd-dev.h
@@ -0,0 +1,5 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define PFFT_MACROS_ONLY
+#include "pffft.c"
diff --git a/src/simd.c b/src/simd.c
new file mode 100644
index 0000000..ec90c3e
--- /dev/null
+++ b/src/simd.c
@@ -0,0 +1,84 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include "simd.h"
+#include "simd-dev.h"
+
+#define SIMD_ALIGNMENT (sizeof(float) * 4)
+
+void * _soxr_simd_aligned_malloc(size_t size)
+{
+  char * p1 = 0, * p = malloc(size + SIMD_ALIGNMENT);
+  if (p) {
+    p1 = (char *)((size_t)(p + SIMD_ALIGNMENT) & ~(SIMD_ALIGNMENT - 1));
+    *((void * *)p1 - 1) = p;
+  }
+  return p1;
+}
+
+
+
+void * _soxr_simd_aligned_calloc(size_t nmemb, size_t size)
+{
+  void * p = _soxr_simd_aligned_malloc(nmemb * size);
+  if (p)
+    memset(p, 0, nmemb * size);
+  return p;
+}
+
+
+
+void _soxr_simd_aligned_free(void * p1)
+{
+  if (p1)
+    free(*((void * *)p1 - 1));
+}
+
+
+
+void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b)
+{
+  int i;
+  float ab0, ab1;
+  v4sf       * /*RESTRICT*/ va = (v4sf       *)a;
+  v4sf const *   RESTRICT   vb = (v4sf const *)b;
+  assert(VALIGNED(a) && VALIGNED(b));
+  ab0 = a[0] * b[0], ab1 = a[1] * b[1];
+  for (i = 0; i < n / 4; i += 2) {
+    v4sf a1r = va[i+0], a1i = va[i+1];
+    v4sf b1r = vb[i+0], b1i = vb[i+1];
+    UNINTERLEAVE2(a1r, a1i, a1r, a1i);
+    UNINTERLEAVE2(b1r, b1i, b1r, b1i);
+    VCPLXMUL(a1r, a1i, b1r, b1i);
+    INTERLEAVE2(a1r, a1i, a1r, a1i);
+    va[i+0] = a1r, va[i+1] = a1i;
+  }
+  a[0] = ab0, a[1] = ab1;
+  (void)not_used;
+}
+
+
+
+void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b)
+{
+  int i;
+  float ab0;
+  v4sf       * /*RESTRICT*/ va = (v4sf       *)a;
+  v4sf const *   RESTRICT   vb = (v4sf const *)b;
+  assert(VALIGNED(a) && VALIGNED(b));
+  ab0 = a[0] * b[0];
+  for (i = 0; i < n / 4; i += 2) {
+    v4sf a1r = va[i+0], a1i = va[i+1];
+    v4sf b1r = vb[i+0], b1i = vb[i+1];
+    UNINTERLEAVE2(a1r, a1i, a1r, a1i);
+    UNINTERLEAVE2(b1r, b1i, b1r, b1i);
+    VCPLXMUL(a1r, a1i, b1r, b1i);
+    INTERLEAVE2(a1r, a1i, a1r, a1i);
+    va[i+0] = a1r, va[i+1] = a1i;
+  }
+  a[0] = ab0;
+  a[1] = b[n] * a[n] - b[n+1] * a[n+1];
+}
diff --git a/src/simd.h b/src/simd.h
new file mode 100644
index 0000000..a99e79c
--- /dev/null
+++ b/src/simd.h
@@ -0,0 +1,16 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined simd_included
+#define simd_included
+
+#include <stddef.h>
+
+void * _soxr_simd_aligned_malloc(size_t);
+void * _soxr_simd_aligned_calloc(size_t, size_t);
+void _soxr_simd_aligned_free(void *);
+
+void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b);
+void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b);
+
+#endif
diff --git a/src/soxr-lsr.h b/src/soxr-lsr.h
new file mode 100644
index 0000000..b313d6d
--- /dev/null
+++ b/src/soxr-lsr.h
@@ -0,0 +1,78 @@
+/* SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Wrapper mostly compatible with `libsamplerate'.
+ * (Libsoxr's native API can be found in soxr.h).  */
+
+#if !defined SAMPLERATE_H
+#define SAMPLERATE_H
+#if defined __cplusplus
+  extern "C" {
+#endif
+
+#if defined SOXR_DLL
+  #if defined soxr_lsr_EXPORTS
+    #define SOXR __declspec(dllexport)
+  #else
+    #define SOXR __declspec(dllimport)
+  #endif
+#else
+  #define SOXR
+#endif
+
+typedef float   SRC_SAMPLE;
+#if !defined SOXR_LIB
+enum SRC_SRCTYPE_e {SRC_SINC_BEST_QUALITY, SRC_SINC_MEDIUM_QUALITY,
+                    SRC_SINC_FASTEST, SRC_ZERO_ORDER_HOLD, SRC_LINEAR};
+typedef int     SRC_SRCTYPE;
+typedef int     SRC_ERROR;
+typedef long    (* src_callback_t)(void *, SRC_SAMPLE * *);
+typedef struct  SRC_STATE SRC_STATE;
+typedef struct  SRC_DATA {
+  SRC_SAMPLE    * data_in, * data_out;
+  long          input_frames, output_frames;
+  long          input_frames_used, output_frames_gen;
+  int           end_of_input;
+  double        src_ratio;
+} SRC_DATA;
+#endif
+SOXR SRC_STATE *   src_new(SRC_SRCTYPE, int num_channels, SRC_ERROR *);
+SOXR SRC_ERROR     src_process  (SRC_STATE *, SRC_DATA *);
+SOXR SRC_ERROR     src_set_ratio(SRC_STATE *, double);
+SOXR SRC_ERROR     src_reset    (SRC_STATE *);
+SOXR SRC_ERROR     src_error    (SRC_STATE *);
+SOXR SRC_STATE *   src_delete   (SRC_STATE *);
+SOXR SRC_STATE *   src_callback_new(
+                    src_callback_t, SRC_SRCTYPE, int, SRC_ERROR *, void *);
+SOXR long          src_callback_read(
+                    SRC_STATE *, double src_ratio, long, SRC_SAMPLE *);
+SOXR SRC_ERROR     src_simple(SRC_DATA *, SRC_SRCTYPE, int);
+SOXR char const *  src_get_name(SRC_SRCTYPE);
+SOXR char const *  src_get_description(SRC_SRCTYPE);
+SOXR char const *  src_get_version(void);
+SOXR char const *  src_strerror(SRC_ERROR);
+SOXR int           src_is_valid_ratio(double);
+SOXR void          src_short_to_float_array(short const *, float *, int);
+SOXR void          src_float_to_short_array(float const *, short *, int);
+SOXR void          src_int_to_float_array(int const *, float *, int);
+SOXR void          src_float_to_int_array(float const *, int *, int);
+
+#undef SOXR
+#if defined __cplusplus
+  }
+#endif
+#endif
diff --git a/src/soxr.c b/src/soxr.c
new file mode 100644
index 0000000..f9e105c
--- /dev/null
+++ b/src/soxr.c
@@ -0,0 +1,643 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "soxr.h"
+#include "data-io.h"
+#include "internal.h"
+
+
+
+char const * soxr_version(void)
+{
+  return "libsoxr-" SOXR_VERSION;
+}
+
+
+
+typedef void sample_t; /* float or double */
+
+typedef struct {
+  sample_t * (*  input)(void *, sample_t * samples, size_t   n);
+  void (* process)(void *, size_t);
+  sample_t const * (* output)(void *, sample_t * samples, size_t * n);
+  void (* flush)(void *);
+  void (* close)(void *);
+  double (* delay)(void *);
+  void (* sizes)(size_t * shared, size_t * channel);
+  char const * (* create)(void * channel, void * shared, double io_ratio,
+      soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale);
+  void (* set_io_ratio)(void *, double io_ratio, size_t len);
+  char const * (* id)(void);
+} control_block_t;
+
+#define resampler_input (*p->control_block.input)
+#define resampler_process (*p->control_block.process)
+#define resampler_output (*p->control_block.output)
+#define resampler_flush (*p->control_block.flush)
+#define resampler_close (*p->control_block.close)
+#define resampler_delay (*p->control_block.delay)
+#define resampler_sizes (*p->control_block.sizes)
+#define resampler_create (*p->control_block.create)
+#define resampler_set_io_ratio (*p->control_block.set_io_ratio)
+#define resampler_id (*p->control_block.id)
+
+
+
+typedef void * resampler_t; /* For one channel. */
+typedef void * resampler_shared_t; /* Between channels. */
+typedef void (* deinterleave_t)(sample_t * * dest,
+    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch);
+typedef size_t (* interleave_t)(soxr_datatype_t data_type, void * * dest,
+    sample_t const * const * src, size_t, unsigned, unsigned long *);
+
+struct soxr {
+  unsigned num_channels;
+  double io_ratio;
+  soxr_error_t error;
+  soxr_quality_spec_t q_spec;
+  soxr_io_spec_t io_spec;
+  soxr_runtime_spec_t runtime_spec;
+
+  void * input_fn_state;
+  soxr_input_fn_t input_fn;
+  size_t max_ilen;
+
+  resampler_shared_t shared;
+  resampler_t * resamplers;
+  control_block_t control_block;
+  deinterleave_t deinterleave;
+  interleave_t interleave;
+
+  void * * channel_ptrs;
+  size_t clips;
+  unsigned long seed;
+  int flushing;
+};
+
+
+
+/* TODO: these should not be here. */
+#define TO_3dB(a)       ((1.6e-6*a-7.5e-4)*a+.646)
+#define LOW_Q_BW0_PC    (67 + 5 / 8.)
+
+soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags)
+{
+  soxr_quality_spec_t spec, * p = &spec;
+  unsigned quality = recipe & 0xf;
+  double rej;
+  memset(p, 0, sizeof(*p));
+  if (quality > 13) {
+    p->e = "invalid quality type";
+    return spec;
+  }
+  if (quality == 13)
+    quality = 6;
+  else if (quality > 10)
+    quality = 0;
+  p->phase = "\62\31\144"[(recipe & 0x30)>>8];
+  p->anti_aliasing_pc = 100;
+  p->bits = !quality? 0: quality < 3? 16 : quality < 8? 4 + quality * 4 : 55 - quality * 4;
+  rej = p->bits * linear_to_dB(2.);
+  p->flags = flags;
+  if (quality < 8) {
+    p->bw_pc = quality == 1? LOW_Q_BW0_PC : 100 - 5 / TO_3dB(rej);
+    if (quality <= 2)
+      p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
+  }
+  else {
+    static float const bw[] = {93.1f, 83.2f, 66.3f};
+    p->bw_pc = bw[quality - 8];
+    if (quality - 8 == 2)
+      p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
+  }
+  if (recipe & SOXR_STEEP_FILTER)
+    p->bw_pc = 100 - 1 / TO_3dB(rej);
+  return spec;
+}
+
+
+
+char const * soxr_engine(soxr_t p)
+{
+  return resampler_id();
+}
+
+
+
+size_t * soxr_num_clips(soxr_t p)
+{
+  return &p->clips;
+}
+
+
+
+soxr_error_t soxr_error(soxr_t p)
+{
+  return p->error;
+}
+
+
+
+soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads)
+{
+  soxr_runtime_spec_t spec, * p = &spec;
+  memset(p, 0, sizeof(*p));
+  p->log2_min_dft_size = 10;
+  p->log2_large_dft_size = 17;
+  p->coef_size_kbytes = 400;
+  p->num_threads = num_threads;
+  return spec;
+}
+
+
+
+soxr_io_spec_t soxr_io_spec(
+  soxr_datatype_t itype,
+  soxr_datatype_t otype)
+{
+  soxr_io_spec_t spec, * p = &spec;
+  memset(p, 0, sizeof(*p));
+  if ((itype | otype) >= SOXR_SPLIT * 2)
+    p->e = "invalid io datatype(s)";
+  else {
+    p->itype = itype;
+    p->otype = otype;
+    p->scale = 1;
+  }
+  return spec;
+}
+
+
+
+#if HAVE_SIMD
+static bool cpu_has_simd(void)
+{
+#if defined __x86_64__ || defined _M_X64
+  return true;
+#elif defined __GNUC__ && defined i386
+  uint32_t eax, ebx, ecx, edx;
+  __asm__ __volatile__ (
+      "pushl %%ebx   \n\t"
+      "cpuid         \n\t"
+      "movl %%ebx, %1\n\t"
+      "popl %%ebx    \n\t"
+      : "=a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx)
+      : "a"(1)
+      : "cc" );
+  return !!(edx & 0x06000000);
+#elif defined _MSC_VER && defined _M_IX86
+  uint32_t d;
+  __asm {
+    xor     eax, eax
+    inc     eax
+    push    ebx
+    cpuid
+    pop     ebx
+    mov     d, edx
+  }
+  return !!(d & 0x06000000);
+#endif
+  return false;
+}
+#endif
+
+extern control_block_t _soxr_rate32s_cb, _soxr_rate32_cb, _soxr_rate64_cb, _soxr_vr32_cb;
+
+
+
+soxr_t soxr_create(
+  double input_rate, double output_rate,
+  unsigned num_channels,
+  soxr_error_t * error0,
+  soxr_io_spec_t const * io_spec,
+  soxr_quality_spec_t const * q_spec,
+  soxr_runtime_spec_t const * runtime_spec)
+{
+  double io_ratio = output_rate? input_rate? input_rate / output_rate : -1 : input_rate? -1 : 0;
+  static const float datatype_full_scale[] = {1, 1, 65536.*32768, 32768};
+  soxr_t p = 0;
+  soxr_error_t error = 0;
+
+  if (q_spec && q_spec->e)  error = q_spec->e;
+  else if (io_spec && (io_spec->itype | io_spec->otype) >= SOXR_SPLIT * 2)
+    error = "invalid io datatype(s)";
+
+  if (!error && !(p = calloc(sizeof(*p), 1))) error = "malloc failed";
+
+  if (p) {
+    p->q_spec = q_spec? *q_spec : soxr_quality_spec(SOXR_HQ, 0);
+    p->io_ratio = io_ratio;
+    p->num_channels = num_channels;
+    if (io_spec)
+      p->io_spec = *io_spec;
+    else
+      p->io_spec.scale = 1;
+
+    p->runtime_spec = runtime_spec? *runtime_spec : soxr_runtime_spec(1);
+    p->io_spec.scale *= datatype_full_scale[p->io_spec.otype & 3] /
+                        datatype_full_scale[p->io_spec.itype & 3];
+    p->seed = (unsigned long)time(0) ^ (unsigned long)p;
+
+#if HAVE_SINGLE_PRECISION
+    if (!HAVE_DOUBLE_PRECISION || (p->q_spec.bits <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION))
+#if HAVE_VR
+        || (p->q_spec.flags & SOXR_VR)
+#endif
+        ) {
+      p->deinterleave = (deinterleave_t)_soxr_deinterleave_f;
+      p->interleave = (interleave_t)_soxr_interleave_f;
+      memcpy(&p->control_block,
+#if HAVE_VR
+          (p->q_spec.flags & SOXR_VR)? &_soxr_vr32_cb :
+#endif
+#if HAVE_SIMD
+          cpu_has_simd()? &_soxr_rate32s_cb :
+#endif
+          &_soxr_rate32_cb, sizeof(p->control_block));
+    }
+#if HAVE_DOUBLE_PRECISION
+    else
+#endif
+#endif
+#if HAVE_DOUBLE_PRECISION
+    {
+      p->deinterleave = (deinterleave_t)_soxr_deinterleave;
+      p->interleave = (interleave_t)_soxr_interleave;
+      memcpy(&p->control_block, &_soxr_rate64_cb, sizeof(p->control_block));
+    }
+#endif
+
+    if (p->num_channels && io_ratio)
+      error = soxr_set_io_ratio(p, io_ratio, 0);
+  }
+  if (error)
+    soxr_delete(p), p = 0;
+  if (error0)
+    *error0 = error;
+  return p;
+}
+
+
+
+soxr_error_t soxr_set_input_fn(soxr_t p,
+    soxr_input_fn_t input_fn, void * input_fn_state, size_t max_ilen)
+{
+  p->input_fn_state = input_fn_state;
+  p->input_fn = input_fn;
+  p->max_ilen = max_ilen? max_ilen : (size_t)-1;
+  return 0;
+}
+
+
+
+static void soxr_delete0(soxr_t p)
+{
+  unsigned i;
+
+  if (p->resamplers) for (i = 0; i < p->num_channels; ++i) {
+    if (p->resamplers[i])
+      resampler_close(p->resamplers[i]);
+    free(p->resamplers[i]);
+  }
+  free(p->resamplers);
+  free(p->channel_ptrs);
+  free(p->shared);
+
+  memset(p, 0, sizeof(*p));
+}
+
+
+
+double soxr_delay(soxr_t p)
+{
+  return (p && !p->error && p->resamplers)? resampler_delay(p->resamplers[0]) : 0;
+}
+
+
+
+static soxr_error_t fatal_error(soxr_t p, soxr_error_t error)
+{
+  soxr_delete0(p);
+  return p->error = error;
+}
+
+
+
+static soxr_error_t initialise(soxr_t p)
+{
+  unsigned i;
+  size_t shared_size, channel_size;
+
+  resampler_sizes(&shared_size, &channel_size);
+  p->channel_ptrs = calloc(sizeof(*p->channel_ptrs), p->num_channels);
+  p->shared = calloc(shared_size, 1);
+  p->resamplers = calloc(sizeof(*p->resamplers), p->num_channels);
+  if (!p->shared || !p->channel_ptrs || !p->resamplers)
+    return fatal_error(p, "malloc failed");
+
+  for (i = 0; i < p->num_channels; ++i) {
+    soxr_error_t error;
+    if (!(p->resamplers[i] = calloc(channel_size, 1)))
+      return fatal_error(p, "malloc failed");
+    error = resampler_create(
+        p->resamplers[i],
+        p->shared,
+        p->io_ratio,
+        &p->q_spec,
+        &p->runtime_spec,
+        p->io_spec.scale);
+    if (error)
+      return fatal_error(p, error);
+  }
+  return 0;
+}
+
+
+
+soxr_error_t soxr_set_num_channels(soxr_t p, unsigned num_channels)
+{
+  if (!p)                return "invalid soxr_t pointer";
+  if (num_channels == p->num_channels) return p->error;
+  if (!num_channels)     return "invalid # of channels";
+  if (p->resamplers)     return "# of channels can't be changed";
+  p->num_channels = num_channels;
+  return soxr_set_io_ratio(p, p->io_ratio, 0);
+}
+
+
+
+soxr_error_t soxr_set_io_ratio(soxr_t p, double io_ratio, size_t slew_len)
+{
+  unsigned i;
+  soxr_error_t error;
+  if (!p)                 return "invalid soxr_t pointer";
+  if ((error = p->error)) return error;
+  if (!p->num_channels)   return "must set # channels before O/I ratio";
+  if (io_ratio <= 0)      return "I/O ratio out-of-range";
+  if (!p->channel_ptrs) {
+    p->io_ratio = io_ratio;
+    return initialise(p);
+  }
+  if (p->control_block.set_io_ratio) {
+    for (i = 0; !error && i < p->num_channels; ++i)
+      resampler_set_io_ratio(p->resamplers[i], io_ratio, slew_len);
+    return error;
+  }
+  return fabs(p->io_ratio - io_ratio) < 1e-15? 0 :
+    "Varying O/I ratio is not supported with this quality level";
+}
+
+
+
+void soxr_delete(soxr_t p)
+{
+  if (p)
+    soxr_delete0(p), free(p);
+}
+
+
+
+soxr_error_t soxr_clear(soxr_t p) /* TODO: this, properly. */
+{
+  if (p) {
+    struct soxr tmp = *p;
+    soxr_delete0(p);
+    memset(p, 0, sizeof(*p));
+    p->input_fn = tmp.input_fn;
+    p->runtime_spec = tmp.runtime_spec;
+    p->q_spec = tmp.q_spec;
+    p->io_spec = tmp.io_spec;
+    p->num_channels = tmp.num_channels;
+    p->input_fn_state = tmp.input_fn_state;
+    p->control_block = tmp.control_block;
+    p->deinterleave = tmp.deinterleave;
+    p->interleave = tmp.interleave;
+    return 0;
+  }
+  return "invalid soxr_t pointer";
+}
+
+
+
+static void soxr_input_1ch(soxr_t p, unsigned i, soxr_cbuf_t src, size_t len)
+{
+  sample_t * dest = resampler_input(p->resamplers[i], NULL, len);
+  (*p->deinterleave)(&dest, p->io_spec.itype, &src, len, 1);
+}
+
+
+
+static size_t soxr_input(soxr_t p, void const * in, size_t len)
+{
+  bool separated = !!(p->io_spec.itype & SOXR_SPLIT);
+  unsigned i;
+  if (!p || p->error) return 0;
+  if (!in && len) {p->error = "null input buffer pointer"; return 0;}
+  if (!len) {
+    p->flushing = true;
+    return 0;
+  }
+  if (separated)
+    for (i = 0; i < p->num_channels; ++i)
+      soxr_input_1ch(p, i, ((soxr_cbufs_t)in)[i], len);
+  else {
+    for (i = 0; i < p->num_channels; ++i)
+      p->channel_ptrs[i] = resampler_input(p->resamplers[i], NULL, len);
+    (*p->deinterleave)(
+        (sample_t **)p->channel_ptrs, p->io_spec.itype, &in, len, p->num_channels);
+  }
+  return len;
+}
+
+
+
+static size_t soxr_output_1ch(soxr_t p, unsigned i, soxr_buf_t dest, size_t len, bool separated)
+{
+  sample_t const * src;
+  if (p->flushing)
+    resampler_flush(p->resamplers[i]);
+  resampler_process(p->resamplers[i], len);
+  src = resampler_output(p->resamplers[i], NULL, &len);
+  if (separated)
+    p->clips += (p->interleave)(p->io_spec.otype, &dest, &src,
+      len, 1, (p->io_spec.flags & SOXR_NO_DITHER)? 0 : &p->seed);
+  else p->channel_ptrs[i] = (void /* const */ *)src;
+  return len;
+}
+
+
+
+static size_t soxr_output_no_callback(soxr_t p, soxr_buf_t out, size_t len)
+{
+  unsigned i;
+  size_t done = 0;
+  bool separated = !!(p->io_spec.otype & SOXR_SPLIT);
+#if defined _OPENMP
+  if (!p->runtime_spec.num_threads && p->num_channels > 1)
+#pragma omp parallel for
+  for (i = 0; i < p->num_channels; ++i) {
+    size_t done1;
+    done1 = soxr_output_1ch(p, i, ((soxr_bufs_t)out)[i], len, separated);
+    if (!i)
+      done = done1;
+  } else
+#endif
+  for (i = 0; i < p->num_channels; ++i)
+    done = soxr_output_1ch(p, i, ((soxr_bufs_t)out)[i], len, separated);
+
+  if (!separated)
+    p->clips += (p->interleave)(p->io_spec.otype, &out, (sample_t const * const *)p->channel_ptrs,
+        done, p->num_channels, (p->io_spec.flags & SOXR_NO_DITHER)? 0 : &p->seed);
+  return done;
+}
+
+
+
+size_t soxr_output(soxr_t p, void * out, size_t len0)
+{
+  size_t odone, odone0 = 0, olen = len0, osize, idone;
+  size_t ilen = min(p->max_ilen, (size_t)ceil((double)olen *p->io_ratio));
+  void const * in = out; /* Set to !=0, so that caller may leave unset. */
+  bool was_flushing;
+
+  if (!p || p->error) return 0;
+  if (!out && len0) {p->error = "null output buffer pointer"; return 0;}
+
+  do {
+    odone = soxr_output_no_callback(p, out, olen);
+    odone0 += odone;
+    if (odone0 == len0 || !p->input_fn || p->flushing)
+      break;
+
+    osize = soxr_datatype_size(p->io_spec.otype) * p->num_channels;
+    out = (char *)out + osize * odone;
+    olen -= odone;
+    idone = p->input_fn(p->input_fn_state, &in, ilen);
+    was_flushing = p->flushing;
+    if (!in)
+      p->error = "input function reported failure";
+    else soxr_input(p, in, idone);
+  } while (odone || idone || (!was_flushing && p->flushing));
+  return odone0;
+}
+
+
+
+static size_t soxr_i_for_o(soxr_t p, size_t olen, size_t ilen)
+{
+  size_t result;
+#if 0
+  if (p->runtime_spec.flags & SOXR_STRICT_BUFFERING)
+    result = rate_i_for_o(p->resamplers[0], olen);
+  else
+#endif
+    result = (size_t)ceil((double)olen * p->io_ratio);
+  return min(result, ilen);
+}
+
+
+
+#if 0
+static size_t soxr_o_for_i(soxr_t p, size_t ilen, size_t olen)
+{
+  size_t result = (size_t)ceil((double)ilen / p->io_ratio);
+  return min(result, olen);
+}
+#endif
+
+
+
+soxr_error_t soxr_process(soxr_t p,
+    void const * in , size_t ilen0, size_t * idone0,
+    void       * out, size_t olen , size_t * odone0)
+{
+  size_t ilen, idone, odone = 0;
+  unsigned i;
+  bool flush_requested = false;
+
+  if (!p) return "null pointer";
+
+  if (!in)
+    flush_requested = true, ilen = ilen0 = 0;
+  else {
+    if ((ptrdiff_t)ilen0 < 0)
+      flush_requested = true, ilen0 = ~ilen0;
+    if (idone0 && (1 || flush_requested))
+      ilen = soxr_i_for_o(p, olen, ilen0);
+    else
+      ilen = ilen0/*, olen = soxr_o_for_i(p, ilen, olen)*/;
+  }
+  p->flushing |= ilen == ilen0 && flush_requested;
+
+  if (!out && !in)
+    idone = ilen;
+  else if (p->io_spec.itype & p->io_spec.otype & SOXR_SPLIT) { /* Both i & o */
+#if defined _OPENMP
+    if (!p->runtime_spec.num_threads && p->num_channels > 1)
+#pragma omp parallel for
+    for (i = 0; i < p->num_channels; ++i) {
+      size_t done;
+      if (in)
+        soxr_input_1ch(p, i, ((soxr_cbufs_t)in)[i], ilen);
+      done = soxr_output_1ch(p, i, ((soxr_bufs_t)out)[i], olen, true);
+      if (!i)
+        odone = done;
+    } else
+#endif
+    for (i = 0; i < p->num_channels; ++i) {
+      if (in)
+        soxr_input_1ch(p, i, ((soxr_cbufs_t)in)[i], ilen);
+      odone = soxr_output_1ch(p, i, ((soxr_bufs_t)out)[i], olen, true);
+    }
+    idone = ilen;
+  }
+  else {
+    idone = ilen? soxr_input (p, in , ilen) : 0;
+    odone = soxr_output(p, out, olen);
+  }
+  if (idone0) *idone0 = idone;
+  if (odone0) *odone0 = odone;
+  return p->error;
+}
+
+
+
+soxr_error_t soxr_oneshot(
+    double irate, double orate,
+    unsigned num_channels,
+    void const * in , size_t ilen, size_t * idone,
+    void * out, size_t olen, size_t * odone,
+    soxr_io_spec_t const * io_spec,
+    soxr_quality_spec_t const * q_spec,
+    soxr_runtime_spec_t const * runtime_spec)
+{
+  soxr_t resampler;
+  soxr_error_t error = q_spec? q_spec->e : 0;
+  if (!error) {
+    soxr_quality_spec_t q_spec1;
+    if (!q_spec)
+      q_spec1 = soxr_quality_spec(SOXR_LQ, 0), q_spec = &q_spec1;
+    resampler = soxr_create(irate, orate, num_channels,
+        &error, io_spec, q_spec, runtime_spec);
+  }
+  if (!error) {
+    error = soxr_process(resampler, in, ~ilen, idone, out, olen, odone);
+    soxr_delete(resampler);
+  }
+  return error;
+}
+
+
+
+soxr_error_t soxr_set_error(soxr_t p, soxr_error_t error)
+{
+  if (!p) return "null pointer";
+  if (!p->error && p->error != error) return p->error;
+  p->error = error;
+  return 0;
+}
diff --git a/src/soxr.h b/src/soxr.h
new file mode 100644
index 0000000..c8374b2
--- /dev/null
+++ b/src/soxr.h
@@ -0,0 +1,318 @@
+/* SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+
+
+/* -------------------------------- Gubbins --------------------------------- */
+
+#if !defined soxr_included
+#define soxr_included
+
+
+#if defined __cplusplus
+  #include <cstddef>
+  extern "C" {
+#else
+  #include <stddef.h>
+#endif
+
+#if defined SOXR_DLL
+  #if defined soxr_EXPORTS
+    #define SOXR __declspec(dllexport)
+  #else
+    #define SOXR __declspec(dllimport)
+  #endif
+#else
+  #define SOXR
+#endif
+
+typedef struct soxr_io_spec soxr_io_spec_t;
+typedef struct soxr_quality_spec soxr_quality_spec_t;
+typedef struct soxr_runtime_spec soxr_runtime_spec_t;
+
+
+
+/* ---------------------------- API conventions --------------------------------
+
+Buffer lengths (and occupancies) are expressed as the number of contained
+samples per channel.
+
+Parameter names for buffer lengths have the suffix `len'.
+
+A single-character `i' or 'o' is often used in names to give context as
+input or output (e.g. ilen, olen).                                            */
+
+
+
+/* --------------------------- Type declarations ---------------------------- */
+
+typedef struct soxr * soxr_t;        /* A resampler for 1 or more channels. */
+typedef char const * soxr_error_t;     /* 0:no-error; non-0:error. */
+
+typedef void       * soxr_buf_t;  /* 1 buffer of channel-interleaved samples. */
+typedef void const * soxr_cbuf_t;                        /* Ditto; read-only. */
+
+typedef soxr_buf_t const  * soxr_bufs_t;/* Or, a separate buffer for each ch. */
+typedef soxr_cbuf_t const * soxr_cbufs_t;                /* Ditto; read-only. */
+
+typedef void const * soxr_in_t;      /* Either a soxr_cbuf_t or soxr_cbufs_t,
+                                        depending on itype in soxr_io_spec_t. */
+typedef void       * soxr_out_t;     /* Either a soxr_buf_t or soxr_bufs_t,
+                                        depending on otype in soxr_io_spec_t. */
+
+
+
+/* --------------------------- API main functions --------------------------- */
+
+SOXR char const    * soxr_version(void);  /* Query library version: "x.y.z". */
+
+#define soxr_strerror(e)               /* Soxr counterpart to strerror. */     \
+    ((e)?(e):"no error")
+
+
+/* Create a stream resampler: */
+
+SOXR soxr_t soxr_create(
+    double      input_rate,      /* Input sample-rate. */
+    double      output_rate,     /* Output sample-rate. */
+    unsigned    num_channels,    /* Number of channels to be used. */
+        /* All following arguments are optional (may be set to NULL). */
+    soxr_error_t *,              /* To report any error during creation. */
+    soxr_io_spec_t const *,      /* To specify non-default I/O formats. */
+    soxr_quality_spec_t const *, /* To specify non-default resampling quality.*/
+    soxr_runtime_spec_t const *);/* To specify non-default runtime resources. */
+
+
+
+/* If not using an app-supplied input function, after creating a stream
+ * resampler, repeatedly call: */
+
+SOXR soxr_error_t soxr_process(
+    soxr_t      resampler,      /* As returned by soxr_create. */
+                            /* Input (to be resampled): */
+    soxr_in_t   in,             /* Input buffer(s); may be NULL (see below). */
+    size_t      ilen,           /* Input buf. length (samples per channel). */
+    size_t      * idone,        /* To return actual # samples used (<= ilen). */
+                            /* Output (resampled): */
+    soxr_out_t  out,            /* Output buffer(s).*/
+    size_t      olen,           /* Output buf. length (samples per channel). */
+    size_t      * odone);       /* To return actual # samples out (<= olen).
+
+    Note that no special meaning is associated with ilen or olen equal to
+    zero.  End-of-input (i.e. no data is available nor shall be available)
+    may be indicated by seting `in' to NULL.                                  */
+
+
+
+/* If using an app-supplied input function, it must look and behave like this:*/
+
+typedef size_t /* data_len */
+  (* soxr_input_fn_t)(         /* Supply data to be resampled. */
+    void * input_fn_state,     /* As given to soxr_set_input_fn (below). */
+    soxr_in_t * data,          /* Returned data; see below. N.B. ptr to ptr(s)*/
+    size_t requested_len);     /* Samples per channel, >= returned data_len.
+
+  data_len  *data     Indicates    Meaning
+   ------- -------   ------------  -------------------------
+     !=0     !=0       Success     *data contains data to be
+                                   input to the resampler.
+      0    !=0 (or   End-of-input  No data is available nor
+           not set)                shall be available.
+      0       0        Failure     An error occurred whilst trying to
+                                   source data to be input to the resampler.  */
+
+/* and be registered with a previously created stream resampler using: */
+
+SOXR soxr_error_t soxr_set_input_fn(/* Set (or reset) an input function.*/
+    soxr_t resampler,            /* As returned by soxr_create. */
+    soxr_input_fn_t,             /* Function to supply data to be resampled.*/
+    void * input_fn_state,       /* If needed by the input function. */
+    size_t max_ilen);            /* Maximum value for input fn. requested_len.*/
+
+/* then repeatedly call: */
+
+SOXR size_t /*odone*/ soxr_output(/* Resample and output a block of data.*/
+    soxr_t resampler,            /* As returned by soxr_create. */
+    soxr_out_t data,             /* App-supplied buffer(s) for resampled data.*/
+    size_t olen);                /* Amount of data to output; >= odone. */
+
+
+
+/* Common stream resampler operations: */
+
+SOXR soxr_error_t soxr_error(soxr_t);   /* Query error status. */
+SOXR size_t     * soxr_num_clips(soxr_t); /* Query int. clip counter (for R/W). */
+SOXR double       soxr_delay(soxr_t);   /* Query current delay in output samples.*/
+SOXR char const * soxr_engine(soxr_t p);/* Query resampling engine name. */
+
+SOXR soxr_error_t soxr_clear(soxr_t);   /* Ready for fresh signal, same config. */
+SOXR void         soxr_delete(soxr_t);  /* Free resources. */
+
+
+
+/* `Short-cut', single call to resample a (probably short) signal held entirely
+ * in memory.  See soxr_create and soxr_process above for parameter details. */
+
+SOXR soxr_error_t soxr_oneshot(
+    double         input_rate,
+    double         output_rate,
+    unsigned       num_channels,
+    soxr_in_t    in , size_t ilen, size_t * idone,
+    soxr_out_t   out, size_t olen, size_t * odone,
+    soxr_io_spec_t const *,
+    soxr_quality_spec_t const *,
+    soxr_runtime_spec_t const *);
+
+
+
+/* For variable-rate resampling (experimental). See example # 5 for how to
+ * create a variable-rate resampler and how to use this function. */
+
+SOXR soxr_error_t soxr_set_io_ratio(soxr_t, double io_ratio, size_t slew_len);
+
+
+
+/* -------------------------- API type definitions -------------------------- */
+
+typedef enum {          /* Datatypes supported for I/O to/from the resampler: */
+  /* Internal; do not use: */
+  SOXR_FLOAT32, SOXR_FLOAT64, SOXR_INT32, SOXR_INT16, SOXR_SPLIT = 4,
+
+  /* Use for interleaved channels: */
+  SOXR_FLOAT32_I = SOXR_FLOAT32, SOXR_FLOAT64_I, SOXR_INT32_I, SOXR_INT16_I,
+
+  /* Use for split channels: */
+  SOXR_FLOAT32_S = SOXR_SPLIT  , SOXR_FLOAT64_S, SOXR_INT32_S, SOXR_INT16_S
+
+} soxr_datatype_t;
+
+#define soxr_datatype_size(x)  /* Returns `sizeof' a soxr_datatype_t sample. */\
+  ((unsigned char *)"\4\10\4\2")[(x)&3]
+
+
+
+struct soxr_io_spec {                                            /* Typically */
+  soxr_datatype_t itype;     /* Input datatype.                SOXR_FLOAT32_I */
+  soxr_datatype_t otype;     /* Output datatype.               SOXR_FLOAT32_I */
+  double scale;              /* Linear gain to apply during resampling.  1    */
+  void * e;                  /* Reserved for internal use                0    */
+  unsigned long flags;       /* Per the following #defines.              0    */
+};
+
+#define SOXR_TPDF              0     /* Applicable only if otype is INT16. */
+#define SOXR_NO_DITHER         8u    /* Disable the above. */
+
+
+
+struct soxr_quality_spec {                                       /* Typically */
+  double bits;               /* Required bit-accuracy (pass + stop).    20    */
+  double phase;              /* Linear/minimum etc. phase. [0,100]      50    */
+  double bw_pc;              /* Pass-band % (0dB pt.) to preserve.     91.3   */
+  double anti_aliasing_pc;   /* % bandwidth without aliasing.           100   */
+  void * e;                  /* Reserved for internal use.               0    */
+  unsigned long flags;       /* Per the following #defines.              0    */
+};
+
+#define SOXR_ROLLOFF_SMALL     0u    /* <= 0.01 dB */
+#define SOXR_ROLLOFF_MEDIUM    1u    /* <= 0.35 dB */
+#define SOXR_ROLLOFF_NONE      2u    /* For Chebyshev bandwidth. */
+
+#define SOXR_MAINTAIN_3DB_PT   4u  /* Reserved for internal use. */
+#define SOXR_HI_PREC_CLOCK     8u  /* Increase `irrational' ratio accuracy. */
+#define SOXR_DOUBLE_PRECISION 16u  /* Use double prec. even @ bitdepths <= 20.*/
+#define SOXR_VR               32u  /* Experimental, variable-rate resampling. */
+
+
+
+struct soxr_runtime_spec {                                       /* Typically */
+  unsigned log2_min_dft_size;/* For DFT efficiency. [8,15]              10    */
+  unsigned log2_large_dft_size;/* For DFT efficiency. [16,20]           17    */
+  unsigned coef_size_kbytes; /* For SOXR_COEF_INTERP_AUTO (below).      400   */
+  unsigned num_threads;      /* If built so. 0 means `automatic'.        1    */
+  void * e;                  /* Reserved for internal use.               0    */
+  unsigned long flags;       /* Per the following #defines.              0    */
+};
+                                   /* For `irrational' ratios only: */
+#define SOXR_COEF_INTERP_AUTO  0u    /* Auto select coef. interpolation. */
+#define SOXR_COEF_INTERP_LOW   1u    /* Man. select: less CPU, more memory. */
+#define SOXR_COEF_INTERP_HIGH  2u    /* Man. select: more CPU, less memory. */
+
+#define SOXR_STRICT_BUFFERING  4u  /* Reserved for future use. */
+#define SOXR_NOSMALLINTOPT     8u  /* For test purposes only. */
+
+
+
+/* -------------------------- API type constructors ------------------------- */
+
+/* These functions allow setting of the most commonly-used structure
+ * parameters, with other parameters being given default values.  The default
+ * values may then be overridden, directly in the structure, if needed.  */
+
+SOXR soxr_quality_spec_t soxr_quality_spec(
+    unsigned long recipe,       /* Per the #defines immediately below. */
+    unsigned long flags);       /* As soxr_quality_spec_t.flags. */
+
+                                  /* The 5 standard qualities found in SoX: */
+#define SOXR_QQ                 0   /* 'Quick' cubic interpolation. */
+#define SOXR_LQ                 1   /* 'Low' 16-bit with larger rolloff. */
+#define SOXR_MQ                 2   /* 'Medium' 16-bit with medium rolloff. */
+#define SOXR_HQ                 SOXR_20_BITQ /* 'High quality'. */
+#define SOXR_VHQ                SOXR_28_BITQ /* 'Very high quality'. */
+
+#define SOXR_16_BITQ            3
+#define SOXR_20_BITQ            4
+#define SOXR_24_BITQ            5
+#define SOXR_28_BITQ            6
+#define SOXR_32_BITQ            7
+                                    /* Libsamplerate equivalent qualities: */
+#define SOXR_LSR0Q              8     /* 'Best sinc'. */
+#define SOXR_LSR1Q              9     /* 'Medium sinc'. */
+#define SOXR_LSR2Q              10    /* 'Fast sinc'. */
+
+#define SOXR_LINEAR_PHASE       0x00
+#define SOXR_INTERMEDIATE_PHASE 0x10
+#define SOXR_MINIMUM_PHASE      0x30
+#define SOXR_STEEP_FILTER       0x40
+#define SOXR_ALLOW_ALIASING     0x80  /* Reserved for future use. */
+
+
+
+SOXR soxr_runtime_spec_t soxr_runtime_spec(
+    unsigned num_threads);
+
+
+
+SOXR soxr_io_spec_t soxr_io_spec(
+    soxr_datatype_t itype,
+    soxr_datatype_t otype);
+
+
+
+/* --------------------------- Internal use only ---------------------------- */
+
+SOXR soxr_error_t soxr_set_error(soxr_t, soxr_error_t);
+SOXR soxr_error_t soxr_set_num_channels(soxr_t, unsigned);
+
+
+
+#undef SOXR
+
+#if defined __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/vr32.c b/src/vr32.c
new file mode 100644
index 0000000..e490ab3
--- /dev/null
+++ b/src/vr32.c
@@ -0,0 +1,771 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Experimental variable-rate resampling. */
+
+#include <assert.h>
+#include <math.h>
+#if !defined M_PI
+#define M_PI    3.14159265358979323846
+#endif
+#if !defined M_LN2
+#define M_LN2   0.69314718055994530942
+#endif
+#include <string.h>
+#include <stdlib.h>
+#include "internal.h"
+#define FIFO_SIZE_T int
+#define FIFO_MIN 0x8000
+#include "fifo.h"
+
+#define FADE_LEN_BITS     9
+#define PHASE_BITS_D      (8 + PHASE_MORE)
+#define PHASE_BITS_U      (7 + PHASE_MORE)
+#define PHASE_MORE        0 /* 2 improves small int, and large u, ratios. */
+
+#define PHASES0_D         12
+#define POLY_FIR_LEN_D    20
+#define PHASES0_U         6
+#define POLY_FIR_LEN_U    12
+
+#define MULT32            (65536. * 65536.)
+#define PHASES_D          (1 << PHASE_BITS_D)
+#define PHASES_U          (1 << PHASE_BITS_U)
+
+static float const half_fir_coefs[] = {
+  4.7111692735253413e-1f, 3.1690797657656167e-1f, 2.8691667164678896e-2f,
+  -1.0192825848403946e-1f, -2.8122856237424654e-2f, 5.6804928137780292e-2f,
+  2.7192768359197508e-2f, -3.6082309197154230e-2f, -2.5927789156038026e-2f,
+  2.3644444384060669e-2f, 2.4363075319345607e-2f, -1.5127630198606428e-2f,
+  -2.2541790286342567e-2f, 8.8733836742880233e-3f, 2.0513077413933017e-2f,
+  -4.1186431656279818e-3f, -1.8330444480421631e-2f, 4.6288071358217028e-4f,
+  1.6049769308921290e-2f, 2.3282106680446069e-3f, -1.3727327353082214e-2f,
+  -4.4066375505196096e-3f, 1.1417847550661287e-2f, 5.8817724081355978e-3f,
+  -9.1727580349157123e-3f, -6.8404638339394346e-3f, 7.0385357033205332e-3f,
+  7.3574525331962567e-3f, -5.0554197628506353e-3f, -7.5008330890673153e-3f,
+  3.2563575907277676e-3f, 7.3346538206330259e-3f, -1.6663208501478607e-3f,
+  -6.9199171108861694e-3f, 3.0196567996023190e-4f, 6.3146436955438768e-3f,
+  8.2835711466756098e-4f, -5.5734271982033918e-3f, -1.7242765658561860e-3f,
+  4.7467223803576682e-3f, 2.3927523666941205e-3f, -3.8801054688632139e-3f,
+  -2.8472115748114728e-3f, 3.0135659731132642e-3f, 3.1064651802365259e-3f,
+  -2.1809660142807748e-3f, -3.1935061143485862e-3f, 1.4096923923208671e-3f,
+  3.1342382222281609e-3f, -7.2053095076414931e-4f, -2.9561940489039682e-3f,
+  1.2777585046118889e-4f, 2.6873033434313882e-3f, 3.6043554054680685e-4f,
+  -2.3547716396561816e-3f, -7.4160208709749312e-4f, 1.9840894915230177e-3f,
+  1.0181606831615856e-3f, -1.5982325266851590e-3f, -1.1966774804490967e-3f,
+  1.2170528733224913e-3f, 1.2869618709883193e-3f, -8.5687504489877664e-4f,
+  -1.3011452950496001e-3f, 5.3030588389885972e-4f, 1.2527854026453923e-3f,
+  -2.4622758430821288e-4f, -1.1560181289625195e-3f, 9.9661643910782316e-6f,
+  1.0247989665318426e-3f, 1.7639297561664703e-4f, -8.7226452073196350e-4f,
+  -3.1358436147401782e-4f, 7.1022054657665971e-4f, 4.0466151692224986e-4f,
+  -5.4877022848030636e-4f, -4.5444807961399138e-4f, 3.9609542800868769e-4f,
+  4.6899779918507020e-4f, -2.5835154936239735e-4f, -4.5505391611721792e-4f,
+  1.3970512544147175e-4f, 4.1957352577882777e-4f, -4.2458993694471047e-5f,
+  -3.6930861782460262e-4f, -3.2738549063278822e-5f, 3.1046609224355927e-4f,
+  8.6624679037202785e-5f, -2.4845427128026068e-4f, -1.2101300074995281e-4f,
+  1.8773208187021294e-4f, 1.3849844077872591e-4f, -1.3170611080827864e-4f,
+  -1.4212373327156217e-4f, 8.2758595879431528e-5f, 1.3513059684140468e-4f,
+  -4.2284127775471251e-5f, -1.2070298779675768e-4f, 1.0811692847491609e-5f,
+  1.0178008299781669e-4f, 1.1852545451857104e-5f, -8.0914539313342186e-5f,
+  -2.6454558961220653e-5f, 6.0208388858339534e-5f, 3.4169979203255580e-5f,
+  -4.1203296686185329e-5f, -3.6353143441156863e-5f, 2.4999186627094098e-5f,
+  3.4542829080466582e-5f, -1.2148053427488782e-5f, -3.0260855999161159e-5f,
+  2.7687092952335852e-6f, 2.5095689880235108e-5f, 3.6223160417538916e-6f,
+  -2.0960977068565079e-5f, -9.3312292092513232e-6f, 2.0711288605113663e-5f,
+  3.1992093654438569e-5f, 1.9772538588596925e-5f, 4.8667740603532560e-6f,
+  -5.3495033191567977e-7f,
+};
+
+#define CONVOLVE \
+    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _ \
+    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _ \
+    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _
+
+#define HALF_FIR_LEN_2 (iAL(half_fir_coefs) - 1)
+#define HALF_FIR_LEN_4 (HALF_FIR_LEN_2 / 2)
+
+#define _ sum += (input[-i] + input[i]) * half_fir_coefs[i], ++i;
+static float half_fir(float const * input)
+{
+  long i = 1;
+  float sum = input[0] * half_fir_coefs[0];
+  CONVOLVE CONVOLVE
+  assert(i == HALF_FIR_LEN_2 + 1);
+  return (float)sum;
+}
+#undef _
+
+#define _ sum += (input[-i] + input[i]) * half_fir_coefs[2*i], ++i;
+static float double_fir0(float const * input)
+{
+  int i = 1;
+  float sum = input[0] * half_fir_coefs[0];
+  CONVOLVE
+  assert(i == HALF_FIR_LEN_4 + 1);
+  return (float)(sum * 2);
+}
+#undef _
+
+#define _ sum += (input[-i] + input[1+i]) * half_fir_coefs[2*i+1], ++i;
+static float double_fir1(float const * input)
+{
+  int i = 0;
+  float sum = 0;
+  CONVOLVE
+  assert(i == HALF_FIR_LEN_4 + 0);
+  return (float)(sum * 2);
+}
+#undef _
+
+static float fast_half_fir(float const * input)
+{
+  static const float coefs[] = {
+    .3094188462713818f, -.08198144615199748f, .03055232105456833f,
+    -.01015890277986387f, .002513237297525149f, -.0003469672050347395f,
+  };
+  int i = 0;
+  float sum = input[0] * .5f;
+#define _ sum += (input[-(2*i+1)] + input[2*i+1]) * coefs[i], ++i;
+  _ _ _ _ _ _
+#undef _
+  return (float)sum;
+}
+
+static const float iir_coefs[] = {
+  .0262852045255816f, .0998310478296204f, .2068650611060755f,
+  .3302241336172489f, .4544203620946318f, .5685783569471244f,
+  .6669444657994612f, .7478697711807407f, .8123244036799226f,
+  .8626000999654434f, .9014277444199280f, .9314860567781748f,
+  .9551915287878752f, .9746617828910630f, .9917763050166036f,
+  };
+#define IIR_FILTER _ _ _ _ _ _ _
+#define _ in1=(in1-p->y[i])*iir_coefs[i]+tmp1;tmp1=p->y[i],p->y[i]=in1;++i;\
+          in0=(in0-p->y[i])*iir_coefs[i]+tmp0;tmp0=p->y[i],p->y[i]=in0;++i;
+
+typedef struct {float x[2], y[AL(iir_coefs)];} half_iir_t;
+
+static float half_iir1(half_iir_t * p, float in0, float in1)
+{
+  int i = 0;
+  float tmp0, tmp1;
+  tmp0 = p->x[0], p->x[0] = in0;
+  tmp1 = p->x[1], p->x[1] = in1;
+  IIR_FILTER
+  p->y[i] = in1 = (in1 - p->y[i]) * iir_coefs[i] + tmp1;
+  return in1 + in0;
+}
+#undef _
+
+static void half_iir(half_iir_t * p, float * obuf, float const * ibuf, int olen)
+{
+  int i;
+  for (i=0; i < olen; obuf[i] = (float)half_iir1(p, ibuf[i*2], ibuf[i*2+1]),++i);
+}
+
+static void half_phase(half_iir_t * p, float * buf, int len)
+{
+  float const small_normal = 1/MULT32/MULT32; /* To quash denormals on path 0.*/
+  int i;
+  for (i = 0; i < len; buf[i] = (float)half_iir1(p, buf[i], 0), ++i);
+#define _ p->y[i] += small_normal, i += 2;
+  i = 0, _ IIR_FILTER
+#undef _
+#define _ p->y[i] -= small_normal, i += 2;
+  i = 0, _ IIR_FILTER
+#undef _
+}
+
+#define raw_coef_t float
+static const raw_coef_t coefs0_d[POLY_FIR_LEN_D / 2 * PHASES0_D + 1] = {
+  0.f, 1.4057457935754080e-5f, 2.3302768424632188e-5f, 4.0084897378442095e-5f,
+  6.1916773126231636e-5f, 8.7973434034929016e-5f, 1.1634847507082481e-4f,
+  1.4391931654629385e-4f, 1.6635470822160746e-4f, 1.7830838562749493e-4f,
+  1.7382737311735053e-4f, 1.4698011689178234e-4f, 9.2677933545427018e-5f,
+  7.6288745483685147e-6f, -1.0867156553965507e-4f, -2.5303924530322309e-4f,
+  -4.1793463959360433e-4f, -5.9118012513731508e-4f, -7.5619603440508576e-4f,
+  -8.9285245696990080e-4f, -9.7897684238178358e-4f, -9.9248131798952959e-4f,
+  -9.1398576537725926e-4f, -7.2972364732199553e-4f, -4.3443557115962946e-4f,
+  -3.3895523979487613e-5f, 4.5331297364457429e-4f, 9.9513966802111057e-4f,
+  1.5468348913161652e-3f, 2.0533350794358640e-3f, 2.4533031436958950e-3f,
+  2.6846707315385087e-3f, 2.6913237051575155e-3f, 2.4303724507982708e-3f,
+  1.8792817173578587e-3f, 1.0420231121204950e-3f, -4.6617252898486750e-5f,
+  -1.3193786988492551e-3f, -2.6781478874181100e-3f, -3.9992272197487003e-3f,
+  -5.1422613336274056e-3f, -5.9624224517967755e-3f, -6.3250283969908542e-3f,
+  -6.1213677360236101e-3f, -5.2841872043022185e-3f, -3.8011036067186429e-3f,
+  -1.7241752288145494e-3f, 8.2596463599396213e-4f, 3.6626436307478369e-3f,
+  6.5430316636724021e-3f, 9.1853404499045010e-3f, 1.1292516396583619e-2f,
+  1.2580791345879052e-2f, 1.2810714562937180e-2f, 1.1817712330677889e-2f,
+  9.5388893881204976e-3f, 6.0327678128662696e-3f, 1.4889921444742027e-3f,
+  -3.7742770128030593e-3f, -9.3265389310393538e-3f, -1.4654680466977541e-2f,
+  -1.9204813565928323e-2f, -2.2433342812570076e-2f, -2.3863084249865732e-2f,
+  -2.3139248817097825e-2f, -2.0079526147977360e-2f, -1.4712465100990968e-2f,
+  -7.2989072959128900e-3f, 1.6676055337427264e-3f, 1.1483818597217116e-2f,
+  2.1283378291010333e-2f, 3.0104924254589629e-2f, 3.6977102234817580e-2f,
+  4.1013752396638667e-2f, 4.1510805491867378e-2f, 3.8035383354576423e-2f,
+  3.0497421566956902e-2f, 1.9194910514469185e-2f, 4.8255960959712636e-3f,
+  -1.1539393212932630e-2f, -2.8521204184392364e-2f, -4.4535662544571142e-2f,
+  -5.7926040870466614e-2f, -6.7116245375785713e-2f, -7.0771566186484461e-2f,
+  -6.7952220045636696e-2f, -5.8244261062898019e-2f, -4.1853211028450271e-2f,
+  -1.9648003905967236e-2f, 6.8535507014343263e-3f, 3.5561844452076982e-2f,
+  6.3953651316164553e-2f, 8.9264185854578418e-2f, 1.0872025112127688e-1f,
+  1.1979689474056175e-1f, 1.2047646491371326e-1f, 1.0948710929592399e-1f,
+  8.6497869185231543e-2f, 5.2249701648862154e-2f, 8.6059406690018377e-3f,
+  -4.1488376792262582e-2f, -9.4141677945723271e-2f, -1.4474093381170536e-1f,
+  -1.8825408052888104e-1f, -2.1958987927558168e-1f, -2.3398931875783419e-1f,
+  -2.2741860176576378e-1f, -1.9693206642095332e-1f, -1.4097432039328661e-1f,
+  -5.9594435654526039e-2f, 4.5448949025739843e-2f, 1.7070477403312445e-1f,
+  3.1117273816011837e-1f, 4.6056631075658744e-1f, 6.1167961235662682e-1f,
+  7.5683349228721264e-1f, 8.8836924234920911e-1f, 9.9915393319190682e-1f,
+  1.0830597619389459e+0f, 1.1353812335460003e+0f, 1.1531583819295732e+0f,
+};
+
+static const raw_coef_t coefs0_u[POLY_FIR_LEN_U / 2 * PHASES0_U + 1] = {
+  0.f, 2.4376543962047211e-5f, 9.7074354091545404e-5f, 2.5656573977863553e-4f,
+  5.2734092391248152e-4f, 8.9078135146855391e-4f, 1.2494786883827907e-3f,
+  1.4060353542261659e-3f, 1.0794576035695273e-3f, -2.1547711862939183e-5f,
+ -2.0658693124381805e-3f, -4.9333908355966233e-3f, -8.0713165910440213e-3f,
+ -1.0451560117817383e-2f, -1.0703998868319438e-2f, -7.4626412699536097e-3f,
+  1.0898921033926621e-4f, 1.1734475997741493e-2f, 2.5579413661660957e-2f,
+  3.8168952738129619e-2f, 4.4846162998312754e-2f, 4.0821915377309274e-2f,
+  2.2679961923658700e-2f, -9.9957152600624218e-3f, -5.3343924460223908e-2f,
+ -9.8792607573741240e-2f, -1.3382736970823086e-1f, -1.4404307655147228e-1f,
+ -1.1619851747063137e-1f, -4.1649695271274462e-2f, 8.0680482815468343e-2f,
+  2.4264355486537642e-1f, 4.2712782955601925e-1f, 6.1041328492424185e-1f,
+  7.6625948559498691e-1f, 8.7088876549652772e-1f, 9.0774244518772884e-1f,
+};
+
+#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, \
+    fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + \
+    ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
+
+#define COEF(h,l,i) ((i)<0||(i)>=(l)?0:(h)[(i)>(l)/2?(l)-(i):(i)])
+static void prepare_coefs(float * coefs, int n, int phases0, int phases,
+    raw_coef_t const * coefs0, double multiplier)
+{
+  double k[6];
+  int length0 = n * phases0, length = n * phases, K0 = iAL(k)/2 - 1, i, j, pos;
+  raw_coef_t * coefs1 = malloc(((size_t)length / 2  + 1) * sizeof(*coefs1));
+  raw_coef_t * p = coefs1, f0, f1 = 0;
+
+  for (j = 0; j < iAL(k); k[j] = COEF(coefs0, length0, j - K0), ++j);
+  for (pos = i = 0; i < length0 / 2; ++i) {
+    double b=(1/24.)*(k[0]+k[4]+6*k[2]-4*(k[1]+k[3])),d=.5*(k[1]+k[3])-k[2]-b;
+    double a=(1/120.)*(k[5]-k[2]-9*(9*b+d)+2.5*(k[3]-k[1])-2*(k[4]-k[0]));
+    double c=(1/12.)*(k[4]-k[0]-2*(k[3]-k[1])-60*a),e=.5*(k[3]-k[1])-a-c;
+    for (; pos / phases == i; pos += phases0) {
+      double x = (double)(pos % phases) / phases;
+      *p++ = (raw_coef_t)(k[K0] + ((((a*x + b)*x + c)*x + d)*x + e)*x);
+    }
+    for (j = 0; j < iAL(k) - 1; k[j] = k[j + 1], ++j);
+    k[j] = COEF(coefs0, length0, i + iAL(k) / 2 + 1);
+  }
+  if (!(length & 1))
+    *p++ = (raw_coef_t)k[K0];
+  assert(p - coefs1 == length / 2  + 1);
+
+  for (i = 0; i < n; ++i) for (j = phases - 1; j >= 0; --j, f1 = f0) {
+    pos = (n - 1 - i) * phases + j;
+    f0 = COEF(coefs1, length, pos) * (raw_coef_t)multiplier;
+    coef(coefs, 1, n, j, 0, i) = (float)f0;
+    coef(coefs, 1, n, j, 1, i) = (float)(f1 - f0);
+  }
+  free(coefs1);
+}
+
+#define _ sum += (b *x + a)*input[i], ++i;
+#define a (coef(poly_fir_coefs_d, 1, POLY_FIR_LEN_D, phase, 0,i))
+#define b (coef(poly_fir_coefs_d, 1, POLY_FIR_LEN_D, phase, 1,i))
+static float poly_fir_coefs_d[POLY_FIR_LEN_D * PHASES_D * 2];
+
+static float poly_fir1_d(float const * input, uint32_t frac)
+{
+  int i = 0, phase = (int)(frac >> (32 - PHASE_BITS_D));
+  float sum = 0, x = (float)(frac << PHASE_BITS_D) * (float)(1 / MULT32);
+  _ _ _ _ _  _ _ _ _ _  _ _ _ _ _  _ _ _ _ _
+  assert(i == POLY_FIR_LEN_D);
+  return (float)sum;
+}
+#undef a
+#undef b
+#define a (coef(poly_fir_coefs_u, 1, POLY_FIR_LEN_U, phase, 0,i))
+#define b (coef(poly_fir_coefs_u, 1, POLY_FIR_LEN_U, phase, 1,i))
+static float poly_fir_coefs_u[POLY_FIR_LEN_U * PHASES_U * 2];
+
+static float poly_fir1_u(float const * input, uint32_t frac)
+{
+  int i = 0, phase = (int)(frac >> (32 - PHASE_BITS_U));
+  float sum = 0, x = (float)(frac << PHASE_BITS_U) * (float)(1 / MULT32);
+  _ _ _ _ _  _ _ _ _ _  _ _
+  assert(i == POLY_FIR_LEN_U);
+  return (float)sum;
+}
+#undef a
+#undef b
+#undef _
+
+#define ADD_TO(x,y)           x.all += y.all
+#define SUBTRACT_FROM(x,y)    x.all -= y.all
+#define FRAC(x)               x.part.frac
+#define INT(x)                x.part.integer
+
+typedef struct {
+  union {
+    int64_t all;
+#if WORDS_BIGENDIAN
+    struct {int32_t integer; uint32_t frac;} part;
+#else
+    struct {uint32_t frac; int32_t integer;} part;
+#endif
+  } at, step, step_step;
+  float const * input;
+  int len, stage_num;
+  bool is_d; /* true: downsampling at x2 rate; false: upsampling at 1x rate. */
+  double step_mult;
+} stream_t;
+
+static int poly_fir_d(stream_t * s, float * output, int olen)
+{
+  int i;
+  float const * input = s->input - POLY_FIR_LEN_D / 2 + 1;
+  for (i = 0; i < olen && INT(s->at) < s->len; ++i) {
+    output[i] = poly_fir1_d(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    if (!(INT(s->at) < s->len)) {
+      SUBTRACT_FROM(s->at, s->step);
+      break;
+    }
+    output[++i] = poly_fir1_d(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    ADD_TO(s->step, s->step_step);
+  }
+  return i;
+}
+
+static int poly_fir_fade_d(
+    stream_t * s, float const * vol, int step, float * output, int olen)
+{
+  int i;
+  float const * input = s->input - POLY_FIR_LEN_D / 2 + 1;
+  for (i = 0; i < olen && INT(s->at) < s->len; ++i, vol += step) {
+    output[i] += *vol * poly_fir1_d(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    if (!(INT(s->at) < s->len)) {
+      SUBTRACT_FROM(s->at, s->step);
+      break;
+    }
+    output[++i] += *(vol += step) * poly_fir1_d(input + INT(s->at),FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    ADD_TO(s->step, s->step_step);
+  }
+  return i;
+}
+
+static int poly_fir_u(stream_t * s, float * output, int olen)
+{
+  int i;
+  float const * input = s->input - POLY_FIR_LEN_U / 2 + 1;
+  for (i = 0; i < olen && INT(s->at) < s->len; ++i) {
+    output[i] = poly_fir1_u(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    ADD_TO(s->step, s->step_step);
+  }
+  return i;
+}
+
+static int poly_fir_fade_u(
+    stream_t * s, float const * vol, int step, float * output, int olen)
+{
+  int i;
+  float const * input = s->input - POLY_FIR_LEN_U / 2 + 1;
+  for (i = 0; i < olen && INT(s->at) < s->len; i += 2, vol += step) {
+    output[i] += *vol * poly_fir1_u(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    ADD_TO(s->step, s->step_step);
+  }
+  return i;
+}
+
+#define shiftr(x,by) ((by) < 0? (x) << (-(by)) : (x) >> (by))
+#define shiftl(x,by) shiftr(x,-(by))
+#define stage_occupancy(s) (fifo_occupancy(&(s)->fifo) - 4*HALF_FIR_LEN_2)
+#define stage_read_p(s) ((float *)fifo_read_ptr(&(s)->fifo) + 2*HALF_FIR_LEN_2)
+#define stage_preload(s) memset(fifo_reserve(&(s)->fifo, (s)->preload), \
+    0, sizeof(float) * (size_t)(s)->preload);
+
+typedef struct {
+  fifo_t fifo;
+  double step_mult;
+  int is_fast, x_fade_len, preload;
+} stage_t;
+
+typedef struct {
+  int num_stages0, num_stages, flushing;
+  int fade_len, slew_len, xfade, stage_inc, switch_stage_num;
+  double new_io_ratio, default_io_ratio;
+  stage_t * stages;
+  fifo_t output_fifo;
+  half_iir_t halfer;
+  stream_t current, fadeout; /* Current/fade-in, fadeout streams. */
+} rate_t;
+
+static float fade_coefs[(2 << FADE_LEN_BITS) + 1];
+
+static void vr_init(rate_t * p, double default_io_ratio, int num_stages, double mult)
+{
+  int i;
+  assert(num_stages >= 0);
+  memset(p, 0, sizeof(*p));
+
+  p->num_stages0 = num_stages;
+  p->num_stages = num_stages = max(num_stages, 1);
+  p->stages = (stage_t *)calloc((unsigned)num_stages + 1, sizeof(*p->stages)) + 1;
+  for (i = -1; i < p->num_stages; ++i) {
+    stage_t * s = &p->stages[i];
+    fifo_create(&s->fifo, sizeof(float));
+    s->step_mult = 2 * MULT32 / shiftl(2, i);
+    s->preload = i < 0? 0 : i == 0? 2 * HALF_FIR_LEN_2 : 3 * HALF_FIR_LEN_2 / 2;
+    stage_preload(s);
+    s->is_fast = true;
+    lsx_debug("%-3i preload=%i", i, s->preload);
+  }
+  fifo_create(&p->output_fifo, sizeof(float));
+  p->default_io_ratio = default_io_ratio;
+  if (!fade_coefs[0]) {
+    for (i = 0; i < iAL(fade_coefs); ++i)
+      fade_coefs[i] = (float)(.5 * (1 + cos(M_PI * i / (AL(fade_coefs) - 1))));
+    prepare_coefs(poly_fir_coefs_u, POLY_FIR_LEN_U, PHASES0_U, PHASES_U, coefs0_u, mult);
+    prepare_coefs(poly_fir_coefs_d, POLY_FIR_LEN_D, PHASES0_D, PHASES_D, coefs0_d, mult *.5);
+  }
+  assert(fade_coefs[0]);
+}
+
+static void enter_new_stage(rate_t * p, int occupancy0)
+{
+  p->current.len = shiftr(occupancy0, p->current.stage_num);
+  p->current.input = stage_read_p(&p->stages[p->current.stage_num]);
+
+  p->current.step_mult = p->stages[p->current.stage_num].step_mult;
+  p->current.is_d = p->current.stage_num >= 0;
+  if (p->current.is_d)
+    p->current.step_mult *= .5;
+}
+
+static void set_step(stream_t * p, double io_ratio)
+{
+  p->step.all = (int64_t)(io_ratio * p->step_mult + .5);
+}
+
+static bool set_step_step(stream_t * p, double io_ratio, int slew_len)
+{
+  int64_t dif;
+  int difi;
+  stream_t tmp = *p;
+  set_step(&tmp, io_ratio);
+  dif = tmp.step.all - p->step.all;
+  dif = dif < 0? dif - (slew_len >> 1) : dif + (slew_len >> 1);
+  difi = (int)dif;   /* Try to avoid int64_t div. */
+  p->step_step.all = difi == dif? difi / slew_len : dif / slew_len;
+  return p->step_step.all != 0;
+}
+
+static void vr_set_io_ratio(rate_t * p, double io_ratio, size_t slew_len)
+{
+  assert(io_ratio > 0);
+  if (slew_len) {
+    if (!set_step_step(&p->current, io_ratio, p->slew_len = (int)slew_len))
+      p->slew_len = 0, p->new_io_ratio = 0, p->fadeout.step_step.all = 0;
+    else {
+      p->new_io_ratio = io_ratio;
+      if (p->fade_len)
+        set_step_step(&p->fadeout, io_ratio, p->slew_len);
+    }
+  }
+  else {
+    if (p->default_io_ratio) { /* Then this is the first call to this fn. */
+      int octave = (int)floor(log(io_ratio) / M_LN2);
+      p->current.stage_num = octave < 0? -1 : min(octave, p->num_stages0-1);
+      enter_new_stage(p, 0);
+    }
+    else if (p->fade_len)
+      set_step(&p->fadeout, io_ratio);
+    set_step(&p->current, io_ratio);
+    if (p->default_io_ratio) FRAC(p->current.at) = FRAC(p->current.step) >> 1;
+    p->default_io_ratio = 0;
+  }
+}
+
+static bool do_input_stage(rate_t * p, int stage_num, int sign, int min_stage_num)
+{
+  int i = 0;
+  float * dest;
+  stage_t * s = &p->stages[stage_num];
+  stage_t * s1 = &p->stages[stage_num - sign];
+  float const * src = (float *)fifo_read_ptr(&s1->fifo) + HALF_FIR_LEN_2;
+  int len = shiftr(fifo_occupancy(&s1->fifo) - HALF_FIR_LEN_2 * 2, sign);
+  int already_done = fifo_occupancy(&s->fifo) - s->preload;
+  if ((len -= already_done) <= 0)
+    return false;
+  src += shiftl(already_done, sign);
+
+  dest = fifo_reserve(&s->fifo, len);
+  if (stage_num < 0) for (; i < len; ++src)
+    dest[i++] = double_fir0(src), dest[i++] = double_fir1(src);
+  else {
+    bool should_be_fast = p->stage_inc;
+    if (!s->x_fade_len && stage_num == p->switch_stage_num) {
+      p->switch_stage_num = 0;
+      if (s->is_fast != should_be_fast) {
+        s->x_fade_len = 1 << FADE_LEN_BITS, s->is_fast = should_be_fast, ++p->xfade;
+        lsx_debug("xfade level %i, inc?=%i", stage_num, p->stage_inc);
+      }
+    }
+    if (s->x_fade_len) {
+      float const * vol1 = fade_coefs + (s->x_fade_len << 1);
+      float const * vol2 = fade_coefs + (((1 << FADE_LEN_BITS) - s->x_fade_len) << 1);
+      int n = min(len, s->x_fade_len);
+      /*lsx_debug("xfade level %i, inc?=%i len=%i n=%i", stage_num, p->stage_inc, s->x_fade_len, n);*/
+      if (should_be_fast)
+        for (; i < n; vol2 += 2, vol1 -= 2, src += 2)
+          dest[i++] = *vol1 * fast_half_fir(src) + *vol2 * half_fir(src);
+      else for (; i < n; vol2 += 2, vol1 -= 2, src += 2)
+        dest[i++] = *vol2 * fast_half_fir(src) + *vol1 * half_fir(src);
+      s->x_fade_len -= n;
+      p->xfade -= !s->x_fade_len;
+    }
+    if (stage_num < min_stage_num)
+      for (; i < len; dest[i++] = fast_half_fir(src), src += 2);
+    else for (; i < len; dest[i++] = half_fir(src), src += 2);
+  }
+  if (p->flushing > 0)
+    stage_preload(s);
+  return true;
+}
+
+static int vr_process(rate_t * p, int olen0)
+{
+  assert(p->num_stages > 0);
+  if (p->default_io_ratio)
+    vr_set_io_ratio(p, p->default_io_ratio, 0);
+  {
+    float * output = fifo_reserve(&p->output_fifo, olen0);
+    int j, odone0 = 0, min_stage_num = p->current.stage_num;
+    int occupancy0, max_stage_num = min_stage_num;
+    if (p->fade_len) {
+      min_stage_num = min(min_stage_num, p->fadeout.stage_num);
+      max_stage_num = max(max_stage_num, p->fadeout.stage_num);
+    }
+
+    for (j = min(min_stage_num, 0); j <= max_stage_num; ++j)
+      if (j && !do_input_stage(p, j, j < 0? -1 : 1, min_stage_num))
+        break;
+    if (p->flushing > 0)
+      p->flushing = -1;
+
+    occupancy0 = shiftl(max(0,stage_occupancy(&p->stages[max_stage_num])), max_stage_num);
+    p->current.len = shiftr(occupancy0, p->current.stage_num);
+    p->current.input = stage_read_p(&p->stages[p->current.stage_num]);
+    if (p->fade_len) {
+      p->fadeout.len = shiftr(occupancy0, p->fadeout.stage_num);
+      p->fadeout.input = stage_read_p(&p->stages[p->fadeout.stage_num]);
+    }
+
+    while (odone0 < olen0) {
+      int odone, odone2, olen = olen0 - odone0, stage_dif = 0, shift;
+      float buf[64 << 1];
+
+      olen = min(olen, (int)(AL(buf) >> 1));
+      if (p->slew_len)
+        olen = min(olen, p->slew_len);
+      else if (p->new_io_ratio) {
+        set_step(&p->current, p->new_io_ratio);
+        set_step(&p->fadeout, p->new_io_ratio);
+        p->fadeout.step_step.all = p->current.step_step.all = 0;
+        p->new_io_ratio = 0;
+      }
+      if (!p->flushing && !p->fade_len && !p->xfade) {
+        if (p->current.is_d) {
+          if (INT(p->current.step) && FRAC(p->current.step))
+            stage_dif = 1, ++max_stage_num;
+          else if (!INT(p->current.step) && FRAC(p->current.step) < (1u << 31))
+            stage_dif = -1, --min_stage_num;
+        } else if (INT(p->current.step) > 1 && FRAC(p->current.step))
+          stage_dif = 1, ++max_stage_num;
+      }
+      if (stage_dif) {
+        int n = p->current.stage_num + stage_dif;
+        if (n >= p->num_stages)
+          --max_stage_num;
+        else {
+          p->stage_inc = stage_dif > 0;
+          p->fadeout = p->current;
+          p->current.stage_num += stage_dif;
+          if (!p->stage_inc)
+          p->switch_stage_num = p->current.stage_num;
+          if ((p->current.stage_num < 0 && stage_dif < 0) ||
+              (p->current.stage_num > 0 && stage_dif > 0)) {
+            stage_t * s = &p->stages[p->current.stage_num];
+            fifo_clear(&s->fifo);
+            stage_preload(s);
+            s->is_fast = false;
+            do_input_stage(p, p->current.stage_num, stage_dif, p->current.stage_num);
+          }
+          if (p->current.stage_num > 0 && stage_dif < 0) {
+            int idone = INT(p->current.at);
+            stage_t * s = &p->stages[p->current.stage_num];
+            fifo_trim_to(&s->fifo, 2 * HALF_FIR_LEN_2 + idone + (POLY_FIR_LEN_D >> 1));
+            do_input_stage(p, p->current.stage_num, 1, p->current.stage_num);
+          }
+          enter_new_stage(p, occupancy0);
+          shift = -stage_dif;
+#define lshift(x,by) (x)=(by)>0?(x)<<(by):(x)>>-(by)
+          lshift(p->current.at.all, shift);
+          shift += p->fadeout.is_d - p->current.is_d;
+          lshift(p->current.step.all, shift);
+          lshift(p->current.step_step.all, shift);
+          p->fade_len = AL(fade_coefs) - 1;
+          lsx_debug("switch from stage %i to %i, x2 from %i to %i", p->fadeout.stage_num, p->current.stage_num, p->fadeout.is_d, p->current.is_d);
+        }
+      }
+
+      if (p->fade_len) {
+        float const * vol1 = fade_coefs + p->fade_len;
+        float const * vol2 = fade_coefs + (iAL(fade_coefs) - 1 - p->fade_len);
+        int olen2 = (olen = min(olen, p->fade_len >> 1)) << 1;
+
+        /* x2 is more fine-grained so may fail to produce a pair of samples
+         * where x1 would not (the x1 second sample is a zero so is always
+         * available).  So do x2 first, then feed odone to the second one. */
+        memset(buf, 0, sizeof(*buf) * (size_t)olen2);
+        if (p->current.is_d && p->fadeout.is_d) {
+          odone  = poly_fir_fade_d(&p->current, vol1,-1, buf, olen2);
+          odone2 = poly_fir_fade_d(&p->fadeout, vol2, 1, buf, odone);
+        } else if (p->current.is_d) {
+          odone  = poly_fir_fade_d(&p->current, vol1,-1, buf, olen2);
+          odone2 = poly_fir_fade_u(&p->fadeout, vol2, 2, buf, odone);
+        } else {
+          assert(p->fadeout.is_d);
+          odone  = poly_fir_fade_d(&p->fadeout, vol2, 1, buf, olen2);
+          odone2 = poly_fir_fade_u(&p->current, vol1,-2, buf, odone);
+        }
+        assert(odone == odone2);
+        (void)odone2;
+        p->fade_len -= odone;
+        if (!p->fade_len) {
+          if (p->stage_inc)
+            p->switch_stage_num = min_stage_num++;
+          else
+            --max_stage_num;
+        }
+        half_iir(&p->halfer, &output[odone0], buf, odone >>= 1);
+      }
+      else if (p->current.is_d) {
+        odone = poly_fir_d(&p->current, buf, olen << 1) >> 1;
+        half_iir(&p->halfer, &output[odone0], buf, odone);
+      }
+      else {
+        odone = poly_fir_u(&p->current, &output[odone0], olen);
+        if (p->num_stages0)
+          half_phase(&p->halfer, &output[odone0], odone);
+      }
+      odone0 += odone;
+      if (p->slew_len)
+        p->slew_len -= odone;
+      if (odone != olen)
+        break; /* Need more input. */
+    } {
+      int from = max(0, max_stage_num), to = min(0, min_stage_num);
+      int i, idone = shiftr(INT(p->current.at), from - p->current.stage_num);
+      INT(p->current.at) -= shiftl(idone, from - p->current.stage_num);
+      if (p->fade_len)
+        INT(p->fadeout.at) -= shiftl(idone, from - p->fadeout.stage_num);
+      for (i = from; i >= to; --i, idone <<= 1)
+        fifo_read(&p->stages[i].fifo, idone, NULL);
+    }
+    fifo_trim_by(&p->output_fifo, olen0 - odone0);
+    return odone0;
+  }
+}
+
+static float * vr_input(rate_t * p, float const * input, size_t n)
+{
+  return fifo_write(&p->stages[0].fifo, (int)n, input);
+}
+
+static float const * vr_output(rate_t * p, float * output, size_t * n)
+{
+  fifo_t * fifo = &p->output_fifo;
+  if (1 || !p->num_stages0)
+    return fifo_read(fifo, (int)(*n = min(*n, (size_t)fifo_occupancy(fifo))), output);
+  else { /* Ignore this complication for now. */
+    int const IIR_DELAY = 2;
+    float * ptr = fifo_read_ptr(fifo);
+    int olen = min((int)*n, max(0, fifo_occupancy(fifo) - IIR_DELAY));
+    *n = (size_t)olen;
+    if (output)
+      memcpy(output, ptr + IIR_DELAY, *n * sizeof(*output));
+    fifo_read(fifo, olen, NULL);
+    return ptr + IIR_DELAY;
+  }
+}
+
+static void vr_flush(rate_t * p)
+{
+  if (!p->flushing) {
+    stage_preload(&p->stages[0]);
+    ++p->flushing;
+  }
+}
+
+static void vr_close(rate_t * p)
+{
+  int i;
+
+  fifo_delete(&p->output_fifo);
+  for (i = -1; i < p->num_stages; ++i) {
+    stage_t * s = &p->stages[i];
+    fifo_delete(&s->fifo);
+  }
+  free(p->stages - 1);
+}
+
+static double vr_delay(rate_t * p)
+{
+  return 100; /* TODO */
+  (void)p;
+}
+
+static void vr_sizes(size_t * shared, size_t * channel)
+{
+  *shared = 0;
+  *channel = sizeof(rate_t);
+}
+
+static char const * vr_create(void * channel, void * shared,double max_io_ratio,
+    void * q_spec, void * r_spec, double scale)
+{
+  double x = max_io_ratio;
+  int n;
+  for (n = 0; x > 1; x *= .5, ++n);
+  vr_init(channel, max_io_ratio, n, scale);
+  return 0;
+  (void)shared, (void)q_spec, (void)r_spec;
+}
+
+static char const * vr_id(void)
+{
+  return "single-precision variable-rate";
+}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_vr32_cb[] = {
+  (fn_t)vr_input,
+  (fn_t)vr_process,
+  (fn_t)vr_output,
+  (fn_t)vr_flush,
+  (fn_t)vr_close,
+  (fn_t)vr_delay,
+  (fn_t)vr_sizes,
+  (fn_t)vr_create,
+  (fn_t)vr_set_io_ratio,
+  (fn_t)vr_id,
+};
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000..c6b40fb
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,50 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+add_definitions (${PROJECT_C_FLAGS})
+link_libraries (${PROJECT_NAME})
+
+file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.c)
+foreach (fe ${SOURCES})
+  get_filename_component (f ${fe} NAME_WE)
+  add_executable (${f} ${fe})
+endforeach ()
+
+enable_testing ()
+
+set (sweep_to_freq 22050)
+set (leader 1)
+set (len 16)
+math (EXPR base_rate "${sweep_to_freq} + ${sweep_to_freq}")
+
+macro (add_vector r)
+  set (output ${CMAKE_CURRENT_BINARY_DIR}/ref-${r}.s32)
+  add_custom_command (OUTPUT ${output} DEPENDS vector-gen ${CMAKE_CURRENT_LIST_FILE}
+    COMMAND vector-gen ${r} ${leader} ${len} ${sweep_to_freq} 1 ${output})
+  set (vectors ${output} ${vectors})
+endmacro ()
+
+macro (add_cmp_test from to bits)
+  set (name ${bits}-bit-perfect-${from}-${to})
+  add_test (NAME ${name} COMMAND ${CMAKE_COMMAND} -Dbits=${bits} -DBIN=${BIN} -DEXAMPLES_BIN=${EXAMPLES_BIN} -Dleader=${leader} -Dto=${to}
+    -Dfrom=${from} -Dlen=${len} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmp-test.cmake)
+  add_vector (${from})
+  add_vector (${to})
+endmacro ()
+
+unset (test_bits)
+if (WITH_SINGLE_PRECISION)
+  set (test_bits 20)
+endif ()
+if (WITH_DOUBLE_PRECISION)
+  set (test_bits ${test_bits} 24)
+endif ()
+
+foreach (b ${test_bits})
+  foreach (r 96000 65537)
+    add_cmp_test (${base_rate} ${r} ${b})
+    add_cmp_test (${r} ${base_rate} ${b})
+  endforeach ()
+endforeach ()
+
+add_custom_target (test-vectors ALL DEPENDS ${vectors})
diff --git a/tests/README b/tests/README
new file mode 100644
index 0000000..44460d6
--- /dev/null
+++ b/tests/README
@@ -0,0 +1 @@
+A few tests on the pass-band performance; not a comprehensive test suite.
diff --git a/tests/cmp-test.cmake b/tests/cmp-test.cmake
new file mode 100644
index 0000000..1b1ae4b
--- /dev/null
+++ b/tests/cmp-test.cmake
@@ -0,0 +1,28 @@
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+if (${bits} STREQUAL 24)
+  set (quality 45)
+else ()
+  set (quality 44)
+endif ()
+
+execute_process(COMMAND ${EXAMPLES_BIN}3-options-input-fn ${from} ${to} 1 2 2 ${quality} a
+  INPUT_FILE ref-${from}.s32
+  OUTPUT_FILE ${from}-${to}.s32
+  ERROR_VARIABLE test_error
+  RESULT_VARIABLE test_result)
+
+if (test_result)
+  message (FATAL_ERROR "Resampling failure: ${test_error}")
+endif ()
+
+execute_process(COMMAND ${BIN}vector-cmp ref-${to}.s32 ${from}-${to}.s32 ${to} ${leader} ${len} ${bits} 98
+  OUTPUT_VARIABLE test_output
+  RESULT_VARIABLE test_result)
+
+if (test_result)
+  message (FATAL_ERROR ${test_output})
+else ()
+  message (STATUS ${test_output})
+endif ()
diff --git a/tests/eg-test b/tests/eg-test
new file mode 100755
index 0000000..de11abd
--- /dev/null
+++ b/tests/eg-test
@@ -0,0 +1,29 @@
+#!/bin/bash
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+len=8
+#vg="valgrind --leak-check=full --show-reachable=yes"
+
+$vg ./1-single-block
+
+ir=96000
+or=44100
+for i in 1 2; do
+  prev=""
+  sox -r $ir -n 0.f32 synth $len sin 0+`expr $ir / 2`
+  for f in `find . -type f -executable -name "[2-4]*"`; do
+    $vg $f $ir $or < 0.f32 > $f.f32
+    test x$prev != x && cmp $f.f32 $prev
+    prev=$f.f32
+  done
+  or=96000
+  ir=44100
+done
+rm *.f32
+
+rm ?.png
+for n in 0 1 2 3; do
+  $vg ./5-variable-rate $n | sox -tf32 -r44100 -c1 - -n spectrogram -hwk -o $n.png -X 50
+  vg=""
+done
diff --git a/tests/io-test b/tests/io-test
new file mode 100755
index 0000000..98fd351
--- /dev/null
+++ b/tests/io-test
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+ir=96000
+or=44100
+len=16
+f=0+48k
+g=48k+0
+ex=./3-options-input-fn
+
+types=(f32 f64 s32 s16)
+
+do_one() {
+  $ex $ir $or $c $1 $2 $3 < $c.${types[$1]} |
+  sox -t ${types[$2]} -r $or -c $c - -n spectrogram -X50 -hwk -z180 -o io$n$c.png
+  n=`expr $n + 1`
+}
+
+rm io??.png
+
+j=2; test z$1 != z && j=$1
+
+for c in `seq 1 $j`; do
+  for n in `seq 0 3`; do
+    sox -r $ir -n $c.${types[$n]} synth $len sin $f gain -.1
+  done
+
+  n=0
+  for m in `seq 0 3`; do do_one $m $m 4; done
+  do_one 1 2 5
+  do_one 2 0 5
+  do_one 3 2 4
+  do_one 0 3 4
+
+  f="$f sin $g"
+  g=48k:0
+done
+
+rm ?.[sf][0-9][0-9]
diff --git a/tests/large-ratio b/tests/large-ratio
new file mode 100755
index 0000000..8ad645e
--- /dev/null
+++ b/tests/large-ratio
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SoX Resampler Library       Copyright (c) 2007-12 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# Warning: the intermediate signal (piped) is 3.2 Gbytes so may slug the
+# system somewhat.
+
+ex=./3-options-input-fn
+q=6
+
+rm lr.png
+
+./vector-gen 1000 0 8 500 .9375 1.s32
+
+$ex 1 1e5 1 2 1 $q < 1.s32 | $ex 1e5 1 1 1 2 $q > 2.s32
+
+/usr/bin/sox -M -r 1k 1.s32 -r 1k 2.s32 -n spectrogram -hwk -z180 -o lr.png
+
+display lr.png &
+
+rm [12].s32
diff --git a/tests/vector-cmp.c b/tests/vector-cmp.c
new file mode 100644
index 0000000..108afb6
--- /dev/null
+++ b/tests/vector-cmp.c
@@ -0,0 +1,53 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Utility used to help test the library; not for general consumption.
+ *
+ * Compare two swept-sine files.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "../src/rint.h"
+
+int main(int bit, char const * arg[])
+{
+  FILE    * f1       = fopen(arg[1], "rb"),
+          * f2       = fopen(arg[2], "rb");
+  double  rate       = atof (arg[3]), /* Rate for this vector */
+          leader_len = atof (arg[4]), /* Leader length in seconds */
+          len        = atof (arg[5]), /* Sweep length (excl. leader_len) */
+          expect_bits= atof (arg[6]),
+          expect_bw  = atof (arg[7]);
+
+  int32_t s1, s2;
+  long count = 0;
+  static long thresh[32];
+  double bw, prev = 0;
+
+  for (; fread(&s1, sizeof(s1), 1, f1) == 1 &&
+         fread(&s2, sizeof(s2), 1, f2) == 1; ++count) {
+    long diff = abs((int)(s1 - s2));
+    for (bit = 0; diff && bit < 32; bit++, diff >>= 1)
+      if ((diff & 1) && !thresh[bit])
+        thresh[bit] = count + 1;
+  }
+
+  if (count != (long)((leader_len + len) * rate + .5)) {
+    printf("incorrect file length\n");
+    exit(1);
+  }
+
+  for (bit = 0; bit < 32; ++bit) {
+    bw = ((double)thresh[bit] - 1) / rate - leader_len;
+    if (bit && bw >= 0 && (bw - prev) * 100 / len < .08) {
+      --bit;
+      break;
+    }
+    prev = bw;
+  }
+  bit = 32 - bit;
+  bw = bw * 100 / len;
+  printf("Bit perfect to %i bits, from DC to %.2f%% nyquist.\n", bit, bw);
+  return !(bit >= expect_bits && bw >= expect_bw);
+}
diff --git a/tests/vector-gen.c b/tests/vector-gen.c
new file mode 100644
index 0000000..83971e2
--- /dev/null
+++ b/tests/vector-gen.c
@@ -0,0 +1,56 @@
+/* SoX Resampler Library      Copyright (c) 2007-12 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Utility used to help test the library; not for general consumption.
+ *
+ * Generate a swept sine to a file, with faded `lead-in' section.  */
+
+#define QUAD 0
+
+#if QUAD
+  #include <quadmath.h>
+#endif
+
+#include "../examples/examples-common.h"
+
+#if QUAD
+  #define modf modfq
+  #define cos cosq
+  #define sin sinq
+  #undef M_PI
+  #define M_PI M_PIq
+  #define real __float128
+  #define atof(x) strtoflt128(x, 0)
+#else
+  #define real double
+  #include "rint.h"
+#endif
+
+int main(int i, char const * argv[])
+{
+  real rate           = atof(argv[1]), /* Rate for this vector */
+       lead_in_len    = atof(argv[2]), /* Lead-in length in seconds */
+       len            = atof(argv[3]), /* Sweep length (excl. lead_in_len) */
+       sweep_to_freq  = atof(argv[4]), /* Sweep from DC to this freq. */
+       multiplier     = atof(argv[5]), /* For headroom */
+       f1 = -sweep_to_freq / len * lead_in_len, f2 = sweep_to_freq,
+       n1 = rate * -lead_in_len, n2 = rate * len,
+       m = (f2 - f1) / (n2 - n1) / 2, dummy;
+  FILE * file = fopen(argv[6], "wb");
+  i = (int)n1;
+  if (!file || i != n1)
+    exit(1);
+  for (; i < (int)(n2 + .5); ++i) {
+    double d1 = multiplier * sin(2 * M_PI * modf(i * m * i / rate, &dummy));
+    double d = i < 0? d1 * (1 - cos(M_PI * (i + n1) / n1)) * .5 : d1;
+#if QUAD
+    size_t actual = fwrite(&d, sizeof(d), 1, file);
+#else
+    int32_t out = rint32(d * (32768. * 65536 - 1));
+    size_t actual = fwrite(&out, sizeof(out), 1, file);
+#endif
+    if (actual != 1)
+      return 1;
+  }
+  return 0;
+}