Import libsoxr_0.1.1.orig.tar.xz

author Benjamin Drung <bdrung@debian.org>

Fri, 5 Apr 2013 11:06:38 +0000 (12:06 +0100)

committer Benjamin Drung <bdrung@debian.org>

Fri, 5 Apr 2013 11:06:38 +0000 (12:06 +0100)
author Benjamin Drung <bdrung@debian.org>
Fri, 5 Apr 2013 11:06:38 +0000 (12:06 +0100)
committer Benjamin Drung <bdrung@debian.org>
Fri, 5 Apr 2013 11:06:38 +0000 (12:06 +0100)
diff --git a/AUTHORS b/AUTHORS

new file mode 100644 (file)

index 0000000..2ba76d3
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Rob Sykes <robs@users.sourceforge.net>
diff --git a/CMakeLists.txt b/CMakeLists.txt

new file mode 100644 (file)

index 0000000..8a24952
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,297 @@
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+cmake_minimum_required (VERSION 2.8 FATAL_ERROR)
+
+project (soxr C)
+set (DESCRIPTION_SUMMARY "High quality, one-dimensional sample-rate conversion library")
+
+
+
+# Release versioning:
+
+set (PROJECT_VERSION_MAJOR 0)
+set (PROJECT_VERSION_MINOR 1)
+set (PROJECT_VERSION_PATCH 1)
+
+# For shared-object; if, since the last public release:
+#  * library code changed at all: ++revision
+#  * interfaces changed at all:   ++current, revision = 0
+#  * interfaces added:            ++age
+#  * interfaces removed:          age = 0
+
+set (SO_VERSION_CURRENT  1)
+set (SO_VERSION_REVISION 0)
+set (SO_VERSION_AGE      1)
+
+
+
+# Main options:
+
+include (CMakeDependentOption)
+
+if (NOT CMAKE_BUILD_TYPE)
+  set (CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
+endif ()
+
+option (BUILD_TESTS "Build sanity-tests."  ON)
+option (BUILD_SHARED_LIBS "Build shared libraries." ON)
+option (BUILD_EXAMPLES "Build examples." OFF)
+option (WITH_OPENMP "Include OpenMP threading." ON)
+option (WITH_LSR_BINDINGS "Include a `libsamplerate'-like interface." ON)
+cmake_dependent_option (WITH_SINGLE_PRECISION "Build with single precision (for up to 20-bit accuracy)." ON
+  "WITH_DOUBLE_PRECISION" ON)
+cmake_dependent_option (WITH_DOUBLE_PRECISION "Build with double precision (for up to 32-bit accuracy)." ON
+  "WITH_SINGLE_PRECISION" ON)
+cmake_dependent_option (WITH_SIMD "Use SIMD (for faster single precision)." ON
+  "WITH_SINGLE_PRECISION" OFF)
+cmake_dependent_option (WITH_AVFFT "Use libavcodec (LGPL) for SIMD DFT." OFF
+  "WITH_SIMD;NOT WITH_PFFFT" OFF)
+cmake_dependent_option (WITH_PFFFT "Use PFFFT (BSD-like licence) for SIMD DFT." ON
+  "WITH_SIMD;NOT WITH_AVFFT" OFF)
+if (UNIX)
+  if (EXISTS ${PROJECT_SOURCE_DIR}/lsr-tests)
+    cmake_dependent_option (BUILD_LSR_TESTS "Build LSR tests." OFF
+      "WITH_LSR_BINDINGS" OFF)
+  endif ()
+endif ()
+
+
+
+# Introspection:
+
+list (APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
+
+include (CheckFunctionExists)
+include (CheckIncludeFiles)
+include (CheckLibraryExists)
+include (TestBigEndian)
+
+check_library_exists (m pow "" NEED_LIBM)
+if (NEED_LIBM)
+  set (CMAKE_REQUIRED_LIBRARIES "m;${CMAKE_REQUIRED_LIBRARIES}")
+  link_libraries (m)
+endif ()
+
+if (WITH_OPENMP)
+  find_package (OpenMP)
+endif ()
+if (OPENMP_FOUND)
+  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+  set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+endif ()
+
+if (WITH_SIMD)
+  find_package (SIMD)
+  if (SIMD_FOUND)
+    set (HAVE_SIMD 1)
+  endif ()
+endif ()
+
+if (WITH_SINGLE_PRECISION)
+  set (HAVE_SINGLE_PRECISION 1)
+endif ()
+
+if (WITH_DOUBLE_PRECISION)
+  set (HAVE_DOUBLE_PRECISION 1)
+endif ()
+
+if (WITH_AVFFT)
+  find_package (LibAVCodec)
+  if (AVCODEC_FOUND)
+    include_directories (${AVCODEC_INCLUDE_DIRS})
+    link_libraries (${AVCODEC_LIBRARIES})
+    set (HAVE_AVFFT 1)
+  endif ()
+endif ()
+
+check_function_exists (lrint HAVE_LRINT)
+check_include_files (fenv.h HAVE_FENV_H)
+test_big_endian (WORDS_BIGENDIAN)
+
+macro (make_exist)
+  foreach (x ${ARGN})
+    if (NOT ${x})
+      set (${x} 0)
+    endif ()
+  endforeach ()
+endmacro ()
+
+make_exist (HAVE_LRINT HAVE_FENV_H WORDS_BIGENDIAN HAVE_SIMD)
+make_exist (HAVE_SINGLE_PRECISION HAVE_DOUBLE_PRECISION HAVE_AVFFT)
+
+
+
+# Compiler configuration:
+
+if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
+  set (PROJECT_CXX_FLAGS "-Wconversion -Wall -W -pedantic -Wundef -Wcast-align -Wpointer-arith -Wno-long-long")
+  set (PROJECT_C_FLAGS "${PROJECT_CXX_FLAGS} -Wnested-externs -Wmissing-prototypes -Wstrict-prototypes")
+  if (CMAKE_BUILD_TYPE STREQUAL "Release")
+    set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s") # strip
+  endif ()
+  cmake_dependent_option (VISIBILITY_HIDDEN "Build with -fvisibility=hidden." ON
+    "BUILD_SHARED_LIBS" OFF)
+  if (VISIBILITY_HIDDEN)
+    add_definitions (-fvisibility=hidden -DSOXR_VISIBILITY)
+  endif ()
+endif ()
+
+if (MSVC)
+  add_definitions (-D_USE_MATH_DEFINES -D_CRT_SECURE_NO_WARNINGS)
+  option (ENABLE_STATIC_RUNTIME "Visual Studio, link with runtime statically."  OFF)
+  if (ENABLE_STATIC_RUNTIME)
+    foreach (flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      string (REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+    endforeach ()
+  endif ()
+  # By default, do not warn when built on machines using only VS Express:
+  if (NOT DEFINED CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS)
+    set (CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
+  endif ()
+endif ()
+
+
+
+# Build configuration:
+
+if (${BUILD_SHARED_LIBS} AND ${CMAKE_SYSTEM_NAME} STREQUAL Windows) # Allow exes to find dlls:
+  set (BIN ${PROJECT_BINARY_DIR}/bin/)
+  set (EXAMPLES_BIN ${BIN})
+  set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${BIN})
+  set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BIN})
+else ()
+  set (BIN ./)
+  set (EXAMPLES_BIN ../examples/)
+endif ()
+
+set (LIB_TYPE STATIC)
+if (BUILD_SHARED_LIBS)
+  set (LIB_TYPE SHARED)
+  if (MSVC)
+    add_definitions (-DSOXR_DLL)
+  endif ()
+endif ()
+
+
+
+# Installation configuration:
+
+if (NOT DEFINED BIN_INSTALL_DIR)
+  set (BIN_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/bin")
+endif ()
+if (NOT DEFINED LIB_INSTALL_DIR)
+  set (LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}")
+endif ()
+if (NOT DEFINED INCLUDE_INSTALL_DIR)
+  set (INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/include")
+endif ()
+if (NOT DEFINED DOC_INSTALL_DIR)
+  if (UNIX)
+    set (DOC_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/share/doc/lib${PROJECT_NAME}")
+  else ()
+    set (DOC_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/doc")
+  endif ()
+endif ()
+
+if (APPLE)
+  option (BUILD_FRAMEWORK "Build an OS X framework." OFF)
+  set (FRAMEWORK_INSTALL_DIR "/Library/Frameworks" CACHE STRING "Directory to install frameworks to.")
+endif ()
+
+
+
+# Top-level:
+
+set (PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH})
+math (EXPR SO_VERSION_MAJOR "${SO_VERSION_CURRENT} - ${SO_VERSION_AGE}")
+math (EXPR SO_VERSION_MINOR "${SO_VERSION_AGE}")
+math (EXPR SO_VERSION_PATCH "${SO_VERSION_REVISION}")
+set (SO_VERSION ${SO_VERSION_MAJOR}.${SO_VERSION_MINOR}.${SO_VERSION_PATCH})
+
+configure_file (
+  ${PROJECT_SOURCE_DIR}/${PROJECT_NAME}-config.h.in
+  ${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.h)
+include_directories (${PROJECT_BINARY_DIR})
+
+if (BUILD_TESTS OR BUILD_LSR_TESTS)
+  enable_testing ()
+endif ()
+
+install (FILES
+  ${CMAKE_CURRENT_SOURCE_DIR}/README
+  ${CMAKE_CURRENT_SOURCE_DIR}/LICENCE
+  ${CMAKE_CURRENT_SOURCE_DIR}/NEWS
+  DESTINATION ${DOC_INSTALL_DIR})
+
+
+
+# Subdirectories:
+
+include_directories (${PROJECT_SOURCE_DIR}/src)
+
+add_subdirectory (src)
+if (BUILD_TESTS)
+  add_subdirectory (tests)
+endif ()
+if (BUILD_LSR_TESTS)
+  add_subdirectory (lsr-tests)
+endif ()
+if (BUILD_EXAMPLES OR BUILD_TESTS)
+  add_subdirectory (examples)
+endif ()
+
+
+
+# Rough-and-ready distclean for anyone still doing in-tree builds:
+
+if (UNIX)
+  add_custom_target (distclean
+    COMMAND make clean && rm -rf
+      CMakeCache.txt
+      CMakeFiles
+      cmake_install.cmake
+      CPackConfig.cmake
+      CPackSourceConfig.cmake
+      deinstall.cmake
+      Makefile
+      soxr-config.h
+      src/CMakeFiles
+      src/cmake_install.cmake
+      src/libsoxr-dev.src
+      src/libsoxr-lsr.pc
+      src/libsoxr.pc
+      src/libsoxr.src
+      src/Makefile)
+endif ()
+
+
+
+# Deinstallation:
+
+configure_file (
+  "${CMAKE_CURRENT_SOURCE_DIR}/deinstall.cmake.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/deinstall.cmake"
+  IMMEDIATE @ONLY)
+
+add_custom_target (deinstall
+  COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/deinstall.cmake")
+
+
+
+# Packaging:
+
+if (UNIX)
+  set (CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}")
+  set (CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}")
+  set (CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}")
+
+  set (CPACK_SOURCE_GENERATOR "TGZ")
+  set (CPACK_SOURCE_IGNORE_FILES "dist;/lsr-tests/;/Debug/;/Release/;/cpack/;\\\\.swp$;\\\\.gitignore;/\\\\.git/")
+
+  include (CPack)
+
+  if (IS_DIRECTORY ${PROJECT_SOURCE_DIR}/cpack)
+    add_subdirectory (cpack)
+  endif ()
+endif ()
diff --git a/COPYING.LGPL b/COPYING.LGPL

new file mode 100644 (file)

index 0000000..551cb4a
--- /dev/null
+++ b/COPYING.LGPL
@@ -0,0 +1,502 @@
+                 GNU LESSER GENERAL PUBLIC LICENSE
+                      Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                           Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+\f
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+\f
+                 GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+\f
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+\f
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+\f
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+\f
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+\f
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+\f
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                           NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                    END OF TERMS AND CONDITIONS
+\f
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/INSTALL b/INSTALL

new file mode 100644 (file)

index 0000000..c2c7675
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,123 @@
+SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+
+INSTALLATION GUIDE CONTENTS
+
+* Standard build
+* Build customisation
+* Cross-compiling with mingw (linux host)
+* Integration with other build systems
+
+
+
+STANDARD BUILD
+
+1. Prerequisites:
+
+    Before you can build this library, you need to have available on your
+    system:
+
+    * A C-compiler with 64-bit integer support and, optionally, OpenMP, SIMD.
+
+    * A 'make' utility (most compiler installations already have one of these).
+
+    * CMake: http://www.cmake.org/cmake/resources/software.html
+
+
+2. Build:
+
+    At a command prompt, change directory (`cd') to the one containing this
+    file, then enter:
+
+        go                          (on MS-Windows with nmake)
+    or
+        ./go                        (on unix-like systems)
+
+    This should build the library and run a few sanity tests.
+
+
+3. Installation:
+
+    Note that this step may need to be performed by a system
+    adminstrator.  Enter:
+
+        nmake install               (on MS-Windows)
+    or
+        cd Release; make install    (on unix)
+
+
+4. Configuration:
+
+    To use the library you may need to set up appropriate paths to the
+    library and its header file in your development environment.
+
+
+5. Installation test
+
+    To test the installation, build and run some of the example programmes
+    (see examples/README).
+
+
+
+BUILD CUSTOMISATION
+
+If it is necessary to customise the build, then steps 2 and 3 above may be
+substituted as follows.  Change directory to the one containing this file,
+then enter commands along the lines of:
+
+    mkdir build
+    cd build
+    cmake [OPTIONS] ..
+    make
+    make test
+    sudo make install
+
+To list help on the available options, enter:
+
+    cmake -LH ..
+
+Options, if given, should be preceded with '-D', e.g.
+
+    cmake -DWITH_SIMD:BOOL=OFF ..
+
+
+
+CROSS-COMPILING WITH MINGW (LINUX HOST)
+
+For example:
+
+    mkdir build
+    cd build
+    cmake -DCMAKE_TOOLCHAIN_FILE=~/Toolchain-x86_64-mingw-w64-mingw32.cmake \
+          -DCMAKE_INSTALL_PREFIX=install \
+          -DHAVE_WORDS_BIGENDIAN_EXITCODE=1 \
+          -DBUILD_TESTS=0 \
+          -DBUILD_EXAMPLES=1 \
+          ..
+    make
+
+where ~/Toolchain-x86_64-mingw-w64-mingw32.cmake might contain:
+
+    SET(CMAKE_SYSTEM_NAME Windows)
+    SET(CMAKE_C_COMPILER /usr/bin/x86_64-w64-mingw32-gcc)
+    SET(CMAKE_CXX_COMPILER /usr/bin/x86_64-w64-mingw32-g++)
+    SET(CMAKE_RC_COMPILER /usr/bin/x86_64-w64-mingw32-windres)
+    SET(CMAKE_Fortran_COMPILER /usr/bin/x86_64-w64-mingw32-gfortran)
+    SET(CMAKE_AR:FILEPATH /usr/bin/x86_64-w64-mingw32-ar)
+    SET(CMAKE_RANLIB:FILEPATH /usr/bin/x86_64-w64-mingw32-ranlib)
+    SET(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32)
+    SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+    SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+    SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+    SET(QT_BINARY_DIR /usr/x86_64-w64-mingw32/bin /usr/bin)
+    SET(Boost_COMPILER -gcc47)
+
+
+
+INTEGRATION WITH OTHER BUILD SYSTEMS
+
+Autotools-based systems might find it useful to create a file called
+`configure' in the directory containing this file, consisting of the line:
+  cmake -DBUILD_SHARED_LIBS=OFF .
+(or with other build options as required).
+
+For MS visual studio, see msvc/README
diff --git a/LICENCE b/LICENCE

new file mode 100644 (file)

index 0000000..1c61878
--- /dev/null
+++ b/LICENCE
@@ -0,0 +1,24 @@
+SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+
+This library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at
+your option) any later version.
+
+This library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+
+
+Notes
+
+1. Re software in the `examples' directory: works that are not resampling
+examples but are based on the given examples -- for example, applications using
+the library -- shall not be considered to be derivative works of the examples.
+
+2. If building with pffft.c, see the licence embedded in that file.
diff --git a/NEWS b/NEWS

new file mode 100644 (file)

index 0000000..64f8ffa
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,24 @@
+Version 0.1.1 (2013-03-03)
+  * Minor fixes/improvements to build/tests.
+  * Fix crash (e.g. with k3b) when null error pointer passed to src_create (lsr
+    bindings only).
+  * Fix broken resampling in many cases with SIMD and anti_aliasing_pc < 100.
+  * For clarity, renamed and slightly changed usage of three parameters in
+    soxr_quality_spec_t (ABI compatible, API incompatible).  An application not
+    setting these parameters directly need make no change; otherwise, changes
+    should be made per the following example (as shown, compatibility with both
+    old/new APIs is maintained).  See also the comments on these parameters in
+    soxr.h.  N.B. ABI compatibility with the 0.1.0 API may be removed in a
+    future release.
+      #if !defined SOXR_VERSION /* Deprecated, 0.1.0 API */
+        q_spec.phase = minimum_phase? 0 : 50;
+        q_spec.bw_pc = cutoff * 100;
+        q_spec.anti_aliasing_pc = anti_aliasing * 100;
+      #else /* 0.1.1 API */                            Explanation:
+        q_spec.phase_response = minimum_phase? 0 : 50;  Renamed.
+        q_spec.passband_end = cutoff;                   Renamed, no longer %.
+        q_spec.stopband_begin = 2 - anti_aliasing;      Renamed, no longer %, no
+      #endif                                            longer mirrored in Fs.
+
+Version 0.1.0 (2013-01-19)
+  * First public release.
diff --git a/README b/README

new file mode 100644 (file)

index 0000000..1f9921d
--- /dev/null
+++ b/README
@@ -0,0 +1,53 @@
+SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+
+The SoX Resampler library `libsoxr' performs one-dimensional sample-rate
+conversion -- it may be used, for example, to resample PCM-encoded audio.
+For higher-dimensional resampling, such as for visual-image processing, you
+should look elsewhere.
+
+It aims to give fast¹ and very high quality² results for any constant
+(rational or irrational) resampling ratio.  Phase-response, preserved
+bandwidth, aliasing, and rejection level parameters are all configurable;
+alternatively, simple `preset' configurations may be selected.  An
+experimental, variable-rate resampling mode of operation is also included.
+
+The resampler is currently available either as part of `libsox' (the audio
+file-format and effect library), or stand-alone as `libsoxr' (this package).
+The interfaces to libsox and libsoxr are slightly different, with that of
+libsoxr designed specifically for resampling.  An application requiring
+support for other effects, or for reading-from or writing-to audio files or
+devices, should use libsox (or other libraries such as libsndfile or
+libavformat).
+
+Libsoxr provides a simple API that allows interfacing using the most
+commonly-used sample formats and buffering schemes: sample-formats may be
+either floating-point or integer, and multiple channels either interleaved
+or split in separate buffers.  The API is documented in the header file
+`soxr.h', together with sample code found in the 'examples' directory.
+
+For compatibility with the popular `libsamplerate' library, the header file
+`soxr-lsr.h' is provided and may be used as an alternative API.³  Note
+however, that libsoxr does not provide a full emulation of libsamplerate
+and that using this approach, only a sub-set of libsoxr's features are
+available.
+
+The design was inspired by Laurent De Soras' paper `The Quest For The
+Perfect Resampler', http://ldesoras.free.fr/doc/articles/resampler-en.pdf;
+in essence, it combines Julius O. Smith's `Bandlimited Interpolation'
+technique (https://ccrma.stanford.edu/~jos/resample/resample.pdf) with FFT-
+based over-sampling.
+
+Note that for real-time resampling, libsoxr may have a higher latency
+than non-FFT based resamplers.  For example, when using the `High Quality'
+configuration to resample between 44100Hz and 48000Hz, the latency is
+around 1000 output samples, i.e. roughly 20ms (though passband and FFT-
+size configuration parameters may be used to reduce this figure).
+
+For build and installation instructions, see the file `INSTALL'; for
+copyright and licensing information, see the file `LICENCE'.
+
+For support and new versions, see http://soxr.sourceforge.net
+________
+¹ For example, multi-channel resampling can utilise multiple CPU-cores.
+² Bit-perfect within practical occupied-bandwidth limits.
+³ For details of that API, see http://www.mega-nerd.com/SRC/api.html.
diff --git a/TODO b/TODO

new file mode 100644 (file)

index 0000000..1c4a31b
--- /dev/null
+++ b/TODO
@@ -0,0 +1,3 @@
+* SOXR_ALLOW_ALIASING
+* Explicit flush API fn, perhaps.
+* More SIMD.
diff --git a/cmake/Modules/FindLibAVCodec.cmake b/cmake/Modules/FindLibAVCodec.cmake

new file mode 100644 (file)

index 0000000..add33c3
--- /dev/null
+++ b/cmake/Modules/FindLibAVCodec.cmake
@@ -0,0 +1,23 @@
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# - Find AVCODEC
+# Find the native installation of this package: includes and libraries.
+#
+#  AVCODEC_INCLUDES    - where to find headers for this package.
+#  AVCODEC_LIBRARIES   - List of libraries when using this package.
+#  AVCODEC_FOUND       - True if this package can be found.
+
+if (AVCODEC_INCLUDES)
+  set (AVCODEC_FIND_QUIETLY TRUE)
+endif (AVCODEC_INCLUDES)
+
+find_path (AVCODEC_INCLUDES libavcodec/avcodec.h)
+
+find_library (AVCODEC_LIBRARIES NAMES avcodec)
+
+include (FindPackageHandleStandardArgs)
+find_package_handle_standard_args (
+  AVCODEC DEFAULT_MSG AVCODEC_LIBRARIES AVCODEC_INCLUDES)
+
+mark_as_advanced (AVCODEC_LIBRARIES AVCODEC_INCLUDES)
diff --git a/cmake/Modules/FindOpenMP.cmake b/cmake/Modules/FindOpenMP.cmake

new file mode 100644 (file)

index 0000000..654dc30
--- /dev/null
+++ b/cmake/Modules/FindOpenMP.cmake
@@ -0,0 +1,108 @@
+# - Finds OpenMP support
+# This module can be used to detect OpenMP support in a compiler.
+# If the compiler supports OpenMP, the flags required to compile with
+# openmp support are set.
+#
+# The following variables are set:
+#   OpenMP_C_FLAGS - flags to add to the C compiler for OpenMP support
+#   OPENMP_FOUND - true if openmp is detected
+#
+# Supported compilers can be found at http://openmp.org/wp/openmp-compilers/
+#
+# Modified for libsoxr not to rely on presence of C++ compiler.
+#
+#=============================================================================
+# Copyright 2009 Kitware, Inc.
+# Copyright 2008-2009 André Rigland Brodtkorb <Andre.Brodtkorb@ifi.uio.no>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * The names of Kitware, Inc., the Insight Consortium, or the names of
+#    any consortium members, or of any contributors, may not be used to
+#    endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include (CheckCSourceCompiles)
+include (FindPackageHandleStandardArgs)
+
+set (OpenMP_C_FLAG_CANDIDATES
+  #Gnu
+  "-fopenmp"
+  #Microsoft Visual Studio
+  "/openmp"
+  #Intel windows
+  "-Qopenmp"
+  #Intel
+  "-openmp"
+  #Empty, if compiler automatically accepts openmp
+  " "
+  #Sun
+  "-xopenmp"
+  #HP
+  "+Oopenmp"
+  #IBM XL C/c++
+  "-qsmp"
+  #Portland Group
+  "-mp"
+)
+
+# sample openmp source code to test
+set (OpenMP_C_TEST_SOURCE
+"
+#include <omp.h>
+int main() {
+#ifdef _OPENMP
+  return 0;
+#else
+  breaks_on_purpose
+#endif
+}
+")
+# if these are set then do not try to find them again,
+# by avoiding any try_compiles for the flags
+if (DEFINED OpenMP_C_FLAGS)
+  set (OpenMP_C_FLAG_CANDIDATES)
+endif (DEFINED OpenMP_C_FLAGS)
+
+# check c compiler
+foreach (FLAG ${OpenMP_C_FLAG_CANDIDATES})
+  set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
+  set (CMAKE_REQUIRED_FLAGS "${FLAG}")
+  unset (OpenMP_FLAG_DETECTED CACHE)
+  message (STATUS "Try OpenMP C flag = [${FLAG}]")
+  check_c_source_compiles ("${OpenMP_C_TEST_SOURCE}" OpenMP_FLAG_DETECTED)
+  set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}")
+  if (OpenMP_FLAG_DETECTED)
+    set (OpenMP_C_FLAGS_INTERNAL "${FLAG}")
+    break ()
+  endif (OpenMP_FLAG_DETECTED)
+endforeach (FLAG ${OpenMP_C_FLAG_CANDIDATES})
+
+set (OpenMP_C_FLAGS "${OpenMP_C_FLAGS_INTERNAL}"
+  CACHE STRING "C compiler flags for OpenMP parallization")
+
+# handle the standard arguments for find_package
+find_package_handle_standard_args (OpenMP DEFAULT_MSG
+  OpenMP_C_FLAGS OpenMP_C_FLAGS)
+
+mark_as_advanced (OpenMP_C_FLAGS)
diff --git a/cmake/Modules/FindSIMD.cmake b/cmake/Modules/FindSIMD.cmake

new file mode 100644 (file)

index 0000000..6484bbd
--- /dev/null
+++ b/cmake/Modules/FindSIMD.cmake
@@ -0,0 +1,88 @@
+# - Finds SIMD support
+#
+# The following variables are set:
+#   SIMD_C_FLAGS - flags to add to the C compiler for this package.
+#   SIMD_FOUND - true if support for this package is found.
+#
+#=============================================================================
+# Based on FindOpenMP.cmake, which is:
+#
+# Copyright 2009 Kitware, Inc.
+# Copyright 2008-2009 André Rigland Brodtkorb <Andre.Brodtkorb@ifi.uio.no>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#  * Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+#  * The names of Kitware, Inc., the Insight Consortium, or the names of
+#    any consortium members, or of any contributors, may not be used to
+#    endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include (CheckCSourceCompiles)
+include (FindPackageHandleStandardArgs)
+
+set (SIMD_C_FLAG_CANDIDATES
+  # Microsoft Visual Studio x64
+  " "
+  # Microsoft Visual Studio x86
+  "/arch:SSE /fp:fast -D__SSE__"
+  # Gnu
+  "-msse -mfpmath=sse"
+)
+
+set (SIMD_C_TEST_SOURCE
+"
+#include <xmmintrin.h>
+int main()
+{
+  __m128 a, b;
+  float vals[4] = {0};
+  a = _mm_loadu_ps (vals);
+  b = a;
+  b = _mm_add_ps (a,b);
+  _mm_storeu_ps (vals,b);
+  return 0;
+}
+")
+
+if (DEFINED SIMD_C_FLAGS)
+  set (SIMD_C_FLAG_CANDIDATES)
+endif ()
+
+foreach (FLAG ${SIMD_C_FLAG_CANDIDATES})
+  set (SAFE_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
+  set (CMAKE_REQUIRED_FLAGS "${FLAG}")
+  unset (SIMD_FLAG_DETECTED CACHE)
+  message (STATUS "Try SIMD C flag = [${FLAG}]")
+  check_c_source_compiles ("${SIMD_C_TEST_SOURCE}" SIMD_FLAG_DETECTED)
+  set (CMAKE_REQUIRED_FLAGS "${SAFE_CMAKE_REQUIRED_FLAGS}")
+  if (SIMD_FLAG_DETECTED)
+    set (SIMD_C_FLAGS_INTERNAL "${FLAG}")
+    break ()
+  endif ()
+endforeach ()
+
+set (SIMD_C_FLAGS "${SIMD_C_FLAGS_INTERNAL}"
+  CACHE STRING "C compiler flags for SIMD vectorization")
+
+find_package_handle_standard_args (SIMD DEFAULT_MSG SIMD_C_FLAGS SIMD_C_FLAGS)
+mark_as_advanced (SIMD_C_FLAGS)
diff --git a/cmake/Modules/TestBigEndian.cmake b/cmake/Modules/TestBigEndian.cmake

new file mode 100644 (file)

index 0000000..7f65cc0
--- /dev/null
+++ b/cmake/Modules/TestBigEndian.cmake
@@ -0,0 +1,15 @@
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# - Macro to determine endian type
+#  test_big_endian (VARIABLE)
+#  VARIABLE - variable to store the result to
+
+macro (test_big_endian VARIABLE)
+  if ("HAVE_${VARIABLE}" MATCHES "^HAVE_${VARIABLE}$")
+    include (CheckCSourceRuns)
+    check_c_source_runs ("int main() {union {long i; char c[sizeof(long)];}
+      const u = {1}; return !!u.c[0];}" HAVE_${VARIABLE})
+    set (${VARIABLE} "${HAVE_${VARIABLE}}" CACHE INTERNAL "1 if system is big endian" FORCE)
+  endif ()
+endmacro ()
diff --git a/deinstall.cmake.in b/deinstall.cmake.in

new file mode 100644 (file)

index 0000000..307be50
--- /dev/null
+++ b/deinstall.cmake.in
@@ -0,0 +1,25 @@
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+if (NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+  message (FATAL_ERROR "Cannot find install manifest")
+endif ()
+
+file (READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
+string (REGEX REPLACE "\n" ";" files "${files}")
+foreach (file ${files})
+  set (dest "$ENV{DESTDIR}${file}")
+  message (STATUS "Deinstalling \"${dest}\"")
+  if (EXISTS "${dest}" OR IS_SYMLINK "${dest}")
+    execute_process (
+      COMMAND "@CMAKE_COMMAND@" -E remove "${dest}"
+      OUTPUT_VARIABLE rm_out
+      RESULT_VARIABLE rm_retval
+    )
+    if (NOT ${rm_retval} EQUAL 0)
+      message (FATAL_ERROR "Problem when removing \"${dest}\"")
+    endif ()
+  else ()
+    message (STATUS "File \"${dest}\" does not exist.")
+  endif ()
+endforeach ()
diff --git a/examples/1-single-block.c b/examples/1-single-block.c

new file mode 100644 (file)

index 0000000..3fb9201
--- /dev/null
+++ b/examples/1-single-block.c
@@ -0,0 +1,50 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 1: `One-shot' resample a single block of data in memory.
+ *
+ * N.B. See example 2 for how to resample a stream (of blocks).
+ *
+ * Optional arguments are: INPUT-RATE OUTPUT-RATE
+ *
+ * With the default arguments, the output should produce lines similar to the
+ * following:
+ *
+ *  0.00  0.71  1.00  0.71 -0.00 -0.71 -1.00 -0.71
+ *
+ * Gibbs effect may be seen at the ends of the resampled signal; this is because
+ * unlike a `real-world' signal, the synthetic input signal is not band-limited.
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+const float in[] = {  /* Input: 12 cycles of a sine wave with freq. = irate/4 */
+  0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1,
+  0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1};
+
+int main(int argc, char const * arg[])
+{
+  double irate = argc > 1? atof(arg[1]) : 1;         /* Default to upsampling */
+  double orate = argc > 2? atof(arg[2]) : 2;             /* by a factor of 2. */
+
+  size_t olen = (size_t)(AL(in) * orate / irate + .5);   /* Assay output len. */
+  float * out = malloc(sizeof(*out) * olen);       /* Allocate output buffer. */
+  size_t odone;
+
+  soxr_error_t error = soxr_oneshot(irate, orate, 1, /* Rates and # of chans. */
+      in, AL(in), NULL,                              /* Input. */
+      out, olen, &odone,                             /* Output. */
+      NULL, NULL, NULL);                             /* Default configuration.*/
+
+  unsigned i = 0;                            /* Print out the resampled data, */
+  while (i++ < odone)
+    printf("%5.2f%c", out[i-1], " \n"[!(i&7) || i == odone]);
+  printf("%-26s %s\n", arg[0], soxr_strerror(error)); /* and reported result. */
+
+  if (argc > 3)                                     /* Library version check: */
+    printf("runtime=%s API="SOXR_THIS_VERSION_STR"\n", soxr_version());
+
+  free(out);                                                      /* Tidy up. */
+  return !!error;
+}
diff --git a/examples/1a-lsr.c b/examples/1a-lsr.c

new file mode 100644 (file)

index 0000000..e42e530
--- /dev/null
+++ b/examples/1a-lsr.c
@@ -0,0 +1,40 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 1a: Variant of example 1 using libsamplerate-like bindings. */
+
+#include <soxr-lsr.h>
+#include "examples-common.h"
+
+float in[] = {  /* Input: 12 cycles of a sine wave with freq. = irate/4 */
+  0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1,
+  0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1, 0,1,0,-1};
+
+int main(int argc, char const * arg[])
+{
+  double irate = argc > 1? atof(arg[1]) : 1;         /* Default to upsampling */
+  double orate = argc > 2? atof(arg[2]) : 2;             /* by a factor of 2. */
+
+  size_t olen = (size_t)(AL(in) * orate / irate + .5);   /* Assay output len. */
+  float * out = (float *)malloc(sizeof(*out) * olen); /* Allocate output buf. */
+
+  int error, i = 0;
+  SRC_DATA data;
+
+  data.data_in = in;
+  data.data_out = out;
+  data.input_frames = AL(in);
+  data.output_frames = (int)olen;
+  data.src_ratio = orate / irate;
+  error = src_simple(&data, SRC_SINC_FASTEST, 1);
+
+  while (i++ < data.output_frames_gen)       /* Print out the resampled data, */
+    printf("%5.2f%c", out[i-1], " \n"[!(i&7) || i == data.output_frames_gen]);
+  printf("%-26s %s\n", arg[0], src_strerror(error));  /* and reported result. */
+
+  if (argc > 3)                                     /* Library version check: */
+    printf("runtime=%s\n", src_get_version());
+
+  free(out);                                                      /* Tidy up. */
+  return !!error;
+}
diff --git a/examples/2-stream.C b/examples/2-stream.C

new file mode 100644 (file)

index 0000000..9d703f6
--- /dev/null
+++ b/examples/2-stream.C
@@ -0,0 +1,78 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 2: resample a raw, single-channel, floating-point data stream from
+ * stdin to stdout.
+ *
+ * The application uses the single function `soxr_process' for both input and
+ * output to/from the resampler; compared to the `input function' approach
+ * (illustrated in example 3) this requires that the application implements
+ * more logic, but one less function.
+ *
+ * Arguments are: INPUT-RATE OUTPUT-RATE
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+int main(int argc, char const * arg[])
+{
+  double const irate = argc > 1? atof(arg[1]) : 96000.;
+  double const orate = argc > 2? atof(arg[2]) : 44100.;
+
+  /* Allocate resampling input and output buffers in proportion to the input
+   * and output rates: */
+  #define buf_total_len 15000  /* In samples. */
+  size_t const olen = (size_t)(orate * buf_total_len / (irate + orate) + .5);
+  size_t const ilen = buf_total_len - olen;
+  size_t const osize = sizeof(float), isize = osize;
+  void * obuf = malloc(osize * olen);
+  void * ibuf = malloc(isize * ilen);
+
+  size_t odone, written, need_input = 1;
+  soxr_error_t error;
+
+  /* Create a stream resampler: */
+  soxr_t soxr = soxr_create(
+      irate, orate, 1,             /* Input rate, output rate, # of channels. */
+      &error,                         /* To report any error during creation. */
+      NULL, NULL, NULL);                        /* Use configuration defaults.*/
+
+  if (!error) {                         /* If all is well, run the resampler: */
+    USE_STD_STDIO;
+                                                       /* Resample in blocks: */
+    do {
+      size_t ilen1 = 0;
+
+      if (need_input) {
+
+        /* Read one block into the buffer, ready to be resampled: */
+        ilen1 = fread(ibuf, isize, ilen, stdin);
+
+        if (!ilen1) {     /* If the is no (more) input data available, */
+          free(ibuf);     /* set ibuf to NULL, to indicate end-of-input */
+          ibuf = NULL;    /* to the resampler. */
+        }
+      }
+
+      /* Copy data from the input buffer into the resampler, and resample
+       * to produce as much output as is possible to the given output buffer: */
+      error = soxr_process(soxr, ibuf, ilen1, NULL, obuf, olen, &odone);
+
+      written = fwrite(obuf, osize, odone, stdout); /* Consume output.*/
+
+      /* If the actual amount of data output is less than that requested, and
+       * we have not already reached the end of the input data, then supply some
+       * more input next time round the loop: */
+      need_input = odone < olen && ibuf;
+
+    } while (!error && (need_input || written));
+  }
+                                                                  /* Tidy up: */
+  soxr_delete(soxr);
+  free(obuf), free(ibuf);
+                                                              /* Diagnostics: */
+  fprintf(stderr, "%-26s %s; I/O: %s\n", arg[0],
+      soxr_strerror(error), errno? strerror(errno) : "no error");
+  return error || errno;
+}
diff --git a/examples/3-options-input-fn.c b/examples/3-options-input-fn.c

new file mode 100644 (file)

index 0000000..bb0bf2d
--- /dev/null
+++ b/examples/3-options-input-fn.c
@@ -0,0 +1,110 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 3: extends example 2 with multiple channels, multiple datatypes,
+ * and other options.
+ *
+ * The application provides an input function, called on demand by libsoxr, in
+ * response to calls to soxr_output(); compared to the `process' approach
+ * (illustrated in example 2) this requires that the application implements
+ * less logic, but one more function.
+ *
+ * The 11 arguments (which are optional, from last to first) are:
+ *   INPUT-RATE       As example 2
+ *   OUTPUT-RATE      Ditto
+ *   NUM-CHANNELS     Number of interleaved channels
+ *   IN-DATATYPE#     0:float32 1:float64 2:int32 3:int16
+ *   OUT-DATATYPE#    Ditto
+ *   Q-RECIPE         Quality recipe (in hex) See soxr.h
+ *   Q-FLAGS          Quality flags  (in hex) See soxr.h
+ *   PASSBAND-END     %
+ *   STOPBAND-BEGIN   %
+ *   PHASE-RESPONSE   [0,100]
+ *   USE-THREADS      1 to use multi-threading (where available)
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+typedef struct {void * ibuf; size_t isize;} input_context_t;
+
+static size_t input_fn(input_context_t * p, soxr_cbuf_t * buf, size_t len)
+{
+  /* Read one block into the buffer, ready to be input to the resampler: */
+  len = fread(p->ibuf, p->isize, len, stdin); /* Actual len read may be less. */
+
+  /* Inform the resampler of the data's whereabouts (which could be anywhere, in
+   * a freshly malloc'd buffer, for example): */
+  *buf = (!len && ferror(stdin))? NULL : p->ibuf;  /* NULL if error occurred. */
+
+  return len;                           /* # of samples per channel to input. */
+}
+
+int main(int n, char const * arg[])
+{
+  char const *     const arg0 = n? --n, *arg++ : "";
+  double          const irate = n? --n, atof(*arg++) : 96000.;
+  double          const orate = n? --n, atof(*arg++) : 44100.;
+  unsigned        const chans = n? --n, (unsigned)atoi(*arg++) : 1;
+  soxr_datatype_t const itype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+  soxr_datatype_t const otype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+  unsigned long const q_recipe= n? --n, strtoul(*arg++, 0, 16) : SOXR_HQ;
+  unsigned long const q_flags = n? --n, strtoul(*arg++, 0, 16) : 0;
+  double   const passband_end = n? --n, atof(*arg++) : 0;
+  double const stopband_begin = n? --n, atof(*arg++) : 0;
+  double const phase_response = n? --n, atof(*arg++) : -1;
+  int       const use_threads = n? --n, atoi(*arg++) : 1;
+
+  soxr_quality_spec_t       q_spec = soxr_quality_spec(q_recipe, q_flags);
+  soxr_io_spec_t      const io_spec = soxr_io_spec(itype, otype);
+  soxr_runtime_spec_t const runtime_spec = soxr_runtime_spec(!use_threads);
+
+  /* Allocate resampling input and output buffers in proportion to the input
+   * and output rates: */
+  #define buf_total_len 15000  /* In samples per channel. */
+  size_t const osize = soxr_datatype_size(otype) * chans;
+  size_t const isize = soxr_datatype_size(itype) * chans;
+  size_t const olen0= (size_t)(orate * buf_total_len / (irate + orate) + .5);
+  size_t const olen = min(max(olen0, 1), buf_total_len - 1);
+  size_t const ilen = buf_total_len - olen;
+  void * const obuf = malloc(osize * olen);
+  void * const ibuf = malloc(isize * ilen);
+
+  input_context_t icontext;
+  size_t odone, clips = 0;
+  soxr_error_t error;
+  soxr_t soxr;
+
+  /* Overrides (if given): */
+  if (passband_end   > 0) q_spec.passband_end   = passband_end / 100;
+  if (stopband_begin > 0) q_spec.stopband_begin = stopband_begin / 100;
+  if (phase_response >=0) q_spec.phase_response = phase_response;
+
+  /* Create a stream resampler: */
+  soxr = soxr_create(
+      irate, orate, chans,         /* Input rate, output rate, # of channels. */
+      &error,                         /* To report any error during creation. */
+      &io_spec, &q_spec, &runtime_spec);
+
+  if (!error) {                      /* Register input_fn with the resampler: */
+    icontext.ibuf = ibuf, icontext.isize = isize;
+    error = soxr_set_input_fn(soxr, (soxr_input_fn_t)input_fn, &icontext, ilen);
+  }
+
+  if (!error) {                         /* If all is well, run the resampler: */
+    USE_STD_STDIO;
+                                                       /* Resample in blocks: */
+    do odone = soxr_output(soxr, obuf, olen);
+    while (fwrite(obuf, osize, odone, stdout));            /* Consume output. */
+
+    error = soxr_error(soxr);            /* Check if any soxr error occurred. */
+    clips = *soxr_num_clips(soxr);     /* Can occur only with integer output. */
+  }
+                                                                  /* Tidy up: */
+  soxr_delete(soxr);
+  free(obuf), free(ibuf);
+                                                              /* Diagnostics: */
+  fprintf(stderr, "%-26s %s; %lu clips; I/O: %s\n", arg0, soxr_strerror(error),
+      (long unsigned)clips, errno? strerror(errno) : "no error");
+  return error || errno;
+}
diff --git a/examples/4-split-channels.c b/examples/4-split-channels.c

new file mode 100644 (file)

index 0000000..a441598
--- /dev/null
+++ b/examples/4-split-channels.c
@@ -0,0 +1,147 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 4: variant of examples 2 & 3, demonstrating I/O with split channels.
+ *
+ * Note that, for convenience of the demonstration, split-channel data is
+ * made available by deinterleaving data sourced from and sent to
+ * interleaved file-streams; this adds a lot of code to the example that,
+ * for purposes of understanding how to use split-channels, may safely be
+ * ignored.  In a real application, the channel-data might never be
+ * interleaved; for example, the split-channel data output from the
+ * resampler might be sent directly to digital-to-analogue converters.
+ *
+ * Note also (not shown in the examples) that split/interleaved channels may
+ * be used for input and output independently.
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+
+
+#define DEINTERLEAVE(T) do { \
+  unsigned i; \
+  size_t j; \
+  T * const * dest = (T * const *)dest0; \
+  T const * src = src0; \
+  if (ch == 1) memcpy(dest[0], src, n * sizeof(dest[0][0])); \
+  else for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) dest[i][j] = *src++; \
+  return; \
+} while (0)
+
+static void deinterleave(soxr_datatype_t data_type,
+    void * const * dest0,
+    void const * src0,
+    size_t n, unsigned ch)
+{
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: DEINTERLEAVE(float);
+    case SOXR_FLOAT64: DEINTERLEAVE(double);
+    case SOXR_INT32  : DEINTERLEAVE(int32_t);
+    case SOXR_INT16  : DEINTERLEAVE(int16_t);
+    default: break;
+  }
+}
+
+#define INTERLEAVE(T) do { \
+  unsigned i; \
+  size_t j; \
+  T * dest = dest0; \
+  T const * const * src = (T const * const *)src0; \
+  if (ch == 1) memcpy(dest, src[0], n * sizeof(dest[0])); \
+  else for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) *dest++ = src[i][j]; \
+  return; \
+} while (0)
+
+static void interleave(soxr_datatype_t data_type, void * dest0,
+  void * const * src0, size_t n, unsigned ch)
+{
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: INTERLEAVE(float);
+    case SOXR_FLOAT64: INTERLEAVE(double);
+    case SOXR_INT32  : INTERLEAVE(int32_t);
+    case SOXR_INT16  : INTERLEAVE(int16_t);
+    default: break;
+  }
+}
+
+int main(int n, char const * arg[])
+{
+  char const *     const arg0 = n? --n, *arg++ : "";
+  double          const irate = n? --n, atof(*arg++) : 96000.;
+  double          const orate = n? --n, atof(*arg++) : 44100.;
+  unsigned        const chans = n? --n, (unsigned)atoi(*arg++) : 1;
+  soxr_datatype_t const itype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+  soxr_datatype_t const otype = n? --n, (soxr_datatype_t)atoi(*arg++) : 0;
+  unsigned long const q_recipe= n? --n, strtoul(*arg++, 0, 16) : SOXR_HQ;
+  unsigned long const q_flags = n? --n, strtoul(*arg++, 0, 16) : 0;
+  int       const use_threads = n? --n, atoi(*arg++) : 1;
+
+  soxr_quality_spec_t const q_spec = soxr_quality_spec(q_recipe, q_flags);
+  soxr_io_spec_t const io_spec=soxr_io_spec(itype|SOXR_SPLIT, otype|SOXR_SPLIT);
+  soxr_runtime_spec_t const runtime_spec = soxr_runtime_spec(!use_threads);
+
+  /* Allocate resampling input and output buffers in proportion to the input
+   * and output rates: */
+  #define buf_total_len 15000  /* In samples per channel. */
+  size_t const osize = soxr_datatype_size(otype) * chans;
+  size_t const isize = soxr_datatype_size(itype) * chans;
+  size_t const olen = (size_t)(orate * buf_total_len / (irate + orate) + .5);
+  size_t const ilen = buf_total_len - olen;
+
+  /* For split channels: */
+  void * * const obuf_ptrs = malloc(sizeof(void *) * chans);
+  void * *       ibuf_ptrs = malloc(sizeof(void *) * chans);
+  char * const obufs = malloc(osize * olen), * optr = obufs;
+  char * const ibufs = malloc(isize * ilen), * iptr = ibufs;
+
+  /* For interleaved channels: */
+  char * const obuf = malloc(osize * olen);
+  char * const ibuf = malloc(isize * ilen);
+
+  size_t odone, written, need_input = 1, clips = 0;
+  soxr_error_t error;
+
+  soxr_t soxr = soxr_create(
+      irate, orate, chans, &error, &io_spec, &q_spec, &runtime_spec);
+
+  unsigned i;
+  for (i = 0; i < chans; ++i) {
+    ibuf_ptrs[i] = iptr;
+    obuf_ptrs[i] = optr;
+    iptr += ilen * soxr_datatype_size(itype);
+    optr += olen * soxr_datatype_size(otype);
+  }
+
+  if (!error) {
+    USE_STD_STDIO;
+
+    do {
+      size_t ilen1 = 0;
+
+      if (need_input) {
+        if (!(ilen1 = fread(ibuf, isize, ilen, stdin)))
+          free(ibuf_ptrs), ibuf_ptrs = 0; /* If none available, don't retry. */
+        else deinterleave(itype, ibuf_ptrs, ibuf, ilen1, chans);
+      }
+
+      error = soxr_process(soxr, ibuf_ptrs, ilen1, NULL, obuf_ptrs, olen, &odone);
+      interleave(otype, obuf, obuf_ptrs, odone, chans);  /* Consume output... */
+      written = fwrite(obuf, osize, odone, stdout);
+
+      need_input = odone < olen && ibuf_ptrs;
+
+    } while (!error && (need_input || written));
+
+    clips = *soxr_num_clips(soxr);     /* Can occur only with integer output. */
+  }
+                                                                  /* Tidy up: */
+  soxr_delete(soxr);
+  free(obuf), free(ibuf), free(obufs), free(ibufs);
+  free(obuf_ptrs), free(ibuf_ptrs);
+                                                              /* Diagnostics: */
+  fprintf(stderr, "%-26s %s; %lu clips; I/O: %s\n", arg0, soxr_strerror(error),
+      (long unsigned)clips, errno? strerror(errno) : "no error");
+  return error || errno;
+}
diff --git a/examples/5-variable-rate.c b/examples/5-variable-rate.c

new file mode 100644 (file)

index 0000000..a2496c9
--- /dev/null
+++ b/examples/5-variable-rate.c
@@ -0,0 +1,94 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Example 5:  Variable-rate resampling (N.B. experimental).  A test signal
+ * (held in a buffer) is resampled over a wide range of octaves.  Resampled
+ * data is sent to stdout as raw, float32 samples.  Choices of 2 test-signals
+ * and of 2 ways of varying the sample-rate are combined in a command-line
+ * option:
+ *
+ * Usage: ./5-variable-rate [0|1|2|3]
+ */
+
+#include <soxr.h>
+#include "examples-common.h"
+
+#define OCTAVES  5       /* Resampling range. ± */
+#define OLEN     16      /* Output length in seconds. */
+#define FS       44100   /* Output sampling rate in Hz. */
+
+/* For output pos in [0,1], returns an ioratio in the 2^±OCTAVES range: */
+static double ioratio(double pos, int fm)
+{
+  if (fm) /* fm: non-0 for a fast-changing ioratio, 0 for a slow sweep. */
+    pos = .5 - cos(pos * 2 * M_PI) * .4 + sin(pos * OLEN * 20 * M_PI) * .05;
+  return pow(2, 2 * OCTAVES * pos - OCTAVES);
+}
+
+int main(int argc, char *arg[])
+{
+  int opt = argc <= 1? 2 : (atoi(arg[1]) & 3), saw = opt & 1, fm = opt & 2;
+  float ibuf[10 << OCTAVES], obuf[AL(ibuf)];
+  int i, wl = 2 << OCTAVES;
+  size_t ilen = AL(ibuf), need_input = 1;
+  size_t odone, total_odone, total_olen = OLEN * FS;
+  size_t olen1 = fm? 10 : AL(obuf); /* Small block-len if fast-changing ratio */
+  soxr_error_t error;
+
+  /* When creating a var-rate resampler, q_spec must be set as follows: */
+  soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_HQ, SOXR_VR);
+
+  /* The ratio of the given input rate and output rates must equate to the
+   * maximum I/O ratio that will be used: */
+  soxr_t soxr = soxr_create(1 << OCTAVES, 1, 1, &error, NULL, &q_spec, NULL);
+
+  if (!error) {
+    USE_STD_STDIO;
+
+    /* Generate input signal, sine or saw, with wave-length = wl: */
+    for (i = 0; i < (int)ilen; ++i)
+      ibuf[i] = (float)(saw? (i%wl)/(wl-1.)-.5 : .9 * sin(2 * M_PI * i / wl));
+
+    /* Set the initial resampling ratio (N.B. 3rd parameter = 0): */
+    soxr_set_io_ratio(soxr, ioratio(0, fm), 0);
+
+    /* Resample in blocks of size olen1: */
+    for (total_odone = 0; !error && total_odone < total_olen;) {
+
+      /* The last block might be shorter: */
+      size_t block_len = min(olen1, total_olen - total_odone);
+
+      /* Determine the position in [0,1] of the end of the current block: */
+      double pos = (double)(total_odone + block_len) / (double)total_olen;
+
+      /* Calculate an ioratio for this position and instruct the resampler to
+       * move smoothly to the new value, over the course of outputting the next
+       * 'block_len' samples (or give 0 for an instant change instead): */
+      soxr_set_io_ratio(soxr, ioratio(pos, fm), block_len);
+
+      /* Output the block of samples, supplying input samples as needed: */
+      do {
+        size_t len = need_input? ilen : 0;
+        error = soxr_process(soxr, ibuf, len, NULL, obuf, block_len, &odone);
+        fwrite(obuf, sizeof(float), odone, stdout);
+
+        /* Update counters for the current block and for the total length: */
+        block_len -= odone;
+        total_odone += odone;
+
+        /* If soxr_process did not provide the complete block, we must call it
+         * again, supplying more input samples: */
+        need_input = block_len != 0;
+
+      } while (need_input && !error);
+
+      /* Now that the block for the current ioratio is complete, go back
+       * round the main `for' loop in order to process the next block. */
+    }
+    soxr_delete(soxr);
+  }
+                                                              /* Diagnostics: */
+  fprintf(stderr, "%-26s %s; I/O: %s\n", arg[0],
+      soxr_strerror(error), errno? strerror(errno) : "no error");
+  return error || errno;
+}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt

new file mode 100644 (file)

index 0000000..862718a
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,37 @@
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+if (${BUILD_EXAMPLES})
+  project (soxr) # Adds c++ compiler
+  file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/[1-9]-*.[cC])
+elseif (${BUILD_TESTS})
+  file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/3*.c)
+endif ()
+
+if (${BUILD_EXAMPLES} OR ${BUILD_TESTS})
+  if (${WITH_LSR_BINDINGS})
+    set (LSR_SOURCES 1a-lsr.c)
+  endif ()
+endif ()
+
+if (NOT BUILD_SHARED_LIBS AND OPENMP_FOUND)
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_C_FLAGS}")
+endif ()
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${PROJECT_C_FLAGS}")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PROJECT_CXX_FLAGS}")
+link_libraries (${PROJECT_NAME})
+
+foreach (fe ${SOURCES} ${LSR_SOURCES})
+  get_filename_component (f ${fe} NAME_WE)
+  add_executable (${f} ${fe})
+  if (${f} STREQUAL "1a-lsr")
+    target_link_libraries (${f} soxr-lsr)
+  endif ()
+endforeach ()
+
+if (${BUILD_TESTS} AND ${WITH_LSR_BINDINGS})
+  add_test (lsr-bindings ${BIN}1a-lsr)
+endif ()
+
+file (GLOB INSTALL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.[cCh])
+install (FILES ${INSTALL_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/README DESTINATION ${DOC_INSTALL_DIR}/examples)
diff --git a/examples/README b/examples/README

new file mode 100644 (file)

index 0000000..a58939b
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,20 @@
+SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+
+These simple examples show the different ways that an application may
+interface with soxr.  Note that real-world applications may also have to
+deal with file-formats, codecs, (more sophisticated) dithering, etc., which
+are not covered here.
+
+With the library installed, the examples may be built using commands similar
+to the following.  On unix-like systems:
+
+    cc 1-single-block.c -lsoxr
+    cc 1a-lsr.c -lsoxr-lsr
+
+or, with MSVC on MS-Windows:
+
+    cl 1-single-block.c -I"C:/Program Files/soxr/include" "C:/Program Files/soxr/lib/soxr.lib"
+    cl 1a-lsr.c -I"C:/Program Files/soxr/include" "C:/Program Files/soxr/lib/soxr-lsr.lib"
+
+IDEs may hide such commands behind configuration screens and build menus --
+where applicable, consult your IDE's user-manual.
diff --git a/examples/examples-common.h b/examples/examples-common.h

new file mode 100644 (file)

index 0000000..585fac3
--- /dev/null
+++ b/examples/examples-common.h
@@ -0,0 +1,45 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Common includes etc. for the examples.  */
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+  /* Work-around for broken file-I/O on MS-Windows: */
+  #include <io.h>
+  #include <fcntl.h>
+  #define USE_STD_STDIO _setmode(_fileno(stdout), _O_BINARY), \
+                        _setmode(_fileno(stdin ), _O_BINARY);
+  /* Sometimes missing, so ensure that it is defined: */
+  #undef M_PI
+  #define M_PI 3.14159265358979323846
+#else
+  #define USE_STD_STDIO
+#endif
+
+#undef int16_t
+#define int16_t short
+
+#undef int32_t
+#if LONG_MAX > 2147483647L
+  #define int32_t int
+#elif LONG_MAX < 2147483647L
+  #error this programme requires that 'long int' has at least 32-bits
+#else
+  #define int32_t long
+#endif
+
+#undef min
+#undef max
+#define min(x,y) ((x)<(y)?(x):(y))
+#define max(x,y) ((x)>(y)?(x):(y))
+
+#define AL(a) (sizeof(a)/sizeof((a)[0]))  /* Array Length */
diff --git a/go b/go

new file mode 100755 (executable)

index 0000000..30adb94
--- /dev/null
+++ b/go
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+case $1 in -j*) j=$1; shift;; esac    # Support -jX for parallel build/test
+
+build=$1
+test x$build = x && build=Release
+
+rm -f CMakeCache.txt             # Prevent interference from any in-tree build
+
+mkdir -p $build
+cd $build
+
+cmake -DCMAKE_BUILD_TYPE=$build .. &&
+  make $j &&
+    (ctest $j || echo "FAILURE details in $build/Testing/Temporary/LastTest.log")
diff --git a/go.bat b/go.bat

new file mode 100644 (file)

index 0000000..7d63de3
--- /dev/null
+++ b/go.bat
@@ -0,0 +1,27 @@
+@echo off
+rem SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+rem Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+set build=%1
+if x%build% == x set build=Release
+
+rem Prevent interference from any in-tree build
+del/f CMakeCache.txt
+
+mkdir %build%
+cd %build%
+
+cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%build% ..
+if errorlevel 1 goto end
+
+nmake
+if errorlevel 1 goto end
+
+nmake test
+if errorlevel 1 goto error
+goto end
+
+:error
+echo FAILURE details in Testing\Temporary\LastTest.log
+
+:end
diff --git a/inst-check b/inst-check

new file mode 100755 (executable)

index 0000000..8cf64b7
--- /dev/null
+++ b/inst-check
@@ -0,0 +1,25 @@
+#!/bin/sh
+set -e
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# Sanity-check of library installed on unix-like system
+
+# This script checks the installation of the entire library (including lsr).
+#
+# Distros using three separate packages can do the following (in order):
+#
+# * Install soxr pkg (i.e. basically, just the shared object)
+# * ./inst-check-soxr
+# * Install soxr-lsr pkg (i.e. basically, just the shared object)
+# * ./inst-check-soxr-lsr
+# * Install the -dev pkg (i.e. examples, headers, & pkg-config)
+# * ./inst-check PATH-OF-INSTALLED-EXAMPLES-DIR (e.g. /usr/share/doc/libsoxr/examples)
+
+# Where are the example source files:
+src=$1
+test x$src = x && src=/usr/local/share/doc/libsoxr/examples
+
+dir="$(dirname $(readlink -f $0))"
+$dir/inst-check-soxr $src
+$dir/inst-check-soxr-lsr $src
diff --git a/inst-check-soxr b/inst-check-soxr

new file mode 100755 (executable)

index 0000000..418f65b
--- /dev/null
+++ b/inst-check-soxr
@@ -0,0 +1,52 @@
+#!/bin/sh
+set -e
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# Sanity-check of sub-library installed on unix-like system
+
+arg="$1" # path to installed examples (if dev pkg installed); otherwise omitted
+dir="$(dirname $(readlink -f $0))"
+
+# Find the examples:
+src="$arg"
+test x"$src" = x && src="$dir/examples"
+cd $src
+
+# Somewhere to put the binaries:
+tmp=`mktemp -d`
+
+build_examples() {
+  if [ x"$arg" = x ]; then
+    echo "Examples in `pwd`; using local headers:" # for when dev pkg not installed
+    libs=-l$1
+    cflags=-I$dir/src 
+  else
+    echo "Examples in `pwd`; using pkg-config:"
+    libs=$(pkg-config --libs $1)
+    cflags=$(pkg-config --cflags $1)
+  fi
+  for f in ?$2-*.[cC]; do
+    cc=cc; echo $f | grep -q C$ && cc=c++
+    out=$tmp/`echo $f | sed "s/.[cC]$//"`
+    cmd="$cc $cflags -o $out $f $libs"
+    echo $cmd; $cmd
+  done
+}
+
+# Determine library:
+if [ `basename $0` = inst-check-soxr ]; then
+  build_examples soxr
+  gen="dd if=/dev/urandom count=1000"
+  $tmp/1-single-block 1 2 .
+  $gen 2> /dev/null | $tmp/2-stream                     2>&1 >$tmp/stdout
+  $gen 2> /dev/null | $tmp/3-options-input-fn 6 7 2 2 0 2>&1 >$tmp/stdout
+  $gen 2> /dev/null | $tmp/4-split-channels   7 6 2 2 3 2>&1 >$tmp/stdout  # Clipping expected here
+  $gen 2> /dev/null | $tmp/5-variable-rate              2>&1 >$tmp/stdout
+else
+  build_examples soxr-lsr a # lsr has 'a' suffix on example number.
+  $tmp/1a-lsr 1 2 .
+fi
+
+# Tidy up:
+rm -rf $tmp
diff --git a/inst-check-soxr-lsr b/inst-check-soxr-lsr

new file mode 120000 (symlink)

index 0000000..ec971fb
--- /dev/null
+++ b/inst-check-soxr-lsr
@@ -0,0 +1 @@
+inst-check-soxr
+\ No newline at end of file
diff --git a/msvc/README b/msvc/README

new file mode 100644 (file)

index 0000000..5a34eba
--- /dev/null
+++ b/msvc/README
@@ -0,0 +1,9 @@
+SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+
+Cmake is able to configure, build (as either a DLL or a static library),
+and install libsoxr for general use on MS-Windows as on other OSs.
+However, for projects that prefer to maintain a more monolithic build
+structure using the MSVC compiler, the accompanying files may be useful.
+
+ * libsoxr.vcproj      Builds a static lib for MSVC ver >= 9 (2008).
+ * soxr-config.h       Pre-configured for a typical Win32 system.
diff --git a/msvc/libsoxr.vcproj b/msvc/libsoxr.vcproj

new file mode 100644 (file)

index 0000000..b1e1714
--- /dev/null
+++ b/msvc/libsoxr.vcproj
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="Windows-1252"?>\r
+<VisualStudioProject\r
+       ProjectType="Visual C++"\r
+       Version="9.00"\r
+       Name="libsoxr"\r
+       ProjectGUID="{af9ad75c-4785-4432-bac3-adab1e7f1192}"\r
+       RootNamespace="libsoxr"\r
+       TargetFrameworkVersion="131072"\r
+       >\r
+       <Platforms>\r
+               <Platform Name="Win32" />\r
+       </Platforms>\r
+       <ToolFiles>\r
+       </ToolFiles>\r
+       <Configurations>\r
+               <Configuration\r
+                       Name="Debug|Win32"\r
+                       OutputDirectory="$(SolutionDir)$(ConfigurationName)"\r
+                       IntermediateDirectory="$(ConfigurationName)"\r
+                       ConfigurationType="4"\r
+                       CharacterSet="2"\r
+                       WholeProgramOptimization="0"\r
+                       >\r
+                       <Tool\r
+                               Name="VCCLCompilerTool"\r
+                               Optimization="0"\r
+                               AdditionalIncludeDirectories="."\r
+                               PreprocessorDefinitions="_DEBUG;_USE_MATH_DEFINES;_CRT_SECURE_NO_WARNINGS;SOXR_LIB"\r
+                               StringPooling="true"\r
+                               BasicRuntimeChecks="3"\r
+                               RuntimeLibrary="3"\r
+                               EnableFunctionLevelLinking="true"\r
+                               WarningLevel="3"\r
+                               DebugInformationFormat="4"\r
+                               CompileAs="0"\r
+                       />\r
+               </Configuration>\r
+               <Configuration\r
+                       Name="Release|Win32"\r
+                       OutputDirectory="$(SolutionDir)$(ConfigurationName)"\r
+                       IntermediateDirectory="$(ConfigurationName)"\r
+                       ConfigurationType="4"\r
+                       CharacterSet="2"\r
+                       WholeProgramOptimization="1"\r
+                       >\r
+                       <Tool\r
+                               Name="VCCLCompilerTool"\r
+                               Optimization="2"\r
+                               AdditionalIncludeDirectories="."\r
+                               PreprocessorDefinitions="NDEBUG;_USE_MATH_DEFINES;_CRT_SECURE_NO_WARNINGS;SOXR_LIB"\r
+                               StringPooling="true"\r
+                               RuntimeLibrary="2"\r
+                               EnableFunctionLevelLinking="true"\r
+                               WarningLevel="3"\r
+                               CompileAs="0"\r
+                       />\r
+               </Configuration>\r
+       </Configurations>\r
+       <References>\r
+       </References>\r
+       <Files>\r
+               <Filter Name="Source Files" >\r
+                       <File RelativePath="../src/data-io.c" />\r
+                       <File RelativePath="../src/dbesi0.c" />\r
+                       <File RelativePath="../src/fft4g32.c" />\r
+                       <File RelativePath="../src/fft4g64.c" />\r
+                       <File RelativePath="../src/filter.c" />\r
+                       <File RelativePath="../src/lsr.c" />\r
+                       <File RelativePath="../src/pffft32s.c" />\r
+                       <File RelativePath="../src/rate32.c" />\r
+                       <File RelativePath="../src/rate32s.c" />\r
+                       <File RelativePath="../src/rate64.c" />\r
+                       <File RelativePath="../src/simd.c" />\r
+                       <File RelativePath="../src/soxr.c" />\r
+                       <File RelativePath="../src/vr32.c" />\r
+               </Filter>\r
+       </Files>\r
+       <Globals>\r
+       </Globals>\r
+</VisualStudioProject>\r
diff --git a/msvc/soxr-config.h b/msvc/soxr-config.h

new file mode 100644 (file)

index 0000000..a158de4
--- /dev/null
+++ b/msvc/soxr-config.h
@@ -0,0 +1,49 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net\r
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */\r
+\r
+/* N.B. Pre-configured for typical MS-Windows systems.  However, the normal\r
+ * procedure is to use the cmake configuration and build system. See INSTALL. */\r
+\r
+#if !defined soxr_config_included\r
+#define soxr_config_included\r
+\r
+#define HAVE_SINGLE_PRECISION 1\r
+#define HAVE_DOUBLE_PRECISION 1\r
+#define HAVE_AVFFT            0\r
+#define HAVE_SIMD             1\r
+#define HAVE_FENV_H           0\r
+#define HAVE_LRINT            0\r
+#define WORDS_BIGENDIAN       0\r
+\r
+#include <limits.h>\r
+\r
+#undef bool\r
+#undef false\r
+#undef true\r
+#define bool int\r
+#define false 0\r
+#define true 1\r
+\r
+#undef int16_t\r
+#undef int32_t\r
+#undef int64_t\r
+#undef uint32_t\r
+#undef uint64_t\r
+#define int16_t short\r
+#if LONG_MAX > 2147483647L\r
+  #define int32_t int\r
+  #define int64_t long\r
+#elif LONG_MAX < 2147483647L\r
+#error this library requires that 'long int' has at least 32-bits\r
+#else\r
+  #define int32_t long\r
+  #if defined _MSC_VER\r
+    #define int64_t __int64\r
+  #else\r
+    #define int64_t long long\r
+  #endif\r
+#endif\r
+#define uint32_t unsigned int32_t\r
+#define uint64_t unsigned int64_t\r
+\r
+#endif\r
diff --git a/soxr-config.h.in b/soxr-config.h.in

new file mode 100644 (file)

index 0000000..227bcfd
--- /dev/null
+++ b/soxr-config.h.in
@@ -0,0 +1,46 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_config_included
+#define soxr_config_included
+
+#define HAVE_SINGLE_PRECISION @HAVE_SINGLE_PRECISION@
+#define HAVE_DOUBLE_PRECISION @HAVE_DOUBLE_PRECISION@
+#define HAVE_AVFFT            @HAVE_AVFFT@
+#define HAVE_SIMD             @HAVE_SIMD@
+#define HAVE_FENV_H           @HAVE_FENV_H@
+#define HAVE_LRINT            @HAVE_LRINT@
+#define WORDS_BIGENDIAN       @WORDS_BIGENDIAN@
+
+#include <limits.h>
+
+#undef bool
+#undef false
+#undef true
+#define bool int
+#define false 0
+#define true 1
+
+#undef int16_t
+#undef int32_t
+#undef int64_t
+#undef uint32_t
+#undef uint64_t
+#define int16_t short
+#if LONG_MAX > 2147483647L
+  #define int32_t int
+  #define int64_t long
+#elif LONG_MAX < 2147483647L
+#error this library requires that 'long int' has at least 32-bits
+#else
+  #define int32_t long
+  #if defined _MSC_VER
+    #define int64_t __int64
+  #else
+    #define int64_t long long
+  #endif
+#endif
+#define uint32_t unsigned int32_t
+#define uint64_t unsigned int64_t
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

new file mode 100644 (file)

index 0000000..2a21156
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,110 @@
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+add_definitions (${PROJECT_C_FLAGS} -DSOXR_LIB)
+
+
+
+# Libsoxr configuration:
+
+set (RDFT32 fft4g32)
+if (WITH_AVFFT AND AVCODEC_FOUND)
+  set (RDFT32 avfft32)
+  set (RDFT32S avfft32s)
+elseif (WITH_PFFFT)
+  #set (RDFT32 pffft32)
+  set (RDFT32S pffft32s)
+elseif (WITH_SIMD)
+  set (RDFT32S fft4g32s)
+endif ()
+
+if (WITH_DOUBLE_PRECISION)
+  set (DP_SOURCES rate64)
+endif ()
+
+if (WITH_SINGLE_PRECISION)
+  set (SP_SOURCES rate32 ${RDFT32})
+endif ()
+
+if (HAVE_SIMD)
+  set (SIMD_SOURCES rate32s ${RDFT32S} simd)
+  foreach (source ${SIMD_SOURCES})
+    set_property (SOURCE ${source} PROPERTY COMPILE_FLAGS ${SIMD_C_FLAGS})
+  endforeach ()
+endif ()
+
+
+
+# Libsoxr:
+
+add_library (${PROJECT_NAME} ${LIB_TYPE} ${PROJECT_NAME}.c data-io dbesi0 filter fft4g64
+  ${SP_SOURCES} vr32 ${DP_SOURCES} ${SIMD_SOURCES})
+set_target_properties (${PROJECT_NAME} PROPERTIES
+  VERSION "${SO_VERSION}"
+  SOVERSION ${SO_VERSION_MAJOR}
+  INSTALL_NAME_DIR ${LIB_INSTALL_DIR}
+  LINK_INTERFACE_LIBRARIES ""
+  PUBLIC_HEADER "${PROJECT_NAME}.h")
+if (BUILD_FRAMEWORK)
+  set_target_properties (${PROJECT_NAME} PROPERTIES FRAMEWORK TRUE)
+elseif (NOT WIN32)
+  set (TARGET_PCS ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc)
+  configure_file (${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}.pc.in ${TARGET_PCS})
+  install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
+endif ()
+
+
+
+# LSR bindings:
+
+if (WITH_LSR_BINDINGS)
+  set (LSR ${PROJECT_NAME}-lsr)
+  set (LSR_SO_VERSION 0.1.9)
+  set (LSR_SO_VERSION_MAJOR 0)
+  add_library (${LSR} ${LIB_TYPE} lsr)
+  target_link_libraries (${LSR} ${PROJECT_NAME})
+  set_target_properties (${LSR} PROPERTIES
+    VERSION "${LSR_SO_VERSION}"
+    SOVERSION ${LSR_SO_VERSION_MAJOR}
+    INSTALL_NAME_DIR ${LIB_INSTALL_DIR}
+    LINK_INTERFACE_LIBRARIES ""
+    PUBLIC_HEADER "${LSR}.h")
+  if (BUILD_FRAMEWORK)
+    set_target_properties (${LSR} PROPERTIES FRAMEWORK TRUE)
+  elseif (NOT WIN32)
+    set (TARGET_PCS "${TARGET_PCS} ${CMAKE_CURRENT_BINARY_DIR}/${LSR}.pc")
+    configure_file (${CMAKE_CURRENT_SOURCE_DIR}/${LSR}.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LSR}.pc)
+    install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${LSR}.pc DESTINATION ${LIB_INSTALL_DIR}/pkgconfig)
+  endif ()
+endif ()
+
+
+
+# Installation (from build from source):
+
+install (TARGETS ${PROJECT_NAME} ${LSR}
+  FRAMEWORK DESTINATION ${FRAMEWORK_INSTALL_DIR}
+  LIBRARY DESTINATION ${LIB_INSTALL_DIR}
+  RUNTIME DESTINATION ${BIN_INSTALL_DIR}
+  ARCHIVE DESTINATION ${LIB_INSTALL_DIR}
+  PUBLIC_HEADER DESTINATION ${INCLUDE_INSTALL_DIR})
+
+
+
+# Packaging (for unix-like distributions):
+
+get_property (LIB1 TARGET ${PROJECT_NAME} PROPERTY LOCATION)
+if (BUILD_SHARED_LIBS)
+  set (LIB1 ${LIB1}.${SO_VERSION_MAJOR} ${LIB1}.${SO_VERSION})
+endif ()
+list (APPEND TARGET_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}.h")
+if (WITH_LSR_BINDINGS)
+  get_property (LIB2 TARGET ${LSR} PROPERTY LOCATION)
+  if (BUILD_SHARED_LIBS)
+    set (LIB2 ${LIB2}.${LSR_SO_VERSION_MAJOR} ${LIB2}.${LSR_SO_VERSION})
+  endif ()
+  list (APPEND TARGET_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/${LSR}.h")
+endif ()
+set (TARGET_LIBS ${LIB1} ${LIB2})
+configure_file (${CMAKE_CURRENT_SOURCE_DIR}/libsoxr.src.in ${CMAKE_CURRENT_BINARY_DIR}/libsoxr.src)
+configure_file (${CMAKE_CURRENT_SOURCE_DIR}/libsoxr-dev.src.in ${CMAKE_CURRENT_BINARY_DIR}/libsoxr-dev.src)
diff --git a/src/aliases.h b/src/aliases.h

new file mode 100644 (file)

index 0000000..eb42bdc
--- /dev/null
+++ b/src/aliases.h
@@ -0,0 +1,37 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if defined SOXR_LIB
+
+#define lsx_bessel_I_0                 _soxr_bessel_I_0
+#define lsx_cdft_f                     _soxr_cdft_f
+#define lsx_cdft                       _soxr_cdft
+#define lsx_clear_fft_cache_f          _soxr_clear_fft_cache_f
+#define lsx_clear_fft_cache            _soxr_clear_fft_cache
+#define lsx_ddct_f                     _soxr_ddct_f
+#define lsx_ddct                       _soxr_ddct
+#define lsx_ddst_f                     _soxr_ddst_f
+#define lsx_ddst                       _soxr_ddst
+#define lsx_design_lpf                 _soxr_design_lpf
+#define lsx_dfct_f                     _soxr_dfct_f
+#define lsx_dfct                       _soxr_dfct
+#define lsx_dfst_f                     _soxr_dfst_f
+#define lsx_dfst                       _soxr_dfst
+#define lsx_fir_to_phase               _soxr_fir_to_phase
+#define lsx_init_fft_cache_f           _soxr_init_fft_cache_f
+#define lsx_init_fft_cache             _soxr_init_fft_cache
+#define lsx_kaiser_beta                _soxr_kaiser_beta
+#define lsx_kaiser_params              _soxr_kaiser_params
+#define lsx_make_lpf                   _soxr_make_lpf
+#define lsx_ordered_convolve_f         _soxr_ordered_convolve_f
+#define lsx_ordered_convolve           _soxr_ordered_convolve
+#define lsx_ordered_partial_convolve_f _soxr_ordered_partial_convolve_f
+#define lsx_ordered_partial_convolve   _soxr_ordered_partial_convolve
+#define lsx_rdft_f                     _soxr_rdft_f
+#define lsx_rdft                       _soxr_rdft
+#define lsx_safe_cdft_f                _soxr_safe_cdft_f
+#define lsx_safe_cdft                  _soxr_safe_cdft
+#define lsx_safe_rdft_f                _soxr_safe_rdft_f
+#define lsx_safe_rdft                  _soxr_safe_rdft
+
+#endif
diff --git a/src/avfft32.c b/src/avfft32.c

new file mode 100644 (file)

index 0000000..5be13d2
--- /dev/null
+++ b/src/avfft32.c
@@ -0,0 +1,27 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <math.h>
+#include <libavcodec/avfft.h>
+#include "filter.h"
+
+static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
+static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
+static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
+static int multiplier(void) {return 2;}
+static void nothing(void) {}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32_cb[] = {
+  (fn_t)forward_setup,
+  (fn_t)backward_setup,
+  (fn_t)av_rdft_end,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)_soxr_ordered_convolve_f,
+  (fn_t)_soxr_ordered_partial_convolve_f,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
diff --git a/src/avfft32s.c b/src/avfft32s.c

new file mode 100644 (file)

index 0000000..75e485e
--- /dev/null
+++ b/src/avfft32s.c
@@ -0,0 +1,27 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <math.h>
+#include <libavcodec/avfft.h>
+#include "simd.h"
+
+static void * forward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),DFT_R2C);}
+static void * backward_setup(int len) {return av_rdft_init((int)(log(len)/log(2)+.5),IDFT_C2R);}
+static void rdft(int length, void * setup, float * h) {av_rdft_calc(setup, h); (void)length;}
+static int multiplier(void) {return 2;}
+static void nothing(void) {}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32s_cb[] = {
+  (fn_t)forward_setup,
+  (fn_t)backward_setup,
+  (fn_t)av_rdft_end,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)rdft,
+  (fn_t)_soxr_ordered_convolve_simd,
+  (fn_t)_soxr_ordered_partial_convolve_simd,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
diff --git a/src/ccrw2.h b/src/ccrw2.h

new file mode 100644 (file)

index 0000000..b42185b
--- /dev/null
+++ b/src/ccrw2.h
@@ -0,0 +1,75 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Concurrent Control with "Readers" and "Writers", P.J. Courtois et al, 1971 */
+
+#if !defined ccrw2_included
+#define ccrw2_included
+
+#if defined SOXR_LIB
+#include "internal.h"
+#endif
+
+#if defined _OPENMP
+
+#include <omp.h>
+
+typedef struct {
+  int readcount, writecount; /* initial value = 0 */
+  omp_lock_t mutex_1, mutex_2, mutex_3, w, r; /* initial value = 1 */
+} ccrw2_t; /* Problem #2: `writers-preference' */
+
+#define ccrw2_become_reader(p) do {\
+  omp_set_lock(&p.mutex_3);\
+    omp_set_lock(&p.r);\
+      omp_set_lock(&p.mutex_1);\
+        if (++p.readcount == 1) omp_set_lock(&p.w);\
+      omp_unset_lock(&p.mutex_1);\
+    omp_unset_lock(&p.r);\
+  omp_unset_lock(&p.mutex_3);\
+} while (0)
+#define ccrw2_cease_reading(p) do {\
+  omp_set_lock(&p.mutex_1);\
+    if (!--p.readcount) omp_unset_lock(&p.w);\
+  omp_unset_lock(&p.mutex_1);\
+} while (0)
+#define ccrw2_become_writer(p) do {\
+  omp_set_lock(&p.mutex_2);\
+    if (++p.writecount == 1) omp_set_lock(&p.r);\
+  omp_unset_lock(&p.mutex_2);\
+  omp_set_lock(&p.w);\
+} while (0)
+#define ccrw2_cease_writing(p) do {\
+  omp_unset_lock(&p.w);\
+  omp_set_lock(&p.mutex_2);\
+    if (!--p.writecount) omp_unset_lock(&p.r);\
+  omp_unset_lock(&p.mutex_2);\
+} while (0)
+#define ccrw2_init(p) do {\
+  omp_init_lock(&p.mutex_1);\
+  omp_init_lock(&p.mutex_2);\
+  omp_init_lock(&p.mutex_3);\
+  omp_init_lock(&p.w);\
+  omp_init_lock(&p.r);\
+} while (0)
+#define ccrw2_clear(p) do {\
+  omp_destroy_lock(&p.r);\
+  omp_destroy_lock(&p.w);\
+  omp_destroy_lock(&p.mutex_3);\
+  omp_destroy_lock(&p.mutex_2);\
+  omp_destroy_lock(&p.mutex_1);\
+} while (0)
+
+#else
+
+typedef int ccrw2_t;
+#define ccrw2_become_reader(x) (void)(x)
+#define ccrw2_cease_reading(x) (void)(x)
+#define ccrw2_become_writer(x) (void)(x)
+#define ccrw2_cease_writing(x) (void)(x)
+#define ccrw2_init(x) (void)(x)
+#define ccrw2_clear(x) (void)(x)
+
+#endif /* _OPENMP */
+
+#endif
diff --git a/src/data-io.c b/src/data-io.c

new file mode 100644 (file)

index 0000000..1cd8e7f
--- /dev/null
+++ b/src/data-io.c
@@ -0,0 +1,252 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <limits.h>
+#include <math.h>
+#include <string.h>
+
+#include "data-io.h"
+#include "internal.h"
+
+
+
+#define DEINTERLEAVE_FROM(T,flag) do { \
+  unsigned i; \
+  size_t j; \
+  T const * src = *src0; \
+  if (ch > 1) \
+    for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) dest[i][j] = (DEINTERLEAVE_TO)*src++; \
+  else if (flag) memcpy(dest[0], src, n * sizeof(T)), src = &src[n]; \
+  else for (j = 0; j < n; dest[0][j++] = (DEINTERLEAVE_TO)*src++); \
+  *src0 = src; \
+} while (0)
+
+
+
+#if HAVE_DOUBLE_PRECISION
+void _soxr_deinterleave(double * * dest, /* Round/clipping not needed here */
+    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
+{
+#define DEINTERLEAVE_TO double
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: DEINTERLEAVE_FROM(float, 0); break;
+    case SOXR_FLOAT64: DEINTERLEAVE_FROM(double, 1); break;
+    case SOXR_INT32:   DEINTERLEAVE_FROM(int32_t, 0); break;
+    case SOXR_INT16:   DEINTERLEAVE_FROM(int16_t, 0); break;
+    default: break;
+  }
+}
+#endif
+
+
+
+#if HAVE_SINGLE_PRECISION
+void _soxr_deinterleave_f(float * * dest, /* Round/clipping not needed here */
+    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch)
+{
+#undef DEINTERLEAVE_TO
+#define DEINTERLEAVE_TO float
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: DEINTERLEAVE_FROM(float, 1); break;
+    case SOXR_FLOAT64: DEINTERLEAVE_FROM(double, 0); break;
+    case SOXR_INT32:   DEINTERLEAVE_FROM(int32_t, 0); break;
+    case SOXR_INT16:   DEINTERLEAVE_FROM(int16_t, 0); break;
+    default: break;
+  }
+}
+#endif
+
+
+
+#include "rint.h"
+
+#if HAVE_FENV_H
+  #include <fenv.h>
+  #define fe_test_invalid() fetestexcept(FE_INVALID)
+  #define fe_clear_invalid() feclearexcept(FE_INVALID)
+#elif defined _MSC_VER
+  #define FE_INVALID 1
+  #if defined _WIN64
+    #include <float.h>
+    #define fe_test_invalid() (_statusfp() & _SW_INVALID)
+    #define fe_clear_invalid _clearfp /* FIXME clears all */
+  #else
+  static __inline int fe_test_invalid()
+  {
+    short status_word;
+    __asm fnstsw status_word
+    return status_word & FE_INVALID;
+  }
+
+  static __inline int fe_clear_invalid()
+  {
+    int16_t status[14];
+    __asm fnstenv status
+    status[2] &= ~FE_INVALID;
+    __asm fldenv status
+    return 0;
+  }
+  #endif
+#endif
+
+
+
+#if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__
+  #if __STDC_VERSION__ >= 199901L
+    #pragma STDC FENV_ACCESS ON
+  #endif
+#endif
+
+#if HAVE_DOUBLE_PRECISION
+#define FLOATX double
+
+#define LSX_RINT_CLIP_2 lsx_rint32_clip_2
+#define LSX_RINT_CLIP lsx_rint32_clip
+#define RINT_CLIP rint32_clip
+#define RINT rint32
+#if defined FPU_RINT32
+  #define FPU_RINT
+#endif
+#define RINT_T int32_t
+#define RINT_MAX 2147483647L
+#include "rint-clip.h"
+
+#define LSX_RINT_CLIP_2 lsx_rint16_clip_2
+#define LSX_RINT_CLIP lsx_rint16_clip
+#define RINT_CLIP rint16_clip
+#define RINT rint16
+#if defined FPU_RINT16
+  #define FPU_RINT
+#endif
+#define RINT_T int16_t
+#define RINT_MAX 32767
+#include "rint-clip.h"
+
+#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither
+#define LSX_RINT_CLIP lsx_rint16_clip_dither
+#define RINT_CLIP rint16_clip_dither
+#define RINT rint16
+#if defined FPU_RINT16
+  #define FPU_RINT
+#endif
+#define RINT_T int16_t
+#define RINT_MAX 32767
+#define DITHER
+#include "rint-clip.h"
+
+#undef FLOATX
+#endif
+
+
+
+#if HAVE_SINGLE_PRECISION
+#define FLOATX float
+
+#define LSX_RINT_CLIP_2 lsx_rint32_clip_2_f
+#define LSX_RINT_CLIP lsx_rint32_clip_f
+#define RINT_CLIP rint32_clip_f
+#define RINT rint32
+#if defined FPU_RINT32
+  #define FPU_RINT
+#endif
+#define RINT_T int32_t
+#define RINT_MAX 2147483647L
+#include "rint-clip.h"
+
+#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_f
+#define LSX_RINT_CLIP lsx_rint16_clip_f
+#define RINT_CLIP rint16_clip_f
+#define RINT rint16
+#if defined FPU_RINT16
+  #define FPU_RINT
+#endif
+#define RINT_T int16_t
+#define RINT_MAX 32767
+#include "rint-clip.h"
+
+#define LSX_RINT_CLIP_2 lsx_rint16_clip_2_dither_f
+#define LSX_RINT_CLIP lsx_rint16_clip_dither_f
+#define RINT_CLIP rint16_clip_dither_f
+#define RINT rint16
+#if defined FPU_RINT16
+  #define FPU_RINT
+#endif
+#define RINT_T int16_t
+#define RINT_MAX 32767
+#define DITHER
+#include "rint-clip.h"
+
+#undef FLOATX
+#endif
+
+#if defined FE_INVALID && defined FPU_RINT32 && defined __STDC_VERSION__
+  #if __STDC_VERSION__ >= 199901L
+    #pragma STDC FENV_ACCESS OFF
+  #endif
+#endif
+
+
+
+#define INTERLEAVE_TO(T,flag) do { \
+  unsigned i; \
+  size_t j; \
+  T * dest = *dest0; \
+  if (ch > 1) \
+  for (j = 0; j < n; ++j) for (i = 0; i < ch; ++i) *dest++ = (T)src[i][j]; \
+  else if (flag) memcpy(dest, src[0], n * sizeof(T)), dest = &dest[n]; \
+  else for (j = 0; j < n; *dest++ = (T)src[0][j++]); \
+  *dest0 = dest; \
+  return 0; \
+} while (0)
+
+#if HAVE_DOUBLE_PRECISION
+size_t /* clips */ _soxr_interleave(soxr_datatype_t data_type, void * * dest0,
+  double const * const * src, size_t n, unsigned ch, unsigned long * seed)
+{
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: INTERLEAVE_TO(float, 0);
+    case SOXR_FLOAT64: INTERLEAVE_TO(double, 1);
+
+    case SOXR_INT32: if (ch == 1)
+        return lsx_rint32_clip(dest0, src[0], n);
+      return lsx_rint32_clip_2(dest0, src, ch, n);
+
+    case SOXR_INT16: if (seed) {
+      if (ch == 1)
+        return lsx_rint16_clip_dither(dest0, src[0], n, seed);
+      return lsx_rint16_clip_2_dither(dest0, src, ch, n, seed);
+    }
+    if (ch == 1)
+        return lsx_rint16_clip(dest0, src[0], n);
+      return lsx_rint16_clip_2(dest0, src, ch, n);
+    default: break;
+  }
+  return 0;
+}
+#endif
+
+#if HAVE_SINGLE_PRECISION
+size_t /* clips */ _soxr_interleave_f(soxr_datatype_t data_type, void * * dest0,
+  float const * const * src, size_t n, unsigned ch, unsigned long * seed)
+{
+  switch (data_type & 3) {
+    case SOXR_FLOAT32: INTERLEAVE_TO(float, 1);
+    case SOXR_FLOAT64: INTERLEAVE_TO(double, 0);
+
+    case SOXR_INT32: if (ch == 1)
+        return lsx_rint32_clip_f(dest0, src[0], n);
+      return lsx_rint32_clip_2_f(dest0, src, ch, n);
+
+    case SOXR_INT16: if (seed) {
+      if (ch == 1)
+        return lsx_rint16_clip_dither_f(dest0, src[0], n, seed);
+      return lsx_rint16_clip_2_dither_f(dest0, src, ch, n, seed);
+    }
+    if (ch == 1)
+        return lsx_rint16_clip_f(dest0, src[0], n);
+      return lsx_rint16_clip_2_f(dest0, src, ch, n);
+    default: break;
+  }
+  return 0;
+}
+#endif
diff --git a/src/data-io.h b/src/data-io.h

new file mode 100644 (file)

index 0000000..83a0a13
--- /dev/null
+++ b/src/data-io.h
@@ -0,0 +1,39 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_data_io_included
+#define soxr_data_io_included
+
+#include "soxr.h"
+
+void _soxr_deinterleave(
+    double * * dest,
+    soxr_datatype_t data_type,
+    void const * * src0,
+    size_t n,
+    unsigned ch);
+
+void _soxr_deinterleave_f(
+    float * * dest,
+    soxr_datatype_t data_type,
+    void const * * src0,
+    size_t n,
+    unsigned ch);
+
+size_t /* clips */ _soxr_interleave(
+    soxr_datatype_t data_type,
+    void * * dest,
+    double const * const * src,
+    size_t n,
+    unsigned ch,
+    unsigned long * seed);
+
+size_t /* clips */ _soxr_interleave_f(
+    soxr_datatype_t data_type,
+    void * * dest,
+    float const * const * src,
+    size_t n,
+    unsigned ch,
+    unsigned long * seed);
+
+#endif
diff --git a/src/dbesi0.c b/src/dbesi0.c

new file mode 100644 (file)

index 0000000..654216e
--- /dev/null
+++ b/src/dbesi0.c
@@ -0,0 +1,149 @@
+/*  Copyright(C) 1996 Takuya OOURA
+
+You may use, copy, modify this code for any purpose and
+without fee.
+
+Package home:  http://www.kurims.kyoto-u.ac.jp/~ooura/bessel.html
+*/
+
+#include "filter.h"
+#define dbesi0 lsx_bessel_I_0
+
+/* Bessel I_0(x) function in double precision */
+
+#include <math.h>
+
+double dbesi0(double x)
+{
+    int k;
+    double w, t, y;
+    static double a[65] = {
+        8.5246820682016865877e-11, 2.5966600546497407288e-9,
+        7.9689994568640180274e-8, 1.9906710409667748239e-6,
+        4.0312469446528002532e-5, 6.4499871606224265421e-4,
+        0.0079012345761930579108, 0.071111111109207045212,
+        0.444444444444724909, 1.7777777777777532045,
+        4.0000000000000011182, 3.99999999999999998,
+        1.0000000000000000001,
+        1.1520919130377195927e-10, 2.2287613013610985225e-9,
+        8.1903951930694585113e-8, 1.9821560631611544984e-6,
+        4.0335461940910133184e-5, 6.4495330974432203401e-4,
+        0.0079013012611467520626, 0.071111038160875566622,
+        0.44444450319062699316, 1.7777777439146450067,
+        4.0000000132337935071, 3.9999999968569015366,
+        1.0000000003426703174,
+        1.5476870780515238488e-10, 1.2685004214732975355e-9,
+        9.2776861851114223267e-8, 1.9063070109379044378e-6,
+        4.0698004389917945832e-5, 6.4370447244298070713e-4,
+        0.0079044749458444976958, 0.071105052411749363882,
+        0.44445280640924755082, 1.7777694934432109713,
+        4.0000055808824003386, 3.9999977081165740932,
+        1.0000004333949319118,
+        2.0675200625006793075e-10, -6.1689554705125681442e-10,
+        1.2436765915401571654e-7, 1.5830429403520613423e-6,
+        4.2947227560776583326e-5, 6.3249861665073441312e-4,
+        0.0079454472840953930811, 0.070994327785661860575,
+        0.44467219586283000332, 1.7774588182255374745,
+        4.0003038986252717972, 3.9998233869142057195,
+        1.0000472932961288324,
+        2.7475684794982708655e-10, -3.8991472076521332023e-9,
+        1.9730170483976049388e-7, 5.9651531561967674521e-7,
+        5.1992971474748995357e-5, 5.7327338675433770752e-4,
+        0.0082293143836530412024, 0.069990934858728039037,
+        0.44726764292723985087, 1.7726685170014087784,
+        4.0062907863712704432, 3.9952750700487845355,
+        1.0016354346654179322
+    };
+    static double b[70] = {
+        6.7852367144945531383e-8, 4.6266061382821826854e-7,
+        6.9703135812354071774e-6, 7.6637663462953234134e-5,
+        7.9113515222612691636e-4, 0.0073401204731103808981,
+        0.060677114958668837046, 0.43994941411651569622,
+        2.7420017097661750609, 14.289661921740860534,
+        59.820609640320710779, 188.78998681199150629,
+        399.8731367825601118, 427.56411572180478514,
+        1.8042097874891098754e-7, 1.2277164312044637357e-6,
+        1.8484393221474274861e-5, 2.0293995900091309208e-4,
+        0.0020918539850246207459, 0.019375315654033949297,
+        0.15985869016767185908, 1.1565260527420641724,
+        7.1896341224206072113, 37.354773811947484532,
+        155.80993164266268457, 489.5211371158540918,
+        1030.9147225169564806, 1093.5883545113746958,
+        4.8017305613187493564e-7, 3.261317843912380074e-6,
+        4.9073137508166159639e-5, 5.3806506676487583755e-4,
+        0.0055387918291051866561, 0.051223717488786549025,
+        0.42190298621367914765, 3.0463625987357355872,
+        18.895299447327733204, 97.915189029455461554,
+        407.13940115493494659, 1274.3088990480582632,
+        2670.9883037012547506, 2815.7166284662544712,
+        1.2789926338424623394e-6, 8.6718263067604918916e-6,
+        1.3041508821299929489e-4, 0.001428224737372747892,
+        0.014684070635768789378, 0.13561403190404185755,
+        1.1152592585977393953, 8.0387088559465389038,
+        49.761318895895479206, 257.2684232313529138,
+        1066.8543146269566231, 3328.3874581009636362,
+        6948.8586598121634874, 7288.4893398212481055,
+        3.409350368197032893e-6, 2.3079025203103376076e-5,
+        3.4691373283901830239e-4, 0.003794994977222908545,
+        0.038974209677945602145, 0.3594948380414878371,
+        2.9522878893539528226, 21.246564609514287056,
+        131.28727387146173141, 677.38107093296675421,
+        2802.3724744545046518, 8718.5731420798254081,
+        18141.348781638832286, 18948.925349296308859
+    };
+    static double c[45] = {
+        2.5568678676452702768e-15, 3.0393953792305924324e-14,
+        6.3343751991094840009e-13, 1.5041298011833009649e-11,
+        4.4569436918556541414e-10, 1.746393051427167951e-8,
+        1.0059224011079852317e-6, 1.0729838945088577089e-4,
+        0.05150322693642527738,
+        5.2527963991711562216e-15, 7.202118481421005641e-15,
+        7.2561421229904797156e-13, 1.482312146673104251e-11,
+        4.4602670450376245434e-10, 1.7463600061788679671e-8,
+        1.005922609132234756e-6, 1.0729838937545111487e-4,
+        0.051503226936437300716,
+        1.3365917359358069908e-14, -1.2932643065888544835e-13,
+        1.7450199447905602915e-12, 1.0419051209056979788e-11,
+        4.58047881980598326e-10, 1.7442405450073548966e-8,
+        1.0059461453281292278e-6, 1.0729837434500161228e-4,
+        0.051503226940658446941,
+        5.3771611477352308649e-14, -1.1396193006413731702e-12,
+        1.2858641335221653409e-11, -5.9802086004570057703e-11,
+        7.3666894305929510222e-10, 1.6731837150730356448e-8,
+        1.0070831435812128922e-6, 1.0729733111203704813e-4,
+        0.051503227360726294675,
+        3.7819492084858931093e-14, -4.8600496888588034879e-13,
+        1.6898350504817224909e-12, 4.5884624327524255865e-11,
+        1.2521615963377513729e-10, 1.8959658437754727957e-8,
+        1.0020716710561353622e-6, 1.073037119856927559e-4,
+        0.05150322383300230775
+    };
+
+    w = fabs(x);
+    if (w < 8.5) {
+        t = w * w * 0.0625;
+        k = 13 * ((int) t);
+        y = (((((((((((a[k] * t + a[k + 1]) * t +
+            a[k + 2]) * t + a[k + 3]) * t + a[k + 4]) * t +
+            a[k + 5]) * t + a[k + 6]) * t + a[k + 7]) * t +
+            a[k + 8]) * t + a[k + 9]) * t + a[k + 10]) * t +
+            a[k + 11]) * t + a[k + 12];
+    } else if (w < 12.5) {
+        k = (int) w;
+        t = w - k;
+        k = 14 * (k - 8);
+        y = ((((((((((((b[k] * t + b[k + 1]) * t +
+            b[k + 2]) * t + b[k + 3]) * t + b[k + 4]) * t +
+            b[k + 5]) * t + b[k + 6]) * t + b[k + 7]) * t +
+            b[k + 8]) * t + b[k + 9]) * t + b[k + 10]) * t +
+            b[k + 11]) * t + b[k + 12]) * t + b[k + 13];
+    } else {
+        t = 60 / w;
+        k = 9 * ((int) t);
+        y = ((((((((c[k] * t + c[k + 1]) * t +
+            c[k + 2]) * t + c[k + 3]) * t + c[k + 4]) * t +
+            c[k + 5]) * t + c[k + 6]) * t + c[k + 7]) * t +
+            c[k + 8]) * sqrt(t) * exp(w);
+    }
+    return y;
+}
diff --git a/src/fft4g.c b/src/fft4g.c

new file mode 100644 (file)

index 0000000..5fae8a6
--- /dev/null
+++ b/src/fft4g.c
@@ -0,0 +1,1352 @@
+/* Copyright Takuya OOURA, 1996-2001.
+
+You may use, copy, modify and distribute this code for any
+purpose (include commercial use) and without fee.  Please
+refer to this package when you modify this code.
+
+Package home:  http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+
+Fast Fourier/Cosine/Sine Transform
+    dimension   :one
+    data length :power of 2
+    decimation  :frequency
+    radix       :4, 2
+    data        :inplace
+    table       :use
+functions
+    cdft: Complex Discrete Fourier Transform
+    rdft: Real Discrete Fourier Transform
+    ddct: Discrete Cosine Transform
+    ddst: Discrete Sine Transform
+    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+    void cdft(int, int, double *, int *, double *);
+    void rdft(int, int, double *, int *, double *);
+    void ddct(int, int, double *, int *, double *);
+    void ddst(int, int, double *, int *, double *);
+    void dfct(int, double *, double *, int *, double *);
+    void dfst(int, double *, double *, int *, double *);
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+    [definition]
+        <case1>
+            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+        <case2>
+            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            cdft(2*n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            cdft(2*n, -1, a, ip, w);
+    [parameters]
+        2*n            :data length (int)
+                        n >= 1, n = power of 2
+        a[0...2*n-1]   :input/output data (double *)
+                        input data
+                            a[2*j] = Re(x[j]),
+                            a[2*j+1] = Im(x[j]), 0<=j<n
+                        output data
+                            a[2*k] = Re(X[k]),
+                            a[2*k+1] = Im(X[k]), 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            cdft(2*n, -1, a, ip, w);
+        is
+            cdft(2*n, 1, a, ip, w);
+            for (j = 0; j <= 2 * n - 1; j++) {
+                a[j] *= 1.0 / n;
+            }
+        .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+    [definition]
+        <case1> RDFT
+            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+        <case2> IRDFT (excluding scale)
+            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
+                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
+                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            rdft(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            rdft(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        <case1>
+                            output data
+                                a[2*k] = R[k], 0<=k<n/2
+                                a[2*k+1] = I[k], 0<k<n/2
+                                a[1] = R[n/2]
+                        <case2>
+                            input data
+                                a[2*j] = R[j], 0<=j<n/2
+                                a[2*j+1] = I[j], 0<j<n/2
+                                a[1] = R[n/2]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            rdft(n, 1, a, ip, w);
+        is
+            rdft(n, -1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+    [definition]
+        <case1> IDCT (excluding scale)
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DCT
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddct(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddct(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        output data
+                            a[k] = C[k], 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddct(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddct(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+    [definition]
+        <case1> IDST (excluding scale)
+            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DST
+            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddst(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddst(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        <case1>
+                            input data
+                                a[j] = A[j], 0<j<n
+                                a[0] = A[n]
+                            output data
+                                a[k] = S[k], 0<=k<n
+                        <case2>
+                            output data
+                                a[k] = S[k], 0<k<n
+                                a[0] = S[n]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddst(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddst(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+    [definition]
+        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+    [usage]
+        ip[0] = 0; // first time only
+        dfct(n, a, t, ip, w);
+    [parameters]
+        n              :data length - 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n]       :input/output data (double *)
+                        output data
+                            a[k] = C[k], 0<=k<=n
+        t[0...n/2]     :work area (double *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+        is
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+            for (j = 0; j <= n; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+    [definition]
+        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+    [usage]
+        ip[0] = 0; // first time only
+        dfst(n, a, t, ip, w);
+    [parameters]
+        n              :data length + 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        output data
+                            a[k] = S[k], 0<k<n
+                        (a[0] is used for work area)
+        t[0...n/2-1]   :work area (double *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            dfst(n, a, t, ip, w);
+        is
+            dfst(n, a, t, ip, w);
+            for (j = 1; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+Appendix :
+    The cos/sin table is recalculated when the larger table required.
+    w[] and ip[] are compatible with all routines.
+*/
+
+
+#include <math.h>
+#include "fft4g.h"
+
+#ifdef FFT4G_FLOAT
+  #define double float
+  #define one_half 0.5f
+
+#if defined _MSC_VER
+  #define sin   (float)sin
+  #define cos   (float)cos
+  #define atan  (float)atan
+#else
+  #define sin   sinf
+  #define cos   cosf
+  #define atan  atanf
+#endif
+
+  #define cdft  lsx_cdft_f
+  #define rdft  lsx_rdft_f
+  #define ddct  lsx_ddct_f
+  #define ddst  lsx_ddst_f
+  #define dfct  lsx_dfct_f
+  #define dfst  lsx_dfst_f
+#else
+  #define one_half 0.5
+  #define cdft  lsx_cdft
+  #define rdft  lsx_rdft
+  #define ddct  lsx_ddct
+  #define ddst  lsx_ddst
+  #define dfct  lsx_dfct
+  #define dfst  lsx_dfst
+#endif
+
+static void bitrv2conj(int n, int *ip, double *a);
+static void bitrv2(int n, int *ip, double *a);
+static void cft1st(int n, double *a, double const *w);
+static void cftbsub(int n, double *a, double const *w);
+static void cftfsub(int n, double *a, double const *w);
+static void cftmdl(int n, int l, double *a, double const *w);
+static void dctsub(int n, double *a, int nc, double const *c);
+static void dstsub(int n, double *a, int nc, double const *c);
+static void makect(int nc, int *ip, double *c);
+static void makewt(int nw, int *ip, double *w);
+static void rftbsub(int n, double *a, int nc, double const *c);
+static void rftfsub(int n, double *a, int nc, double const *c);
+
+
+void cdft(int n, int isgn, double *a, int *ip, double *w)
+{
+    if (n > (ip[0] << 2)) {
+        makewt(n >> 2, ip, w);
+    }
+    if (n > 4) {
+        if (isgn >= 0) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+        } else {
+            bitrv2conj(n, ip + 2, a);
+            cftbsub(n, a, w);
+        }
+    } else if (n == 4) {
+        cftfsub(n, a, w);
+    }
+}
+
+
+void rdft(int n, int isgn, double *a, int *ip, double *w)
+{
+    int nw, nc;
+    double xi;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 2)) {
+        nc = n >> 2;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xi = a[0] - a[1];
+        a[0] += a[1];
+        a[1] = xi;
+    } else {
+        a[1] = one_half * (a[0] - a[1]);
+        a[0] -= a[1];
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+}
+
+
+void ddct(int n, int isgn, double *a, int *ip, double *w)
+{
+    int j, nw, nc;
+    double xr;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = a[j] - a[j - 1];
+            a[j] += a[j - 1];
+        }
+        a[1] = a[0] - xr;
+        a[0] += xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dctsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = a[j] - a[j + 1];
+            a[j] += a[j + 1];
+        }
+        a[n - 1] = xr;
+    }
+}
+
+
+void ddst(int n, int isgn, double *a, int *ip, double *w)
+{
+    int j, nw, nc;
+    double xr;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = -a[j] - a[j - 1];
+            a[j] -= a[j - 1];
+        }
+        a[1] = a[0] + xr;
+        a[0] -= xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dstsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = -a[j] - a[j + 1];
+            a[j] -= a[j + 1];
+        }
+        a[n - 1] = -xr;
+    }
+}
+
+
+void dfct(int n, double *a, double *t, int *ip, double *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    double xr, xi, yr, yi;
+
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    m = n >> 1;
+    yi = a[m];
+    xi = a[0] + a[n];
+    a[0] -= a[n];
+    t[0] = xi - yi;
+    t[m] = xi + yi;
+    if (n > 2) {
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] - a[n - j];
+            xi = a[j] + a[n - j];
+            yr = a[k] - a[n - k];
+            yi = a[k] + a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi - yi;
+            t[k] = xi + yi;
+        }
+        t[mh] = a[mh] + a[n - mh];
+        a[mh] -= a[n - mh];
+        dctsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[0] - a[1];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] + a[j + 1];
+            a[2 * j - 1] = a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dctsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[0] - t[1];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = t[j] - t[j + 1];
+                a[k + l] = t[j] + t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 0; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] - t[m + j];
+                t[k] = t[m + k] + t[m + j];
+            }
+            t[mh] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+        a[n] = t[2] - t[1];
+        a[0] = t[2] + t[1];
+    } else {
+        a[1] = a[0];
+        a[2] = t[0];
+        a[0] = t[1];
+    }
+}
+
+
+void dfst(int n, double *a, double *t, int *ip, double *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    double xr, xi, yr, yi;
+
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    if (n > 2) {
+        m = n >> 1;
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] + a[n - j];
+            xi = a[j] - a[n - j];
+            yr = a[k] + a[n - k];
+            yi = a[k] - a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi + yi;
+            t[k] = xi - yi;
+        }
+        t[0] = a[mh] - a[n - mh];
+        a[mh] += a[n - mh];
+        a[0] = a[m];
+        dstsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[1] - a[0];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] - a[j + 1];
+            a[2 * j - 1] = -a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dstsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[1] - t[0];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = -t[j] - t[j + 1];
+                a[k + l] = t[j] - t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 1; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] + t[m + j];
+                t[k] = t[m + k] - t[m + j];
+            }
+            t[0] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+    }
+    a[0] = 0;
+}
+
+
+/* -------- initializing routines -------- */
+
+
+static void makewt(int nw, int *ip, double *w)
+{
+    int j, nwh;
+    double delta, x, y;
+
+    ip[0] = nw;
+    ip[1] = 1;
+    if (nw > 2) {
+        nwh = nw >> 1;
+        delta = atan(1.0) / (double)nwh;
+        w[0] = 1;
+        w[1] = 0;
+        w[nwh] = cos(delta * (double)nwh);
+        w[nwh + 1] = w[nwh];
+        if (nwh > 2) {
+            for (j = 2; j < nwh; j += 2) {
+                x = cos(delta * (double)j);
+                y = sin(delta * (double)j);
+                w[j] = x;
+                w[j + 1] = y;
+                w[nw - j] = y;
+                w[nw - j + 1] = x;
+            }
+            bitrv2(nw, ip + 2, w);
+        }
+    }
+}
+
+
+static void makect(int nc, int *ip, double *c)
+{
+    int j, nch;
+    double delta;
+
+    ip[1] = nc;
+    if (nc > 1) {
+        nch = nc >> 1;
+        delta = atan(1.0) / (double)nch;
+        c[0] = cos(delta * (double)nch);
+        c[nch] = one_half * c[0];
+        for (j = 1; j < nch; j++) {
+            c[j] = one_half * cos(delta * (double)j);
+            c[nc - j] = one_half * sin(delta * (double)j);
+        }
+    }
+}
+
+
+/* -------- child routines -------- */
+
+
+static void bitrv2(int n, int *ip0, double *a)
+{
+    int j, j1, k, k1, l, m, m2, ip[1024];
+    double xr, xi, yr, yi;
+
+    (void)ip0;
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            j1 = 2 * k + m2 + ip[k];
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    } else {
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+        }
+    }
+}
+
+
+static void bitrv2conj(int n, int *ip0, double *a)
+{
+    int j, j1, k, k1, l, m, m2, ip[256];
+    double xr, xi, yr, yi;
+
+    (void)ip0;
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            j1 = k1 + m2;
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            k1 += m2;
+            a[k1 + 1] = -a[k1 + 1];
+        }
+    } else {
+        a[1] = -a[1];
+        a[m2 + 1] = -a[m2 + 1];
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            a[k1 + m2 + 1] = -a[k1 + m2 + 1];
+        }
+    }
+}
+
+
+static void cftfsub(int n, double *a, double const *w)
+{
+    int j, j1, j2, j3, l;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i - x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i + x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i - x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = a[j + 1] - a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] += a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+
+static void cftbsub(int n, double *a, double const *w)
+{
+    int j, j1, j2, j3, l;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = -a[j + 1] - a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = -a[j + 1] + a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i - x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i + x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i - x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i + x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = -a[j + 1] + a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] = -a[j + 1] - a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+
+static void cft1st(int n, double *a, double const *w)
+{
+    int j, k1, k2;
+    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    x0r = a[0] + a[2];
+    x0i = a[1] + a[3];
+    x1r = a[0] - a[2];
+    x1i = a[1] - a[3];
+    x2r = a[4] + a[6];
+    x2i = a[5] + a[7];
+    x3r = a[4] - a[6];
+    x3i = a[5] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[2] = x1r - x3i;
+    a[3] = x1i + x3r;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+    wk1r = w[2];
+    x0r = a[8] + a[10];
+    x0i = a[9] + a[11];
+    x1r = a[8] - a[10];
+    x1i = a[9] - a[11];
+    x2r = a[12] + a[14];
+    x2i = a[13] + a[15];
+    x3r = a[12] - a[14];
+    x3i = a[13] - a[15];
+    a[8] = x0r + x2r;
+    a[9] = x0i + x2i;
+    a[12] = x2i - x0i;
+    a[13] = x0r - x2r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[10] = wk1r * (x0r - x0i);
+    a[11] = wk1r * (x0r + x0i);
+    x0r = x3i + x1r;
+    x0i = x3r - x1i;
+    a[14] = wk1r * (x0i - x0r);
+    a[15] = wk1r * (x0i + x0r);
+    k1 = 0;
+    for (j = 16; j < n; j += 16) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        x0r = a[j] + a[j + 2];
+        x0i = a[j + 1] + a[j + 3];
+        x1r = a[j] - a[j + 2];
+        x1i = a[j + 1] - a[j + 3];
+        x2r = a[j + 4] + a[j + 6];
+        x2i = a[j + 5] + a[j + 7];
+        x3r = a[j + 4] - a[j + 6];
+        x3i = a[j + 5] - a[j + 7];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 4] = wk2r * x0r - wk2i * x0i;
+        a[j + 5] = wk2r * x0i + wk2i * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 2] = wk1r * x0r - wk1i * x0i;
+        a[j + 3] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 6] = wk3r * x0r - wk3i * x0i;
+        a[j + 7] = wk3r * x0i + wk3i * x0r;
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        x0r = a[j + 8] + a[j + 10];
+        x0i = a[j + 9] + a[j + 11];
+        x1r = a[j + 8] - a[j + 10];
+        x1i = a[j + 9] - a[j + 11];
+        x2r = a[j + 12] + a[j + 14];
+        x2i = a[j + 13] + a[j + 15];
+        x3r = a[j + 12] - a[j + 14];
+        x3i = a[j + 13] - a[j + 15];
+        a[j + 8] = x0r + x2r;
+        a[j + 9] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 12] = -wk2i * x0r - wk2r * x0i;
+        a[j + 13] = -wk2i * x0i + wk2r * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 10] = wk1r * x0r - wk1i * x0i;
+        a[j + 11] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 14] = wk3r * x0r - wk3i * x0i;
+        a[j + 15] = wk3r * x0i + wk3i * x0r;
+    }
+}
+
+
+static void cftmdl(int n, int l, double *a, double const *w)
+{
+    int j, j1, j2, j3, k, k1, k2, m, m2;
+    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    m = l << 2;
+    for (j = 0; j < l; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x0r - x2r;
+        a[j2 + 1] = x0i - x2i;
+        a[j1] = x1r - x3i;
+        a[j1 + 1] = x1i + x3r;
+        a[j3] = x1r + x3i;
+        a[j3 + 1] = x1i - x3r;
+    }
+    wk1r = w[2];
+    for (j = m; j < l + m; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x2i - x0i;
+        a[j2 + 1] = x0r - x2r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j1] = wk1r * (x0r - x0i);
+        a[j1 + 1] = wk1r * (x0r + x0i);
+        x0r = x3i + x1r;
+        x0i = x3r - x1i;
+        a[j3] = wk1r * (x0i - x0r);
+        a[j3 + 1] = wk1r * (x0i + x0r);
+    }
+    k1 = 0;
+    m2 = 2 * m;
+    for (k = m2; k < n; k += m2) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        for (j = k; j < l + k; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = wk2r * x0r - wk2i * x0i;
+            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        for (j = k + m; j < l + (k + m); j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = -wk2i * x0r - wk2r * x0i;
+            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+    }
+}
+
+
+static void rftfsub(int n, double *a, int nc, double const *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = one_half - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr - wki * xi;
+        yi = wkr * xi + wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+
+static void rftbsub(int n, double *a, int nc, double const *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+
+    a[1] = -a[1];
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = one_half - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr + wki * xi;
+        yi = wkr * xi - wki * xr;
+        a[j] -= yr;
+        a[j + 1] = yi - a[j + 1];
+        a[k] += yr;
+        a[k + 1] = yi - a[k + 1];
+    }
+    a[m + 1] = -a[m + 1];
+}
+
+
+static void dctsub(int n, double *a, int nc, double const *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr;
+
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[j] - wkr * a[k];
+        a[j] = wkr * a[j] + wki * a[k];
+        a[k] = xr;
+    }
+    a[m] *= c[0];
+}
+
+
+static void dstsub(int n, double *a, int nc, double const *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr;
+
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[k] - wkr * a[j];
+        a[k] = wkr * a[k] + wki * a[j];
+        a[j] = xr;
+    }
+    a[m] *= c[0];
+}
diff --git a/src/fft4g.h b/src/fft4g.h

new file mode 100644 (file)

index 0000000..0f906ab
--- /dev/null
+++ b/src/fft4g.h
@@ -0,0 +1,23 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+void lsx_cdft(int, int, double *, int *, double *);
+void lsx_rdft(int, int, double *, int *, double *);
+void lsx_ddct(int, int, double *, int *, double *);
+void lsx_ddst(int, int, double *, int *, double *);
+void lsx_dfct(int, double *, double *, int *, double *);
+void lsx_dfst(int, double *, double *, int *, double *);
+
+void lsx_cdft_f(int, int, float *, int *, float *);
+void lsx_rdft_f(int, int, float *, int *, float *);
+void lsx_ddct_f(int, int, float *, int *, float *);
+void lsx_ddst_f(int, int, float *, int *, float *);
+void lsx_dfct_f(int, float *, float *, int *, float *);
+void lsx_dfst_f(int, float *, float *, int *, float *);
+
+#define dft_br_len(l) (2ul + (1ul << (int)(log(l / 2 + .5) / log(2.)) / 2))
+#define dft_sc_len(l) ((unsigned long)l / 2)
+
+/* Over-allocate h by 2 to use these macros */
+#define LSX_PACK(h, n)   h[1] = h[n]
+#define LSX_UNPACK(h, n) h[n] = h[1], h[n + 1] = h[1] = 0;
diff --git a/src/fft4g32.c b/src/fft4g32.c

new file mode 100644 (file)

index 0000000..8741394
--- /dev/null
+++ b/src/fft4g32.c
@@ -0,0 +1,27 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "filter.h"
+#define FFT4G_FLOAT
+#include "fft4g.c"
+
+static void * null(void) {return 0;}
+static void forward (int length, void * setup, double * H) {lsx_safe_rdft_f(length,  1, H); (void)setup;}
+static void backward(int length, void * setup, double * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
+static int multiplier(void) {return 2;}
+static void nothing(void) {}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32_cb[] = {
+  (fn_t)null,
+  (fn_t)null,
+  (fn_t)nothing,
+  (fn_t)forward,
+  (fn_t)forward,
+  (fn_t)backward,
+  (fn_t)backward,
+  (fn_t)_soxr_ordered_convolve_f,
+  (fn_t)_soxr_ordered_partial_convolve_f,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
diff --git a/src/fft4g32s.c b/src/fft4g32s.c

new file mode 100644 (file)

index 0000000..4a95a7d
--- /dev/null
+++ b/src/fft4g32s.c
@@ -0,0 +1,26 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "filter.h"
+#include "simd.h"
+
+static void * null(void) {return 0;}
+static void nothing(void) {}
+static void forward (int length, void * setup, float * H) {lsx_safe_rdft_f(length,  1, H); (void)setup;}
+static void backward(int length, void * setup, float * H) {lsx_safe_rdft_f(length, -1, H); (void)setup;}
+static int multiplier(void) {return 2;}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32s_cb[] = {
+  (fn_t)null,
+  (fn_t)null,
+  (fn_t)nothing,
+  (fn_t)forward,
+  (fn_t)forward,
+  (fn_t)backward,
+  (fn_t)backward,
+  (fn_t)_soxr_ordered_convolve_simd,
+  (fn_t)_soxr_ordered_partial_convolve_simd,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
diff --git a/src/fft4g64.c b/src/fft4g64.c

new file mode 100644 (file)

index 0000000..48eaddd
--- /dev/null
+++ b/src/fft4g64.c
@@ -0,0 +1,29 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "filter.h"
+#include "fft4g.c"
+#include "soxr-config.h"
+
+#if HAVE_DOUBLE_PRECISION
+static void * null(void) {return 0;}
+static void nothing(void) {}
+static void forward (int length, void * setup, double * H) {lsx_safe_rdft(length,  1, H); (void)setup;}
+static void backward(int length, void * setup, double * H) {lsx_safe_rdft(length, -1, H); (void)setup;}
+static int multiplier(void) {return 2;}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft64_cb[] = {
+  (fn_t)null,
+  (fn_t)null,
+  (fn_t)nothing,
+  (fn_t)forward,
+  (fn_t)forward,
+  (fn_t)backward,
+  (fn_t)backward,
+  (fn_t)_soxr_ordered_convolve,
+  (fn_t)_soxr_ordered_partial_convolve,
+  (fn_t)multiplier,
+  (fn_t)nothing,
+};
+#endif
diff --git a/src/fft4g_cache.h b/src/fft4g_cache.h

new file mode 100644 (file)

index 0000000..d776c16
--- /dev/null
+++ b/src/fft4g_cache.h
@@ -0,0 +1,92 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+static int * LSX_FFT_BR;
+static DFT_FLOAT * LSX_FFT_SC;
+static int FFT_LEN = -1;
+static ccrw2_t FFT_CACHE_CCRW;
+
+void LSX_INIT_FFT_CACHE(void)
+{
+  if (FFT_LEN >= 0)
+    return;
+  assert(LSX_FFT_BR == NULL);
+  assert(LSX_FFT_SC == NULL);
+  assert(FFT_LEN == -1);
+  ccrw2_init(FFT_CACHE_CCRW);
+  FFT_LEN = 0;
+}
+
+void LSX_CLEAR_FFT_CACHE(void)
+{
+  assert(FFT_LEN >= 0);
+  ccrw2_clear(FFT_CACHE_CCRW);
+  free(LSX_FFT_BR);
+  free(LSX_FFT_SC);
+  LSX_FFT_SC = NULL;
+  LSX_FFT_BR = NULL;
+  FFT_LEN = -1;
+}
+
+static bool UPDATE_FFT_CACHE(int len)
+{
+  LSX_INIT_FFT_CACHE();
+  assert(lsx_is_power_of_2(len));
+  assert(FFT_LEN >= 0);
+  ccrw2_become_reader(FFT_CACHE_CCRW);
+  if (len > FFT_LEN) {
+    ccrw2_cease_reading(FFT_CACHE_CCRW);
+    ccrw2_become_writer(FFT_CACHE_CCRW);
+    if (len > FFT_LEN) {
+      int old_n = FFT_LEN;
+      FFT_LEN = len;
+      LSX_FFT_BR = realloc(LSX_FFT_BR, dft_br_len(FFT_LEN) * sizeof(*LSX_FFT_BR));
+      LSX_FFT_SC = realloc(LSX_FFT_SC, dft_sc_len(FFT_LEN) * sizeof(*LSX_FFT_SC));
+      if (!old_n) {
+        LSX_FFT_BR[0] = 0;
+#if SOXR_LIB
+        atexit(LSX_CLEAR_FFT_CACHE);
+#endif
+      }
+      return true;
+    }
+    ccrw2_cease_writing(FFT_CACHE_CCRW);
+    ccrw2_become_reader(FFT_CACHE_CCRW);
+  }
+  return false;
+}
+
+static void DONE_WITH_FFT_CACHE(bool is_writer)
+{
+  if (is_writer)
+    ccrw2_cease_writing(FFT_CACHE_CCRW);
+  else ccrw2_cease_reading(FFT_CACHE_CCRW);
+}
+
+void LSX_SAFE_RDFT(int len, int type, DFT_FLOAT * d)
+{
+  bool is_writer = UPDATE_FFT_CACHE(len);
+  LSX_RDFT(len, type, d, LSX_FFT_BR, LSX_FFT_SC);
+  DONE_WITH_FFT_CACHE(is_writer);
+}
+
+void LSX_SAFE_CDFT(int len, int type, DFT_FLOAT * d)
+{
+  bool is_writer = UPDATE_FFT_CACHE(len);
+  LSX_CDFT(len, type, d, LSX_FFT_BR, LSX_FFT_SC);
+  DONE_WITH_FFT_CACHE(is_writer);
+}
+
+#undef UPDATE_FFT_CACHE
+#undef LSX_SAFE_RDFT
+#undef LSX_SAFE_CDFT
+#undef LSX_RDFT
+#undef LSX_INIT_FFT_CACHE
+#undef LSX_FFT_SC
+#undef LSX_FFT_BR
+#undef LSX_CLEAR_FFT_CACHE
+#undef LSX_CDFT
+#undef FFT_LEN
+#undef FFT_CACHE_CCRW
+#undef DONE_WITH_FFT_CACHE
+#undef DFT_FLOAT
diff --git a/src/fifo.h b/src/fifo.h

new file mode 100644 (file)

index 0000000..b2bda43
--- /dev/null
+++ b/src/fifo.h
@@ -0,0 +1,124 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#ifndef fifo_included
+#define fifo_included
+
+#if !defined FIFO_SIZE_T
+#define FIFO_SIZE_T size_t
+#endif
+
+#if !defined FIFO_REALLOC
+  #define FIFO_REALLOC(a,b,c) realloc(a,b)
+  #undef FIFO_FREE
+  #define FIFO_FREE free
+  #undef FIFO_MALLOC
+  #define FIFO_MALLOC malloc
+#endif
+
+typedef struct {
+  char * data;
+  size_t allocation;   /* Number of bytes allocated for data. */
+  size_t item_size;    /* Size of each item in data */
+  size_t begin;        /* Offset of the first byte to read. */
+  size_t end;          /* 1 + Offset of the last byte byte to read. */
+} fifo_t;
+
+#if !defined FIFO_MIN
+  #define FIFO_MIN 0x4000
+#endif
+
+#if !defined UNUSED
+  #define UNUSED
+#endif
+
+UNUSED static void fifo_clear(fifo_t * f)
+{
+  f->end = f->begin = 0;
+}
+
+UNUSED static void * fifo_reserve(fifo_t * f, FIFO_SIZE_T n0)
+{
+  size_t n = (size_t)n0;
+  n *= f->item_size;
+
+  if (f->begin == f->end)
+    fifo_clear(f);
+
+  while (1) {
+    if (f->end + n <= f->allocation) {
+      void *p = f->data + f->end;
+
+      f->end += n;
+      return p;
+    }
+    if (f->begin > FIFO_MIN) {
+      memmove(f->data, f->data + f->begin, f->end - f->begin);
+      f->end -= f->begin;
+      f->begin = 0;
+      continue;
+    }
+    f->data = FIFO_REALLOC(f->data, f->allocation + n, f->allocation);
+    f->allocation += n;
+    if (!f->data)
+      return 0;
+  }
+}
+
+UNUSED static void * fifo_write(fifo_t * f, FIFO_SIZE_T n0, void const * data)
+{
+  size_t n = (size_t)n0;
+  void * s = fifo_reserve(f, n0);
+  if (data)
+    memcpy(s, data, n * f->item_size);
+  return s;
+}
+
+UNUSED static void fifo_trim_to(fifo_t * f, FIFO_SIZE_T n0)
+{
+  size_t n = (size_t)n0;
+  n *= f->item_size;
+  f->end = f->begin + n;
+}
+
+UNUSED static void fifo_trim_by(fifo_t * f, FIFO_SIZE_T n0)
+{
+  size_t n = (size_t)n0;
+  n *= f->item_size;
+  f->end -= n;
+}
+
+UNUSED static FIFO_SIZE_T fifo_occupancy(fifo_t * f)
+{
+  return (FIFO_SIZE_T)((f->end - f->begin) / f->item_size);
+}
+
+UNUSED static void * fifo_read(fifo_t * f, FIFO_SIZE_T n0, void * data)
+{
+  size_t n = (size_t)n0;
+  char * ret = f->data + f->begin;
+  n *= f->item_size;
+  if (n > (f->end - f->begin))
+    return NULL;
+  if (data)
+    memcpy(data, ret, (size_t)n);
+  f->begin += n;
+  return ret;
+}
+
+#define fifo_read_ptr(f) fifo_read(f, (FIFO_SIZE_T)0, NULL)
+
+UNUSED static void fifo_delete(fifo_t * f)
+{
+  FIFO_FREE(f->data);
+}
+
+UNUSED static int fifo_create(fifo_t * f, FIFO_SIZE_T item_size)
+{
+  f->item_size = (size_t)item_size;
+  f->allocation = FIFO_MIN;
+  fifo_clear(f);
+  return !(f->data = FIFO_MALLOC(f->allocation));
+}
+
+#endif
diff --git a/src/filter.c b/src/filter.c

new file mode 100644 (file)

index 0000000..ca146d2
--- /dev/null
+++ b/src/filter.c
@@ -0,0 +1,245 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "filter.h"
+
+#include <math.h>
+#if !defined M_PI
+#define M_PI    3.14159265358979323846
+#endif
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "fft4g.h"
+#include "ccrw2.h"
+
+#if 1 || HAVE_DOUBLE_PRECISION /* Always need this, for lsx_fir_to_phase. */
+#define DFT_FLOAT double
+#define DONE_WITH_FFT_CACHE done_with_fft_cache
+#define FFT_CACHE_CCRW fft_cache_ccrw
+#define FFT_LEN fft_len
+#define LSX_CDFT lsx_cdft
+#define LSX_CLEAR_FFT_CACHE lsx_clear_fft_cache
+#define LSX_FFT_BR lsx_fft_br
+#define LSX_FFT_SC lsx_fft_sc
+#define LSX_INIT_FFT_CACHE lsx_init_fft_cache
+#define LSX_RDFT lsx_rdft
+#define LSX_SAFE_CDFT lsx_safe_cdft
+#define LSX_SAFE_RDFT lsx_safe_rdft
+#define UPDATE_FFT_CACHE update_fft_cache
+#include "fft4g_cache.h"
+#endif
+
+#if HAVE_SINGLE_PRECISION && !HAVE_AVFFT
+#define DFT_FLOAT float
+#define DONE_WITH_FFT_CACHE done_with_fft_cache_f
+#define FFT_CACHE_CCRW fft_cache_ccrw_f
+#define FFT_LEN fft_len_f
+#define LSX_CDFT lsx_cdft_f
+#define LSX_CLEAR_FFT_CACHE lsx_clear_fft_cache_f
+#define LSX_FFT_BR lsx_fft_br_f
+#define LSX_FFT_SC lsx_fft_sc_f
+#define LSX_INIT_FFT_CACHE lsx_init_fft_cache_f
+#define LSX_RDFT lsx_rdft_f
+#define LSX_SAFE_CDFT lsx_safe_cdft_f
+#define LSX_SAFE_RDFT lsx_safe_rdft_f
+#define UPDATE_FFT_CACHE update_fft_cache_f
+#include "fft4g_cache.h"
+#endif
+
+#if HAVE_DOUBLE_PRECISION || !SOXR_LIB
+#define DFT_FLOAT double
+#define ORDERED_CONVOLVE lsx_ordered_convolve
+#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve
+#include "rdft.h"
+#endif
+
+#if HAVE_SINGLE_PRECISION
+#define DFT_FLOAT float
+#define ORDERED_CONVOLVE lsx_ordered_convolve_f
+#define ORDERED_PARTIAL_CONVOLVE lsx_ordered_partial_convolve_f
+#include "rdft.h"
+#endif
+
+double lsx_kaiser_beta(double att, double tr_bw)
+{
+  if (att >= 60) {
+    static const double coefs[][4] = {
+      {-6.784957e-10,1.02856e-05,0.1087556,-0.8988365+.001},
+      {-6.897885e-10,1.027433e-05,0.10876,-0.8994658+.002},
+      {-1.000683e-09,1.030092e-05,0.1087677,-0.9007898+.003},
+      {-3.654474e-10,1.040631e-05,0.1087085,-0.8977766+.006},
+      {8.106988e-09,6.983091e-06,0.1091387,-0.9172048+.015},
+      {9.519571e-09,7.272678e-06,0.1090068,-0.9140768+.025},
+      {-5.626821e-09,1.342186e-05,0.1083999,-0.9065452+.05},
+      {-9.965946e-08,5.073548e-05,0.1040967,-0.7672778+.085},
+      {1.604808e-07,-5.856462e-05,0.1185998,-1.34824+.1},
+      {-1.511964e-07,6.363034e-05,0.1064627,-0.9876665+.18},
+    };
+    double realm = log(tr_bw/.0005)/log(2.);
+    double const * c0 = coefs[range_limit(  (int)realm, 0, (int)array_length(coefs)-1)];
+    double const * c1 = coefs[range_limit(1+(int)realm, 0, (int)array_length(coefs)-1)];
+    double b0 = ((c0[0]*att + c0[1])*att + c0[2])*att + c0[3];
+    double b1 = ((c1[0]*att + c1[1])*att + c1[2])*att + c1[3];
+    return b0 + (b1 - b0) * (realm - (int)realm);
+  }
+  if (att > 50   ) return .1102 * (att - 8.7);
+  if (att > 20.96) return .58417 * pow(att -20.96, .4) + .07886 * (att - 20.96);
+  return 0;
+}
+
+double * lsx_make_lpf(
+    int num_taps, double Fc, double beta, double rho, double scale)
+{
+  int i, m = num_taps - 1;
+  double * h = malloc((size_t)num_taps * sizeof(*h));
+  double mult = scale / lsx_bessel_I_0(beta), mult1 = 1 / (.5 * m + rho);
+  assert(Fc >= 0 && Fc <= 1);
+  lsx_debug("make_lpf(n=%i Fc=%.7g β=%g ρ=%g scale=%g)",
+      num_taps, Fc, beta, rho, scale);
+
+  if (h) for (i = 0; i <= m / 2; ++i) {
+    double z = i - .5 * m, x = z * M_PI, y = z * mult1;
+    h[i] = x? sin(Fc * x) / x : Fc;
+    h[i] *= lsx_bessel_I_0(beta * sqrt(1 - y * y)) * mult;
+    if (m - i != i)
+      h[m - i] = h[i];
+  }
+  return h;
+}
+
+void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps)
+{
+  *beta = *beta < 0? lsx_kaiser_beta(att, tr_bw * .5 / Fc): *beta;
+  att = att < 60? (att - 7.95) / (2.285 * M_PI * 2) :
+    ((.0007528358-1.577737e-05**beta)**beta+.6248022)**beta+.06186902;
+  *num_taps = !*num_taps? (int)ceil(att/tr_bw + 1) : *num_taps;
+}
+
+double * lsx_design_lpf(
+    double Fp,      /* End of pass-band */
+    double Fs,      /* Start of stop-band */
+    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI */
+    double att,     /* Stop-band attenuation in dB */
+    int * num_taps, /* 0: value will be estimated */
+    int k,          /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */
+    double beta)    /* <0: value will be estimated */
+{
+  int n = *num_taps, phases = max(k, 1), modulo = max(-k, 1);
+  double tr_bw, Fc, rho = phases == 1? .5 : att < 120? .63 : .75;
+
+  Fp /= fabs(Fn), Fs /= fabs(Fn);        /* Normalise to Fn = 1 */
+  tr_bw = .5 * (Fs - Fp); /* Transition band-width: 6dB to stop points */
+  tr_bw /= phases, Fs /= phases;
+  tr_bw = min(tr_bw, .5 * Fs);
+  Fc = Fs - tr_bw;
+  assert(Fc - tr_bw >= 0);
+  lsx_kaiser_params(att, Fc, tr_bw, &beta, num_taps);
+  if (!n)
+    *num_taps = phases > 1? *num_taps / phases * phases + phases - 1 :
+      (*num_taps + modulo - 2) / modulo * modulo + 1;
+  return Fn < 0? 0 : lsx_make_lpf(*num_taps, Fc, beta, rho, (double)phases);
+}
+
+static double safe_log(double x)
+{
+  assert(x >= 0);
+  if (x)
+    return log(x);
+  lsx_debug("log(0)");
+  return -26;
+}
+
+void lsx_fir_to_phase(double * * h, int * len, int * post_len, double phase)
+{
+  double * pi_wraps, * work, phase1 = (phase > 50 ? 100 - phase : phase) / 50;
+  int i, work_len, begin, end, imp_peak = 0, peak = 0;
+  double imp_sum = 0, peak_imp_sum = 0;
+  double prev_angle2 = 0, cum_2pi = 0, prev_angle1 = 0, cum_1pi = 0;
+
+  for (i = *len, work_len = 2 * 2 * 8; i > 1; work_len <<= 1, i >>= 1);
+
+  work = calloc((size_t)work_len + 2, sizeof(*work)); /* +2: (UN)PACK */
+  pi_wraps = malloc((((size_t)work_len + 2) / 2) * sizeof(*pi_wraps));
+
+  memcpy(work, *h, (size_t)*len * sizeof(*work));
+  lsx_safe_rdft(work_len, 1, work); /* Cepstral: */
+  LSX_UNPACK(work, work_len);
+
+  for (i = 0; i <= work_len; i += 2) {
+    double angle = atan2(work[i + 1], work[i]);
+    double detect = 2 * M_PI;
+    double delta = angle - prev_angle2;
+    double adjust = detect * ((delta < -detect * .7) - (delta > detect * .7));
+    prev_angle2 = angle;
+    cum_2pi += adjust;
+    angle += cum_2pi;
+    detect = M_PI;
+    delta = angle - prev_angle1;
+    adjust = detect * ((delta < -detect * .7) - (delta > detect * .7));
+    prev_angle1 = angle;
+    cum_1pi += fabs(adjust); /* fabs for when 2pi and 1pi have combined */
+    pi_wraps[i >> 1] = cum_1pi;
+
+    work[i] = safe_log(sqrt(sqr(work[i]) + sqr(work[i + 1])));
+    work[i + 1] = 0;
+  }
+  LSX_PACK(work, work_len);
+  lsx_safe_rdft(work_len, -1, work);
+  for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;
+
+  for (i = 1; i < work_len / 2; ++i) { /* Window to reject acausal components */
+    work[i] *= 2;
+    work[i + work_len / 2] = 0;
+  }
+  lsx_safe_rdft(work_len, 1, work);
+
+  for (i = 2; i < work_len; i += 2) /* Interpolate between linear & min phase */
+    work[i + 1] = phase1 * i / work_len * pi_wraps[work_len >> 1] +
+        (1 - phase1) * (work[i + 1] + pi_wraps[i >> 1]) - pi_wraps[i >> 1];
+
+  work[0] = exp(work[0]), work[1] = exp(work[1]);
+  for (i = 2; i < work_len; i += 2) {
+    double x = exp(work[i]);
+    work[i    ] = x * cos(work[i + 1]);
+    work[i + 1] = x * sin(work[i + 1]);
+  }
+
+  lsx_safe_rdft(work_len, -1, work);
+  for (i = 0; i < work_len; ++i) work[i] *= 2. / work_len;
+
+  /* Find peak pos. */
+  for (i = 0; i <= (int)(pi_wraps[work_len >> 1] / M_PI + .5); ++i) {
+    imp_sum += work[i];
+    if (fabs(imp_sum) > fabs(peak_imp_sum)) {
+      peak_imp_sum = imp_sum;
+      peak = i;
+    }
+    if (work[i] > work[imp_peak]) /* For debug check only */
+      imp_peak = i;
+  }
+  while (peak && fabs(work[peak-1]) > fabs(work[peak]) && work[peak-1] * work[peak] > 0)
+    --peak;
+
+  if (!phase1)
+    begin = 0;
+  else if (phase1 == 1)
+    begin = peak - *len / 2;
+  else {
+    begin = (int)((.997 - (2 - phase1) * .22) * *len + .5);
+    end   = (int)((.997 + (0 - phase1) * .22) * *len + .5);
+    begin = peak - (begin & ~3);
+    end   = peak + 1 + ((end + 3) & ~3);
+    *len = end - begin;
+    *h = realloc(*h, (size_t)*len * sizeof(**h));
+  }
+  for (i = 0; i < *len; ++i) (*h)[i] =
+    work[(begin + (phase > 50 ? *len - 1 - i : i) + work_len) & (work_len - 1)];
+  *post_len = phase > 50 ? peak - begin : begin + *len - (peak + 1);
+
+  lsx_debug("nPI=%g peak-sum@%i=%g (val@%i=%g); len=%i post=%i (%g%%)",
+      pi_wraps[work_len >> 1] / M_PI, peak, peak_imp_sum, imp_peak,
+      work[imp_peak], *len, *post_len, 100 - 100. * *post_len / (*len - 1));
+  free(pi_wraps), free(work);
+}
diff --git a/src/filter.h b/src/filter.h

new file mode 100644 (file)

index 0000000..435303b
--- /dev/null
+++ b/src/filter.h
@@ -0,0 +1,39 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_filter_included
+#define soxr_filter_included
+
+#include "aliases.h"
+
+double lsx_bessel_I_0(double x);
+void lsx_init_fft_cache(void);
+void lsx_clear_fft_cache(void);
+void lsx_init_fft_cache_f(void);
+void lsx_clear_fft_cache_f(void);
+#define lsx_is_power_of_2(x) !(x < 2 || (x & (x - 1)))
+void lsx_safe_rdft(int len, int type, double * d);
+void lsx_safe_cdft(int len, int type, double * d);
+void lsx_safe_rdft_f(int len, int type, float * d);
+void lsx_safe_cdft_f(int len, int type, float * d);
+void lsx_ordered_convolve(int n, void * not_used, double * a, const double * b);
+void lsx_ordered_convolve_f(int n, void * not_used, float * a, const float * b);
+void lsx_ordered_partial_convolve(int n, double * a, const double * b);
+void lsx_ordered_partial_convolve_f(int n, float * a, const float * b);
+
+double lsx_kaiser_beta(double att, double tr_bw);
+double * lsx_make_lpf(int num_taps, double Fc, double beta, double rho,
+    double scale);
+void lsx_kaiser_params(double att, double Fc, double tr_bw, double * beta, int * num_taps);
+double * lsx_design_lpf(
+    double Fp,      /* End of pass-band */
+    double Fs,      /* Start of stop-band */
+    double Fn,      /* Nyquist freq; e.g. 0.5, 1, PI; < 0: dummy run */
+    double att,     /* Stop-band attenuation in dB */
+    int * num_taps, /* 0: value will be estimated */
+    int k,          /* >0: number of phases; <0: num_taps ≡ 1 (mod -k) */
+    double beta);   /* <0: value will be estimated */
+void lsx_fir_to_phase(double * * h, int * len,
+    int * post_len, double phase0);
+
+#endif
diff --git a/src/filters.h b/src/filters.h

new file mode 100644 (file)

index 0000000..e9a8011
--- /dev/null
+++ b/src/filters.h
@@ -0,0 +1,151 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "half_coefs.h"
+
+#define FUNCTION h8
+#define CONVOLVE _ _ _ _ _ _ _ _
+#define h8_l 8
+#define COEFS half_fir_coefs_8
+#include "half-fir.h"
+
+#define FUNCTION h9
+#define CONVOLVE _ _ _ _ _ _ _ _ _
+#define h9_l 9
+#define COEFS half_fir_coefs_9
+#include "half-fir.h"
+
+#define FUNCTION h10
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _
+#define h10_l 10
+#define COEFS half_fir_coefs_10
+#include "half-fir.h"
+
+#define FUNCTION h11
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _
+#define h11_l 11
+#define COEFS half_fir_coefs_11
+#include "half-fir.h"
+
+#define FUNCTION h12
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _
+#define h12_l 12
+#define COEFS half_fir_coefs_12
+#include "half-fir.h"
+
+#define FUNCTION h13
+#define CONVOLVE _ _ _ _ _ _ _ _ _ _ _ _ _
+#define h13_l 13
+#define COEFS half_fir_coefs_13
+#include "half-fir.h"
+
+static struct {int num_coefs; stage_fn_t fn; float att;} const half_firs[] = {
+  { 8, h8 , 136.51f},
+  { 9, h9 , 152.32f},
+  {10, h10, 168.07f},
+  {11, h11, 183.78f},
+  {12, h12, 199.44f},
+  {13, h13, 212.75f},
+};
+
+#define HI_PREC_CLOCK
+
+#define VAR_LENGTH p->n
+#define VAR_CONVOLVE while (j < FIR_LENGTH) _
+#define VAR_POLY_PHASE_BITS p->phase_bits
+
+#define FUNCTION vpoly0
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
+#include "poly-fir0.h"
+
+#define FUNCTION vpoly1
+#define COEF_INTERP 1
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
+#include "poly-fir.h"
+
+#define FUNCTION vpoly2
+#define COEF_INTERP 2
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
+#include "poly-fir.h"
+
+#define FUNCTION vpoly3
+#define COEF_INTERP 3
+#define PHASE_BITS VAR_POLY_PHASE_BITS
+#define FIR_LENGTH VAR_LENGTH
+#define CONVOLVE VAR_CONVOLVE
+#include "poly-fir.h"
+
+#undef HI_PREC_CLOCK
+
+#define U100_l 42
+#if RATE_SIMD_POLY
+  #define U100_l_EXTRA _ _
+  #define u100_l_EXTRA _
+  #define U100_l_EXTRA_LENGTH 2
+  #define u100_l_EXTRA_LENGTH 1
+#else
+  #define U100_l_EXTRA
+  #define u100_l_EXTRA
+  #define U100_l_EXTRA_LENGTH 0
+  #define u100_l_EXTRA_LENGTH 0
+#endif
+#define poly_fir_convolve_U100 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ U100_l_EXTRA
+#define FUNCTION U100_0
+#define FIR_LENGTH (U100_l + U100_l_EXTRA_LENGTH)
+#define CONVOLVE poly_fir_convolve_U100
+#include "poly-fir0.h"
+
+#define u100_l 11
+#define poly_fir_convolve_u100 _ _ _ _ _ _ _ _ _ _ _ u100_l_EXTRA
+#define FUNCTION u100_0
+#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
+#define CONVOLVE poly_fir_convolve_u100
+#include "poly-fir0.h"
+
+#define FUNCTION u100_1
+#define COEF_INTERP 1
+#define PHASE_BITS 8
+#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
+#define CONVOLVE poly_fir_convolve_u100
+#include "poly-fir.h"
+#define u100_1_b 8
+
+#define FUNCTION u100_2
+#define COEF_INTERP 2
+#define PHASE_BITS 6
+#define FIR_LENGTH (u100_l + u100_l_EXTRA_LENGTH)
+#define CONVOLVE poly_fir_convolve_u100
+#include "poly-fir.h"
+#define u100_2_b 6
+
+typedef struct {float scalar; stage_fn_t fn;} poly_fir1_t;
+typedef struct {float beta; poly_fir1_t interp[3];} poly_fir_t;
+
+static poly_fir_t const poly_firs[] = {
+  {-1, {{0, vpoly0}, { 7.2f, vpoly1}, {5.0f, vpoly2}}},
+  {-1, {{0, vpoly0}, { 9.4f, vpoly1}, {6.7f, vpoly2}}},
+  {-1, {{0, vpoly0}, {12.4f, vpoly1}, {7.8f, vpoly2}}},
+  {-1, {{0, vpoly0}, {13.6f, vpoly1}, {9.3f, vpoly2}}},
+  {-1, {{0, vpoly0}, {10.5f, vpoly2}, {8.4f, vpoly3}}},
+  {-1, {{0, vpoly0}, {11.85f,vpoly2}, {9.0f, vpoly3}}},
+
+  {-1, {{0, vpoly0}, { 8.0f, vpoly1}, {5.3f, vpoly2}}},
+  {-1, {{0, vpoly0}, { 8.6f, vpoly1}, {5.7f, vpoly2}}},
+  {-1, {{0, vpoly0}, {10.6f, vpoly1}, {6.75f,vpoly2}}},
+  {-1, {{0, vpoly0}, {12.6f, vpoly1}, {8.6f, vpoly2}}},
+  {-1, {{0, vpoly0}, { 9.6f, vpoly2}, {7.6f, vpoly3}}},
+  {-1, {{0, vpoly0}, {11.4f, vpoly2}, {8.65f,vpoly3}}},
+
+  {10.62f, {{U100_l, U100_0}, {0, 0}, {0, 0}}},
+  {11.28f, {{u100_l, u100_0}, {u100_1_b, u100_1}, {u100_2_b, u100_2}}},
+  {-1, {{0, vpoly0}, {   9, vpoly1}, {  6, vpoly2}}},
+  {-1, {{0, vpoly0}, {  11, vpoly1}, {  7, vpoly2}}},
+  {-1, {{0, vpoly0}, {  13, vpoly1}, {  8, vpoly2}}},
+  {-1, {{0, vpoly0}, {  10, vpoly2}, {  8, vpoly3}}},
+  {-1, {{0, vpoly0}, {  12, vpoly2}, {  9, vpoly3}}},
+};
diff --git a/src/half-fir.h b/src/half-fir.h

new file mode 100644 (file)

index 0000000..0a8ee97
--- /dev/null
+++ b/src/half-fir.h
@@ -0,0 +1,25 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Down-sample by a factor of 2 using a FIR with odd length (LEN).*/
+/* Input must be preceded and followed by LEN >> 1 samples. */
+
+#define _ sum += (input[-(2*j +1)] + input[(2*j +1)]) * COEFS[j], ++j;
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_out = (stage_occupancy(p) + 1) / 2;
+  sample_t * output = fifo_reserve(output_fifo, num_out);
+
+  for (i = 0; i < num_out; ++i, input += 2) {
+    int j = 0;
+    sample_t sum = input[0] * .5f;
+    CONVOLVE
+    output[i] = sum;
+  }
+  fifo_read(&p->fifo, 2 * num_out, NULL);
+}
+#undef _
+#undef COEFS
+#undef CONVOLVE
+#undef FUNCTION
diff --git a/src/half_coefs.h b/src/half_coefs.h

new file mode 100644 (file)

index 0000000..aac7769
--- /dev/null
+++ b/src/half_coefs.h
@@ -0,0 +1,57 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if defined __GNUC__
+  #pragma GCC system_header
+#elif defined __SUNPRO_C
+  #pragma disable_warn
+#elif defined _MSC_VER
+  #pragma warning(push, 1)
+#endif
+
+static const sample_t half_fir_coefs_8[] = {
+  0.3115465451887802, -0.08734497241282892, 0.03681452335604365,
+  -0.01518925831569441, 0.005454118437408876, -0.001564400922162005,
+  0.0003181701445034203, -3.48001341225749e-5,
+};
+
+static const sample_t half_fir_coefs_9[] = {
+  0.3122703613711853, -0.08922155288172305, 0.03913974805854332,
+  -0.01725059723447163, 0.006858970092378141, -0.002304518467568703,
+  0.0006096426006051062, -0.0001132393923815236, 1.119795386287666e-5,
+};
+
+static const sample_t half_fir_coefs_10[] = {
+  0.3128545521327376, -0.09075671986104322, 0.04109637155154835,
+  -0.01906629512749895, 0.008184039342054333, -0.0030766775017262,
+  0.0009639607022414314, -0.0002358552746579827, 4.025184282444155e-5,
+  -3.629779111541012e-6,
+};
+
+static const sample_t half_fir_coefs_11[] = {
+  0.3133358837508807, -0.09203588680609488, 0.04276515428384758,
+  -0.02067356614745591, 0.00942253142371517, -0.003856330993895144,
+  0.001363470684892284, -0.0003987400965541919, 9.058629923971627e-5,
+  -1.428553070915318e-5, 1.183455238783835e-6,
+};
+
+static const sample_t half_fir_coefs_12[] = {
+  0.3137392991811407, -0.0931182192961332, 0.0442050575271454,
+  -0.02210391200618091, 0.01057473015666001, -0.00462766983973885,
+  0.001793630226239453, -0.0005961819959665878, 0.0001631475979359577,
+  -3.45557865639653e-5, 5.06188341942088e-6, -3.877010943315563e-7,
+};
+
+static const sample_t half_fir_coefs_13[] = {
+  0.3140822554324578, -0.0940458550886253, 0.04545990399121566,
+  -0.02338339450796002, 0.01164429409071052, -0.005380686021429845,
+  0.002242915773871009, -0.000822047600000082, 0.0002572510962395222,
+  -6.607320708956279e-5, 1.309926399120154e-5, -1.790719575255006e-6,
+  1.27504961098836e-7,
+};
+
+#if defined __SUNPRO_C
+  #pragma enable_warn
+#elif defined _MSC_VER
+  #pragma warning(pop)
+#endif
diff --git a/src/internal.h b/src/internal.h

new file mode 100644 (file)

index 0000000..f29e29f
--- /dev/null
+++ b/src/internal.h
@@ -0,0 +1,46 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_internal_included
+#define soxr_internal_included
+
+#include "soxr-config.h"
+
+#undef min
+#undef max
+#define min(a, b) ((a) <= (b) ? (a) : (b))
+#define max(a, b) ((a) >= (b) ? (a) : (b))
+
+#define range_limit(x, lower, upper) (min(max(x, lower), upper))
+#define linear_to_dB(x) (log10(x) * 20)
+#define array_length(a) (sizeof(a)/sizeof(a[0]))
+#define AL(a) array_length(a)
+#define iAL(a) (int)AL(a)
+#define sqr(a) ((a) * (a))
+
+#ifdef __GNUC__
+  #define UNUSED __attribute__ ((unused))
+#else
+  #define UNUSED
+#endif
+
+#if defined NDEBUG
+  #ifdef __GNUC__
+    void lsx_dummy(char const *, ...);
+  #else
+    static __inline void lsx_dummy(char const * x, ...) {}
+  #endif
+  #define lsx_debug if(0) lsx_dummy
+#else
+  #include <stdarg.h>
+  #include <stdio.h>
+  UNUSED static void lsx_debug(char const * fmt, ...)
+  {
+    va_list args;
+    va_start(args, fmt);
+    vfprintf(stderr, fmt, args);
+    fputc('\n', stderr);
+    va_end(args);
+  }
+#endif
+#endif
diff --git a/src/libsoxr-dev.src.in b/src/libsoxr-dev.src.in

new file mode 100644 (file)

index 0000000..ce879f9
--- /dev/null
+++ b/src/libsoxr-dev.src.in
@@ -0,0 +1,2 @@
+set(TARGET_HEADERS "@TARGET_HEADERS@")
+set(TARGET_PCS "@TARGET_PCS@")
diff --git a/src/libsoxr.src.in b/src/libsoxr.src.in

new file mode 100644 (file)

index 0000000..1c926ff
--- /dev/null
+++ b/src/libsoxr.src.in
@@ -0,0 +1 @@
+set(TARGET_LIBS "@TARGET_LIBS@")
diff --git a/src/lsr.c b/src/lsr.c

new file mode 100644 (file)

index 0000000..64b5798
--- /dev/null
+++ b/src/lsr.c
@@ -0,0 +1,114 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Wrapper mostly compatible with `libsamplerate'. */
+
+#include <assert.h>
+#include <stdlib.h>
+#include "soxr.h"
+
+/* Runtime casts: */
+typedef struct io_t {
+  float *in,*out; long ilen,olen,idone,odone; int eoi; double oi_ratio;} io_t;
+#define SRC_DATA io_t
+typedef struct  soxr SRC_STATE;
+#define src_callback_t soxr_input_fn_t
+#define SRC_ERROR soxr_error_t
+#define SRC_SRCTYPE unsigned
+
+#include "soxr-lsr.h"
+#include "rint.h"
+
+soxr_error_t src_simple(io_t * p, unsigned id, int channels)
+{
+  size_t idone, odone;
+  soxr_error_t error;
+  soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_LSR0Q + id, 0);
+  char const * e = getenv("SOXR_LSR_NUM_THREADS");
+  soxr_runtime_spec_t r_spec = soxr_runtime_spec(!(e && atoi(e) != 1));
+  assert (channels > 0);
+  assert (p->ilen >= 0);
+  assert (p->olen >= 0);
+  error = soxr_oneshot(1, p->oi_ratio, (unsigned)channels,
+      p->in, (size_t)p->ilen, &idone, p->out, (size_t)p->olen, &odone,
+      0, &q_spec, &r_spec);
+  p->idone = (long)idone, p->odone = (long)odone;
+  return error;
+}
+
+soxr_t src_callback_new(soxr_input_fn_t fn, unsigned id, int channels, SRC_ERROR * error0, void * p)
+{
+  soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_LSR0Q + id, 0);
+  char const * e = getenv("SOXR_LSR_NUM_THREADS");
+  soxr_runtime_spec_t r_spec = soxr_runtime_spec(!(e && atoi(e) != 1));
+  soxr_error_t error;
+  soxr_t soxr = 0;
+  assert (channels > 0);
+  /* To minimise latency e.g. for real-time playback:
+  if (id == 2)
+    r_spec.log2_large_dft_size = r_spec.log2_min_dft_size = 8;
+    */
+  soxr = soxr_create(0, 0, (unsigned)channels, &error, 0, &q_spec, &r_spec);
+  if (soxr)
+    error = soxr_set_input_fn(soxr, fn, p, 0);
+  if (error0)
+    *(int *)error0 = (int)(ptrdiff_t)error;
+  return soxr;
+}
+
+soxr_error_t src_process(soxr_t p, io_t * io)
+{
+  if (!p || !io) return "null pointer";
+  soxr_set_error(p, soxr_set_io_ratio(p, 1/io->oi_ratio, (size_t)io->olen));
+
+  { size_t idone , odone;
+  soxr_process(p, io->in, (size_t)(io->eoi? ~io->ilen : io->ilen), /* hack */
+      &idone, io->out, (size_t)io->olen, &odone);
+  io->idone = (long)idone, io->odone = (long)odone;
+  return soxr_error(p); }
+}
+
+long src_callback_read(soxr_t p, double oi_ratio, long olen, float * obuf)
+{
+  if (!p || olen < 0) return -1;
+  soxr_set_error(p, soxr_set_io_ratio(p, 1/oi_ratio, (size_t)olen));
+  return (long)soxr_output(p, obuf, (size_t)olen);
+}
+
+void src_float_to_short_array(float const * src, short * dest, int len)
+{
+  double d, N = 1. + SHRT_MAX;
+  assert (src && dest);
+  while (len--) d = src[len] * N, dest[len] = (short)(d > N - 1? (short)(N - 1) : d < -N? (short)-N : rint16(d));
+}
+
+void src_short_to_float_array(short const * src, float * dest, int len)
+{
+  assert (src && dest);
+  while (len--) dest[len] = (float)(src[len] * (1 / (1. + SHRT_MAX)));
+}
+
+void src_float_to_int_array(float const * src, int * dest, int len)
+{
+  double d, N = 32768. * 65536.; /* N.B. int32, not int! (Also next fn.) */
+  assert (src && dest);
+  while (len--) d = src[len] * N, dest[len] = d >= N - 1? (int)(N - 1) : d < -N? (int)(-N) : rint32(d);
+}
+
+void src_int_to_float_array(int const * src, float * dest, int len)
+{
+  assert (src && dest);
+  while (len--) dest[len] = (float)(src[len] * (1 / (32768. * 65536.)));
+}
+
+static char const * const names[] = {"LSR best sinc", "LSR medium sinc", "LSR fastest sinc", "LSR ZOH", "LSR linear", "SoX VHQ"};
+char const * src_get_name(unsigned n)         {return n < 5u + !getenv("SOXR_LSR_STRICT")? names[n] : 0;}
+char const * src_get_description(unsigned id) {return src_get_name(id);}
+char const * src_get_version(void)            {return soxr_version();}
+char const * src_strerror(soxr_error_t error) {return error == (soxr_error_t)1? "Placeholder." : sizeof(int) >= sizeof(char *) || !error ? soxr_strerror(error) : "soxr error";}
+int src_is_valid_ratio(double oi_ratio)       {return getenv("SOXR_LSR_STRICT")? oi_ratio >= 1./256 && oi_ratio <= 256 : oi_ratio > 0;}
+soxr_error_t src_error(soxr_t p)              {return soxr_error(p);}
+soxr_error_t src_reset(soxr_t p)              {return soxr_clear(p);}
+soxr_t src_delete(soxr_t p)                   {soxr_delete(p); return 0;}
+soxr_error_t src_set_ratio(soxr_t p, double oi_ratio) {return soxr_set_io_ratio(p, 1/oi_ratio, 0);}
+soxr_t src_new(unsigned id, int channels, SRC_ERROR * error) {return src_callback_new(0, id, channels, error, 0);}
diff --git a/src/pffft.c b/src/pffft.c

new file mode 100644 (file)

index 0000000..9b4f59d
--- /dev/null
+++ b/src/pffft.c
@@ -0,0 +1,1729 @@
+/* Copyright (c) 2011  Julien Pommier ( pommier@modartt.com )
+
+   Based on original fortran 77 code from FFTPACKv4 from NETLIB
+   (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber
+   of NCAR, in 1985.
+
+   As confirmed by the NCAR fftpack software curators, the following
+   FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
+   released under the same terms.
+
+   FFTPACK license:
+
+   http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
+
+   Copyright (c) 2004 the University Corporation for Atmospheric
+   Research ("UCAR"). All rights reserved. Developed by NCAR's
+   Computational and Information Systems Laboratory, UCAR,
+   www.cisl.ucar.edu.
+
+   Redistribution and use of the Software in source and binary forms,
+   with or without modification, is permitted provided that the
+   following conditions are met:
+
+   - Neither the names of NCAR's Computational and Information Systems
+   Laboratory, the University Corporation for Atmospheric Research,
+   nor the names of its sponsors or contributors may be used to
+   endorse or promote products derived from this Software without
+   specific prior written permission.
+
+   - Redistributions of source code must retain the above copyright
+   notices, this list of conditions, and the disclaimer below.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions, and the disclaimer below in the
+   documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+   SOFTWARE.
+
+
+   PFFFT : a Pretty Fast FFT.
+
+   This file is largerly based on the original FFTPACK implementation, modified in
+   order to take advantage of SIMD instructions of modern CPUs.
+*/
+
+/*
+  ChangeLog:
+  - 2011/10/02, version 1: This is the very first release of this file.
+*/
+
+#if !defined PFFT_MACROS_ONLY
+#include "pffft.h"
+#include "simd.h"
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+
+#define pffft_aligned_free    _soxr_simd_aligned_free
+#define pffft_aligned_malloc  _soxr_simd_aligned_malloc
+#define pffft_aligned_calloc  _soxr_simd_aligned_calloc
+#endif
+
+/*
+   vector support macros: the rest of the code is independant of
+   SSE/Altivec/NEON -- adding support for other platforms with 4-element
+   vectors should be limited to these macros
+*/
+
+
+/* define PFFFT_SIMD_DISABLE if you want to use scalar code instead of simd code */
+/*#define PFFFT_SIMD_DISABLE */
+
+/* detect compiler flavour */
+#if defined(_MSC_VER)
+#  define COMPILER_MSVC
+#elif defined(__GNUC__)
+#  define COMPILER_GCC
+#endif
+
+#if defined(COMPILER_GCC)
+#  define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline))
+#  define NEVER_INLINE(return_type) return_type __attribute__ ((noinline))
+#  define RESTRICT __restrict
+/*#  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; */
+#elif defined(COMPILER_MSVC)
+#  define ALWAYS_INLINE(return_type) __forceinline return_type
+#  define NEVER_INLINE(return_type) __declspec(noinline) return_type
+#  define RESTRICT __restrict
+/*#  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (v4sf*)_alloca(size__ * sizeof(type__)) */
+#endif
+
+/*
+   Altivec support macros
+*/
+#if !defined(PFFFT_SIMD_DISABLE) && (defined(__ppc__) || defined(__ppc64__))
+typedef vector float v4sf;
+#  define SIMD_SZ 4
+#  define VZERO() ((vector float) vec_splat_u8(0))
+#  define VMUL(a,b) vec_madd(a,b, VZERO())
+#  define VADD(a,b) vec_add(a,b)
+#  define VMADD(a,b,c) vec_madd(a,b,c)
+#  define VSUB(a,b) vec_sub(a,b)
+inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0); }
+#  define LD_PS1(p) ld_ps1(&p)
+#  define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = vec_mergeh(in1, in2); out2 = vec_mergel(in1, in2); out1 = tmp__; }
+#  define UNINTERLEAVE2(in1, in2, out1, out2) {                           \
+    vector unsigned char vperm1 =  (vector unsigned char)(0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); \
+    vector unsigned char vperm2 =  (vector unsigned char)(4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); \
+    v4sf tmp__ = vec_perm(in1, in2, vperm1); out2 = vec_perm(in1, in2, vperm2); out1 = tmp__; \
+  }
+#  define VTRANSPOSE4(x0,x1,x2,x3) {              \
+    v4sf y0 = vec_mergeh(x0, x2);               \
+    v4sf y1 = vec_mergel(x0, x2);               \
+    v4sf y2 = vec_mergeh(x1, x3);               \
+    v4sf y3 = vec_mergel(x1, x3);               \
+    x0 = vec_mergeh(y0, y2);                    \
+    x1 = vec_mergel(y0, y2);                    \
+    x2 = vec_mergeh(y1, y3);                    \
+    x3 = vec_mergel(y1, y3);                    \
+  }
+#  define VSWAPHL(a,b) vec_perm(a,b, (vector unsigned char)(16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15))
+#  define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
+
+/*
+  SSE1 support macros
+*/
+#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86))
+
+#include <xmmintrin.h>
+typedef __m128 v4sf;
+#  define SIMD_SZ 4 /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */
+#  define VZERO() _mm_setzero_ps()
+#  define VMUL(a,b) _mm_mul_ps(a,b)
+#  define VADD(a,b) _mm_add_ps(a,b)
+#  define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c)
+#  define VSUB(a,b) _mm_sub_ps(a,b)
+#  define LD_PS1(p) _mm_set1_ps(p)
+#  define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_unpacklo_ps(in1, in2); out2 = _mm_unpackhi_ps(in1, in2); out1 = tmp__; }
+#  define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; }
+#  define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3)
+#  define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0))
+#  define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
+
+/*
+  ARM NEON support macros
+*/
+#elif !defined(PFFFT_SIMD_DISABLE) && defined(__arm__)
+#  include <arm_neon.h>
+typedef float32x4_t v4sf;
+#  define SIMD_SZ 4
+#  define VZERO() vdupq_n_f32(0)
+#  define VMUL(a,b) vmulq_f32(a,b)
+#  define VADD(a,b) vaddq_f32(a,b)
+#  define VMADD(a,b,c) vmlaq_f32(c,a,b)
+#  define VSUB(a,b) vsubq_f32(a,b)
+#  define LD_PS1(p) vld1q_dup_f32(&(p))
+#  define INTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vzipq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; }
+#  define UNINTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vuzpq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; }
+#  define VTRANSPOSE4_(x0,x1,x2,x3) {                                    \
+    float32x4x2_t t0_ = vzipq_f32(x0, x2);                              \
+    float32x4x2_t t1_ = vzipq_f32(x1, x3);                              \
+    float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]);              \
+    float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]);              \
+    x0 = u0_.val[0]; x1 = u0_.val[1]; x2 = u1_.val[0]; x3 = u1_.val[1]; \
+  }
+/* marginally faster version */
+#  define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); }
+#  define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a))
+#  define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0)
+#else
+#  if !defined(PFFFT_SIMD_DISABLE)
+#    warning "building with simd disabled !\n";
+#    define PFFFT_SIMD_DISABLE /* fallback to scalar code */
+#  endif
+#endif
+
+/* fallback mode for situations where SSE/Altivec are not available, use scalar mode instead */
+#ifdef PFFFT_SIMD_DISABLE
+typedef float v4sf;
+#  define SIMD_SZ 1
+#  define VZERO() 0.f
+#  define VMUL(a,b) ((a)*(b))
+#  define VADD(a,b) ((a)+(b))
+#  define VMADD(a,b,c) ((a)*(b)+(c))
+#  define VSUB(a,b) ((a)-(b))
+#  define LD_PS1(p) (p)
+#  define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0)
+#endif
+
+/* shortcuts for complex multiplcations */
+#define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); }
+#define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); }
+
+#if !defined(PFFFT_SIMD_DISABLE)
+typedef union v4sf_union {
+  v4sf  v;
+  float f[4];
+} v4sf_union;
+
+#if 0
+#include <string.h>
+
+#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3))
+
+/* detect bugs with the vector support macros */
+void validate_pffft_simd() {
+  float f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 };
+  v4sf_union a0, a1, a2, a3, t, u;
+  memcpy(a0.f, f, 4*sizeof(float));
+  memcpy(a1.f, f+4, 4*sizeof(float));
+  memcpy(a2.f, f+8, 4*sizeof(float));
+  memcpy(a3.f, f+12, 4*sizeof(float));
+
+  t = a0; u = a1; t.v = VZERO();
+  printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0);
+  t.v = VADD(a1.v, a2.v);
+  printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18);
+  t.v = VMUL(a1.v, a2.v);
+  printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
+  t.v = VMADD(a1.v, a2.v,a0.v);
+  printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
+
+  INTERLEAVE2(a1.v,a2.v,t.v,u.v);
+  printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+  assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
+  UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
+  printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
+  assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
+
+  t.v=LD_PS1(f[15]);
+  printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 15, 15, 15, 15);
+  t.v = VSWAPHL(a1.v, a2.v);
+  printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
+  assertv4(t, 8, 9, 6, 7);
+  VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
+  printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
+         a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
+         a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
+  assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
+}
+#endif
+#endif /*!PFFFT_SIMD_DISABLE */
+
+#if !defined PFFT_MACROS_ONLY
+
+
+#if defined (COMPILER_MSVC)
+  #define sin   (float)sin
+  #define cos   (float)cos
+#else
+  #define sin   sinf
+  #define cos   cosf
+#endif
+
+/*
+int pffft_simd_size() { return SIMD_SZ; }
+*/
+
+/*
+  passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
+*/
+static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1, float fsign) {
+  int k, i;
+  int l1ido = l1*ido;
+  if (ido <= 2) {
+    for (k=0; k < l1ido; k += ido, ch += ido, cc+= 2*ido) {
+      ch[0]         = VADD(cc[0], cc[ido+0]);
+      ch[l1ido]     = VSUB(cc[0], cc[ido+0]);
+      ch[1]         = VADD(cc[1], cc[ido+1]);
+      ch[l1ido + 1] = VSUB(cc[1], cc[ido+1]);
+    }
+  } else {
+    for (k=0; k < l1ido; k += ido, ch += ido, cc += 2*ido) {
+      for (i=0; i<ido-1; i+=2) {
+        v4sf tr2 = VSUB(cc[i+0], cc[i+ido+0]);
+        v4sf ti2 = VSUB(cc[i+1], cc[i+ido+1]);
+        v4sf wr = LD_PS1(wa1[i]), wi = VMUL(LD_PS1(fsign), LD_PS1(wa1[i+1]));
+        ch[i]   = VADD(cc[i+0], cc[i+ido+0]);
+        ch[i+1] = VADD(cc[i+1], cc[i+ido+1]);
+        VCPLXMUL(tr2, ti2, wr, wi);
+        ch[i+l1ido]   = tr2;
+        ch[i+l1ido+1] = ti2;
+      }
+    }
+  }
+}
+
+/*
+  passf3 and passb3 has been merged here, fsign = -1 for passf3, +1 for passb3
+*/
+static NEVER_INLINE(void) passf3_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
+                                    const float *wa1, const float *wa2, float fsign) {
+  static const float taur = -0.5f;
+  float taui = 0.866025403784439f*fsign;
+  int i, k;
+  v4sf tr2, ti2, cr2, ci2, cr3, ci3, dr2, di2, dr3, di3;
+  int l1ido = l1*ido;
+  float wr1, wi1, wr2, wi2;
+  assert(ido > 2);
+  for (k=0; k< l1ido; k += ido, cc+= 3*ido, ch +=ido) {
+    for (i=0; i<ido-1; i+=2) {
+      tr2 = VADD(cc[i+ido], cc[i+2*ido]);
+      cr2 = VADD(cc[i], VMUL(LD_PS1(taur),tr2));
+      ch[i]    = VADD(cc[i], tr2);
+      ti2 = VADD(cc[i+ido+1], cc[i+2*ido+1]);
+      ci2 = VADD(cc[i    +1], VMUL(LD_PS1(taur),ti2));
+      ch[i+1]  = VADD(cc[i+1], ti2);
+      cr3 = VMUL(LD_PS1(taui), VSUB(cc[i+ido], cc[i+2*ido]));
+      ci3 = VMUL(LD_PS1(taui), VSUB(cc[i+ido+1], cc[i+2*ido+1]));
+      dr2 = VSUB(cr2, ci3);
+      dr3 = VADD(cr2, ci3);
+      di2 = VADD(ci2, cr3);
+      di3 = VSUB(ci2, cr3);
+      wr1=wa1[i], wi1=fsign*wa1[i+1], wr2=wa2[i], wi2=fsign*wa2[i+1];
+      VCPLXMUL(dr2, di2, LD_PS1(wr1), LD_PS1(wi1));
+      ch[i+l1ido] = dr2;
+      ch[i+l1ido + 1] = di2;
+      VCPLXMUL(dr3, di3, LD_PS1(wr2), LD_PS1(wi2));
+      ch[i+2*l1ido] = dr3;
+      ch[i+2*l1ido+1] = di3;
+    }
+  }
+} /* passf3 */
+
+static NEVER_INLINE(void) passf4_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
+                                    const float *wa1, const float *wa2, const float *wa3, float fsign) {
+  /* isign == -1 for forward transform and +1 for backward transform */
+
+  int i, k;
+  v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
+  int l1ido = l1*ido;
+  if (ido == 2) {
+    for (k=0; k < l1ido; k += ido, ch += ido, cc += 4*ido) {
+      tr1 = VSUB(cc[0], cc[2*ido + 0]);
+      tr2 = VADD(cc[0], cc[2*ido + 0]);
+      ti1 = VSUB(cc[1], cc[2*ido + 1]);
+      ti2 = VADD(cc[1], cc[2*ido + 1]);
+      ti4 = VMUL(VSUB(cc[1*ido + 0], cc[3*ido + 0]), LD_PS1(fsign));
+      tr4 = VMUL(VSUB(cc[3*ido + 1], cc[1*ido + 1]), LD_PS1(fsign));
+      tr3 = VADD(cc[ido + 0], cc[3*ido + 0]);
+      ti3 = VADD(cc[ido + 1], cc[3*ido + 1]);
+
+      ch[0*l1ido + 0] = VADD(tr2, tr3);
+      ch[0*l1ido + 1] = VADD(ti2, ti3);
+      ch[1*l1ido + 0] = VADD(tr1, tr4);
+      ch[1*l1ido + 1] = VADD(ti1, ti4);
+      ch[2*l1ido + 0] = VSUB(tr2, tr3);
+      ch[2*l1ido + 1] = VSUB(ti2, ti3);
+      ch[3*l1ido + 0] = VSUB(tr1, tr4);
+      ch[3*l1ido + 1] = VSUB(ti1, ti4);
+    }
+  } else {
+    for (k=0; k < l1ido; k += ido, ch+=ido, cc += 4*ido) {
+      for (i=0; i<ido-1; i+=2) {
+        float wr1, wi1, wr2, wi2, wr3, wi3;
+        tr1 = VSUB(cc[i + 0], cc[i + 2*ido + 0]);
+        tr2 = VADD(cc[i + 0], cc[i + 2*ido + 0]);
+        ti1 = VSUB(cc[i + 1], cc[i + 2*ido + 1]);
+        ti2 = VADD(cc[i + 1], cc[i + 2*ido + 1]);
+        tr4 = VMUL(VSUB(cc[i + 3*ido + 1], cc[i + 1*ido + 1]), LD_PS1(fsign));
+        ti4 = VMUL(VSUB(cc[i + 1*ido + 0], cc[i + 3*ido + 0]), LD_PS1(fsign));
+        tr3 = VADD(cc[i + ido + 0], cc[i + 3*ido + 0]);
+        ti3 = VADD(cc[i + ido + 1], cc[i + 3*ido + 1]);
+
+        ch[i] = VADD(tr2, tr3);
+        cr3    = VSUB(tr2, tr3);
+        ch[i + 1] = VADD(ti2, ti3);
+        ci3 = VSUB(ti2, ti3);
+
+        cr2 = VADD(tr1, tr4);
+        cr4 = VSUB(tr1, tr4);
+        ci2 = VADD(ti1, ti4);
+        ci4 = VSUB(ti1, ti4);
+        wr1=wa1[i], wi1=fsign*wa1[i+1];
+        VCPLXMUL(cr2, ci2, LD_PS1(wr1), LD_PS1(wi1));
+        wr2=wa2[i], wi2=fsign*wa2[i+1];
+        ch[i + l1ido] = cr2;
+        ch[i + l1ido + 1] = ci2;
+
+        VCPLXMUL(cr3, ci3, LD_PS1(wr2), LD_PS1(wi2));
+        wr3=wa3[i], wi3=fsign*wa3[i+1];
+        ch[i + 2*l1ido] = cr3;
+        ch[i + 2*l1ido + 1] = ci3;
+
+        VCPLXMUL(cr4, ci4, LD_PS1(wr3), LD_PS1(wi3));
+        ch[i + 3*l1ido] = cr4;
+        ch[i + 3*l1ido + 1] = ci4;
+      }
+    }
+  }
+} /* passf4 */
+
+static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, const float *wa1) {
+  static const float minus_one = -1.f;
+  int i, k, l1ido = l1*ido;
+  for (k=0; k < l1ido; k += ido) {
+    v4sf a = cc[k], b = cc[k + l1ido];
+    ch[2*k] = VADD(a, b);
+    ch[2*(k+ido)-1] = VSUB(a, b);
+  }
+  if (ido < 2) return;
+  if (ido != 2) {
+    for (k=0; k < l1ido; k += ido) {
+      for (i=2; i<ido; i+=2) {
+        v4sf tr2 = cc[i - 1 + k + l1ido], ti2 = cc[i + k + l1ido];
+        v4sf br = cc[i - 1 + k], bi = cc[i + k];
+        VCPLXMULCONJ(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
+        ch[i + 2*k] = VADD(bi, ti2);
+        ch[2*(k+ido) - i] = VSUB(ti2, bi);
+        ch[i - 1 + 2*k] = VADD(br, tr2);
+        ch[2*(k+ido) - i -1] = VSUB(br, tr2);
+      }
+    }
+    if (ido % 2 == 1) return;
+  }
+  for (k=0; k < l1ido; k += ido) {
+    ch[2*k + ido] = VMUL(LD_PS1(minus_one), cc[ido-1 + k + l1ido]);
+    ch[2*k + ido-1] = cc[k + ido-1];
+  }
+} /* radf2 */
+
+
+static NEVER_INLINE(void) radb2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1) {
+  static const float minus_two=-2;
+  int i, k, l1ido = l1*ido;
+  v4sf a,b,c,d, tr2, ti2;
+  for (k=0; k < l1ido; k += ido) {
+    a = cc[2*k]; b = cc[2*(k+ido) - 1];
+    ch[k] = VADD(a, b);
+    ch[k + l1ido] =VSUB(a, b);
+  }
+  if (ido < 2) return;
+  if (ido != 2) {
+    for (k = 0; k < l1ido; k += ido) {
+      for (i = 2; i < ido; i += 2) {
+        a = cc[i-1 + 2*k]; b = cc[2*(k + ido) - i - 1];
+        c = cc[i+0 + 2*k]; d = cc[2*(k + ido) - i + 0];
+        ch[i-1 + k] = VADD(a, b);
+        tr2 = VSUB(a, b);
+        ch[i+0 + k] = VSUB(c, d);
+        ti2 = VADD(c, d);
+        VCPLXMUL(tr2, ti2, LD_PS1(wa1[i - 2]), LD_PS1(wa1[i - 1]));
+        ch[i-1 + k + l1ido] = tr2;
+        ch[i+0 + k + l1ido] = ti2;
+      }
+    }
+    if (ido % 2 == 1) return;
+  }
+  for (k = 0; k < l1ido; k += ido) {
+    a = cc[2*k + ido-1]; b = cc[2*k + ido];
+    ch[k + ido-1] = VADD(a,a);
+    ch[k + ido-1 + l1ido] = VMUL(LD_PS1(minus_two), b);
+  }
+} /* radb2 */
+
+static void radf3_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
+                     const float *wa1, const float *wa2) {
+  static const float taur = -0.5f;
+  static const float taui = 0.866025403784439f;
+  int i, k, ic;
+  v4sf ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3, wr1, wi1, wr2, wi2;
+  for (k=0; k<l1; k++) {
+    cr2 = VADD(cc[(k + l1)*ido], cc[(k + 2*l1)*ido]);
+    ch[3*k*ido] = VADD(cc[k*ido], cr2);
+    ch[(3*k+2)*ido] = VMUL(LD_PS1(taui), VSUB(cc[(k + l1*2)*ido], cc[(k + l1)*ido]));
+    ch[ido-1 + (3*k + 1)*ido] = VADD(cc[k*ido], VMUL(LD_PS1(taur), cr2));
+  }
+  if (ido == 1) return;
+  for (k=0; k<l1; k++) {
+    for (i=2; i<ido; i+=2) {
+      ic = ido - i;
+      wr1 = LD_PS1(wa1[i - 2]); wi1 = LD_PS1(wa1[i - 1]);
+      dr2 = cc[i - 1 + (k + l1)*ido]; di2 = cc[i + (k + l1)*ido];
+      VCPLXMULCONJ(dr2, di2, wr1, wi1);
+
+      wr2 = LD_PS1(wa2[i - 2]); wi2 = LD_PS1(wa2[i - 1]);
+      dr3 = cc[i - 1 + (k + l1*2)*ido]; di3 = cc[i + (k + l1*2)*ido];
+      VCPLXMULCONJ(dr3, di3, wr2, wi2);
+
+      cr2 = VADD(dr2, dr3);
+      ci2 = VADD(di2, di3);
+      ch[i - 1 + 3*k*ido] = VADD(cc[i - 1 + k*ido], cr2);
+      ch[i + 3*k*ido] = VADD(cc[i + k*ido], ci2);
+      tr2 = VADD(cc[i - 1 + k*ido], VMUL(LD_PS1(taur), cr2));
+      ti2 = VADD(cc[i + k*ido], VMUL(LD_PS1(taur), ci2));
+      tr3 = VMUL(LD_PS1(taui), VSUB(di2, di3));
+      ti3 = VMUL(LD_PS1(taui), VSUB(dr3, dr2));
+      ch[i - 1 + (3*k + 2)*ido] = VADD(tr2, tr3);
+      ch[ic - 1 + (3*k + 1)*ido] = VSUB(tr2, tr3);
+      ch[i + (3*k + 2)*ido] = VADD(ti2, ti3);
+      ch[ic + (3*k + 1)*ido] = VSUB(ti3, ti2);
+    }
+  }
+} /* radf3 */
+
+
+static void radb3_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
+                     const float *wa1, const float *wa2)
+{
+  static const float taur = -0.5f;
+  static const float taui = 0.866025403784439f;
+  static const float taui_2 = 0.866025403784439f*2;
+  int i, k, ic;
+  v4sf ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
+  for (k=0; k<l1; k++) {
+    tr2 = cc[ido-1 + (3*k + 1)*ido]; tr2 = VADD(tr2,tr2);
+    cr2 = VMADD(LD_PS1(taur), tr2, cc[3*k*ido]);
+    ch[k*ido] = VADD(cc[3*k*ido], tr2);
+    ci3 = VMUL(LD_PS1(taui_2), cc[(3*k + 2)*ido]);
+    ch[(k + l1)*ido] = VSUB(cr2, ci3);
+    ch[(k + 2*l1)*ido] = VADD(cr2, ci3);
+  }
+  if (ido == 1) return;
+  for (k=0; k<l1; k++) {
+    for (i=2; i<ido; i+=2) {
+      ic = ido - i;
+      tr2 = VADD(cc[i - 1 + (3*k + 2)*ido], cc[ic - 1 + (3*k + 1)*ido]);
+      cr2 = VMADD(LD_PS1(taur), tr2, cc[i - 1 + 3*k*ido]);
+      ch[i - 1 + k*ido] = VADD(cc[i - 1 + 3*k*ido], tr2);
+      ti2 = VSUB(cc[i + (3*k + 2)*ido], cc[ic + (3*k + 1)*ido]);
+      ci2 = VMADD(LD_PS1(taur), ti2, cc[i + 3*k*ido]);
+      ch[i + k*ido] = VADD(cc[i + 3*k*ido], ti2);
+      cr3 = VMUL(LD_PS1(taui), VSUB(cc[i - 1 + (3*k + 2)*ido], cc[ic - 1 + (3*k + 1)*ido]));
+      ci3 = VMUL(LD_PS1(taui), VADD(cc[i + (3*k + 2)*ido], cc[ic + (3*k + 1)*ido]));
+      dr2 = VSUB(cr2, ci3);
+      dr3 = VADD(cr2, ci3);
+      di2 = VADD(ci2, cr3);
+      di3 = VSUB(ci2, cr3);
+      VCPLXMUL(dr2, di2, LD_PS1(wa1[i-2]), LD_PS1(wa1[i-1]));
+      ch[i - 1 + (k + l1)*ido] = dr2;
+      ch[i + (k + l1)*ido] = di2;
+      VCPLXMUL(dr3, di3, LD_PS1(wa2[i-2]), LD_PS1(wa2[i-1]));
+      ch[i - 1 + (k + 2*l1)*ido] = dr3;
+      ch[i + (k + 2*l1)*ido] = di3;
+    }
+  }
+} /* radb3 */
+
+
+static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf * RESTRICT ch,
+                                   const float * RESTRICT wa1, const float * RESTRICT wa2, const float * RESTRICT wa3)
+{
+  static const float minus_hsqt2 = (float)-0.7071067811865475;
+  int i, k, l1ido = l1*ido;
+  {
+    const v4sf *RESTRICT cc_ = cc, * RESTRICT cc_end = cc + l1ido;
+    v4sf * RESTRICT ch_ = ch;
+    while (cc < cc_end) {
+      /* this loop represents between 25% and 40% of total radf4_ps cost ! */
+      v4sf a0 = cc[0], a1 = cc[l1ido];
+      v4sf a2 = cc[2*l1ido], a3 = cc[3*l1ido];
+      v4sf tr1 = VADD(a1, a3);
+      v4sf tr2 = VADD(a0, a2);
+      ch[2*ido-1] = VSUB(a0, a2);
+      ch[2*ido  ] = VSUB(a3, a1);
+      ch[0      ] = VADD(tr1, tr2);
+      ch[4*ido-1] = VSUB(tr2, tr1);
+      cc += ido; ch += 4*ido;
+    }
+    cc = cc_; ch = ch_;
+  }
+  if (ido < 2) return;
+  if (ido != 2) {
+    for (k = 0; k < l1ido; k += ido) {
+      const v4sf * RESTRICT pc = (v4sf*)(cc + 1 + k);
+      for (i=2; i<ido; i += 2, pc += 2) {
+        int ic = ido - i;
+        v4sf wr, wi, cr2, ci2, cr3, ci3, cr4, ci4;
+        v4sf tr1, ti1, tr2, ti2, tr3, ti3, tr4, ti4;
+
+        cr2 = pc[1*l1ido+0];
+        ci2 = pc[1*l1ido+1];
+        wr=LD_PS1(wa1[i - 2]);
+        wi=LD_PS1(wa1[i - 1]);
+        VCPLXMULCONJ(cr2,ci2,wr,wi);
+
+        cr3 = pc[2*l1ido+0];
+        ci3 = pc[2*l1ido+1];
+        wr = LD_PS1(wa2[i-2]);
+        wi = LD_PS1(wa2[i-1]);
+        VCPLXMULCONJ(cr3, ci3, wr, wi);
+
+        cr4 = pc[3*l1ido];
+        ci4 = pc[3*l1ido+1];
+        wr = LD_PS1(wa3[i-2]);
+        wi = LD_PS1(wa3[i-1]);
+        VCPLXMULCONJ(cr4, ci4, wr, wi);
+
+        /* at this point, on SSE, five of "cr2 cr3 cr4 ci2 ci3 ci4" should be loaded in registers */
+
+        tr1 = VADD(cr2,cr4);
+        tr4 = VSUB(cr4,cr2);
+        tr2 = VADD(pc[0],cr3);
+        tr3 = VSUB(pc[0],cr3);
+        ch[i - 1 + 4*k] = VADD(tr1,tr2);
+        ch[ic - 1 + 4*k + 3*ido] = VSUB(tr2,tr1); /* at this point tr1 and tr2 can be disposed */
+        ti1 = VADD(ci2,ci4);
+        ti4 = VSUB(ci2,ci4);
+        ch[i - 1 + 4*k + 2*ido] = VADD(ti4,tr3);
+        ch[ic - 1 + 4*k + 1*ido] = VSUB(tr3,ti4); /* dispose tr3, ti4 */
+        ti2 = VADD(pc[1],ci3);
+        ti3 = VSUB(pc[1],ci3);
+        ch[i + 4*k] = VADD(ti1, ti2);
+        ch[ic + 4*k + 3*ido] = VSUB(ti1, ti2);
+        ch[i + 4*k + 2*ido] = VADD(tr4, ti3);
+        ch[ic + 4*k + 1*ido] = VSUB(tr4, ti3);
+      }
+    }
+    if (ido % 2 == 1) return;
+  }
+  for (k=0; k<l1ido; k += ido) {
+    v4sf a = cc[ido-1 + k + l1ido], b = cc[ido-1 + k + 3*l1ido];
+    v4sf c = cc[ido-1 + k], d = cc[ido-1 + k + 2*l1ido];
+    v4sf ti1 = VMUL(LD_PS1(minus_hsqt2), VADD(a, b));
+    v4sf tr1 = VMUL(LD_PS1(minus_hsqt2), VSUB(b, a));
+    ch[ido-1 + 4*k] = VADD(tr1, c);
+    ch[ido-1 + 4*k + 2*ido] = VSUB(c, tr1);
+    ch[4*k + 1*ido] = VSUB(ti1, d);
+    ch[4*k + 3*ido] = VADD(ti1, d);
+  }
+} /* radf4 */
+
+
+static NEVER_INLINE(void) radb4_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
+                                   const float * RESTRICT wa1, const float * RESTRICT wa2, const float *RESTRICT wa3)
+{
+  static const float minus_sqrt2 = (float)-1.414213562373095;
+  static const float two = 2.f;
+  int i, k, l1ido = l1*ido;
+  v4sf ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
+  {
+    const v4sf *RESTRICT cc_ = cc, * RESTRICT ch_end = ch + l1ido;
+    v4sf *ch_ = ch;
+    while (ch < ch_end) {
+      v4sf a = cc[0], b = cc[4*ido-1];
+      v4sf c = cc[2*ido], d = cc[2*ido-1];
+      tr3 = VMUL(LD_PS1(two),d);
+      tr2 = VADD(a,b);
+      tr1 = VSUB(a,b);
+      tr4 = VMUL(LD_PS1(two),c);
+      ch[0*l1ido] = VADD(tr2, tr3);
+      ch[2*l1ido] = VSUB(tr2, tr3);
+      ch[1*l1ido] = VSUB(tr1, tr4);
+      ch[3*l1ido] = VADD(tr1, tr4);
+
+      cc += 4*ido; ch += ido;
+    }
+    cc = cc_; ch = ch_;
+  }
+  if (ido < 2) return;
+  if (ido != 2) {
+    for (k = 0; k < l1ido; k += ido) {
+      const v4sf * RESTRICT pc = (v4sf*)(cc - 1 + 4*k);
+      v4sf * RESTRICT ph = (v4sf*)(ch + k + 1);
+      for (i = 2; i < ido; i += 2) {
+
+        tr1 = VSUB(pc[i], pc[4*ido - i]);
+        tr2 = VADD(pc[i], pc[4*ido - i]);
+        ti4 = VSUB(pc[2*ido + i], pc[2*ido - i]);
+        tr3 = VADD(pc[2*ido + i], pc[2*ido - i]);
+        ph[0] = VADD(tr2, tr3);
+        cr3 = VSUB(tr2, tr3);
+
+        ti3 = VSUB(pc[2*ido + i + 1], pc[2*ido - i + 1]);
+        tr4 = VADD(pc[2*ido + i + 1], pc[2*ido - i + 1]);
+        cr2 = VSUB(tr1, tr4);
+        cr4 = VADD(tr1, tr4);
+
+        ti1 = VADD(pc[i + 1], pc[4*ido - i + 1]);
+        ti2 = VSUB(pc[i + 1], pc[4*ido - i + 1]);
+
+        ph[1] = VADD(ti2, ti3); ph += l1ido;
+        ci3 = VSUB(ti2, ti3);
+        ci2 = VADD(ti1, ti4);
+        ci4 = VSUB(ti1, ti4);
+        VCPLXMUL(cr2, ci2, LD_PS1(wa1[i-2]), LD_PS1(wa1[i-1]));
+        ph[0] = cr2;
+        ph[1] = ci2; ph += l1ido;
+        VCPLXMUL(cr3, ci3, LD_PS1(wa2[i-2]), LD_PS1(wa2[i-1]));
+        ph[0] = cr3;
+        ph[1] = ci3; ph += l1ido;
+        VCPLXMUL(cr4, ci4, LD_PS1(wa3[i-2]), LD_PS1(wa3[i-1]));
+        ph[0] = cr4;
+        ph[1] = ci4; ph = ph - 3*l1ido + 2;
+      }
+    }
+    if (ido % 2 == 1) return;
+  }
+  for (k=0; k < l1ido; k+=ido) {
+    int i0 = 4*k + ido;
+    v4sf c = cc[i0-1], d = cc[i0 + 2*ido-1];
+    v4sf a = cc[i0+0], b = cc[i0 + 2*ido+0];
+    tr1 = VSUB(c,d);
+    tr2 = VADD(c,d);
+    ti1 = VADD(b,a);
+    ti2 = VSUB(b,a);
+    ch[ido-1 + k + 0*l1ido] = VADD(tr2,tr2);
+    ch[ido-1 + k + 1*l1ido] = VMUL(LD_PS1(minus_sqrt2), VSUB(ti1, tr1));
+    ch[ido-1 + k + 2*l1ido] = VADD(ti2, ti2);
+    ch[ido-1 + k + 3*l1ido] = VMUL(LD_PS1(minus_sqrt2), VADD(ti1, tr1));
+  }
+} /* radb4 */
+
+static NEVER_INLINE(v4sf *) rfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
+                                      const float *wa, const int *ifac) {
+  v4sf *in  = (v4sf*)input_readonly;
+  v4sf *out = (in == work2 ? work1 : work2);
+  int nf = ifac[1], k1;
+  int l2 = n;
+  int iw = n-1;
+  assert(in != out && work1 != work2);
+  for (k1 = 1; k1 <= nf; ++k1) {
+    int kh = nf - k1;
+    int ip = ifac[kh + 2];
+    int l1 = l2 / ip;
+    int ido = n / l2;
+    iw -= (ip - 1)*ido;
+    switch (ip) {
+      case 4: {
+        int ix2 = iw + ido;
+        int ix3 = ix2 + ido;
+        radf4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
+      } break;
+      case 3: {
+        int ix2 = iw + ido;
+        radf3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
+      } break;
+      case 2:
+        radf2_ps(ido, l1, in, out, &wa[iw]);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    l2 = l1;
+    if (out == work2) {
+      out = work1; in = work2;
+    } else {
+      out = work2; in = work1;
+    }
+  }
+  return in; /* this is in fact the output .. */
+} /* rfftf1 */
+
+static NEVER_INLINE(v4sf *) rfftb1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
+                                      const float *wa, const int *ifac) {
+  v4sf *in  = (v4sf*)input_readonly;
+  v4sf *out = (in == work2 ? work1 : work2);
+  int nf = ifac[1], k1;
+  int l1 = 1;
+  int iw = 0;
+  assert(in != out);
+  for (k1=1; k1<=nf; k1++) {
+    int ip = ifac[k1 + 1];
+    int l2 = ip*l1;
+    int ido = n / l2;
+    switch (ip) {
+      case 4: {
+        int ix2 = iw + ido;
+        int ix3 = ix2 + ido;
+        radb4_ps(ido, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3]);
+      } break;
+      case 3: {
+        int ix2 = iw + ido;
+        radb3_ps(ido, l1, in, out, &wa[iw], &wa[ix2]);
+      } break;
+      case 2:
+        radb2_ps(ido, l1, in, out, &wa[iw]);
+        break;
+      default:
+        assert(0);
+        break;
+    }
+    l1 = l2;
+    iw += (ip - 1)*ido;
+
+    if (out == work2) {
+      out = work1; in = work2;
+    } else {
+      out = work2; in = work1;
+    }
+  }
+  return in; /* this is in fact the output .. */
+}
+
+static int decompose(int n, int *ifac, const int ntryh[3]) {
+  int nl = n, nf = 0, i, j = 0;
+  for (j=0; j < 3; ++j) {
+    int ntry = ntryh[j];
+    while (nl != 1) {
+      int nq = nl / ntry;
+      int nr = nl - ntry * nq;
+      if (nr == 0) {
+        ifac[2+nf++] = ntry;
+        nl = nq;
+        if (ntry == 2 && nf != 1) {
+          for (i = 2; i <= nf; ++i) {
+            int ib = nf - i + 2;
+            ifac[ib + 1] = ifac[ib];
+          }
+          ifac[2] = 2;
+        }
+      } else break;
+    }
+  }
+  ifac[0] = n;
+  ifac[1] = nf;
+  return nf;
+}
+
+
+
+static void rffti1_ps(int n, float *wa, int *ifac)
+{
+  static const int ntryh[3] = { 4,2,3 };
+  int k1, j, ii;
+
+  int nf = decompose(n,ifac,ntryh);
+  float argh = (float)((2*M_PI) / n);
+  int is = 0;
+  int nfm1 = nf - 1;
+  int l1 = 1;
+  if (nfm1 == 0) return;
+  for (k1 = 1; k1 <= nfm1; k1++) {
+    int ip = ifac[k1 + 1];
+    int ld = 0;
+    int l2 = l1*ip;
+    int ido = n / l2;
+    int ipm = ip - 1;
+    for (j = 1; j <= ipm; ++j) {
+      float argld;
+      int i = is, fi=0;
+      ld += l1;
+      argld = (float)ld*argh;
+      for (ii = 3; ii <= ido; ii += 2) {
+        i += 2;
+        fi += 1;
+        wa[i - 2] = cos((float)fi*argld);
+        wa[i - 1] = sin((float)fi*argld);
+      }
+      is += ido;
+    }
+    l1 = l2;
+  }
+} /* rffti1 */
+
+static void cffti1_ps(int n, float *wa, int *ifac)
+{
+  static const int ntryh[3] = { 3,4,2 };
+  int k1, j, ii;
+
+  int nf = decompose(n,ifac,ntryh);
+  float argh = (float)((2*M_PI)/n);
+  int i = 1;
+  int l1 = 1;
+  for (k1=1; k1<=nf; k1++) {
+    int ip = ifac[k1+1];
+    int ld = 0;
+    int l2 = l1*ip;
+    int ido = n / l2;
+    int idot = ido + ido + 2;
+    int ipm = ip - 1;
+    for (j=1; j<=ipm; j++) {
+      float argld;
+      int i1 = i, fi = 0;
+      wa[i-1] = 1;
+      wa[i] = 0;
+      ld += l1;
+      argld = (float)ld*argh;
+      for (ii = 4; ii <= idot; ii += 2) {
+        i += 2;
+        fi += 1;
+        wa[i-1] = cos((float)fi*argld);
+        wa[i] = sin((float)fi*argld);
+      }
+      if (ip > 5) {
+        wa[i1-1] = wa[i-1];
+        wa[i1] = wa[i];
+      }
+    }
+    l1 = l2;
+  }
+} /* cffti1 */
+
+
+static v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2, const float *wa, const int *ifac, int isign) {
+  v4sf *in  = (v4sf*)input_readonly;
+  v4sf *out = (in == work2 ? work1 : work2);
+  int nf = ifac[1], k1;
+  int l1 = 1;
+  int iw = 0;
+  assert(in != out && work1 != work2);
+  for (k1=2; k1<=nf+1; k1++) {
+    int ip = ifac[k1];
+    int l2 = ip*l1;
+    int ido = n / l2;
+    int idot = ido + ido;
+    switch (ip) {
+      case 4: {
+        int ix2 = iw + idot;
+        int ix3 = ix2 + idot;
+        passf4_ps(idot, l1, in, out, &wa[iw], &wa[ix2], &wa[ix3], (float)isign);
+      } break;
+      case 2: {
+        passf2_ps(idot, l1, in, out, &wa[iw], (float)isign);
+      } break;
+      case 3: {
+        int ix2 = iw + idot;
+        passf3_ps(idot, l1, in, out, &wa[iw], &wa[ix2], (float)isign);
+      } break;
+      default:
+        assert(0);
+    }
+    l1 = l2;
+    iw += (ip - 1)*idot;
+    if (out == work2) {
+      out = work1; in = work2;
+    } else {
+      out = work2; in = work1;
+    }
+  }
+
+  return in; /* this is in fact the output .. */
+}
+
+
+struct PFFFT_Setup {
+  int     N;
+  int     Ncvec; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */
+  int ifac[15];
+  pffft_transform_t transform;
+  v4sf *data; /* allocated room for twiddle coefs */
+  float *e;    /* points into 'data' , N/4*3 elements */
+  float *twiddle; /* points into 'data', N/4 elements */
+};
+
+PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) {
+  int k, m;
+  PFFFT_Setup *s = (PFFFT_Setup*)malloc(sizeof(PFFFT_Setup));
+  if (!s)
+    return s;
+  if (transform == PFFFT_REAL) { assert(N >= 32); }
+  if (transform == PFFFT_COMPLEX) { assert(N >= 16); }
+  /*assert((N % 32) == 0); */
+  s->N = N;
+  s->transform = transform;
+  /* nb of complex simd vectors */
+  s->Ncvec = (transform == PFFFT_REAL ? N/2 : N)/SIMD_SZ;
+  s->data = (v4sf*)pffft_aligned_malloc(2*(size_t)s->Ncvec * sizeof(v4sf));
+  if (!s->data) {
+    free(s);
+    return 0;
+  }
+  s->e = (float*)s->data;
+  s->twiddle = (float*)(s->data + (2*s->Ncvec*(SIMD_SZ-1))/SIMD_SZ);
+
+  if (transform == PFFFT_REAL) {
+    for (k=0; k < s->Ncvec; ++k) {
+      int i = k/SIMD_SZ;
+      int j = k%SIMD_SZ;
+      for (m=0; m < SIMD_SZ-1; ++m) {
+        float A = (float)(-2*M_PI*(m+1)*k / N);
+        s->e[(2*(i*3 + m) + 0) * SIMD_SZ + j] = cos(A);
+        s->e[(2*(i*3 + m) + 1) * SIMD_SZ + j] = sin(A);
+      }
+    }
+    rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac);
+  } else {
+    for (k=0; k < s->Ncvec; ++k) {
+      int i = k/SIMD_SZ;
+      int j = k%SIMD_SZ;
+      for (m=0; m < SIMD_SZ-1; ++m) {
+        float A = (float)(-2*M_PI*(m+1)*k / N);
+        s->e[(2*(i*3 + m) + 0)*SIMD_SZ + j] = cos(A);
+        s->e[(2*(i*3 + m) + 1)*SIMD_SZ + j] = sin(A);
+      }
+    }
+    cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac);
+  }
+  return s;
+}
+
+
+static void pffft_destroy_setup(PFFFT_Setup *s) {
+  if(s){
+    pffft_aligned_free(s->data);
+    free(s);
+  }
+}
+
+#if !defined(PFFFT_SIMD_DISABLE)
+
+/* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */
+static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) {
+  v4sf g0, g1;
+  int k;
+  INTERLEAVE2(in[0], in[1], g0, g1); in += in_stride;
+
+  *--out = VSWAPHL(g0, g1); /* [g0l, g0h], [g1l g1h] -> [g1l, g0h] */
+  for (k=1; k < N; ++k) {
+    v4sf h0, h1;
+    INTERLEAVE2(in[0], in[1], h0, h1); in += in_stride;
+    *--out = VSWAPHL(g1, h0);
+    *--out = VSWAPHL(h0, h1);
+    g1 = h1;
+  }
+  *--out = VSWAPHL(g1, g0);
+}
+
+static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) {
+  v4sf g0, g1, h0, h1;
+  int k;
+  g0 = g1 = in[0]; ++in;
+  for (k=1; k < N; ++k) {
+    h0 = *in++; h1 = *in++;
+    g1 = VSWAPHL(g1, h0);
+    h0 = VSWAPHL(h0, h1);
+    UNINTERLEAVE2(h0, g1, out[0], out[1]); out += out_stride;
+    g1 = h1;
+  }
+  h0 = *in++; h1 = g0;
+  g1 = VSWAPHL(g1, h0);
+  h0 = VSWAPHL(h0, h1);
+  UNINTERLEAVE2(h0, g1, out[0], out[1]);
+}
+
+static void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) {
+  int k, N = setup->N, Ncvec = setup->Ncvec;
+  const v4sf *vin = (const v4sf*)in;
+  v4sf *vout = (v4sf*)out;
+  assert(in != out);
+  if (setup->transform == PFFFT_REAL) {
+    int k, dk = N/32;
+    if (direction == PFFFT_FORWARD) {
+      for (k=0; k < dk; ++k) {
+        INTERLEAVE2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]);
+        INTERLEAVE2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]);
+      }
+      reversed_copy(dk, vin+2, 8, (v4sf*)(out + N/2));
+      reversed_copy(dk, vin+6, 8, (v4sf*)(out + N));
+    } else {
+      for (k=0; k < dk; ++k) {
+        UNINTERLEAVE2(vin[2*(0*dk + k) + 0], vin[2*(0*dk + k) + 1], vout[k*8 + 0], vout[k*8 + 1]);
+        UNINTERLEAVE2(vin[2*(2*dk + k) + 0], vin[2*(2*dk + k) + 1], vout[k*8 + 4], vout[k*8 + 5]);
+      }
+      unreversed_copy(dk, (v4sf*)(in + N/4), (v4sf*)(out + N - 6*SIMD_SZ), -8);
+      unreversed_copy(dk, (v4sf*)(in + 3*N/4), (v4sf*)(out + N - 2*SIMD_SZ), -8);
+    }
+  } else {
+    if (direction == PFFFT_FORWARD) {
+      for (k=0; k < Ncvec; ++k) {
+        int kk = (k/4) + (k%4)*(Ncvec/4);
+        INTERLEAVE2(vin[k*2], vin[k*2+1], vout[kk*2], vout[kk*2+1]);
+      }
+    } else {
+      for (k=0; k < Ncvec; ++k) {
+        int kk = (k/4) + (k%4)*(Ncvec/4);
+        UNINTERLEAVE2(vin[kk*2], vin[kk*2+1], vout[k*2], vout[k*2+1]);
+      }
+    }
+  }
+}
+
+static void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
+  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
+  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
+  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
+  assert(in != out);
+  for (k=0; k < dk; ++k) {
+    r0 = in[8*k+0]; i0 = in[8*k+1];
+    r1 = in[8*k+2]; i1 = in[8*k+3];
+    r2 = in[8*k+4]; i2 = in[8*k+5];
+    r3 = in[8*k+6]; i3 = in[8*k+7];
+    VTRANSPOSE4(r0,r1,r2,r3);
+    VTRANSPOSE4(i0,i1,i2,i3);
+    VCPLXMUL(r1,i1,e[k*6+0],e[k*6+1]);
+    VCPLXMUL(r2,i2,e[k*6+2],e[k*6+3]);
+    VCPLXMUL(r3,i3,e[k*6+4],e[k*6+5]);
+
+    sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2);
+    sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3);
+    si0 = VADD(i0,i2); di0 = VSUB(i0, i2);
+    si1 = VADD(i1,i3); di1 = VSUB(i1, i3);
+
+    /*
+      transformation for each column is:
+
+      [1   1   1   1   0   0   0   0]   [r0]
+      [1   0  -1   0   0  -1   0   1]   [r1]
+      [1  -1   1  -1   0   0   0   0]   [r2]
+      [1   0  -1   0   0   1   0  -1]   [r3]
+      [0   0   0   0   1   1   1   1] * [i0]
+      [0   1   0  -1   1   0  -1   0]   [i1]
+      [0   0   0   0   1  -1   1  -1]   [i2]
+      [0  -1   0   1   1   0  -1   0]   [i3]
+    */
+
+    r0 = VADD(sr0, sr1); i0 = VADD(si0, si1);
+    r1 = VADD(dr0, di1); i1 = VSUB(di0, dr1);
+    r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1);
+    r3 = VSUB(dr0, di1); i3 = VADD(di0, dr1);
+
+    *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1;
+    *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3;
+  }
+}
+
+static void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
+  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
+  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
+  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
+  assert(in != out);
+  for (k=0; k < dk; ++k) {
+    r0 = in[8*k+0]; i0 = in[8*k+1];
+    r1 = in[8*k+2]; i1 = in[8*k+3];
+    r2 = in[8*k+4]; i2 = in[8*k+5];
+    r3 = in[8*k+6]; i3 = in[8*k+7];
+
+    sr0 = VADD(r0,r2); dr0 = VSUB(r0, r2);
+    sr1 = VADD(r1,r3); dr1 = VSUB(r1, r3);
+    si0 = VADD(i0,i2); di0 = VSUB(i0, i2);
+    si1 = VADD(i1,i3); di1 = VSUB(i1, i3);
+
+    r0 = VADD(sr0, sr1); i0 = VADD(si0, si1);
+    r1 = VSUB(dr0, di1); i1 = VADD(di0, dr1);
+    r2 = VSUB(sr0, sr1); i2 = VSUB(si0, si1);
+    r3 = VADD(dr0, di1); i3 = VSUB(di0, dr1);
+
+    VCPLXMULCONJ(r1,i1,e[k*6+0],e[k*6+1]);
+    VCPLXMULCONJ(r2,i2,e[k*6+2],e[k*6+3]);
+    VCPLXMULCONJ(r3,i3,e[k*6+4],e[k*6+5]);
+
+    VTRANSPOSE4(r0,r1,r2,r3);
+    VTRANSPOSE4(i0,i1,i2,i3);
+
+    *out++ = r0; *out++ = i0; *out++ = r1; *out++ = i1;
+    *out++ = r2; *out++ = i2; *out++ = r3; *out++ = i3;
+  }
+}
+
+
+static ALWAYS_INLINE(void) pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in,
+                            const v4sf *e, v4sf *out) {
+  v4sf r0, i0, r1, i1, r2, i2, r3, i3;
+  v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
+  r0 = *in0; i0 = *in1;
+  r1 = *in++; i1 = *in++; r2 = *in++; i2 = *in++; r3 = *in++; i3 = *in++;
+  VTRANSPOSE4(r0,r1,r2,r3);
+  VTRANSPOSE4(i0,i1,i2,i3);
+
+  /*
+    transformation for each column is:
+
+    [1   1   1   1   0   0   0   0]   [r0]
+    [1   0  -1   0   0  -1   0   1]   [r1]
+    [1   0  -1   0   0   1   0  -1]   [r2]
+    [1  -1   1  -1   0   0   0   0]   [r3]
+    [0   0   0   0   1   1   1   1] * [i0]
+    [0  -1   0   1  -1   0   1   0]   [i1]
+    [0  -1   0   1   1   0  -1   0]   [i2]
+    [0   0   0   0  -1   1  -1   1]   [i3]
+  */
+
+  /*cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */
+  /*cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */
+
+  VCPLXMUL(r1,i1,e[0],e[1]);
+  VCPLXMUL(r2,i2,e[2],e[3]);
+  VCPLXMUL(r3,i3,e[4],e[5]);
+
+  /*cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */
+  /*cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */
+
+  sr0 = VADD(r0,r2); dr0 = VSUB(r0,r2);
+  sr1 = VADD(r1,r3); dr1 = VSUB(r3,r1);
+  si0 = VADD(i0,i2); di0 = VSUB(i0,i2);
+  si1 = VADD(i1,i3); di1 = VSUB(i3,i1);
+
+  r0 = VADD(sr0, sr1);
+  r3 = VSUB(sr0, sr1);
+  i0 = VADD(si0, si1);
+  i3 = VSUB(si1, si0);
+  r1 = VADD(dr0, di1);
+  r2 = VSUB(dr0, di1);
+  i1 = VSUB(dr1, di0);
+  i2 = VADD(dr1, di0);
+
+  *out++ = r0;
+  *out++ = i0;
+  *out++ = r1;
+  *out++ = i1;
+  *out++ = r2;
+  *out++ = i2;
+  *out++ = r3;
+  *out++ = i3;
+
+}
+
+static NEVER_INLINE(void) pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
+  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
+  /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
+
+  v4sf_union cr, ci, *uout = (v4sf_union*)out;
+  v4sf save = in[7], zero=VZERO();
+  float xr0, xi0, xr1, xi1, xr2, xi2, xr3, xi3;
+  static const float s = (float)(M_SQRT2/2);
+
+  cr.v = in[0]; ci.v = in[Ncvec*2-1];
+  assert(in != out);
+  pffft_real_finalize_4x4(&zero, &zero, in+1, e, out);
+
+  /*
+    [cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3]
+
+    [Xr(1)]  ] [1   1   1   1   0   0   0   0]
+    [Xr(N/4) ] [0   0   0   0   1   s   0  -s]
+    [Xr(N/2) ] [1   0  -1   0   0   0   0   0]
+    [Xr(3N/4)] [0   0   0   0   1  -s   0   s]
+    [Xi(1)   ] [1  -1   1  -1   0   0   0   0]
+    [Xi(N/4) ] [0   0   0   0   0  -s  -1  -s]
+    [Xi(N/2) ] [0  -1   0   1   0   0   0   0]
+    [Xi(3N/4)] [0   0   0   0   0  -s   1  -s]
+  */
+
+  xr0=(cr.f[0]+cr.f[2]) + (cr.f[1]+cr.f[3]); uout[0].f[0] = xr0;
+  xi0=(cr.f[0]+cr.f[2]) - (cr.f[1]+cr.f[3]); uout[1].f[0] = xi0;
+  xr2=(cr.f[0]-cr.f[2]);                     uout[4].f[0] = xr2;
+  xi2=(cr.f[3]-cr.f[1]);                     uout[5].f[0] = xi2;
+  xr1= ci.f[0] + s*(ci.f[1]-ci.f[3]);        uout[2].f[0] = xr1;
+  xi1=-ci.f[2] - s*(ci.f[1]+ci.f[3]);        uout[3].f[0] = xi1;
+  xr3= ci.f[0] - s*(ci.f[1]-ci.f[3]);        uout[6].f[0] = xr3;
+  xi3= ci.f[2] - s*(ci.f[1]+ci.f[3]);        uout[7].f[0] = xi3;
+
+  for (k=1; k < dk; ++k) {
+    v4sf save_next = in[8*k+7];
+    pffft_real_finalize_4x4(&save, &in[8*k+0], in + 8*k+1,
+                           e + k*6, out + k*8);
+    save = save_next;
+  }
+
+}
+
+static ALWAYS_INLINE(void) pffft_real_preprocess_4x4(const v4sf *in,
+                                             const v4sf *e, v4sf *out, int first) {
+  v4sf r0=in[0], i0=in[1], r1=in[2], i1=in[3], r2=in[4], i2=in[5], r3=in[6], i3=in[7];
+  /*
+    transformation for each column is:
+
+    [1   1   1   1   0   0   0   0]   [r0]
+    [1   0   0  -1   0  -1  -1   0]   [r1]
+    [1  -1  -1   1   0   0   0   0]   [r2]
+    [1   0   0  -1   0   1   1   0]   [r3]
+    [0   0   0   0   1  -1   1  -1] * [i0]
+    [0  -1   1   0   1   0   0   1]   [i1]
+    [0   0   0   0   1   1  -1  -1]   [i2]
+    [0   1  -1   0   1   0   0   1]   [i3]
+  */
+
+  v4sf sr0 = VADD(r0,r3), dr0 = VSUB(r0,r3);
+  v4sf sr1 = VADD(r1,r2), dr1 = VSUB(r1,r2);
+  v4sf si0 = VADD(i0,i3), di0 = VSUB(i0,i3);
+  v4sf si1 = VADD(i1,i2), di1 = VSUB(i1,i2);
+
+  r0 = VADD(sr0, sr1);
+  r2 = VSUB(sr0, sr1);
+  r1 = VSUB(dr0, si1);
+  r3 = VADD(dr0, si1);
+  i0 = VSUB(di0, di1);
+  i2 = VADD(di0, di1);
+  i1 = VSUB(si0, dr1);
+  i3 = VADD(si0, dr1);
+
+  VCPLXMULCONJ(r1,i1,e[0],e[1]);
+  VCPLXMULCONJ(r2,i2,e[2],e[3]);
+  VCPLXMULCONJ(r3,i3,e[4],e[5]);
+
+  VTRANSPOSE4(r0,r1,r2,r3);
+  VTRANSPOSE4(i0,i1,i2,i3);
+
+  if (!first) {
+    *out++ = r0;
+    *out++ = i0;
+  }
+  *out++ = r1;
+  *out++ = i1;
+  *out++ = r2;
+  *out++ = i2;
+  *out++ = r3;
+  *out++ = i3;
+}
+
+static NEVER_INLINE(void) pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
+  int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
+  /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
+
+  v4sf_union Xr, Xi, *uout = (v4sf_union*)out;
+  float cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3;
+  static const float s = (float)M_SQRT2;
+  assert(in != out);
+  for (k=0; k < 4; ++k) {
+    Xr.f[k] = ((float*)in)[8*k];
+    Xi.f[k] = ((float*)in)[8*k+4];
+  }
+
+  pffft_real_preprocess_4x4(in, e, out+1, 1); /* will write only 6 values */
+
+  /*
+    [Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3]
+
+    [cr0] [1   0   2   0   1   0   0   0]
+    [cr1] [1   0   0   0  -1   0  -2   0]
+    [cr2] [1   0  -2   0   1   0   0   0]
+    [cr3] [1   0   0   0  -1   0   2   0]
+    [ci0] [0   2   0   2   0   0   0   0]
+    [ci1] [0   s   0  -s   0  -s   0  -s]
+    [ci2] [0   0   0   0   0  -2   0   2]
+    [ci3] [0  -s   0   s   0  -s   0  -s]
+  */
+  for (k=1; k < dk; ++k) {
+    pffft_real_preprocess_4x4(in+8*k, e + k*6, out-1+k*8, 0);
+  }
+
+  cr0=(Xr.f[0]+Xi.f[0]) + 2*Xr.f[2]; uout[0].f[0] = cr0;
+  cr1=(Xr.f[0]-Xi.f[0]) - 2*Xi.f[2]; uout[0].f[1] = cr1;
+  cr2=(Xr.f[0]+Xi.f[0]) - 2*Xr.f[2]; uout[0].f[2] = cr2;
+  cr3=(Xr.f[0]-Xi.f[0]) + 2*Xi.f[2]; uout[0].f[3] = cr3;
+  ci0= 2*(Xr.f[1]+Xr.f[3]);                       uout[2*Ncvec-1].f[0] = ci0;
+  ci1= s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[1] = ci1;
+  ci2= 2*(Xi.f[3]-Xi.f[1]);                       uout[2*Ncvec-1].f[2] = ci2;
+  ci3=-s*(Xr.f[1]-Xr.f[3]) - s*(Xi.f[1]+Xi.f[3]); uout[2*Ncvec-1].f[3] = ci3;
+}
+
+
+static void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *foutput, v4sf *scratch,
+                             pffft_direction_t direction, int ordered) {
+  int k, Ncvec   = setup->Ncvec;
+  int nf_odd = (setup->ifac[1] & 1);
+
+  /* temporary buffer is allocated on the stack if the scratch pointer is NULL */
+  /*int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); */
+  /*VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); */
+
+  int ib = (nf_odd ^ ordered ? 1 : 0);
+  const v4sf *vinput = (const v4sf*)finput;
+  v4sf *voutput      = (v4sf*)foutput;
+  v4sf *buff[2];
+  buff[0] = voutput, buff[1] = scratch /*? scratch : scratch_on_stack*/;
+
+  /*if (scratch == 0) scratch = scratch_on_stack; */
+
+  assert(VALIGNED(finput) && VALIGNED(foutput));
+
+  /*assert(finput != foutput); */
+  if (direction == PFFFT_FORWARD) {
+    ib = !ib;
+    if (setup->transform == PFFFT_REAL) {
+      ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
+      pffft_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e);
+    } else {
+      v4sf *tmp = buff[ib];
+      for (k=0; k < Ncvec; ++k) {
+        UNINTERLEAVE2(vinput[k*2], vinput[k*2+1], tmp[k*2], tmp[k*2+1]);
+      }
+      ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib],
+                      setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
+      pffft_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sf*)setup->e);
+    }
+    if (ordered) {
+      pffft_zreorder(setup, (float*)buff[!ib], (float*)buff[ib], PFFFT_FORWARD);
+    } else ib = !ib;
+  } else {
+    if (vinput == buff[ib]) {
+      ib = !ib; /* may happen when finput == foutput */
+    }
+    if (ordered) {
+      pffft_zreorder(setup, (float*)vinput, (float*)buff[ib], PFFFT_BACKWARD);
+      vinput = buff[ib]; ib = !ib;
+    }
+    if (setup->transform == PFFFT_REAL) {
+      pffft_real_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e);
+      ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1],
+                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
+    } else {
+      pffft_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sf*)setup->e);
+      ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1],
+                      setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1);
+      for (k=0; k < Ncvec; ++k) {
+        INTERLEAVE2(buff[ib][k*2], buff[ib][k*2+1], buff[ib][k*2], buff[ib][k*2+1]);
+      }
+    }
+  }
+
+  if (buff[ib] != voutput) {
+    /* extra copy required -- this situation should only happen when finput == foutput */
+    assert(finput==foutput);
+    for (k=0; k < Ncvec; ++k) {
+      v4sf a = buff[ib][2*k], b = buff[ib][2*k+1];
+      voutput[2*k] = a; voutput[2*k+1] = b;
+    }
+    ib = !ib;
+  }
+  assert(buff[ib] == voutput);
+}
+
+#if 0
+static void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) {
+  int i, Ncvec = s->Ncvec;
+  const v4sf * RESTRICT va = (const v4sf*)a;
+  const v4sf * RESTRICT vb = (const v4sf*)b;
+  v4sf * RESTRICT vab = (v4sf*)ab;
+
+#ifdef __arm__
+  __builtin_prefetch(va);
+  __builtin_prefetch(vb);
+  __builtin_prefetch(vab);
+  __builtin_prefetch(va+2);
+  __builtin_prefetch(vb+2);
+  __builtin_prefetch(vab+2);
+  __builtin_prefetch(va+4);
+  __builtin_prefetch(vb+4);
+  __builtin_prefetch(vab+4);
+  __builtin_prefetch(va+6);
+  __builtin_prefetch(vb+6);
+  __builtin_prefetch(vab+6);
+#endif
+
+  float ar, ai, br, bi, abr, abi;
+  v4sf vscal = LD_PS1(scaling);
+
+  assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
+  ar = ((v4sf_union*)va)[0].f[0];
+  ai = ((v4sf_union*)va)[1].f[0];
+  br = ((v4sf_union*)vb)[0].f[0];
+  bi = ((v4sf_union*)vb)[1].f[0];
+  abr = ((v4sf_union*)vab)[0].f[0];
+  abi = ((v4sf_union*)vab)[1].f[0];
+
+#ifdef __arm__
+#  if 1 /* inline asm version */
+  const float *a_ = a, *b_ = b; float *ab_ = ab;
+  int N = Ncvec;
+  asm volatile("mov         r8, %2                  \n"
+               "vdup.f32    q15, %4                 \n"
+               "1:                                  \n"
+               "pld         [%0,#64]                \n"
+               "pld         [%1,#64]                \n"
+               "pld         [%2,#64]                \n"
+               "pld         [%0,#96]                \n"
+               "pld         [%1,#96]                \n"
+               "pld         [%2,#96]                \n"
+               "vld1.f32    {q0,q1},   [%0,:128]!         \n"
+               "vld1.f32    {q4,q5},   [%1,:128]!         \n"
+               "vld1.f32    {q2,q3},   [%0,:128]!         \n"
+               "vld1.f32    {q6,q7},   [%1,:128]!         \n"
+               "vld1.f32    {q8,q9},   [r8,:128]!          \n"
+
+               "vmul.f32    q10, q0, q4             \n"
+               "vmul.f32    q11, q0, q5             \n"
+               "vmul.f32    q12, q2, q6             \n"
+               "vmul.f32    q13, q2, q7             \n"
+               "vmls.f32    q10, q1, q5             \n"
+               "vmla.f32    q11, q1, q4             \n"
+               "vld1.f32    {q0,q1}, [r8,:128]!     \n"
+               "vmls.f32    q12, q3, q7             \n"
+               "vmla.f32    q13, q3, q6             \n"
+               "vmla.f32    q8, q10, q15            \n"
+               "vmla.f32    q9, q11, q15            \n"
+               "vmla.f32    q0, q12, q15            \n"
+               "vmla.f32    q1, q13, q15            \n"
+               "vst1.f32    {q8,q9},[%2,:128]!    \n"
+               "vst1.f32    {q0,q1},[%2,:128]!    \n"
+               "subs        %3, #2                  \n"
+               "bne         1b                      \n"
+               : "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N) : "r"(scaling) : "r8", "q0","q1","q2","q3","q4","q5","q6","q7","q8","q9", "q10","q11","q12","q13","q15","memory");
+
+#  else /* neon instrinsics version, 30% slower that the asm one with gcc 4.6 */
+  v4sf a1r, a1i, b1r, b1i;
+  v4sf a2r, a2i, b2r, b2i;
+  v4sf ab1r, ab1i, ab2r, ab2i;
+  for (i=0; i < Ncvec; i += 2) {
+    __builtin_prefetch(va+8);
+    __builtin_prefetch(va+10);
+
+    a1r = *va++; a1i = *va++;
+    a2r = *va++; a2i = *va++;
+    b1r = *vb++; b1i = *vb++;
+    b2r = *vb++; b2i = *vb++;
+    ab1r = vab[0]; ab1i = vab[1];
+    ab2r = vab[2]; ab2i = vab[3];
+
+    v4sf z1r = VMUL(a1r, b1r);
+    v4sf z2r = VMUL(a2r, b2r);
+    v4sf z1i = VMUL(a1r, b1i);
+    v4sf z2i = VMUL(a2r, b2i);
+
+    __builtin_prefetch(vb+4);
+    __builtin_prefetch(vb+6);
+
+    z1r = vmlsq_f32(z1r, a1i, b1i);
+    z2r = vmlsq_f32(z2r, a2i, b2i);
+    z1i = vmlaq_f32(z1i, a1i, b1r);
+    z2i = vmlaq_f32(z2i, a2i, b2r);
+
+    __builtin_prefetch(vab+4);
+    __builtin_prefetch(vab+6);
+
+    ab1r = vmlaq_f32(ab1r, z1r, vscal);
+    ab2r = vmlaq_f32(ab2r, z2r, vscal);
+    ab1i = vmlaq_f32(ab1i, z1i, vscal);
+    ab2i = vmlaq_f32(ab2i, z2i, vscal);
+
+    *vab++ = ab1r; *vab++ = ab1i;
+    *vab++ = ab2r; *vab++ = ab2i;
+  }
+#  endif
+
+#else /* not ARM, no need to use a special routine */
+  for (i=0; i < Ncvec; i += 2) {
+    v4sf ar, ai, br, bi;
+    ar = va[2*i+0]; ai = va[2*i+1];
+    br = vb[2*i+0]; bi = vb[2*i+1];
+    VCPLXMUL(ar, ai, br, bi);
+    vab[2*i+0] = VMADD(ar, vscal, vab[2*i+0]);
+    vab[2*i+1] = VMADD(ai, vscal, vab[2*i+1]);
+    ar = va[2*i+2]; ai = va[2*i+3];
+    br = vb[2*i+2]; bi = vb[2*i+3];
+    VCPLXMUL(ar, ai, br, bi);
+    vab[2*i+2] = VMADD(ar, vscal, vab[2*i+2]);
+    vab[2*i+3] = VMADD(ai, vscal, vab[2*i+3]);
+  }
+#endif
+  if (s->transform == PFFFT_REAL) {
+    ((v4sf_union*)vab)[0].f[0] = abr + ar*br*scaling;
+    ((v4sf_union*)vab)[1].f[0] = abi + ai*bi*scaling;
+  }
+}
+#endif
+
+static void pffft_zconvolve(PFFFT_Setup *s, const float *a, const float *b, float *ab) {
+  int i, Ncvec = s->Ncvec;
+  const v4sf * /*RESTRICT*/ va = (const v4sf*)a;
+  const v4sf * RESTRICT vb = (const v4sf*)b;
+  v4sf * /*RESTRICT*/ vab = (v4sf*)ab;
+
+  float ar, ai, br, bi;
+
+#ifdef __arm__
+#error
+#endif
+  assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab));
+  ar = ((v4sf_union*)va)[0].f[0];
+  ai = ((v4sf_union*)va)[1].f[0];
+  br = ((v4sf_union*)vb)[0].f[0];
+  bi = ((v4sf_union*)vb)[1].f[0];
+
+  for (i=0; i < Ncvec; i += 2) {
+    v4sf ar, ai, br, bi;
+    ar = va[2*i+0]; ai = va[2*i+1];
+    br = vb[2*i+0]; bi = vb[2*i+1];
+    VCPLXMUL(ar, ai, br, bi);
+    vab[2*i+0] = ar;
+    vab[2*i+1] = ai;
+    ar = va[2*i+2]; ai = va[2*i+3];
+    br = vb[2*i+2]; bi = vb[2*i+3];
+    VCPLXMUL(ar, ai, br, bi);
+    vab[2*i+2] = ar;
+    vab[2*i+3] = ai;
+  }
+  if (s->transform == PFFFT_REAL) {
+    ((v4sf_union*)vab)[0].f[0] = ar*br;
+    ((v4sf_union*)vab)[1].f[0] = ai*bi;
+  }
+}
+
+
+
+#else /* defined(PFFFT_SIMD_DISABLE) */
+
+/* standard routine using scalar floats, without SIMD stuff. */
+
+#define pffft_zreorder_nosimd pffft_zreorder
+static void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) {
+  int k, N = setup->N;
+  if (setup->transform == PFFFT_COMPLEX) {
+    for (k=0; k < 2*N; ++k) out[k] = in[k];
+    return;
+  }
+  else if (direction == PFFFT_FORWARD) {
+    float x_N = in[N-1];
+    for (k=N-1; k > 1; --k) out[k] = in[k-1];
+    out[0] = in[0];
+    out[1] = x_N;
+  } else {
+    float x_N = in[1];
+    for (k=1; k < N-1; ++k) out[k] = in[k+1];
+    out[0] = in[0];
+    out[N-1] = x_N;
+  }
+}
+
+#define pffft_transform_internal_nosimd pffft_transform_internal
+static void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, float *output, float *scratch,
+                                    pffft_direction_t direction, int ordered) {
+  int Ncvec   = setup->Ncvec;
+  int nf_odd = (setup->ifac[1] & 1);
+
+  /* temporary buffer is allocated on the stack if the scratch pointer is NULL */
+  /*int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); */
+  /*VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); */
+  /*if (scratch == 0) scratch = scratch_on_stack; */
+
+  int ib;
+  float *buff[2];
+  buff[0] = output, buff[1] = scratch;
+  if (setup->transform == PFFFT_COMPLEX) ordered = 0; /* it is always ordered. */
+  ib = (nf_odd ^ ordered ? 1 : 0);
+
+  if (direction == PFFFT_FORWARD) {
+    if (setup->transform == PFFFT_REAL) {
+      ib = (rfftf1_ps(Ncvec*2, input, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
+    } else {
+      ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
+    }
+    if (ordered) {
+      pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD); ib = !ib;
+    }
+  } else {
+    if (input == buff[ib]) {
+      ib = !ib; /* may happen when finput == foutput */
+    }
+    if (ordered) {
+      pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD);
+      input = buff[!ib];
+    }
+    if (setup->transform == PFFFT_REAL) {
+      ib = (rfftb1_ps(Ncvec*2, input, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
+    } else {
+      ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib],
+                      setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1);
+    }
+  }
+  if (buff[ib] != output) {
+    int k;
+    /* extra copy required -- this situation should happens only when finput == foutput */
+    assert(input==output);
+    for (k=0; k < Ncvec; ++k) {
+      float a = buff[ib][2*k], b = buff[ib][2*k+1];
+      output[2*k] = a; output[2*k+1] = b;
+    }
+    ib = !ib;
+  }
+  assert(buff[ib] == output);
+}
+
+#if 0
+#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate
+static void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a, const float *b,
+                                       float *ab, float scaling) {
+  int i, Ncvec = s->Ncvec;
+
+  if (s->transform == PFFFT_REAL) {
+    /* take care of the fftpack ordering */
+    ab[0] += a[0]*b[0]*scaling;
+    ab[2*Ncvec-1] += a[2*Ncvec-1]*b[2*Ncvec-1]*scaling;
+    ++ab; ++a; ++b; --Ncvec;
+  }
+  for (i=0; i < Ncvec; ++i) {
+    float ar, ai, br, bi;
+    ar = a[2*i+0]; ai = a[2*i+1];
+    br = b[2*i+0]; bi = b[2*i+1];
+    VCPLXMUL(ar, ai, br, bi);
+    ab[2*i+0] += ar*scaling;
+    ab[2*i+1] += ai*scaling;
+  }
+}
+#endif
+
+#define pffft_zconvolve_nosimd pffft_zconvolve
+static void pffft_zconvolve_nosimd(PFFFT_Setup *s, const float *a, const float *b, float *ab) {
+  int i, Ncvec = s->Ncvec;
+
+  if (s->transform == PFFFT_REAL) {
+    /* take care of the fftpack ordering */
+    ab[0] = a[0]*b[0];
+    ab[2*Ncvec-1] = a[2*Ncvec-1]*b[2*Ncvec-1];
+    ++ab; ++a; ++b; --Ncvec;
+  }
+  for (i=0; i < Ncvec; ++i) {
+    float ar, ai, br, bi;
+    ar = a[2*i+0]; ai = a[2*i+1];
+    br = b[2*i+0]; bi = b[2*i+1];
+    VCPLXMUL(ar, ai, br, bi);
+    ab[2*i+0] = ar;
+    ab[2*i+1] = ai;
+  }
+}
+
+#endif /* defined(PFFFT_SIMD_DISABLE) */
+
+static void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) {
+  pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 0);
+}
+
+static void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) {
+  pffft_transform_internal(setup, input, output, (v4sf*)work, direction, 1);
+}
+
+
+static void pffft_reorder_back(int length, void * setup, float * data, float * work)
+{
+  memcpy(work, data, (unsigned)length * sizeof(*work));
+  pffft_zreorder(setup, work, data, PFFFT_BACKWARD);
+}
+#endif
diff --git a/src/pffft.h b/src/pffft.h

new file mode 100644 (file)

index 0000000..78d936b
--- /dev/null
+++ b/src/pffft.h
@@ -0,0 +1,177 @@
+/* Copyright (c) 2011  Julien Pommier ( pommier@modartt.com )
+
+   Based on original fortran 77 code from FFTPACKv4 from NETLIB,
+   authored by Dr Paul Swarztrauber of NCAR, in 1985.
+
+   As confirmed by the NCAR fftpack software curators, the following
+   FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
+   released under the same terms.
+
+   FFTPACK license:
+
+   http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
+
+   Copyright (c) 2004 the University Corporation for Atmospheric
+   Research ("UCAR"). All rights reserved. Developed by NCAR's
+   Computational and Information Systems Laboratory, UCAR,
+   www.cisl.ucar.edu.
+
+   Redistribution and use of the Software in source and binary forms,
+   with or without modification, is permitted provided that the
+   following conditions are met:
+
+   - Neither the names of NCAR's Computational and Information Systems
+   Laboratory, the University Corporation for Atmospheric Research,
+   nor the names of its sponsors or contributors may be used to
+   endorse or promote products derived from this Software without
+   specific prior written permission.
+
+   - Redistributions of source code must retain the above copyright
+   notices, this list of conditions, and the disclaimer below.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions, and the disclaimer below in the
+   documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+   SOFTWARE.
+*/
+
+/*
+   PFFFT : a Pretty Fast FFT.
+
+   This is basically an adaptation of the single precision fftpack
+   (v4) as found on netlib taking advantage of SIMD instruction found
+   on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
+
+   For architectures where no SIMD instruction is available, the code
+   falls back to a scalar version.
+
+   Restrictions:
+
+   - 1D transforms only, with 32-bit single precision.
+
+   - supports only transforms for inputs of length N of the form
+   N=(2^a)*(3^b), a >= 5 and b >=0 (32, 48, 64, 96, 128, 144 etc
+   are all acceptable lengths). Performance is best for 128<=N<=8192.
+
+   - all (float*) pointers in the functions below are expected to
+   have an "simd-compatible" alignment, that is 16 bytes on x86 and
+   powerpc CPUs.
+
+   You can allocate such buffers with the functions
+   pffft_aligned_malloc / pffft_aligned_free (or with stuff like
+   posix_memalign..)
+
+*/
+
+#ifndef PFFFT_H
+#define PFFFT_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  /* opaque struct holding internal stuff (precomputed twiddle factors)
+     this struct can be shared by many threads as it contains only
+     read-only data.
+  */
+  typedef struct PFFFT_Setup PFFFT_Setup;
+
+  /* direction of the transform */
+  typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
+
+  /* type of transform */
+  typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
+
+  /*
+    prepare for performing transforms of size N -- the returned
+    PFFFT_Setup structure is read-only so it can safely be shared by
+    multiple concurrent threads.
+  */
+  static PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
+  static void pffft_destroy_setup(PFFFT_Setup *);
+  /*
+     Perform a Fourier transform , The z-domain data is stored in the
+     most efficient order for transforming it back, or using it for
+     convolution. If you need to have its content sorted in the
+     "usual" way, that is as an array of interleaved complex numbers,
+     either use pffft_transform_ordered , or call pffft_zreorder after
+     the forward fft, and before the backward fft.
+
+     Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
+     Typically you will want to scale the backward transform by 1/N.
+
+     The 'work' pointer should point to an area of N (2*N for complex
+     fft) floats, properly aligned. [del]If 'work' is NULL, then stack will
+     be used instead (this is probably the beest strategy for small
+     FFTs, say for N < 16384).[/del]
+
+     input and output may alias.
+  */
+  static void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
+
+  /*
+     Similar to pffft_transform, but makes sure that the output is
+     ordered as expected (interleaved complex numbers).  This is
+     similar to calling pffft_transform and then pffft_zreorder.
+
+     input and output may alias.
+  */
+  static void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
+
+  /*
+     call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
+     PFFFT_FORWARD) if you want to have the frequency components in
+     the correct "canonical" order, as interleaved complex numbers.
+
+     (for real transforms, both 0-frequency and half frequency
+     components, which are real, are assembled in the first entry as
+     F(0)+i*F(n/2+1). Note that the original fftpack did place
+     F(n/2+1) at the end of the arrays).
+
+     input and output should not alias.
+  */
+  static void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
+
+  /*
+     Perform a multiplication of the frequency components of dft_a and
+     dft_b and accumulate them into dft_ab. The arrays should have
+     been obtained with pffft_transform(.., PFFFT_FORWARD) and should
+     *not* have been reordered with pffft_zreorder (otherwise just
+     perform the operation yourself as the dft coefs are stored as
+     interleaved complex numbers).
+
+     the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
+
+     The dft_a, dft_b and dft_ab pointers may alias.
+  void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
+  */
+
+  /*
+     the operation performed is: dft_ab = (dft_a * fdt_b)
+
+     The dft_a, dft_b and dft_ab pointers may alias.
+  */
+  static void pffft_zconvolve(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab);
+
+  /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
+  int pffft_simd_size(void);
+
+  static void pffft_reorder_back(int length, void * setup, float * data, float * work);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/pffft32.c b/src/pffft32.c

new file mode 100644 (file)

index 0000000..21bd845
--- /dev/null
+++ b/src/pffft32.c
@@ -0,0 +1,32 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define _soxr_simd_aligned_free free
+#define _soxr_simd_aligned_malloc malloc
+#define PFFFT_SIMD_DISABLE
+#include "pffft.c"
+#include "filter.h"
+
+static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
+static void delete_setup(void * setup) {pffft_destroy_setup(setup);}
+static void forward  (int length, void * setup, float * h, float * scratch) {pffft_transform        (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform        (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H);  (void)length;}
+static int multiplier(void) {return 1;}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32_cb[] = {
+  (fn_t)setup,
+  (fn_t)setup,
+  (fn_t)delete_setup,
+  (fn_t)forward,
+  (fn_t)oforward,
+  (fn_t)backward,
+  (fn_t)obackward,
+  (fn_t)convolve,
+  (fn_t)_soxr_ordered_partial_convolve_f,
+  (fn_t)multiplier,
+  (fn_t)pffft_reorder_back,
+};
diff --git a/src/pffft32s.c b/src/pffft32s.c

new file mode 100644 (file)

index 0000000..d049990
--- /dev/null
+++ b/src/pffft32s.c
@@ -0,0 +1,27 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include "pffft.c"
+
+static void * setup(int len) {return pffft_new_setup(len, PFFFT_REAL);}
+static void forward  (int length, void * setup, float * h, float * scratch) {pffft_transform        (setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void oforward (int length, void * setup, float * h, float * scratch) {pffft_transform_ordered(setup, h, h, scratch, PFFFT_FORWARD); (void)length;}
+static void backward (int length, void * setup, float * H, float * scratch) {pffft_transform        (setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void obackward(int length, void * setup, float * H, float * scratch) {pffft_transform_ordered(setup, H, H, scratch, PFFFT_BACKWARD);(void)length;}
+static void convolve(int length, void * setup, float * H, float const * with) { pffft_zconvolve(setup, H, with, H);                  (void)length;}
+static int multiplier(void) {return 1;}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_rdft32s_cb[] = {
+  (fn_t)setup,
+  (fn_t)setup,
+  (fn_t)pffft_destroy_setup,
+  (fn_t)forward,
+  (fn_t)oforward,
+  (fn_t)backward,
+  (fn_t)obackward,
+  (fn_t)convolve,
+  (fn_t)_soxr_ordered_partial_convolve_simd,
+  (fn_t)multiplier,
+  (fn_t)pffft_reorder_back,
+};
diff --git a/src/poly-fir.h b/src/poly-fir.h

new file mode 100644 (file)

index 0000000..f7b4261
--- /dev/null
+++ b/src/poly-fir.h
@@ -0,0 +1,98 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Resample using an interpolated poly-phase FIR with length LEN.*/
+/* Input must be followed by LEN-1 samples. */
+
+#define a (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 0,j))
+#define b (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 1,j))
+#define c (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 2,j))
+#define d (coef(p->shared->poly_fir_coefs, COEF_INTERP, FIR_LENGTH, phase, 3,j))
+#if COEF_INTERP == 0
+  #define _ sum += a *in[j], ++j;
+#elif COEF_INTERP == 1
+  #define _ sum += (b *x + a)*in[j], ++j;
+#elif COEF_INTERP == 2
+  #define _ sum += ((c *x + b)*x + a)*in[j], ++j;
+#elif COEF_INTERP == 3
+  #define _ sum += (((d*x + c)*x + b)*x + a)*in[j], ++j;
+#else
+  #error COEF_INTERP
+#endif
+
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+#if defined HI_PREC_CLOCK
+#if FLOAT_HI_PREC_CLOCK
+  if (p->use_hi_prec_clock) {
+    float_step_t at = p->at.flt;
+    for (i = 0; (int)at < num_in; ++i, at += p->step.flt) {
+      sample_t const * in = input + (int)at;
+      float_step_t frac = at - (int)at;
+      int phase = (int)(frac * (1 << PHASE_BITS));
+#if COEF_INTERP > 0
+      sample_t x = (sample_t)(frac * (1 << PHASE_BITS) - phase);
+#endif
+      sample_t sum = 0;
+      int j = 0;
+      CONVOLVE
+      output[i] = sum;
+    }
+    fifo_read(&p->fifo, (int)at, NULL);
+    p->at.flt = at - (int)at;
+  } else
+#else
+  if (p->use_hi_prec_clock) {
+    for (i = 0; p->at.integer < num_in; ++i,
+        p->at.fix.ls.all += p->step.fix.ls.all,
+        p->at.whole += p->step.whole + (p->at.fix.ls.all < p->step.fix.ls.all)) {
+      sample_t const * in = input + p->at.integer;
+      uint32_t frac = p->at.fraction;
+      int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */
+#if COEF_INTERP > 0              /* low-order bits, scaled to [0,1) */
+      sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32));
+#endif
+      sample_t sum = 0;
+      int j = 0;
+      CONVOLVE
+      output[i] = sum;
+    }
+    fifo_read(&p->fifo, p->at.integer, NULL);
+    p->at.integer = 0;
+  } else
+#endif
+#endif
+  {
+    for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
+      sample_t const * in = input + p->at.integer;
+      uint32_t frac = p->at.fraction;
+      int phase = (int)(frac >> (32 - PHASE_BITS)); /* high-order bits */
+#if COEF_INTERP > 0              /* low-order bits, scaled to [0,1) */
+      sample_t x = (sample_t)((frac << PHASE_BITS) * (1 / MULT32));
+#endif
+      sample_t sum = 0;
+      int j = 0;
+      CONVOLVE
+      output[i] = sum;
+    }
+    fifo_read(&p->fifo, p->at.integer, NULL);
+    p->at.integer = 0;
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+}
+
+#undef _
+#undef a
+#undef b
+#undef c
+#undef d
+#undef COEF_INTERP
+#undef CONVOLVE
+#undef FIR_LENGTH
+#undef FUNCTION
+#undef PHASE_BITS
diff --git a/src/poly-fir0.h b/src/poly-fir0.h

new file mode 100644 (file)

index 0000000..52d85b3
--- /dev/null
+++ b/src/poly-fir0.h
@@ -0,0 +1,32 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Resample using a non-interpolated poly-phase FIR with length LEN.*/
+/* Input must be followed by LEN-1 samples. */
+
+#define _ sum += (coef(p->shared->poly_fir_coefs, 0, FIR_LENGTH, rem, 0, j)) *at[j], ++j;
+
+static void FUNCTION(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t const * input = stage_read_p(p);
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+  for (i = 0; p->at.integer < num_in * p->L; ++i, p->at.integer += p->step.integer) {
+    int div = p->at.integer / p->L, rem = p->at.integer % p->L;
+    sample_t const * at = input + div;
+    sample_t sum = 0;
+    int j = 0;
+    CONVOLVE
+    output[i] = sum;
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+  fifo_read(&p->fifo, p->at.integer / p->L, NULL);
+  p->at.integer = p->at.integer % p->L;
+}
+
+#undef _
+#undef CONVOLVE
+#undef FIR_LENGTH
+#undef FUNCTION
diff --git a/src/rate.h b/src/rate.h

new file mode 100644 (file)

index 0000000..81cabff
--- /dev/null
+++ b/src/rate.h
@@ -0,0 +1,724 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <math.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "filter.h"
+
+#if defined SOXR_LIB
+#include "internal.h"
+
+typedef void (* fn_t)(void);
+extern fn_t RDFT_CB[11];
+
+#define rdft_forward_setup    (*(void * (*)(int))RDFT_CB[0])
+#define rdft_backward_setup   (*(void * (*)(int))RDFT_CB[1])
+#define rdft_delete_setup     (*(void (*)(void *))RDFT_CB[2])
+#define rdft_forward          (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[3])
+#define rdft_oforward         (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[4])
+#define rdft_backward         (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[5])
+#define rdft_obackward        (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[6])
+#define rdft_convolve         (*(void (*)(int, void *, sample_t *, sample_t const *))RDFT_CB[7])
+#define rdft_convolve_portion (*(void (*)(int, sample_t *, sample_t const *))RDFT_CB[8])
+#define rdft_multiplier       (*(int (*)(void))RDFT_CB[9])
+#define rdft_reorder_back     (*(void (*)(int, void *, sample_t *, sample_t *))RDFT_CB[10])
+
+#endif
+
+#if RATE_SIMD /* Align for SIMD: */
+  #include "simd.h"
+#if 0 /* Not using this yet. */
+  #define RATE_SIMD_POLY 1
+  #define num_coefs4 ((num_coefs + 3) & ~3)
+  #define coefs4_check(i) ((i) < num_coefs)
+#else
+  #define RATE_SIMD_POLY 0
+  #define num_coefs4 num_coefs
+  #define coefs4_check(i) 1
+#endif
+
+  #define aligned_free    _soxr_simd_aligned_free
+  #define aligned_malloc  _soxr_simd_aligned_malloc
+  #define aligned_calloc  _soxr_simd_aligned_calloc
+#if 0
+  #define FIFO_REALLOC    aligned_realloc
+  #define FIFO_MALLOC     aligned_malloc
+  #define FIFO_FREE       aligned_free
+
+  static void * aligned_realloc(void * q, size_t nb_bytes, size_t copy_bytes) {
+    void * p = aligned_malloc(nb_bytes);
+    if (p) memcpy(p, q, copy_bytes);
+    aligned_free(q);
+    return p;
+  }
+#endif
+#else
+  #define RATE_SIMD_POLY 0
+  #define num_coefs4 num_coefs
+  #define coefs4_check(i) 1
+
+  #define aligned_free    free
+  #define aligned_malloc  malloc
+  #define aligned_calloc  calloc
+#endif
+
+#define  FIFO_SIZE_T int
+#include "fifo.h"
+
+typedef union { /* Int64 in parts */
+  #if WORDS_BIGENDIAN
+  struct {int32_t ms; uint32_t ls;} parts;
+  #else
+  struct {uint32_t ls; int32_t ms;} parts;
+  #endif
+  int64_t all;
+} int64p_t;
+
+typedef union { /* Uint64 in parts */
+  #if WORDS_BIGENDIAN
+  struct {uint32_t ms, ls;} parts;
+  #else
+  struct {uint32_t ls, ms;} parts;
+  #endif
+  uint64_t all;
+} uint64p_t;
+
+#define FLOAT_HI_PREC_CLOCK 0    /* Non-float hi-prec has ~96 bits. */
+#define float_step_t long double /* __float128 is also a (slow) option */
+
+#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
+
+#define raw_coef_t double
+
+static sample_t * prepare_coefs(raw_coef_t const * coefs, int num_coefs,
+    int num_phases, int interp_order, double multiplier)
+{
+  int i, j, length = num_coefs4 * num_phases;
+  sample_t * result = malloc((size_t)(length * (interp_order + 1)) * sizeof(*result));
+  double fm1 = coefs[0], f1 = 0, f2 = 0;
+
+  for (i = num_coefs4 - 1; i >= 0; --i)
+    for (j = num_phases - 1; j >= 0; --j) {
+      double f0 = fm1, b = 0, c = 0, d = 0; /* = 0 to kill compiler warning */
+      int pos = i * num_phases + j - 1;
+      fm1 = coefs4_check(i) && pos > 0 ? coefs[pos - 1] * multiplier : 0;
+      switch (interp_order) {
+        case 1: b = f1 - f0; break;
+        case 2: b = f1 - (.5 * (f2+f0) - f1) - f0; c = .5 * (f2+f0) - f1; break;
+        case 3: c=.5*(f1+fm1)-f0;d=(1/6.)*(f2-f1+fm1-f0-4*c);b=f1-f0-d-c; break;
+        default: if (interp_order) assert(0);
+      }
+      #define coef_coef(x) \
+        coef(result, interp_order, num_coefs4, j, x, num_coefs4 - 1 - i)
+      coef_coef(0) = (sample_t)f0;
+      if (interp_order > 0) coef_coef(1) = (sample_t)b;
+      if (interp_order > 1) coef_coef(2) = (sample_t)c;
+      if (interp_order > 2) coef_coef(3) = (sample_t)d;
+      #undef coef_coef
+      f2 = f1, f1 = f0;
+    }
+  return result;
+}
+
+typedef struct {
+  int        dft_length, num_taps, post_peak;
+  void       * dft_forward_setup, * dft_backward_setup;
+  sample_t   * coefs;
+} dft_filter_t;
+
+typedef struct { /* So generated filter coefs may be shared between channels */
+  sample_t   * poly_fir_coefs;
+  dft_filter_t dft_filter[2];
+} rate_shared_t;
+
+typedef enum {
+  irrational_stage = 1,
+  cubic_stage,
+  dft_stage,
+  half_stage,
+  rational_stage
+} stage_type_t;
+
+struct stage;
+typedef void (* stage_fn_t)(struct stage * input, fifo_t * output);
+#define MULT32 (65536. * 65536.)
+
+typedef union { /* Fixed point arithmetic */
+  struct {uint64p_t ls; int64p_t ms;} fix;
+  float_step_t flt;
+} step_t;
+
+typedef struct stage {
+  /* Common to all stage types: */
+  stage_type_t type;
+  stage_fn_t fn;
+  fifo_t     fifo;
+  int        pre;       /* Number of past samples to store */
+  int        pre_post;  /* pre + number of future samples to store */
+  int        preload;   /* Number of zero samples to pre-load the fifo */
+  double     out_in_ratio; /* For buffer management. */
+
+  /* For a stage with variable (run-time generated) filter coefs: */
+  rate_shared_t * shared;
+  unsigned   dft_filter_num; /* Which, if any, of the 2 DFT filters to use */
+  sample_t   * dft_scratch, * dft_out;
+
+  /* For a stage with variable L/M: */
+  step_t     at, step;
+  bool       use_hi_prec_clock;
+  int        L, remM;
+  int        n, phase_bits, block_len;
+  double     mult, phase0;
+} stage_t;
+
+#define stage_occupancy(s) max(0, fifo_occupancy(&(s)->fifo) - (s)->pre_post)
+#define stage_read_p(s) ((sample_t *)fifo_read_ptr(&(s)->fifo) + (s)->pre)
+
+static void cubic_stage_fn(stage_t * p, fifo_t * output_fifo)
+{
+  int i, num_in = stage_occupancy(p), max_num_out = 1 + (int)(num_in*p->out_in_ratio);
+  sample_t const * input = stage_read_p(p);
+  sample_t * output = fifo_reserve(output_fifo, max_num_out);
+
+#define integer  fix.ms.parts.ms
+#define fraction fix.ms.parts.ls
+#define whole    fix.ms.all
+  for (i = 0; p->at.integer < num_in; ++i, p->at.whole += p->step.whole) {
+    sample_t const * s = input + p->at.integer;
+    double x = p->at.fraction * (1 / MULT32);
+    double b = .5*(s[1]+s[-1])-*s, a = (1/6.)*(s[2]-s[1]+s[-1]-*s-4*b);
+    double c = s[1]-*s-a-b;
+    output[i] = (sample_t)(p->mult * (((a*x + b)*x + c)*x + *s));
+  }
+  assert(max_num_out - i >= 0);
+  fifo_trim_by(output_fifo, max_num_out - i);
+  fifo_read(&p->fifo, p->at.integer, NULL);
+  p->at.integer = 0;
+}
+
+#if RATE_SIMD
+  #define dft_out p->dft_out
+#else
+  #define dft_out output
+#endif
+
+static void dft_stage_fn(stage_t * p, fifo_t * output_fifo)
+{
+  sample_t * output;
+  int i, j, num_in = max(0, fifo_occupancy(&p->fifo));
+  rate_shared_t const * s = p->shared;
+  dft_filter_t const * f = &s->dft_filter[p->dft_filter_num];
+  int const overlap = f->num_taps - 1;
+
+  while (p->at.integer + p->L * num_in >= f->dft_length) {
+    div_t divd = div(f->dft_length - overlap - p->at.integer + p->L - 1, p->L);
+    sample_t const * input = fifo_read_ptr(&p->fifo);
+    fifo_read(&p->fifo, divd.quot, NULL);
+    num_in -= divd.quot;
+
+    output = fifo_reserve(output_fifo, f->dft_length);
+
+    if (lsx_is_power_of_2(p->L)) { /* F-domain */
+      int portion = f->dft_length / p->L;
+      memcpy(dft_out, input, (unsigned)portion * sizeof(*dft_out));
+      rdft_oforward(portion, f->dft_forward_setup, dft_out, p->dft_scratch);
+      for (i = portion + 2; i < (portion << 1); i += 2) /* Mirror image. */
+        dft_out[i] = dft_out[(portion << 1) - i],
+        dft_out[i+1] = -dft_out[(portion << 1) - i + 1];
+      dft_out[portion] = dft_out[1];
+      dft_out[portion + 1] = 0;
+      dft_out[1] = dft_out[0];
+
+      for (portion <<= 1; i < f->dft_length; i += portion, portion <<= 1) {
+        memcpy(dft_out + i, dft_out, (size_t)portion * sizeof(*dft_out));
+        dft_out[i + 1] = 0;
+      }
+      if (p->step.integer > 0)
+        rdft_reorder_back(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
+    } else {
+      if (p->L == 1)
+        memcpy(dft_out, input, (size_t)f->dft_length * sizeof(*dft_out));
+      else {
+        memset(dft_out, 0, (size_t)f->dft_length * sizeof(*dft_out));
+        for (j = 0, i = p->at.integer; i < f->dft_length; ++j, i += p->L)
+          dft_out[i] = input[j];
+        p->at.integer = p->L - 1 - divd.rem;
+      }
+      if (p->step.integer > 0)
+        rdft_forward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
+      else
+        rdft_oforward(f->dft_length, f->dft_forward_setup, dft_out, p->dft_scratch);
+    }
+
+    if (p->step.integer > 0) {
+      rdft_convolve(f->dft_length, f->dft_backward_setup, dft_out, f->coefs);
+      rdft_backward(f->dft_length, f->dft_backward_setup, dft_out, p->dft_scratch);
+#if RATE_SIMD
+      if (p->step.integer == 1)
+        memcpy(output, dft_out, (size_t)f->dft_length * sizeof(sample_t));
+#endif
+      if (p->step.integer != 1) {
+        for (j = 0, i = p->remM; i < f->dft_length - overlap; ++j,
+            i += p->step.integer)
+          output[j] = dft_out[i];
+        p->remM = i - (f->dft_length - overlap);
+        fifo_trim_by(output_fifo, f->dft_length - j);
+      }
+      else fifo_trim_by(output_fifo, overlap);
+    }
+    else { /* F-domain */
+      int m = -p->step.integer;
+      rdft_convolve_portion(f->dft_length >> m, dft_out, f->coefs);
+      rdft_obackward(f->dft_length >> m, f->dft_backward_setup, dft_out, p->dft_scratch);
+#if RATE_SIMD
+      memcpy(output, dft_out, (size_t)(f->dft_length >> m) * sizeof(sample_t));
+#endif
+      fifo_trim_by(output_fifo, (((1 << m) - 1) * f->dft_length + overlap) >>m);
+    }
+  }
+}
+
+#undef dft_out
+
+/* Set to 4 x nearest power of 2 */
+/* or half of that if danger of causing too many cache misses. */
+static int set_dft_length(int num_taps, int min, int large)
+{
+  double d = log((double)num_taps) / log(2.);
+  return 1 << range_limit((int)(d + 2.77), min, max((int)(d + 1.77), large));
+}
+
+static void dft_stage_init(
+    unsigned instance, double Fp, double Fs, double Fn, double att,
+    double phase, stage_t * p, int L, int M, double * multiplier,
+    int min_dft_size, int large_dft_size)
+{
+  dft_filter_t * f = &p->shared->dft_filter[instance];
+  int num_taps = 0, dft_length = f->dft_length, i;
+  bool f_domain_m = abs(3-M) == 1 && Fs <= 1;
+
+  if (!dft_length) {
+    int k = phase == 50 && lsx_is_power_of_2(L) && Fn == L? L << 1 : 4;
+    double * h = lsx_design_lpf(Fp, Fs, Fn, att, &num_taps, -k, -1.);
+
+    if (phase != 50)
+      lsx_fir_to_phase(&h, &num_taps, &f->post_peak, phase);
+    else f->post_peak = num_taps / 2;
+
+    dft_length = set_dft_length(num_taps, min_dft_size, large_dft_size);
+    f->coefs = aligned_calloc((size_t)dft_length, sizeof(*f->coefs));
+    for (i = 0; i < num_taps; ++i)
+      f->coefs[(i + dft_length - num_taps + 1) & (dft_length - 1)]
+        = (sample_t)(h[i] * ((1. / dft_length) * rdft_multiplier() * L * *multiplier));
+    free(h);
+  }
+
+#if RATE_SIMD
+  p->dft_out = aligned_malloc(sizeof(sample_t) * (size_t)dft_length);
+#endif
+#if 1 /* In fact, currently, only pffft needs this. */
+  p->dft_scratch = aligned_malloc(2 * sizeof(sample_t) * (size_t)dft_length);
+#endif
+
+  if (!f->dft_length) {
+    void * coef_setup = rdft_forward_setup(dft_length);
+    int Lp = lsx_is_power_of_2(L)? L : 1;
+    int Mp = f_domain_m? M : 1;
+    f->dft_forward_setup = rdft_forward_setup(dft_length / Lp);
+    f->dft_backward_setup = rdft_backward_setup(dft_length / Mp);
+    if (Mp == 1)
+      rdft_forward(dft_length, coef_setup, f->coefs, p->dft_scratch);
+    else
+      rdft_oforward(dft_length, coef_setup, f->coefs, p->dft_scratch);
+    rdft_delete_setup(coef_setup);
+    f->num_taps = num_taps;
+    f->dft_length = dft_length;
+    lsx_debug("fir_len=%i dft_length=%i Fp=%g Fs=%g Fn=%g att=%g %i/%i",
+        num_taps, dft_length, Fp, Fs, Fn, att, L, M);
+  }
+  *multiplier = 1;
+  p->out_in_ratio = (double)L / M;
+  p->type = dft_stage;
+  p->fn = dft_stage_fn;
+  p->preload = f->post_peak / L;
+  p->at.integer = f->post_peak % L;
+  p->L = L;
+  p->step.integer = f_domain_m? -M/2 : M;
+  p->dft_filter_num = instance;
+  p->block_len = f->dft_length - (f->num_taps - 1);
+  p->phase0 = p->at.integer / p->L;
+}
+
+#include "filters.h"
+
+typedef struct {
+  double     factor;
+  uint64_t   samples_in, samples_out;
+  int        num_stages;
+  stage_t    * stages;
+} rate_t;
+
+#define pre_stage       p->stages[shift]
+#define arb_stage       p->stages[shift + have_pre_stage]
+#define post_stage      p->stages[shift + have_pre_stage + have_arb_stage]
+#define have_pre_stage  (preM  * preL  != 1)
+#define have_arb_stage  (arbM  * arbL  != 1)
+#define have_post_stage (postM * postL != 1)
+
+#define TO_3dB(a)       ((1.6e-6*a-7.5e-4)*a+.646)
+#define LOW_Q_BW0       (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
+
+typedef enum {
+  rolloff_none, rolloff_small /* <= 0.01 dB */, rolloff_medium /* <= 0.35 dB */
+} rolloff_t;
+
+
+static char const * rate_init(
+  /* Private work areas (to be supplied by the client):                       */
+  rate_t * p,                /* Per audio channel.                            */
+  rate_shared_t * shared,    /* Between channels (undergoing same rate change)*/
+
+  /* Public parameters:                                             Typically */
+  double factor,             /* Input rate divided by output rate.            */
+  double bits,               /* Required bit-accuracy (pass + stop)  16|20|28 */
+  double phase,              /* Linear/minimum etc. filter phase.       50    */
+  double passband_end,       /* 0dB pt. bandwidth to preserve; nyquist=1 0.913*/
+  double stopband_begin,     /* Aliasing/imaging control; > passband_end  1   */
+  rolloff_t rolloff,         /* Pass-band roll-off                    small   */
+  bool maintain_3dB_pt,      /*                                        true   */
+  double multiplier,         /* Linear gain to apply during conversion.   1   */
+
+  /* Primarily for test/development purposes:                                 */
+  bool use_hi_prec_clock,    /* Increase irrational ratio accuracy.   false   */
+  int interpolator,          /* Force a particular coef interpolator.   -1    */
+  size_t max_coefs_size,     /* k bytes of coefs to try to keep below.  400   */
+  bool noSmallIntOpt,        /* Disable small integer optimisations.  false   */
+  int log2_min_dft_size,
+  int log2_large_dft_size)
+{
+  double att = (bits + 1) * linear_to_dB(2.), attArb = att;    /* pass + stop */
+  double tbw0 = 1 - passband_end, Fs_a = stopband_begin;
+  double arbM = factor, tbw_tighten = 1;
+  int n = 0, i, preL = 1, preM = 1, shift = 0, arbL = 1, postL = 1, postM = 1;
+  bool upsample = false, rational = false, iOpt = !noSmallIntOpt;
+  int mode = rolloff > rolloff_small? factor > 1 || passband_end > LOW_Q_BW0:
+    (int)ceil(2 + (bits - 17) / 4);
+  stage_t * s;
+
+  assert(factor > 0);
+  assert(!bits || (15 <= bits && bits <= 33));
+  assert(0 <= phase && phase <= 100);
+  assert(.53 <= passband_end);
+  assert(stopband_begin <= 1.2);
+  assert(passband_end + .005 < stopband_begin);
+
+  p->factor = factor;
+  if (bits) while (!n++) {                               /* Determine stages: */
+    int try, L, M, x, maxL = interpolator > 0? 1 : mode? 2048 :
+      (int)ceil((double)max_coefs_size * 1000. / (U100_l * sizeof(sample_t)));
+    double d, epsilon = 0, frac;
+    upsample = arbM < 1;
+    for (i = (int)(arbM * .5), shift = 0; i >>= 1; arbM *= .5, ++shift);
+    preM = upsample || (arbM > 1.5 && arbM < 2);
+    postM = 1 + (arbM > 1 && preM), arbM /= postM;
+    preL = 1 + (!preM && arbM < 2) + (upsample && mode), arbM *= preL;
+    if ((frac = arbM - (int)arbM))
+      epsilon = fabs((uint32_t)(frac * MULT32 + .5) / (frac * MULT32) - 1);
+    for (i = 1, rational = !frac; i <= maxL && !rational; ++i) {
+      d = frac * i, try = (int)(d + .5);
+      if ((rational = fabs(try / d - 1) <= epsilon)) {    /* No long doubles! */
+        if (try == i)
+          arbM = ceil(arbM), shift += arbM > 2, arbM /= 1 + (arbM > 2);
+        else arbM = i * (int)arbM + try, arbL = i;
+      }
+    }
+    L = preL * arbL, M = (int)(arbM * postM), x = (L|M)&1, L >>= !x, M >>= !x;
+    if (iOpt && postL == 1 && (d = preL * arbL / arbM) > 4 && d != 5) {
+      for (postL = 4, i = (int)(d / 16); (i >>= 1) && postL < 256; postL <<= 1);
+      arbM = arbM * postL / arbL / preL, arbL = 1, n = 0;
+    } else if (rational && (max(L, M) < 3 + 2 * iOpt || L * M < 6 * iOpt))
+      preL = L, preM = M, arbM = arbL = postM = 1;
+    if (!mode && (!rational || !n))
+      ++mode, n = 0;
+  }
+
+  p->num_stages = shift + have_pre_stage + have_arb_stage + have_post_stage;
+  if (!p->num_stages && multiplier != 1) {
+    arbL = 0;
+    ++p->num_stages;
+  }
+  p->stages = calloc((size_t)p->num_stages + 1, sizeof(*p->stages));
+  for (i = 0; i < p->num_stages; ++i)
+    p->stages[i].shared = shared;
+
+  if ((n = p->num_stages) > 1) {                              /* Att. budget: */
+    if (have_arb_stage)
+      att += linear_to_dB(2.), attArb = att, --n;
+    att += linear_to_dB((double)n);
+  }
+
+  for (n = 0; (size_t)n + 1 < array_length(half_firs) && att > half_firs[n].att; ++n);
+  for (i = 0, s = p->stages; i < shift; ++i, ++s) {
+    s->type = half_stage;
+    s->fn = half_firs[n].fn;
+    s->pre_post = 4 * half_firs[n].num_coefs;
+    s->preload = s->pre = s->pre_post >> 1;
+  }
+
+  if (have_pre_stage) {
+    if (maintain_3dB_pt && have_post_stage) {    /* Trans. bands overlapping. */
+      double tbw3 = tbw0 * TO_3dB(att);                /* FFS: consider Fs_a. */
+      double x = ((2.1429e-4 - 5.2083e-7 * att) * att - .015863) * att + 3.95;
+      x = att * pow((tbw0 - tbw3) / (postM / (factor * postL) - 1 + tbw0), x);
+      if (x > .035) {
+        tbw_tighten = ((4.3074e-3 - 3.9121e-4 * x) * x - .040009) * x + 1.0014;
+        lsx_debug("x=%g tbw_tighten=%g", x, tbw_tighten);
+      }
+    }
+    dft_stage_init(0, 1 - tbw0 * tbw_tighten, Fs_a, preM? max(preL, preM) :
+        arbM / arbL, att, phase, &pre_stage, preL, max(preM, 1), &multiplier,
+        log2_min_dft_size, log2_large_dft_size);
+  }
+
+  if (!bits && have_arb_stage) {                /* Quick and dirty arb stage: */
+    arb_stage.type = cubic_stage;
+    arb_stage.fn = cubic_stage_fn;
+    arb_stage.mult = multiplier, multiplier = 1;
+    arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5);
+    arb_stage.pre_post = max(3, arb_stage.step.integer);
+    arb_stage.preload = arb_stage.pre = 1;
+    arb_stage.out_in_ratio = MULT32 / (double)arb_stage.step.whole;
+  }
+  else if (have_arb_stage) {                     /* Higher quality arb stage: */
+    poly_fir_t const * f = &poly_firs[6*(upsample + !!preM) + mode - !upsample];
+    int order, num_coefs = (int)f->interp[0].scalar, phase_bits, phases;
+    size_t coefs_size;
+    double x = .5, at, Fp, Fs, Fn, mult = upsample? 1 : arbL / arbM;
+    poly_fir1_t const * f1;
+
+    Fn = !upsample && preM? x = arbM / arbL : 1;
+    Fp = !preM? mult : mode? .5 : 1;
+    Fs = 2 - Fp;           /* Ignore Fs_a; it would have little benefit here. */
+    Fp *= 1 - tbw0;
+    if (rolloff > rolloff_small && mode)
+      Fp = !preM? mult * .5 - .125 : mult * .05 + .1;
+    else if (rolloff == rolloff_small)
+      Fp = Fs - (Fs - .148 * x - Fp * .852) * (.00813 * bits + .973);
+
+    i = (interpolator < 0? !rational : max(interpolator, !rational)) - 1;
+    do {
+      f1 = &f->interp[++i];
+      assert(f1->fn);
+      if (i)
+        arbM /= arbL, arbL = 1, rational = false;
+      phase_bits = (int)ceil(f1->scalar + log(mult)/log(2.));
+      phases = !rational? (1 << phase_bits) : arbL;
+      if (!f->interp[0].scalar) {
+        int phases0 = max(phases, 19), n0 = 0;
+        lsx_design_lpf(Fp, Fs, -Fn, attArb, &n0, phases0, f->beta);
+        num_coefs = n0 / phases0 + 1, num_coefs += num_coefs & !preM;
+      }
+      if ((num_coefs & 1) && rational && (arbL & 1))
+        phases <<= 1, arbL <<= 1, arbM *= 2;
+      at = arbL * (arb_stage.phase0 = .5 * (num_coefs & 1));
+      order = i + (i && mode > 4);
+      coefs_size = (size_t)(num_coefs4 * phases * (order + 1)) * sizeof(sample_t);
+    } while (interpolator < 0 && i < 2 && f->interp[i+1].fn &&
+        coefs_size / 1000 > max_coefs_size);
+
+    if (!arb_stage.shared->poly_fir_coefs) {
+      int num_taps = num_coefs * phases - 1;
+      raw_coef_t * coefs = lsx_design_lpf(
+          Fp, Fs, Fn, attArb, &num_taps, phases, f->beta);
+      arb_stage.shared->poly_fir_coefs = prepare_coefs(
+          coefs, num_coefs, phases, order, multiplier);
+      lsx_debug("fir_len=%i phases=%i coef_interp=%i size=%.3gk",
+          num_coefs, phases, order, (double)coefs_size / 1000.);
+      free(coefs);
+    }
+    multiplier = 1;
+    arb_stage.type = rational? rational_stage : irrational_stage;
+    arb_stage.fn = f1->fn;
+    arb_stage.pre_post = num_coefs4 - 1;
+    arb_stage.preload = ((num_coefs - 1) >> 1) + (num_coefs4 - num_coefs);
+    arb_stage.n = num_coefs4;
+    arb_stage.phase_bits = phase_bits;
+    arb_stage.L = arbL;
+    arb_stage.use_hi_prec_clock = mode > 1 && use_hi_prec_clock && !rational;
+#if FLOAT_HI_PREC_CLOCK
+    if (arb_stage.use_hi_prec_clock) {
+      arb_stage.at.flt = at;
+      arb_stage.step.flt = arbM;
+      arb_stage.out_in_ratio = (double)(arbL / arb_stage.step.flt);
+    } else
+#endif
+    {
+      arb_stage.at.whole = (int64_t)(at * MULT32 + .5);
+#if !FLOAT_HI_PREC_CLOCK
+      if (arb_stage.use_hi_prec_clock) {
+        arb_stage.at.fix.ls.parts.ms = 0x80000000ul;
+        arbM *= MULT32;
+        arb_stage.step.whole = (int64_t)arbM;
+        arbM -= (double)arb_stage.step.whole;
+        arbM *= MULT32 * MULT32;
+        arb_stage.step.fix.ls.all = (uint64_t)arbM;
+      } else
+#endif
+        arb_stage.step.whole = (int64_t)(arbM * MULT32 + .5);
+      arb_stage.out_in_ratio = MULT32 * arbL / (double)arb_stage.step.whole;
+    }
+  }
+
+  if (have_post_stage)
+    dft_stage_init(1, 1 - (1 - (1 - tbw0) *
+        (upsample? factor * postL / postM : 1)) * tbw_tighten, Fs_a,
+        (double)max(postL, postM), att, phase, &post_stage, postL, postM,
+        &multiplier, log2_min_dft_size, log2_large_dft_size);
+
+
+  lsx_debug("%g: »%i⋅%i/%i⋅%i/%g⋅%i/%i",
+      1/factor, shift, preL, preM, arbL, arbM, postL, postM);
+  for (i = 0, s = p->stages; i < p->num_stages; ++i, ++s) {
+    fifo_create(&s->fifo, (int)sizeof(sample_t));
+    memset(fifo_reserve(&s->fifo, s->preload), 0, sizeof(sample_t) * (size_t)s->preload);
+    lsx_debug("%5i|%-5i preload=%i remL=%i o/i=%g",
+        s->pre, s->pre_post - s->pre, s->preload, s->at.integer, s->out_in_ratio);
+  }
+  fifo_create(&s->fifo, (int)sizeof(sample_t));
+  return 0;
+}
+
+static void rate_process(rate_t * p)
+{
+  stage_t * stage = p->stages;
+  int i;
+  for (i = 0; i < p->num_stages; ++i, ++stage)
+    stage->fn(stage, &(stage+1)->fifo);
+}
+
+static sample_t * rate_input(rate_t * p, sample_t const * samples, size_t n)
+{
+  p->samples_in += n;
+  return fifo_write(&p->stages[0].fifo, (int)n, samples);
+}
+
+static sample_t const * rate_output(rate_t * p, sample_t * samples, size_t * n)
+{
+  fifo_t * fifo = &p->stages[p->num_stages].fifo;
+  p->samples_out += *n = min(*n, (size_t)fifo_occupancy(fifo));
+  return fifo_read(fifo, (int)*n, samples);
+}
+
+static void rate_flush(rate_t * p)
+{
+  fifo_t * fifo = &p->stages[p->num_stages].fifo;
+#if defined _MSC_VER && _MSC_VER == 1200
+  uint64_t samples_out = (uint64_t)(int64_t)((double)(int64_t)p->samples_in / p->factor + .5);
+#else
+  uint64_t samples_out = (uint64_t)((double)p->samples_in / p->factor + .5);
+#endif
+  size_t remaining = (size_t)(samples_out - p->samples_out);
+  sample_t * buff = calloc(1024, sizeof(*buff));
+
+  if (samples_out > p->samples_out) {
+    while ((size_t)fifo_occupancy(fifo) < remaining) {
+      rate_input(p, buff, 1024);
+      rate_process(p);
+    }
+    fifo_trim_to(fifo, (int)remaining);
+    p->samples_in = 0;
+  }
+  free(buff);
+}
+
+static void rate_close(rate_t * p)
+{
+  rate_shared_t * shared = p->stages[0].shared;
+  int i;
+
+  for (i = 0; i <= p->num_stages; ++i) {
+    stage_t * s = &p->stages[i];
+    aligned_free(s->dft_scratch);
+    aligned_free(s->dft_out);
+    fifo_delete(&s->fifo);
+  }
+  if (shared) {
+    for (i = 0; i < 2; ++i) {
+      dft_filter_t * f= &shared->dft_filter[i];
+      aligned_free(f->coefs);
+      rdft_delete_setup(f->dft_forward_setup);
+      rdft_delete_setup(f->dft_backward_setup);
+    }
+    free(shared->poly_fir_coefs);
+    memset(shared, 0, sizeof(*shared));
+  }
+  free(p->stages);
+}
+
+#if defined SOXR_LIB
+static double rate_delay(rate_t * p)
+{
+#if defined _MSC_VER && _MSC_VER == 1200
+  double samples_out = (double)(int64_t)p->samples_in / p->factor;
+  return samples_out - (double)(int64_t)p->samples_out;
+#else
+  double samples_out = (double)p->samples_in / p->factor;
+  return samples_out - (double)p->samples_out;
+#endif
+}
+
+static void rate_sizes(size_t * shared, size_t * channel)
+{
+  *shared = sizeof(rate_shared_t);
+  *channel = sizeof(rate_t);
+}
+
+#include "soxr.h"
+
+static char const * rate_create(
+    void * channel,
+    void * shared,
+    double io_ratio,
+    soxr_quality_spec_t * q_spec,
+    soxr_runtime_spec_t * r_spec,
+    double scale)
+{
+  return rate_init(
+      channel, shared,
+      io_ratio,
+      q_spec->precision,
+      q_spec->phase_response,
+      q_spec->passband_end,
+      q_spec->stopband_begin,
+      "\1\2\0"[q_spec->flags & 3],
+      !!(q_spec->flags & SOXR_MAINTAIN_3DB_PT),
+      scale,
+      !!(q_spec->flags & SOXR_HI_PREC_CLOCK),
+      (int)(r_spec->flags & 3) - 1,
+      r_spec->coef_size_kbytes,
+      !!(r_spec->flags & SOXR_NOSMALLINTOPT),
+      (int)r_spec->log2_min_dft_size,
+      (int)r_spec->log2_large_dft_size);
+}
+
+static char const * id(void)
+{
+  return RATE_ID;
+}
+
+fn_t RATE_CB[] = {
+  (fn_t)rate_input,
+  (fn_t)rate_process,
+  (fn_t)rate_output,
+  (fn_t)rate_flush,
+  (fn_t)rate_close,
+  (fn_t)rate_delay,
+  (fn_t)rate_sizes,
+  (fn_t)rate_create,
+  (fn_t)0,
+  (fn_t)id,
+};
+#endif
diff --git a/src/rate32.c b/src/rate32.c

new file mode 100644 (file)

index 0000000..d6dd3b9
--- /dev/null
+++ b/src/rate32.c
@@ -0,0 +1,9 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define sample_t   float
+#define RATE_SIMD  0
+#define RDFT_CB    _soxr_rdft32_cb
+#define RATE_CB    _soxr_rate32_cb
+#define RATE_ID    "single-precision"
+#include "rate.h"
diff --git a/src/rate32s.c b/src/rate32s.c

new file mode 100644 (file)

index 0000000..26a371a
--- /dev/null
+++ b/src/rate32s.c
@@ -0,0 +1,9 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define sample_t   float
+#define RATE_SIMD  1
+#define RDFT_CB    _soxr_rdft32s_cb
+#define RATE_CB    _soxr_rate32s_cb
+#define RATE_ID    "single-precision-SIMD"
+#include "rate.h"
diff --git a/src/rate64.c b/src/rate64.c

new file mode 100644 (file)

index 0000000..6289911
--- /dev/null
+++ b/src/rate64.c
@@ -0,0 +1,9 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define sample_t   double
+#define RATE_SIMD  0
+#define RDFT_CB    _soxr_rdft64_cb
+#define RATE_CB    _soxr_rate64_cb
+#define RATE_ID    "double-precision"
+#include "rate.h"
diff --git a/src/rdft.h b/src/rdft.h

new file mode 100644 (file)

index 0000000..59ba174
--- /dev/null
+++ b/src/rdft.h
@@ -0,0 +1,31 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+void ORDERED_CONVOLVE(int n, void * not_used, DFT_FLOAT * a, const DFT_FLOAT * b)
+{
+  int i;
+  a[0] *= b[0];
+  a[1] *= b[1];
+  for (i = 2; i < n; i += 2) {
+    DFT_FLOAT tmp = a[i];
+    a[i  ] = b[i  ] * tmp - b[i+1] * a[i+1];
+    a[i+1] = b[i+1] * tmp + b[i  ] * a[i+1];
+  }
+  (void)not_used;
+}
+
+void ORDERED_PARTIAL_CONVOLVE(int n, DFT_FLOAT * a, const DFT_FLOAT * b)
+{
+  int i;
+  a[0] *= b[0];
+  for (i = 2; i < n; i += 2) {
+    DFT_FLOAT tmp = a[i];
+    a[i  ] = b[i  ] * tmp - b[i+1] * a[i+1];
+    a[i+1] = b[i+1] * tmp + b[i  ] * a[i+1];
+  }
+  a[1] = b[i] * a[i] - b[i+1] * a[i+1];
+}
+
+#undef ORDERED_CONVOLVE
+#undef ORDERED_PARTIAL_CONVOLVE
+#undef DFT_FLOAT
diff --git a/src/rint-clip.h b/src/rint-clip.h

new file mode 100644 (file)

index 0000000..a501cec
--- /dev/null
+++ b/src/rint-clip.h
@@ -0,0 +1,153 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if defined DITHER
+
+#define DITHERING (1./32)*(int)(((ran1>>=3)&31)-((ran2>>=3)&31))
+#define DITHER_RAND (seed = 1664525UL * seed + 1013904223UL) >> 3
+#define DITHER_VARS unsigned long ran1 = DITHER_RAND, ran2 = DITHER_RAND
+#define SEED_ARG , unsigned long * seed0
+#define SAVE_SEED *seed0 = seed
+#define COPY_SEED unsigned long seed = *seed0;
+#define COPY_SEED1 unsigned long seed1 = seed
+#define PASS_SEED1 , &seed1
+#define PASS_SEED  , &seed
+
+#else
+
+#define DITHERING 0
+#define DITHER_VARS
+#define SEED_ARG
+#define SAVE_SEED
+#define COPY_SEED
+#define COPY_SEED1
+#define PASS_SEED1
+#define PASS_SEED
+
+#endif
+
+
+
+#if defined FE_INVALID && defined FPU_RINT
+static void RINT_CLIP(RINT_T * const dest, FLOATX const * const src,
+    unsigned stride, size_t i, size_t const n, size_t * const clips SEED_ARG)
+{
+  COPY_SEED
+  DITHER_VARS;
+  for (; i < n; ++i) {
+    double d = src[i] + DITHERING;
+    dest[stride * i] = RINT(d);
+    if (fe_test_invalid()) {
+      fe_clear_invalid();
+      dest[stride * i] = d > 0? RINT_MAX : -RINT_MAX - 1;
+      ++*clips;
+    }
+  }
+  SAVE_SEED;
+}
+#endif
+
+
+
+static size_t LSX_RINT_CLIP(void * * const dest0, FLOATX const * const src,
+    size_t const n SEED_ARG)
+{
+  size_t i, clips = 0;
+  RINT_T * dest = *dest0;
+  COPY_SEED
+#if defined FE_INVALID && defined FPU_RINT
+#define _ dest[i] = RINT(src[i] + DITHERING), ++i,
+  fe_clear_invalid();
+  for (i = 0; i < (n & ~7u);) {
+    COPY_SEED1;
+    DITHER_VARS;
+    _ _ _ _ _ _ _ _ 0;
+    if (fe_test_invalid()) {
+      fe_clear_invalid();
+      RINT_CLIP(dest, src, 1, i - 8, i, &clips PASS_SEED1);
+    }
+  }
+  RINT_CLIP(dest, src, 1, i, n, &clips PASS_SEED);
+#else
+#define _ d = src[i] + DITHERING, dest[i++] = (RINT_T)(d > 0? d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5),
+  const double N = 1. + RINT_MAX;
+  double d;
+  for (i = 0; i < (n & ~7u);) {
+    DITHER_VARS;
+    _ _ _ _ _ _ _ _ 0;
+  }
+  {
+    DITHER_VARS;
+    for (; i < n; _ 0);
+  }
+#endif
+  SAVE_SEED;
+  *dest0 = dest + n;
+  return clips;
+}
+#undef _
+
+
+
+static size_t LSX_RINT_CLIP_2(void * * dest0, FLOATX const * const * srcs,
+    unsigned const stride, size_t const n SEED_ARG)
+{
+  unsigned j;
+  size_t i, clips = 0;
+  RINT_T * dest = *dest0;
+  COPY_SEED
+#if defined FE_INVALID && defined FPU_RINT
+#define _ dest[stride * i] = RINT(src[i] + DITHERING), ++i,
+  fe_clear_invalid();
+  for (j = 0; j < stride; ++j, ++dest) {
+    FLOATX const * const src = srcs[j];
+    for (i = 0; i < (n & ~7u);) {
+      COPY_SEED1;
+      DITHER_VARS;
+      _ _ _ _ _ _ _ _ 0;
+      if (fe_test_invalid()) {
+        fe_clear_invalid();
+        RINT_CLIP(dest, src, stride, i - 8, i, &clips PASS_SEED1);
+      }
+    }
+    RINT_CLIP(dest, src, stride, i, n, &clips PASS_SEED);
+  }
+#else
+#define _ d = src[i] + DITHERING, dest[stride * i++] = (RINT_T)(d > 0? d+.5 >= N? ++clips, N-1 : d+.5 : d-.5 <= -N-1? ++clips, -N:d-.5),
+  const double N = 1. + RINT_MAX;
+  double d;
+  for (j = 0; j < stride; ++j, ++dest) {
+    FLOATX const * const src = srcs[j];
+    for (i = 0; i < (n & ~7u);) {
+      DITHER_VARS;
+      _ _ _ _ _ _ _ _ 0;
+    }
+    {
+      DITHER_VARS;
+      for (; i < n; _ 0);
+    }
+  }
+#endif
+  SAVE_SEED;
+  *dest0 = dest + stride * (n - 1);
+  return clips;
+}
+#undef _
+
+#undef PASS_SEED
+#undef PASS_SEED1
+#undef COPY_SEED1
+#undef COPY_SEED
+#undef SAVE_SEED
+#undef SEED_ARG
+#undef DITHER_VARS
+#undef DITHERING
+#undef DITHER
+
+#undef RINT_MAX
+#undef RINT_T
+#undef FPU_RINT
+#undef RINT
+#undef RINT_CLIP
+#undef LSX_RINT_CLIP
+#undef LSX_RINT_CLIP_2
diff --git a/src/rint.h b/src/rint.h

new file mode 100644 (file)

index 0000000..529e4bb
--- /dev/null
+++ b/src/rint.h
@@ -0,0 +1,68 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined soxr_rint_included
+#define soxr_rint_included
+
+#include "soxr-config.h"
+
+
+
+#if HAVE_LRINT && LONG_MAX == 2147483647L
+  #include <math.h>
+  #define FPU_RINT32
+  #define rint32 lrint
+#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+  #define FPU_RINT32
+  static __inline int32_t rint32(double input) {
+    int32_t result;
+    __asm__ __volatile__("fistpl %0": "=m"(result): "t"(input): "st");
+    return result;
+  }
+#elif defined __GNUC__ && defined __arm__
+  #define FPU_RINT32
+  static __inline int32_t rint32(double input) {
+    register int32_t result;
+    __asm__ __volatile__ ("ftosid %0, %P1": "=w"(result): "w"(input));
+    return result;
+  }
+#elif defined _MSC_VER && defined _M_IX86 /* FIXME need solution for MSVC x64 */
+  #define FPU_RINT32
+  static __inline int32_t rint32(double input) {
+    int32_t result;
+    _asm {
+      fld input
+      fistp result
+    }
+    return result;
+  }
+#else
+  #define rint32(x) (int32_t)((x) < 0? x - .5 : x + .5)
+#endif
+
+
+
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+  #define FPU_RINT16
+  static __inline int16_t rint16(double input) {
+    int16_t result;
+    __asm__ __volatile__("fistps %0": "=m"(result): "t"(input): "st");
+    return result;
+  }
+#elif defined _MSC_VER && defined _M_IX86 /* FIXME need solution for MSVC x64 */
+  #define FPU_RINT16
+  static __inline int16_t rint16(double input) {
+    int16_t result;
+    _asm {
+      fld input
+      fistp result
+    }
+    return result;
+  }
+#else
+  #define rint16(x) (int16_t)((x) < 0? x - .5 : x + .5)
+#endif
+
+
+
+#endif
diff --git a/src/samplerate.h b/src/samplerate.h

new file mode 100644 (file)

index 0000000..911cc5d
--- /dev/null
+++ b/src/samplerate.h
@@ -0,0 +1 @@
+#include "soxr-lsr.h"
diff --git a/src/simd-dev.h b/src/simd-dev.h

new file mode 100644 (file)

index 0000000..019325c
--- /dev/null
+++ b/src/simd-dev.h
@@ -0,0 +1,5 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#define PFFT_MACROS_ONLY
+#include "pffft.c"
diff --git a/src/simd.c b/src/simd.c

new file mode 100644 (file)

index 0000000..7659ab9
--- /dev/null
+++ b/src/simd.c
@@ -0,0 +1,84 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include "simd.h"
+#include "simd-dev.h"
+
+#define SIMD_ALIGNMENT (sizeof(float) * 4)
+
+void * _soxr_simd_aligned_malloc(size_t size)
+{
+  char * p1 = 0, * p = malloc(size + SIMD_ALIGNMENT);
+  if (p) {
+    p1 = (char *)((size_t)(p + SIMD_ALIGNMENT) & ~(SIMD_ALIGNMENT - 1));
+    *((void * *)p1 - 1) = p;
+  }
+  return p1;
+}
+
+
+
+void * _soxr_simd_aligned_calloc(size_t nmemb, size_t size)
+{
+  void * p = _soxr_simd_aligned_malloc(nmemb * size);
+  if (p)
+    memset(p, 0, nmemb * size);
+  return p;
+}
+
+
+
+void _soxr_simd_aligned_free(void * p1)
+{
+  if (p1)
+    free(*((void * *)p1 - 1));
+}
+
+
+
+void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b)
+{
+  int i;
+  float ab0, ab1;
+  v4sf       * /*RESTRICT*/ va = (v4sf       *)a;
+  v4sf const *   RESTRICT   vb = (v4sf const *)b;
+  assert(VALIGNED(a) && VALIGNED(b));
+  ab0 = a[0] * b[0], ab1 = a[1] * b[1];
+  for (i = 0; i < n / 4; i += 2) {
+    v4sf a1r = va[i+0], a1i = va[i+1];
+    v4sf b1r = vb[i+0], b1i = vb[i+1];
+    UNINTERLEAVE2(a1r, a1i, a1r, a1i);
+    UNINTERLEAVE2(b1r, b1i, b1r, b1i);
+    VCPLXMUL(a1r, a1i, b1r, b1i);
+    INTERLEAVE2(a1r, a1i, a1r, a1i);
+    va[i+0] = a1r, va[i+1] = a1i;
+  }
+  a[0] = ab0, a[1] = ab1;
+  (void)not_used;
+}
+
+
+
+void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b)
+{
+  int i;
+  float ab0;
+  v4sf       * /*RESTRICT*/ va = (v4sf       *)a;
+  v4sf const *   RESTRICT   vb = (v4sf const *)b;
+  assert(VALIGNED(a) && VALIGNED(b));
+  ab0 = a[0] * b[0];
+  for (i = 0; i < n / 4; i += 2) {
+    v4sf a1r = va[i+0], a1i = va[i+1];
+    v4sf b1r = vb[i+0], b1i = vb[i+1];
+    UNINTERLEAVE2(a1r, a1i, a1r, a1i);
+    UNINTERLEAVE2(b1r, b1i, b1r, b1i);
+    VCPLXMUL(a1r, a1i, b1r, b1i);
+    INTERLEAVE2(a1r, a1i, a1r, a1i);
+    va[i+0] = a1r, va[i+1] = a1i;
+  }
+  a[0] = ab0;
+  a[1] = b[n] * a[n] - b[n+1] * a[n+1];
+}
diff --git a/src/simd.h b/src/simd.h

new file mode 100644 (file)

index 0000000..71eefc6
--- /dev/null
+++ b/src/simd.h
@@ -0,0 +1,16 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#if !defined simd_included
+#define simd_included
+
+#include <stddef.h>
+
+void * _soxr_simd_aligned_malloc(size_t);
+void * _soxr_simd_aligned_calloc(size_t, size_t);
+void _soxr_simd_aligned_free(void *);
+
+void _soxr_ordered_convolve_simd(int n, void * not_used, float * a, const float * b);
+void _soxr_ordered_partial_convolve_simd(int n, float * a, const float * b);
+
+#endif
diff --git a/src/soxr-lsr.h b/src/soxr-lsr.h

new file mode 100644 (file)

index 0000000..c0923aa
--- /dev/null
+++ b/src/soxr-lsr.h
@@ -0,0 +1,80 @@
+/* SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Wrapper compatible with `libsamplerate' (constant-rate).
+ * (Libsoxr's native API can be found in soxr.h).  */
+
+#if !defined SAMPLERATE_H
+#define SAMPLERATE_H
+#if defined __cplusplus
+  extern "C" {
+#endif
+
+#if defined SOXR_DLL
+  #if defined soxr_lsr_EXPORTS
+    #define SOXR __declspec(dllexport)
+  #else
+    #define SOXR __declspec(dllimport)
+  #endif
+#elif defined SOXR_VISIBILITY && defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+  #define SOXR __attribute__ ((visibility("default")))
+#else
+  #define SOXR
+#endif
+
+typedef float   SRC_SAMPLE;
+#if !defined SOXR_LIB
+enum SRC_SRCTYPE_e {SRC_SINC_BEST_QUALITY, SRC_SINC_MEDIUM_QUALITY,
+                    SRC_SINC_FASTEST, SRC_ZERO_ORDER_HOLD, SRC_LINEAR};
+typedef int     SRC_SRCTYPE;
+typedef int     SRC_ERROR;
+typedef long    (* src_callback_t)(void *, SRC_SAMPLE * *);
+typedef struct  SRC_STATE SRC_STATE;
+typedef struct  SRC_DATA {
+  SRC_SAMPLE    * data_in, * data_out;
+  long          input_frames, output_frames;
+  long          input_frames_used, output_frames_gen;
+  int           end_of_input;
+  double        src_ratio;
+} SRC_DATA;
+#endif
+SOXR SRC_STATE *   src_new(SRC_SRCTYPE, int num_channels, SRC_ERROR *);
+SOXR SRC_ERROR     src_process  (SRC_STATE *, SRC_DATA *);
+SOXR SRC_ERROR     src_set_ratio(SRC_STATE *, double);
+SOXR SRC_ERROR     src_reset    (SRC_STATE *);
+SOXR SRC_ERROR     src_error    (SRC_STATE *);
+SOXR SRC_STATE *   src_delete   (SRC_STATE *);
+SOXR SRC_STATE *   src_callback_new(
+                    src_callback_t, SRC_SRCTYPE, int, SRC_ERROR *, void *);
+SOXR long          src_callback_read(
+                    SRC_STATE *, double src_ratio, long, SRC_SAMPLE *);
+SOXR SRC_ERROR     src_simple(SRC_DATA *, SRC_SRCTYPE, int);
+SOXR char const *  src_get_name(SRC_SRCTYPE);
+SOXR char const *  src_get_description(SRC_SRCTYPE);
+SOXR char const *  src_get_version(void);
+SOXR char const *  src_strerror(SRC_ERROR);
+SOXR int           src_is_valid_ratio(double);
+SOXR void          src_short_to_float_array(short const *, float *, int);
+SOXR void          src_float_to_short_array(float const *, short *, int);
+SOXR void          src_int_to_float_array(int const *, float *, int);
+SOXR void          src_float_to_int_array(float const *, int *, int);
+
+#undef SOXR
+#if defined __cplusplus
+  }
+#endif
+#endif
diff --git a/src/soxr-lsr.pc.in b/src/soxr-lsr.pc.in

new file mode 100644 (file)

index 0000000..7b75757
--- /dev/null
+++ b/src/soxr-lsr.pc.in
@@ -0,0 +1,5 @@
+Name: ${LSR}
+Description: ${DESCRIPTION_SUMMARY} (with libsamplerate-like bindings)
+Version: ${PROJECT_VERSION}
+Libs: -L${LIB_INSTALL_DIR} -l${LSR}
+Cflags: -I${INCLUDE_INSTALL_DIR}
diff --git a/src/soxr.c b/src/soxr.c

new file mode 100644 (file)

index 0000000..93dd9c1
--- /dev/null
+++ b/src/soxr.c
@@ -0,0 +1,634 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "soxr.h"
+#include "data-io.h"
+#include "internal.h"
+
+
+
+char const * soxr_version(void)
+{
+  return "libsoxr-" SOXR_THIS_VERSION_STR;
+}
+
+
+
+typedef void sample_t; /* float or double */
+typedef void (* fn_t)(void);
+typedef fn_t control_block_t[10];
+
+#define resampler_input        (*(sample_t * (*)(void *, sample_t * samples, size_t   n))p->control_block[0])
+#define resampler_process      (*(void (*)(void *, size_t))p->control_block[1])
+#define resampler_output       (*(sample_t const * (*)(void *, sample_t * samples, size_t * n))p->control_block[2])
+#define resampler_flush        (*(void (*)(void *))p->control_block[3])
+#define resampler_close        (*(void (*)(void *))p->control_block[4])
+#define resampler_delay        (*(double (*)(void *))p->control_block[5])
+#define resampler_sizes        (*(void (*)(size_t * shared, size_t * channel))p->control_block[6])
+#define resampler_create       (*(char const * (*)(void * channel, void * shared, double io_ratio, soxr_quality_spec_t * q_spec, soxr_runtime_spec_t * r_spec, double scale))p->control_block[7])
+#define resampler_set_io_ratio (*(void (*)(void *, double io_ratio, size_t len))p->control_block[8])
+#define resampler_id           (*(char const * (*)(void))p->control_block[9])
+
+typedef void * resampler_t; /* For one channel. */
+typedef void * resampler_shared_t; /* Between channels. */
+typedef void (* deinterleave_t)(sample_t * * dest,
+    soxr_datatype_t data_type, void const * * src0, size_t n, unsigned ch);
+typedef size_t (* interleave_t)(soxr_datatype_t data_type, void * * dest,
+    sample_t const * const * src, size_t, unsigned, unsigned long *);
+
+struct soxr {
+  unsigned num_channels;
+  double io_ratio;
+  soxr_error_t error;
+  soxr_quality_spec_t q_spec;
+  soxr_io_spec_t io_spec;
+  soxr_runtime_spec_t runtime_spec;
+
+  void * input_fn_state;
+  soxr_input_fn_t input_fn;
+  size_t max_ilen;
+
+  resampler_shared_t shared;
+  resampler_t * resamplers;
+  control_block_t control_block;
+  deinterleave_t deinterleave;
+  interleave_t interleave;
+
+  void * * channel_ptrs;
+  size_t clips;
+  unsigned long seed;
+  int flushing;
+};
+
+
+
+/* TODO: these should not be here. */
+#define TO_3dB(a)       ((1.6e-6*a-7.5e-4)*a+.646)
+#define LOW_Q_BW0       (1385 / 2048.) /* 0.67625 rounded to be a FP exact. */
+
+soxr_quality_spec_t soxr_quality_spec(unsigned long recipe, unsigned long flags)
+{
+  soxr_quality_spec_t spec, * p = &spec;
+  unsigned quality = recipe & 0xf;
+  double rej;
+  memset(p, 0, sizeof(*p));
+  if (quality > 13) {
+    p->e = "invalid quality type";
+    return spec;
+  }
+  if (quality == 13)
+    quality = 6;
+  else if (quality > 10)
+    quality = 0;
+  p->phase_response = "\62\31\144"[(recipe & 0x30)>>8];
+  p->stopband_begin = 1;
+  p->precision = !quality? 0: quality < 3? 16 : quality < 8? 4 + quality * 4 : 55 - quality * 4;
+  rej = p->precision * linear_to_dB(2.);
+  p->flags = flags;
+  if (quality < 8) {
+    p->passband_end = quality == 1? LOW_Q_BW0 : 1 - .05 / TO_3dB(rej);
+    if (quality <= 2)
+      p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
+  }
+  else {
+    static float const bw[] = {.931f, .832f, .663f};
+    p->passband_end = bw[quality - 8];
+    if (quality - 8 == 2)
+      p->flags &= ~SOXR_ROLLOFF_NONE, p->flags |= SOXR_ROLLOFF_MEDIUM;
+  }
+  if (recipe & SOXR_STEEP_FILTER)
+    p->passband_end = 1 - .01 / TO_3dB(rej);
+  return spec;
+}
+
+
+
+char const * soxr_engine(soxr_t p)
+{
+  return resampler_id();
+}
+
+
+
+size_t * soxr_num_clips(soxr_t p)
+{
+  return &p->clips;
+}
+
+
+
+soxr_error_t soxr_error(soxr_t p)
+{
+  return p->error;
+}
+
+
+
+soxr_runtime_spec_t soxr_runtime_spec(unsigned num_threads)
+{
+  soxr_runtime_spec_t spec, * p = &spec;
+  memset(p, 0, sizeof(*p));
+  p->log2_min_dft_size = 10;
+  p->log2_large_dft_size = 17;
+  p->coef_size_kbytes = 400;
+  p->num_threads = num_threads;
+  return spec;
+}
+
+
+
+soxr_io_spec_t soxr_io_spec(
+  soxr_datatype_t itype,
+  soxr_datatype_t otype)
+{
+  soxr_io_spec_t spec, * p = &spec;
+  memset(p, 0, sizeof(*p));
+  if ((itype | otype) >= SOXR_SPLIT * 2)
+    p->e = "invalid io datatype(s)";
+  else {
+    p->itype = itype;
+    p->otype = otype;
+    p->scale = 1;
+  }
+  return spec;
+}
+
+
+
+#if HAVE_SIMD
+static bool cpu_has_simd(void)
+{
+#if defined __x86_64__ || defined _M_X64
+  return true;
+#elif defined __GNUC__ && defined i386
+  uint32_t eax, ebx, ecx, edx;
+  __asm__ __volatile__ (
+      "pushl %%ebx   \n\t"
+      "cpuid         \n\t"
+      "movl %%ebx, %1\n\t"
+      "popl %%ebx    \n\t"
+      : "=a"(eax), "=r"(ebx), "=c"(ecx), "=d"(edx)
+      : "a"(1)
+      : "cc" );
+  return !!(edx & 0x06000000);
+#elif defined _MSC_VER && defined _M_IX86
+  uint32_t d;
+  __asm {
+    xor     eax, eax
+    inc     eax
+    push    ebx
+    cpuid
+    pop     ebx
+    mov     d, edx
+  }
+  return !!(d & 0x06000000);
+#endif
+  return false;
+}
+#endif
+
+extern control_block_t _soxr_rate32s_cb, _soxr_rate32_cb, _soxr_rate64_cb, _soxr_vr32_cb;
+
+
+
+soxr_t soxr_create(
+  double input_rate, double output_rate,
+  unsigned num_channels,
+  soxr_error_t * error0,
+  soxr_io_spec_t const * io_spec,
+  soxr_quality_spec_t const * q_spec,
+  soxr_runtime_spec_t const * runtime_spec)
+{
+  double io_ratio = output_rate? input_rate? input_rate / output_rate : -1 : input_rate? -1 : 0;
+  static const float datatype_full_scale[] = {1, 1, 65536.*32768, 32768};
+  soxr_t p = 0;
+  soxr_error_t error = 0;
+
+  if (q_spec && q_spec->e)  error = q_spec->e;
+  else if (io_spec && (io_spec->itype | io_spec->otype) >= SOXR_SPLIT * 2)
+    error = "invalid io datatype(s)";
+
+  if (!error && !(p = calloc(sizeof(*p), 1))) error = "malloc failed";
+
+  if (p) {
+    p->q_spec = q_spec? *q_spec : soxr_quality_spec(SOXR_HQ, 0);
+
+    if (q_spec) { /* Backwards compatibility with original API: */
+      if (p->q_spec.passband_end > 2)
+        p->q_spec.passband_end /= 100;
+      if (p->q_spec.stopband_begin > 2)
+        p->q_spec.stopband_begin = 2 - p->q_spec.stopband_begin / 100;
+    }
+
+    p->io_ratio = io_ratio;
+    p->num_channels = num_channels;
+    if (io_spec)
+      p->io_spec = *io_spec;
+    else
+      p->io_spec.scale = 1;
+
+    p->runtime_spec = runtime_spec? *runtime_spec : soxr_runtime_spec(1);
+    p->io_spec.scale *= datatype_full_scale[p->io_spec.otype & 3] /
+                        datatype_full_scale[p->io_spec.itype & 3];
+    p->seed = (unsigned long)time(0) ^ (unsigned long)(size_t)p;
+
+#if HAVE_SINGLE_PRECISION
+    if (!HAVE_DOUBLE_PRECISION || (p->q_spec.precision <= 20 && !(p->q_spec.flags & SOXR_DOUBLE_PRECISION))
+        || (p->q_spec.flags & SOXR_VR)) {
+      p->deinterleave = (deinterleave_t)_soxr_deinterleave_f;
+      p->interleave = (interleave_t)_soxr_interleave_f;
+      memcpy(&p->control_block,
+          (p->q_spec.flags & SOXR_VR)? &_soxr_vr32_cb :
+#if HAVE_SIMD
+          cpu_has_simd()? &_soxr_rate32s_cb :
+#endif
+          &_soxr_rate32_cb, sizeof(p->control_block));
+    }
+#if HAVE_DOUBLE_PRECISION
+    else
+#endif
+#endif
+#if HAVE_DOUBLE_PRECISION
+    {
+      p->deinterleave = (deinterleave_t)_soxr_deinterleave;
+      p->interleave = (interleave_t)_soxr_interleave;
+      memcpy(&p->control_block, &_soxr_rate64_cb, sizeof(p->control_block));
+    }
+#endif
+
+    if (p->num_channels && io_ratio)
+      error = soxr_set_io_ratio(p, io_ratio, 0);
+  }
+  if (error)
+    soxr_delete(p), p = 0;
+  if (error0)
+    *error0 = error;
+  return p;
+}
+
+
+
+soxr_error_t soxr_set_input_fn(soxr_t p,
+    soxr_input_fn_t input_fn, void * input_fn_state, size_t max_ilen)
+{
+  p->input_fn_state = input_fn_state;
+  p->input_fn = input_fn;
+  p->max_ilen = max_ilen? max_ilen : (size_t)-1;
+  return 0;
+}
+
+
+
+static void soxr_delete0(soxr_t p)
+{
+  unsigned i;
+
+  if (p->resamplers) for (i = 0; i < p->num_channels; ++i) {
+    if (p->resamplers[i])
+      resampler_close(p->resamplers[i]);
+    free(p->resamplers[i]);
+  }
+  free(p->resamplers);
+  free(p->channel_ptrs);
+  free(p->shared);
+
+  memset(p, 0, sizeof(*p));
+}
+
+
+
+double soxr_delay(soxr_t p)
+{
+  return (p && !p->error && p->resamplers)? resampler_delay(p->resamplers[0]) : 0;
+}
+
+
+
+static soxr_error_t fatal_error(soxr_t p, soxr_error_t error)
+{
+  soxr_delete0(p);
+  return p->error = error;
+}
+
+
+
+static soxr_error_t initialise(soxr_t p)
+{
+  unsigned i;
+  size_t shared_size, channel_size;
+
+  resampler_sizes(&shared_size, &channel_size);
+  p->channel_ptrs = calloc(sizeof(*p->channel_ptrs), p->num_channels);
+  p->shared = calloc(shared_size, 1);
+  p->resamplers = calloc(sizeof(*p->resamplers), p->num_channels);
+  if (!p->shared || !p->channel_ptrs || !p->resamplers)
+    return fatal_error(p, "malloc failed");
+
+  for (i = 0; i < p->num_channels; ++i) {
+    soxr_error_t error;
+    if (!(p->resamplers[i] = calloc(channel_size, 1)))
+      return fatal_error(p, "malloc failed");
+    error = resampler_create(
+        p->resamplers[i],
+        p->shared,
+        p->io_ratio,
+        &p->q_spec,
+        &p->runtime_spec,
+        p->io_spec.scale);
+    if (error)
+      return fatal_error(p, error);
+  }
+  return 0;
+}
+
+
+
+soxr_error_t soxr_set_num_channels(soxr_t p, unsigned num_channels)
+{
+  if (!p)                return "invalid soxr_t pointer";
+  if (num_channels == p->num_channels) return p->error;
+  if (!num_channels)     return "invalid # of channels";
+  if (p->resamplers)     return "# of channels can't be changed";
+  p->num_channels = num_channels;
+  return soxr_set_io_ratio(p, p->io_ratio, 0);
+}
+
+
+
+soxr_error_t soxr_set_io_ratio(soxr_t p, double io_ratio, size_t slew_len)
+{
+  unsigned i;
+  soxr_error_t error;
+  if (!p)                 return "invalid soxr_t pointer";
+  if ((error = p->error)) return error;
+  if (!p->num_channels)   return "must set # channels before O/I ratio";
+  if (io_ratio <= 0)      return "I/O ratio out-of-range";
+  if (!p->channel_ptrs) {
+    p->io_ratio = io_ratio;
+    return initialise(p);
+  }
+  if (p->control_block[8]) {
+    for (i = 0; !error && i < p->num_channels; ++i)
+      resampler_set_io_ratio(p->resamplers[i], io_ratio, slew_len);
+    return error;
+  }
+  return fabs(p->io_ratio - io_ratio) < 1e-15? 0 :
+    "Varying O/I ratio is not supported with this quality level";
+}
+
+
+
+void soxr_delete(soxr_t p)
+{
+  if (p)
+    soxr_delete0(p), free(p);
+}
+
+
+
+soxr_error_t soxr_clear(soxr_t p) /* TODO: this, properly. */
+{
+  if (p) {
+    struct soxr tmp = *p;
+    soxr_delete0(p);
+    memset(p, 0, sizeof(*p));
+    p->input_fn = tmp.input_fn;
+    p->runtime_spec = tmp.runtime_spec;
+    p->q_spec = tmp.q_spec;
+    p->io_spec = tmp.io_spec;
+    p->num_channels = tmp.num_channels;
+    p->input_fn_state = tmp.input_fn_state;
+    memcpy(p->control_block, tmp.control_block, sizeof(p->control_block));
+    p->deinterleave = tmp.deinterleave;
+    p->interleave = tmp.interleave;
+    return 0;
+  }
+  return "invalid soxr_t pointer";
+}
+
+
+
+static void soxr_input_1ch(soxr_t p, unsigned i, soxr_cbuf_t src, size_t len)
+{
+  sample_t * dest = resampler_input(p->resamplers[i], NULL, len);
+  (*p->deinterleave)(&dest, p->io_spec.itype, &src, len, 1);
+}
+
+
+
+static size_t soxr_input(soxr_t p, void const * in, size_t len)
+{
+  bool separated = !!(p->io_spec.itype & SOXR_SPLIT);
+  unsigned i;
+  if (!p || p->error) return 0;
+  if (!in && len) {p->error = "null input buffer pointer"; return 0;}
+  if (!len) {
+    p->flushing = true;
+    return 0;
+  }
+  if (separated)
+    for (i = 0; i < p->num_channels; ++i)
+      soxr_input_1ch(p, i, ((soxr_cbufs_t)in)[i], len);
+  else {
+    for (i = 0; i < p->num_channels; ++i)
+      p->channel_ptrs[i] = resampler_input(p->resamplers[i], NULL, len);
+    (*p->deinterleave)(
+        (sample_t **)p->channel_ptrs, p->io_spec.itype, &in, len, p->num_channels);
+  }
+  return len;
+}
+
+
+
+static size_t soxr_output_1ch(soxr_t p, unsigned i, soxr_buf_t dest, size_t len, bool separated)
+{
+  sample_t const * src;
+  if (p->flushing)
+    resampler_flush(p->resamplers[i]);
+  resampler_process(p->resamplers[i], len);
+  src = resampler_output(p->resamplers[i], NULL, &len);
+  if (separated)
+    p->clips += (p->interleave)(p->io_spec.otype, &dest, &src,
+      len, 1, (p->io_spec.flags & SOXR_NO_DITHER)? 0 : &p->seed);
+  else p->channel_ptrs[i] = (void /* const */ *)src;
+  return len;
+}
+
+
+
+static size_t soxr_output_no_callback(soxr_t p, soxr_buf_t out, size_t len)
+{
+  unsigned u;
+  size_t done = 0;
+  bool separated = !!(p->io_spec.otype & SOXR_SPLIT);
+#if defined _OPENMP
+  int i;
+  if (!p->runtime_spec.num_threads && p->num_channels > 1)
+#pragma omp parallel for
+  for (i = 0; i < (int)p->num_channels; ++i) {
+    size_t done1;
+    done1 = soxr_output_1ch(p, (unsigned)i, ((soxr_bufs_t)out)[i], len, separated);
+    if (!i)
+      done = done1;
+  } else
+#endif
+  for (u = 0; u < p->num_channels; ++u)
+    done = soxr_output_1ch(p, u, ((soxr_bufs_t)out)[u], len, separated);
+
+  if (!separated)
+    p->clips += (p->interleave)(p->io_spec.otype, &out, (sample_t const * const *)p->channel_ptrs,
+        done, p->num_channels, (p->io_spec.flags & SOXR_NO_DITHER)? 0 : &p->seed);
+  return done;
+}
+
+
+
+size_t soxr_output(soxr_t p, void * out, size_t len0)
+{
+  size_t odone, odone0 = 0, olen = len0, osize, idone;
+  size_t ilen = min(p->max_ilen, (size_t)ceil((double)olen *p->io_ratio));
+  void const * in = out; /* Set to !=0, so that caller may leave unset. */
+  bool was_flushing;
+
+  if (!p || p->error) return 0;
+  if (!out && len0) {p->error = "null output buffer pointer"; return 0;}
+
+  do {
+    odone = soxr_output_no_callback(p, out, olen);
+    odone0 += odone;
+    if (odone0 == len0 || !p->input_fn || p->flushing)
+      break;
+
+    osize = soxr_datatype_size(p->io_spec.otype) * p->num_channels;
+    out = (char *)out + osize * odone;
+    olen -= odone;
+    idone = p->input_fn(p->input_fn_state, &in, ilen);
+    was_flushing = p->flushing;
+    if (!in)
+      p->error = "input function reported failure";
+    else soxr_input(p, in, idone);
+  } while (odone || idone || (!was_flushing && p->flushing));
+  return odone0;
+}
+
+
+
+static size_t soxr_i_for_o(soxr_t p, size_t olen, size_t ilen)
+{
+  size_t result;
+#if 0
+  if (p->runtime_spec.flags & SOXR_STRICT_BUFFERING)
+    result = rate_i_for_o(p->resamplers[0], olen);
+  else
+#endif
+    result = (size_t)ceil((double)olen * p->io_ratio);
+  return min(result, ilen);
+}
+
+
+
+#if 0
+static size_t soxr_o_for_i(soxr_t p, size_t ilen, size_t olen)
+{
+  size_t result = (size_t)ceil((double)ilen / p->io_ratio);
+  return min(result, olen);
+}
+#endif
+
+
+
+soxr_error_t soxr_process(soxr_t p,
+    void const * in , size_t ilen0, size_t * idone0,
+    void       * out, size_t olen , size_t * odone0)
+{
+  size_t ilen, idone, odone = 0;
+  unsigned u;
+  bool flush_requested = false;
+
+  if (!p) return "null pointer";
+
+  if (!in)
+    flush_requested = true, ilen = ilen0 = 0;
+  else {
+    if ((ptrdiff_t)ilen0 < 0)
+      flush_requested = true, ilen0 = ~ilen0;
+    if (idone0 && (1 || flush_requested))
+      ilen = soxr_i_for_o(p, olen, ilen0);
+    else
+      ilen = ilen0/*, olen = soxr_o_for_i(p, ilen, olen)*/;
+  }
+  p->flushing |= ilen == ilen0 && flush_requested;
+
+  if (!out && !in)
+    idone = ilen;
+  else if (p->io_spec.itype & p->io_spec.otype & SOXR_SPLIT) { /* Both i & o */
+#if defined _OPENMP
+    int i;
+    if (!p->runtime_spec.num_threads && p->num_channels > 1)
+#pragma omp parallel for
+    for (i = 0; i < (int)p->num_channels; ++i) {
+      size_t done;
+      if (in)
+        soxr_input_1ch(p, (unsigned)i, ((soxr_cbufs_t)in)[i], ilen);
+      done = soxr_output_1ch(p, (unsigned)i, ((soxr_bufs_t)out)[i], olen, true);
+      if (!i)
+        odone = done;
+    } else
+#endif
+    for (u = 0; u < p->num_channels; ++u) {
+      if (in)
+        soxr_input_1ch(p, u, ((soxr_cbufs_t)in)[u], ilen);
+      odone = soxr_output_1ch(p, u, ((soxr_bufs_t)out)[u], olen, true);
+    }
+    idone = ilen;
+  }
+  else {
+    idone = ilen? soxr_input (p, in , ilen) : 0;
+    odone = soxr_output(p, out, olen);
+  }
+  if (idone0) *idone0 = idone;
+  if (odone0) *odone0 = odone;
+  return p->error;
+}
+
+
+
+soxr_error_t soxr_oneshot(
+    double irate, double orate,
+    unsigned num_channels,
+    void const * in , size_t ilen, size_t * idone,
+    void * out, size_t olen, size_t * odone,
+    soxr_io_spec_t const * io_spec,
+    soxr_quality_spec_t const * q_spec,
+    soxr_runtime_spec_t const * runtime_spec)
+{
+  soxr_t resampler;
+  soxr_error_t error = q_spec? q_spec->e : 0;
+  if (!error) {
+    soxr_quality_spec_t q_spec1;
+    if (!q_spec)
+      q_spec1 = soxr_quality_spec(SOXR_LQ, 0), q_spec = &q_spec1;
+    resampler = soxr_create(irate, orate, num_channels,
+        &error, io_spec, q_spec, runtime_spec);
+  }
+  if (!error) {
+    error = soxr_process(resampler, in, ~ilen, idone, out, olen, odone);
+    soxr_delete(resampler);
+  }
+  return error;
+}
+
+
+
+soxr_error_t soxr_set_error(soxr_t p, soxr_error_t error)
+{
+  if (!p) return "null pointer";
+  if (!p->error && p->error != error) return p->error;
+  p->error = error;
+  return 0;
+}
diff --git a/src/soxr.h b/src/soxr.h

new file mode 100644 (file)

index 0000000..d878bb8
--- /dev/null
+++ b/src/soxr.h
@@ -0,0 +1,329 @@
+/* SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+
+
+/* -------------------------------- Gubbins --------------------------------- */
+
+#if !defined soxr_included
+#define soxr_included
+
+
+#if defined __cplusplus
+  #include <cstddef>
+  extern "C" {
+#else
+  #include <stddef.h>
+#endif
+
+#if defined SOXR_DLL
+  #if defined soxr_EXPORTS
+    #define SOXR __declspec(dllexport)
+  #else
+    #define SOXR __declspec(dllimport)
+  #endif
+#elif defined SOXR_VISIBILITY && defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+  #define SOXR __attribute__ ((visibility("default")))
+#else
+  #define SOXR
+#endif
+
+typedef struct soxr_io_spec soxr_io_spec_t;
+typedef struct soxr_quality_spec soxr_quality_spec_t;
+typedef struct soxr_runtime_spec soxr_runtime_spec_t;
+
+
+
+/* ---------------------------- API conventions --------------------------------
+
+Buffer lengths (and occupancies) are expressed as the number of contained
+samples per channel.
+
+Parameter names for buffer lengths have the suffix `len'.
+
+A single-character `i' or 'o' is often used in names to give context as
+input or output (e.g. ilen, olen).                                            */
+
+
+
+/* --------------------------- Version management --------------------------- */
+
+/* E.g. #if SOXR_THIS_VERSION >= SOXR_VERSION(0,1,1) ...                      */
+#define SOXR_VERSION(x,y,z)     (((x)<<16)|((y)<<8)|(z))
+#define SOXR_THIS_VERSION       SOXR_VERSION(0,1,1)
+#define SOXR_THIS_VERSION_STR               "0.1.1"
+
+
+
+/* --------------------------- Type declarations ---------------------------- */
+
+typedef struct soxr * soxr_t;          /* A resampler for 1 or more channels. */
+typedef char const * soxr_error_t;                /* 0:no-error; non-0:error. */
+
+typedef void       * soxr_buf_t;  /* 1 buffer of channel-interleaved samples. */
+typedef void const * soxr_cbuf_t;                        /* Ditto; read-only. */
+
+typedef soxr_buf_t const  * soxr_bufs_t;/* Or, a separate buffer for each ch. */
+typedef soxr_cbuf_t const * soxr_cbufs_t;                /* Ditto; read-only. */
+
+typedef void const * soxr_in_t;      /* Either a soxr_cbuf_t or soxr_cbufs_t,
+                                        depending on itype in soxr_io_spec_t. */
+typedef void       * soxr_out_t;     /* Either a soxr_buf_t or soxr_bufs_t,
+                                        depending on otype in soxr_io_spec_t. */
+
+
+
+/* --------------------------- API main functions --------------------------- */
+
+SOXR char const * soxr_version(void);  /* Query library version: "libsoxr-x.y.z" */
+
+#define soxr_strerror(e)               /* Soxr counterpart to strerror. */     \
+    ((e)?(e):"no error")
+
+
+/* Create a stream resampler: */
+
+SOXR soxr_t soxr_create(
+    double      input_rate,      /* Input sample-rate. */
+    double      output_rate,     /* Output sample-rate. */
+    unsigned    num_channels,    /* Number of channels to be used. */
+        /* All following arguments are optional (may be set to NULL). */
+    soxr_error_t *,              /* To report any error during creation. */
+    soxr_io_spec_t const *,      /* To specify non-default I/O formats. */
+    soxr_quality_spec_t const *, /* To specify non-default resampling quality.*/
+    soxr_runtime_spec_t const *);/* To specify non-default runtime resources. */
+
+
+
+/* If not using an app-supplied input function, after creating a stream
+ * resampler, repeatedly call: */
+
+SOXR soxr_error_t soxr_process(
+    soxr_t      resampler,      /* As returned by soxr_create. */
+                            /* Input (to be resampled): */
+    soxr_in_t   in,             /* Input buffer(s); may be NULL (see below). */
+    size_t      ilen,           /* Input buf. length (samples per channel). */
+    size_t      * idone,        /* To return actual # samples used (<= ilen). */
+                            /* Output (resampled): */
+    soxr_out_t  out,            /* Output buffer(s).*/
+    size_t      olen,           /* Output buf. length (samples per channel). */
+    size_t      * odone);       /* To return actual # samples out (<= olen).
+
+    Note that no special meaning is associated with ilen or olen equal to
+    zero.  End-of-input (i.e. no data is available nor shall be available)
+    may be indicated by seting `in' to NULL.                                  */
+
+
+
+/* If using an app-supplied input function, it must look and behave like this:*/
+
+typedef size_t /* data_len */
+  (* soxr_input_fn_t)(         /* Supply data to be resampled. */
+    void * input_fn_state,     /* As given to soxr_set_input_fn (below). */
+    soxr_in_t * data,          /* Returned data; see below. N.B. ptr to ptr(s)*/
+    size_t requested_len);     /* Samples per channel, >= returned data_len.
+
+  data_len  *data     Indicates    Meaning
+   ------- -------   ------------  -------------------------
+     !=0     !=0       Success     *data contains data to be
+                                   input to the resampler.
+      0    !=0 (or   End-of-input  No data is available nor
+           not set)                shall be available.
+      0       0        Failure     An error occurred whilst trying to
+                                   source data to be input to the resampler.  */
+
+/* and be registered with a previously created stream resampler using: */
+
+SOXR soxr_error_t soxr_set_input_fn(/* Set (or reset) an input function.*/
+    soxr_t resampler,            /* As returned by soxr_create. */
+    soxr_input_fn_t,             /* Function to supply data to be resampled.*/
+    void * input_fn_state,       /* If needed by the input function. */
+    size_t max_ilen);            /* Maximum value for input fn. requested_len.*/
+
+/* then repeatedly call: */
+
+SOXR size_t /*odone*/ soxr_output(/* Resample and output a block of data.*/
+    soxr_t resampler,            /* As returned by soxr_create. */
+    soxr_out_t data,             /* App-supplied buffer(s) for resampled data.*/
+    size_t olen);                /* Amount of data to output; >= odone. */
+
+
+
+/* Common stream resampler operations: */
+
+SOXR soxr_error_t soxr_error(soxr_t);   /* Query error status. */
+SOXR size_t   * soxr_num_clips(soxr_t); /* Query int. clip counter (for R/W). */
+SOXR double     soxr_delay(soxr_t);  /* Query current delay in output samples.*/
+SOXR char const * soxr_engine(soxr_t p); /* Query resampling engine name. */
+
+SOXR soxr_error_t soxr_clear(soxr_t); /* Ready for fresh signal, same config. */
+SOXR void         soxr_delete(soxr_t);  /* Free resources. */
+
+
+
+/* `Short-cut', single call to resample a (probably short) signal held entirely
+ * in memory.  See soxr_create and soxr_process above for parameter details. */
+
+SOXR soxr_error_t soxr_oneshot(
+    double         input_rate,
+    double         output_rate,
+    unsigned       num_channels,
+    soxr_in_t    in , size_t ilen, size_t * idone,
+    soxr_out_t   out, size_t olen, size_t * odone,
+    soxr_io_spec_t const *,
+    soxr_quality_spec_t const *,
+    soxr_runtime_spec_t const *);
+
+
+
+/* For variable-rate resampling (experimental). See example # 5 for how to
+ * create a variable-rate resampler and how to use this function. */
+
+SOXR soxr_error_t soxr_set_io_ratio(soxr_t, double io_ratio, size_t slew_len);
+
+
+
+/* -------------------------- API type definitions -------------------------- */
+
+typedef enum {          /* Datatypes supported for I/O to/from the resampler: */
+  /* Internal; do not use: */
+  SOXR_FLOAT32, SOXR_FLOAT64, SOXR_INT32, SOXR_INT16, SOXR_SPLIT = 4,
+
+  /* Use for interleaved channels: */
+  SOXR_FLOAT32_I = SOXR_FLOAT32, SOXR_FLOAT64_I, SOXR_INT32_I, SOXR_INT16_I,
+
+  /* Use for split channels: */
+  SOXR_FLOAT32_S = SOXR_SPLIT  , SOXR_FLOAT64_S, SOXR_INT32_S, SOXR_INT16_S
+
+} soxr_datatype_t;
+
+#define soxr_datatype_size(x)  /* Returns `sizeof' a soxr_datatype_t sample. */\
+  ((unsigned char *)"\4\10\4\2")[(x)&3]
+
+
+
+struct soxr_io_spec {                                            /* Typically */
+  soxr_datatype_t itype;     /* Input datatype.                SOXR_FLOAT32_I */
+  soxr_datatype_t otype;     /* Output datatype.               SOXR_FLOAT32_I */
+  double scale;              /* Linear gain to apply during resampling.  1    */
+  void * e;                  /* Reserved for internal use                0    */
+  unsigned long flags;       /* Per the following #defines.              0    */
+};
+
+#define SOXR_TPDF              0     /* Applicable only if otype is INT16. */
+#define SOXR_NO_DITHER         8u    /* Disable the above. */
+
+
+
+struct soxr_quality_spec {                                       /* Typically */
+  double precision;         /* Conversion precision (in bits).           20   */
+  double phase_response;    /* 0=minimum, ... 50=linear, ... 100=maximum 50   */
+  double passband_end;      /* 0dB pt. bandwidth to preserve; nyquist=1  0.913*/
+  double stopband_begin;    /* Aliasing/imaging control; > passband_end   1   */
+  void * e;                 /* Reserved for internal use.                 0   */
+  unsigned long flags;      /* Per the following #defines.                0   */
+};
+
+#define SOXR_ROLLOFF_SMALL     0u    /* <= 0.01 dB */
+#define SOXR_ROLLOFF_MEDIUM    1u    /* <= 0.35 dB */
+#define SOXR_ROLLOFF_NONE      2u    /* For Chebyshev bandwidth. */
+
+#define SOXR_MAINTAIN_3DB_PT   4u  /* Reserved for internal use. */
+#define SOXR_HI_PREC_CLOCK     8u  /* Increase `irrational' ratio accuracy. */
+#define SOXR_DOUBLE_PRECISION 16u  /* Use D.P. calcs even if precision <= 20. */
+#define SOXR_VR               32u  /* Experimental, variable-rate resampling. */
+
+
+
+struct soxr_runtime_spec {                                       /* Typically */
+  unsigned log2_min_dft_size;/* For DFT efficiency. [8,15]              10    */
+  unsigned log2_large_dft_size;/* For DFT efficiency. [16,20]           17    */
+  unsigned coef_size_kbytes; /* For SOXR_COEF_INTERP_AUTO (below).      400   */
+  unsigned num_threads;      /* If built so. 0 means `automatic'.        1    */
+  void * e;                  /* Reserved for internal use.               0    */
+  unsigned long flags;       /* Per the following #defines.              0    */
+};
+                                   /* For `irrational' ratios only: */
+#define SOXR_COEF_INTERP_AUTO  0u    /* Auto select coef. interpolation. */
+#define SOXR_COEF_INTERP_LOW   1u    /* Man. select: less CPU, more memory. */
+#define SOXR_COEF_INTERP_HIGH  2u    /* Man. select: more CPU, less memory. */
+
+#define SOXR_STRICT_BUFFERING  4u  /* Reserved for future use. */
+#define SOXR_NOSMALLINTOPT     8u  /* For test purposes only. */
+
+
+
+/* -------------------------- API type constructors ------------------------- */
+
+/* These functions allow setting of the most commonly-used structure
+ * parameters, with other parameters being given default values.  The default
+ * values may then be overridden, directly in the structure, if needed.  */
+
+SOXR soxr_quality_spec_t soxr_quality_spec(
+    unsigned long recipe,       /* Per the #defines immediately below. */
+    unsigned long flags);       /* As soxr_quality_spec_t.flags. */
+
+                                  /* The 5 standard qualities found in SoX: */
+#define SOXR_QQ                 0   /* 'Quick' cubic interpolation. */
+#define SOXR_LQ                 1   /* 'Low' 16-bit with larger rolloff. */
+#define SOXR_MQ                 2   /* 'Medium' 16-bit with medium rolloff. */
+#define SOXR_HQ                 SOXR_20_BITQ /* 'High quality'. */
+#define SOXR_VHQ                SOXR_28_BITQ /* 'Very high quality'. */
+
+#define SOXR_16_BITQ            3
+#define SOXR_20_BITQ            4
+#define SOXR_24_BITQ            5
+#define SOXR_28_BITQ            6
+#define SOXR_32_BITQ            7
+                                    /* Libsamplerate equivalent qualities: */
+#define SOXR_LSR0Q              8     /* 'Best sinc'. */
+#define SOXR_LSR1Q              9     /* 'Medium sinc'. */
+#define SOXR_LSR2Q              10    /* 'Fast sinc'. */
+
+#define SOXR_LINEAR_PHASE       0x00
+#define SOXR_INTERMEDIATE_PHASE 0x10
+#define SOXR_MINIMUM_PHASE      0x30
+#define SOXR_STEEP_FILTER       0x40
+#define SOXR_ALLOW_ALIASING     0x80  /* Reserved for future use. */
+
+
+
+SOXR soxr_runtime_spec_t soxr_runtime_spec(
+    unsigned num_threads);
+
+
+
+SOXR soxr_io_spec_t soxr_io_spec(
+    soxr_datatype_t itype,
+    soxr_datatype_t otype);
+
+
+
+/* --------------------------- Internal use only ---------------------------- */
+
+SOXR soxr_error_t soxr_set_error(soxr_t, soxr_error_t);
+SOXR soxr_error_t soxr_set_num_channels(soxr_t, unsigned);
+
+
+
+#undef SOXR
+
+#if defined __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/soxr.pc.in b/src/soxr.pc.in

new file mode 100644 (file)

index 0000000..69d225b
--- /dev/null
+++ b/src/soxr.pc.in
@@ -0,0 +1,5 @@
+Name: ${PROJECT_NAME}
+Description: ${DESCRIPTION_SUMMARY}
+Version: ${PROJECT_VERSION}
+Libs: -L${LIB_INSTALL_DIR} -l${PROJECT_NAME}
+Cflags: -I${INCLUDE_INSTALL_DIR}
diff --git a/src/vr32.c b/src/vr32.c

new file mode 100644 (file)

index 0000000..f804db5
--- /dev/null
+++ b/src/vr32.c
@@ -0,0 +1,771 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Experimental variable-rate resampling. */
+
+#include <assert.h>
+#include <math.h>
+#if !defined M_PI
+#define M_PI    3.14159265358979323846
+#endif
+#if !defined M_LN2
+#define M_LN2   0.69314718055994530942
+#endif
+#include <string.h>
+#include <stdlib.h>
+#include "internal.h"
+#define FIFO_SIZE_T int
+#define FIFO_MIN 0x8000
+#include "fifo.h"
+
+#define FADE_LEN_BITS     9
+#define PHASE_BITS_D      (8 + PHASE_MORE)
+#define PHASE_BITS_U      (7 + PHASE_MORE)
+#define PHASE_MORE        0 /* 2 improves small int, and large u, ratios. */
+
+#define PHASES0_D         12
+#define POLY_FIR_LEN_D    20
+#define PHASES0_U         6
+#define POLY_FIR_LEN_U    12
+
+#define MULT32            (65536. * 65536.)
+#define PHASES_D          (1 << PHASE_BITS_D)
+#define PHASES_U          (1 << PHASE_BITS_U)
+
+static float const half_fir_coefs[] = {
+  4.7111692735253413e-1f, 3.1690797657656167e-1f, 2.8691667164678896e-2f,
+  -1.0192825848403946e-1f, -2.8122856237424654e-2f, 5.6804928137780292e-2f,
+  2.7192768359197508e-2f, -3.6082309197154230e-2f, -2.5927789156038026e-2f,
+  2.3644444384060669e-2f, 2.4363075319345607e-2f, -1.5127630198606428e-2f,
+  -2.2541790286342567e-2f, 8.8733836742880233e-3f, 2.0513077413933017e-2f,
+  -4.1186431656279818e-3f, -1.8330444480421631e-2f, 4.6288071358217028e-4f,
+  1.6049769308921290e-2f, 2.3282106680446069e-3f, -1.3727327353082214e-2f,
+  -4.4066375505196096e-3f, 1.1417847550661287e-2f, 5.8817724081355978e-3f,
+  -9.1727580349157123e-3f, -6.8404638339394346e-3f, 7.0385357033205332e-3f,
+  7.3574525331962567e-3f, -5.0554197628506353e-3f, -7.5008330890673153e-3f,
+  3.2563575907277676e-3f, 7.3346538206330259e-3f, -1.6663208501478607e-3f,
+  -6.9199171108861694e-3f, 3.0196567996023190e-4f, 6.3146436955438768e-3f,
+  8.2835711466756098e-4f, -5.5734271982033918e-3f, -1.7242765658561860e-3f,
+  4.7467223803576682e-3f, 2.3927523666941205e-3f, -3.8801054688632139e-3f,
+  -2.8472115748114728e-3f, 3.0135659731132642e-3f, 3.1064651802365259e-3f,
+  -2.1809660142807748e-3f, -3.1935061143485862e-3f, 1.4096923923208671e-3f,
+  3.1342382222281609e-3f, -7.2053095076414931e-4f, -2.9561940489039682e-3f,
+  1.2777585046118889e-4f, 2.6873033434313882e-3f, 3.6043554054680685e-4f,
+  -2.3547716396561816e-3f, -7.4160208709749312e-4f, 1.9840894915230177e-3f,
+  1.0181606831615856e-3f, -1.5982325266851590e-3f, -1.1966774804490967e-3f,
+  1.2170528733224913e-3f, 1.2869618709883193e-3f, -8.5687504489877664e-4f,
+  -1.3011452950496001e-3f, 5.3030588389885972e-4f, 1.2527854026453923e-3f,
+  -2.4622758430821288e-4f, -1.1560181289625195e-3f, 9.9661643910782316e-6f,
+  1.0247989665318426e-3f, 1.7639297561664703e-4f, -8.7226452073196350e-4f,
+  -3.1358436147401782e-4f, 7.1022054657665971e-4f, 4.0466151692224986e-4f,
+  -5.4877022848030636e-4f, -4.5444807961399138e-4f, 3.9609542800868769e-4f,
+  4.6899779918507020e-4f, -2.5835154936239735e-4f, -4.5505391611721792e-4f,
+  1.3970512544147175e-4f, 4.1957352577882777e-4f, -4.2458993694471047e-5f,
+  -3.6930861782460262e-4f, -3.2738549063278822e-5f, 3.1046609224355927e-4f,
+  8.6624679037202785e-5f, -2.4845427128026068e-4f, -1.2101300074995281e-4f,
+  1.8773208187021294e-4f, 1.3849844077872591e-4f, -1.3170611080827864e-4f,
+  -1.4212373327156217e-4f, 8.2758595879431528e-5f, 1.3513059684140468e-4f,
+  -4.2284127775471251e-5f, -1.2070298779675768e-4f, 1.0811692847491609e-5f,
+  1.0178008299781669e-4f, 1.1852545451857104e-5f, -8.0914539313342186e-5f,
+  -2.6454558961220653e-5f, 6.0208388858339534e-5f, 3.4169979203255580e-5f,
+  -4.1203296686185329e-5f, -3.6353143441156863e-5f, 2.4999186627094098e-5f,
+  3.4542829080466582e-5f, -1.2148053427488782e-5f, -3.0260855999161159e-5f,
+  2.7687092952335852e-6f, 2.5095689880235108e-5f, 3.6223160417538916e-6f,
+  -2.0960977068565079e-5f, -9.3312292092513232e-6f, 2.0711288605113663e-5f,
+  3.1992093654438569e-5f, 1.9772538588596925e-5f, 4.8667740603532560e-6f,
+  -5.3495033191567977e-7f,
+};
+
+#define CONVOLVE \
+    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _ \
+    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _ \
+    _ _ _ _ _ _ _ _ _ _  _ _ _ _ _ _ _ _ _ _
+
+#define HALF_FIR_LEN_2 (iAL(half_fir_coefs) - 1)
+#define HALF_FIR_LEN_4 (HALF_FIR_LEN_2 / 2)
+
+#define _ sum += (input[-i] + input[i]) * half_fir_coefs[i], ++i;
+static float half_fir(float const * input)
+{
+  long i = 1;
+  float sum = input[0] * half_fir_coefs[0];
+  CONVOLVE CONVOLVE
+  assert(i == HALF_FIR_LEN_2 + 1);
+  return (float)sum;
+}
+#undef _
+
+#define _ sum += (input[-i] + input[i]) * half_fir_coefs[2*i], ++i;
+static float double_fir0(float const * input)
+{
+  int i = 1;
+  float sum = input[0] * half_fir_coefs[0];
+  CONVOLVE
+  assert(i == HALF_FIR_LEN_4 + 1);
+  return (float)(sum * 2);
+}
+#undef _
+
+#define _ sum += (input[-i] + input[1+i]) * half_fir_coefs[2*i+1], ++i;
+static float double_fir1(float const * input)
+{
+  int i = 0;
+  float sum = 0;
+  CONVOLVE
+  assert(i == HALF_FIR_LEN_4 + 0);
+  return (float)(sum * 2);
+}
+#undef _
+
+static float fast_half_fir(float const * input)
+{
+  static const float coefs[] = {
+    .3094188462713818f, -.08198144615199748f, .03055232105456833f,
+    -.01015890277986387f, .002513237297525149f, -.0003469672050347395f,
+  };
+  int i = 0;
+  float sum = input[0] * .5f;
+#define _ sum += (input[-(2*i+1)] + input[2*i+1]) * coefs[i], ++i;
+  _ _ _ _ _ _
+#undef _
+  return (float)sum;
+}
+
+static const float iir_coefs[] = {
+  .0262852045255816f, .0998310478296204f, .2068650611060755f,
+  .3302241336172489f, .4544203620946318f, .5685783569471244f,
+  .6669444657994612f, .7478697711807407f, .8123244036799226f,
+  .8626000999654434f, .9014277444199280f, .9314860567781748f,
+  .9551915287878752f, .9746617828910630f, .9917763050166036f,
+  };
+#define IIR_FILTER _ _ _ _ _ _ _
+#define _ in1=(in1-p->y[i])*iir_coefs[i]+tmp1;tmp1=p->y[i],p->y[i]=in1;++i;\
+          in0=(in0-p->y[i])*iir_coefs[i]+tmp0;tmp0=p->y[i],p->y[i]=in0;++i;
+
+typedef struct {float x[2], y[AL(iir_coefs)];} half_iir_t;
+
+static float half_iir1(half_iir_t * p, float in0, float in1)
+{
+  int i = 0;
+  float tmp0, tmp1;
+  tmp0 = p->x[0], p->x[0] = in0;
+  tmp1 = p->x[1], p->x[1] = in1;
+  IIR_FILTER
+  p->y[i] = in1 = (in1 - p->y[i]) * iir_coefs[i] + tmp1;
+  return in1 + in0;
+}
+#undef _
+
+static void half_iir(half_iir_t * p, float * obuf, float const * ibuf, int olen)
+{
+  int i;
+  for (i=0; i < olen; obuf[i] = (float)half_iir1(p, ibuf[i*2], ibuf[i*2+1]),++i);
+}
+
+static void half_phase(half_iir_t * p, float * buf, int len)
+{
+  float const small_normal = 1/MULT32/MULT32; /* To quash denormals on path 0.*/
+  int i;
+  for (i = 0; i < len; buf[i] = (float)half_iir1(p, buf[i], 0), ++i);
+#define _ p->y[i] += small_normal, i += 2;
+  i = 0, _ IIR_FILTER
+#undef _
+#define _ p->y[i] -= small_normal, i += 2;
+  i = 0, _ IIR_FILTER
+#undef _
+}
+
+#define raw_coef_t float
+static const raw_coef_t coefs0_d[POLY_FIR_LEN_D / 2 * PHASES0_D + 1] = {
+  0.f, 1.4057457935754080e-5f, 2.3302768424632188e-5f, 4.0084897378442095e-5f,
+  6.1916773126231636e-5f, 8.7973434034929016e-5f, 1.1634847507082481e-4f,
+  1.4391931654629385e-4f, 1.6635470822160746e-4f, 1.7830838562749493e-4f,
+  1.7382737311735053e-4f, 1.4698011689178234e-4f, 9.2677933545427018e-5f,
+  7.6288745483685147e-6f, -1.0867156553965507e-4f, -2.5303924530322309e-4f,
+  -4.1793463959360433e-4f, -5.9118012513731508e-4f, -7.5619603440508576e-4f,
+  -8.9285245696990080e-4f, -9.7897684238178358e-4f, -9.9248131798952959e-4f,
+  -9.1398576537725926e-4f, -7.2972364732199553e-4f, -4.3443557115962946e-4f,
+  -3.3895523979487613e-5f, 4.5331297364457429e-4f, 9.9513966802111057e-4f,
+  1.5468348913161652e-3f, 2.0533350794358640e-3f, 2.4533031436958950e-3f,
+  2.6846707315385087e-3f, 2.6913237051575155e-3f, 2.4303724507982708e-3f,
+  1.8792817173578587e-3f, 1.0420231121204950e-3f, -4.6617252898486750e-5f,
+  -1.3193786988492551e-3f, -2.6781478874181100e-3f, -3.9992272197487003e-3f,
+  -5.1422613336274056e-3f, -5.9624224517967755e-3f, -6.3250283969908542e-3f,
+  -6.1213677360236101e-3f, -5.2841872043022185e-3f, -3.8011036067186429e-3f,
+  -1.7241752288145494e-3f, 8.2596463599396213e-4f, 3.6626436307478369e-3f,
+  6.5430316636724021e-3f, 9.1853404499045010e-3f, 1.1292516396583619e-2f,
+  1.2580791345879052e-2f, 1.2810714562937180e-2f, 1.1817712330677889e-2f,
+  9.5388893881204976e-3f, 6.0327678128662696e-3f, 1.4889921444742027e-3f,
+  -3.7742770128030593e-3f, -9.3265389310393538e-3f, -1.4654680466977541e-2f,
+  -1.9204813565928323e-2f, -2.2433342812570076e-2f, -2.3863084249865732e-2f,
+  -2.3139248817097825e-2f, -2.0079526147977360e-2f, -1.4712465100990968e-2f,
+  -7.2989072959128900e-3f, 1.6676055337427264e-3f, 1.1483818597217116e-2f,
+  2.1283378291010333e-2f, 3.0104924254589629e-2f, 3.6977102234817580e-2f,
+  4.1013752396638667e-2f, 4.1510805491867378e-2f, 3.8035383354576423e-2f,
+  3.0497421566956902e-2f, 1.9194910514469185e-2f, 4.8255960959712636e-3f,
+  -1.1539393212932630e-2f, -2.8521204184392364e-2f, -4.4535662544571142e-2f,
+  -5.7926040870466614e-2f, -6.7116245375785713e-2f, -7.0771566186484461e-2f,
+  -6.7952220045636696e-2f, -5.8244261062898019e-2f, -4.1853211028450271e-2f,
+  -1.9648003905967236e-2f, 6.8535507014343263e-3f, 3.5561844452076982e-2f,
+  6.3953651316164553e-2f, 8.9264185854578418e-2f, 1.0872025112127688e-1f,
+  1.1979689474056175e-1f, 1.2047646491371326e-1f, 1.0948710929592399e-1f,
+  8.6497869185231543e-2f, 5.2249701648862154e-2f, 8.6059406690018377e-3f,
+  -4.1488376792262582e-2f, -9.4141677945723271e-2f, -1.4474093381170536e-1f,
+  -1.8825408052888104e-1f, -2.1958987927558168e-1f, -2.3398931875783419e-1f,
+  -2.2741860176576378e-1f, -1.9693206642095332e-1f, -1.4097432039328661e-1f,
+  -5.9594435654526039e-2f, 4.5448949025739843e-2f, 1.7070477403312445e-1f,
+  3.1117273816011837e-1f, 4.6056631075658744e-1f, 6.1167961235662682e-1f,
+  7.5683349228721264e-1f, 8.8836924234920911e-1f, 9.9915393319190682e-1f,
+  1.0830597619389459e+0f, 1.1353812335460003e+0f, 1.1531583819295732e+0f,
+};
+
+static const raw_coef_t coefs0_u[POLY_FIR_LEN_U / 2 * PHASES0_U + 1] = {
+  0.f, 2.4376543962047211e-5f, 9.7074354091545404e-5f, 2.5656573977863553e-4f,
+  5.2734092391248152e-4f, 8.9078135146855391e-4f, 1.2494786883827907e-3f,
+  1.4060353542261659e-3f, 1.0794576035695273e-3f, -2.1547711862939183e-5f,
+ -2.0658693124381805e-3f, -4.9333908355966233e-3f, -8.0713165910440213e-3f,
+ -1.0451560117817383e-2f, -1.0703998868319438e-2f, -7.4626412699536097e-3f,
+  1.0898921033926621e-4f, 1.1734475997741493e-2f, 2.5579413661660957e-2f,
+  3.8168952738129619e-2f, 4.4846162998312754e-2f, 4.0821915377309274e-2f,
+  2.2679961923658700e-2f, -9.9957152600624218e-3f, -5.3343924460223908e-2f,
+ -9.8792607573741240e-2f, -1.3382736970823086e-1f, -1.4404307655147228e-1f,
+ -1.1619851747063137e-1f, -4.1649695271274462e-2f, 8.0680482815468343e-2f,
+  2.4264355486537642e-1f, 4.2712782955601925e-1f, 6.1041328492424185e-1f,
+  7.6625948559498691e-1f, 8.7088876549652772e-1f, 9.0774244518772884e-1f,
+};
+
+#define coef(coef_p, interp_order, fir_len, phase_num, coef_interp_num, \
+    fir_coef_num) coef_p[(fir_len) * ((interp_order) + 1) * (phase_num) + \
+    ((interp_order) + 1) * (fir_coef_num) + (interp_order - coef_interp_num)]
+
+#define COEF(h,l,i) ((i)<0||(i)>=(l)?0:(h)[(i)>(l)/2?(l)-(i):(i)])
+static void prepare_coefs(float * coefs, int n, int phases0, int phases,
+    raw_coef_t const * coefs0, double multiplier)
+{
+  double k[6];
+  int length0 = n * phases0, length = n * phases, K0 = iAL(k)/2 - 1, i, j, pos;
+  raw_coef_t * coefs1 = malloc(((size_t)length / 2  + 1) * sizeof(*coefs1));
+  raw_coef_t * p = coefs1, f0, f1 = 0;
+
+  for (j = 0; j < iAL(k); k[j] = COEF(coefs0, length0, j - K0), ++j);
+  for (pos = i = 0; i < length0 / 2; ++i) {
+    double b=(1/24.)*(k[0]+k[4]+6*k[2]-4*(k[1]+k[3])),d=.5*(k[1]+k[3])-k[2]-b;
+    double a=(1/120.)*(k[5]-k[2]-9*(9*b+d)+2.5*(k[3]-k[1])-2*(k[4]-k[0]));
+    double c=(1/12.)*(k[4]-k[0]-2*(k[3]-k[1])-60*a),e=.5*(k[3]-k[1])-a-c;
+    for (; pos / phases == i; pos += phases0) {
+      double x = (double)(pos % phases) / phases;
+      *p++ = (raw_coef_t)(k[K0] + ((((a*x + b)*x + c)*x + d)*x + e)*x);
+    }
+    for (j = 0; j < iAL(k) - 1; k[j] = k[j + 1], ++j);
+    k[j] = COEF(coefs0, length0, i + iAL(k) / 2 + 1);
+  }
+  if (!(length & 1))
+    *p++ = (raw_coef_t)k[K0];
+  assert(p - coefs1 == length / 2  + 1);
+
+  for (i = 0; i < n; ++i) for (j = phases - 1; j >= 0; --j, f1 = f0) {
+    pos = (n - 1 - i) * phases + j;
+    f0 = COEF(coefs1, length, pos) * (raw_coef_t)multiplier;
+    coef(coefs, 1, n, j, 0, i) = (float)f0;
+    coef(coefs, 1, n, j, 1, i) = (float)(f1 - f0);
+  }
+  free(coefs1);
+}
+
+#define _ sum += (b *x + a)*input[i], ++i;
+#define a (coef(poly_fir_coefs_d, 1, POLY_FIR_LEN_D, phase, 0,i))
+#define b (coef(poly_fir_coefs_d, 1, POLY_FIR_LEN_D, phase, 1,i))
+static float poly_fir_coefs_d[POLY_FIR_LEN_D * PHASES_D * 2];
+
+static float poly_fir1_d(float const * input, uint32_t frac)
+{
+  int i = 0, phase = (int)(frac >> (32 - PHASE_BITS_D));
+  float sum = 0, x = (float)(frac << PHASE_BITS_D) * (float)(1 / MULT32);
+  _ _ _ _ _  _ _ _ _ _  _ _ _ _ _  _ _ _ _ _
+  assert(i == POLY_FIR_LEN_D);
+  return (float)sum;
+}
+#undef a
+#undef b
+#define a (coef(poly_fir_coefs_u, 1, POLY_FIR_LEN_U, phase, 0,i))
+#define b (coef(poly_fir_coefs_u, 1, POLY_FIR_LEN_U, phase, 1,i))
+static float poly_fir_coefs_u[POLY_FIR_LEN_U * PHASES_U * 2];
+
+static float poly_fir1_u(float const * input, uint32_t frac)
+{
+  int i = 0, phase = (int)(frac >> (32 - PHASE_BITS_U));
+  float sum = 0, x = (float)(frac << PHASE_BITS_U) * (float)(1 / MULT32);
+  _ _ _ _ _  _ _ _ _ _  _ _
+  assert(i == POLY_FIR_LEN_U);
+  return (float)sum;
+}
+#undef a
+#undef b
+#undef _
+
+#define ADD_TO(x,y)           x.all += y.all
+#define SUBTRACT_FROM(x,y)    x.all -= y.all
+#define FRAC(x)               x.part.frac
+#define INT(x)                x.part.integer
+
+typedef struct {
+  union {
+    int64_t all;
+#if WORDS_BIGENDIAN
+    struct {int32_t integer; uint32_t frac;} part;
+#else
+    struct {uint32_t frac; int32_t integer;} part;
+#endif
+  } at, step, step_step;
+  float const * input;
+  int len, stage_num;
+  bool is_d; /* true: downsampling at x2 rate; false: upsampling at 1x rate. */
+  double step_mult;
+} stream_t;
+
+static int poly_fir_d(stream_t * s, float * output, int olen)
+{
+  int i;
+  float const * input = s->input - POLY_FIR_LEN_D / 2 + 1;
+  for (i = 0; i < olen && INT(s->at) < s->len; ++i) {
+    output[i] = poly_fir1_d(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    if (!(INT(s->at) < s->len)) {
+      SUBTRACT_FROM(s->at, s->step);
+      break;
+    }
+    output[++i] = poly_fir1_d(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    ADD_TO(s->step, s->step_step);
+  }
+  return i;
+}
+
+static int poly_fir_fade_d(
+    stream_t * s, float const * vol, int step, float * output, int olen)
+{
+  int i;
+  float const * input = s->input - POLY_FIR_LEN_D / 2 + 1;
+  for (i = 0; i < olen && INT(s->at) < s->len; ++i, vol += step) {
+    output[i] += *vol * poly_fir1_d(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    if (!(INT(s->at) < s->len)) {
+      SUBTRACT_FROM(s->at, s->step);
+      break;
+    }
+    output[++i] += *(vol += step) * poly_fir1_d(input + INT(s->at),FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    ADD_TO(s->step, s->step_step);
+  }
+  return i;
+}
+
+static int poly_fir_u(stream_t * s, float * output, int olen)
+{
+  int i;
+  float const * input = s->input - POLY_FIR_LEN_U / 2 + 1;
+  for (i = 0; i < olen && INT(s->at) < s->len; ++i) {
+    output[i] = poly_fir1_u(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    ADD_TO(s->step, s->step_step);
+  }
+  return i;
+}
+
+static int poly_fir_fade_u(
+    stream_t * s, float const * vol, int step, float * output, int olen)
+{
+  int i;
+  float const * input = s->input - POLY_FIR_LEN_U / 2 + 1;
+  for (i = 0; i < olen && INT(s->at) < s->len; i += 2, vol += step) {
+    output[i] += *vol * poly_fir1_u(input + INT(s->at), FRAC(s->at));
+    ADD_TO(s->at, s->step);
+    ADD_TO(s->step, s->step_step);
+  }
+  return i;
+}
+
+#define shiftr(x,by) ((by) < 0? (x) << (-(by)) : (x) >> (by))
+#define shiftl(x,by) shiftr(x,-(by))
+#define stage_occupancy(s) (fifo_occupancy(&(s)->fifo) - 4*HALF_FIR_LEN_2)
+#define stage_read_p(s) ((float *)fifo_read_ptr(&(s)->fifo) + 2*HALF_FIR_LEN_2)
+#define stage_preload(s) memset(fifo_reserve(&(s)->fifo, (s)->preload), \
+    0, sizeof(float) * (size_t)(s)->preload);
+
+typedef struct {
+  fifo_t fifo;
+  double step_mult;
+  int is_fast, x_fade_len, preload;
+} stage_t;
+
+typedef struct {
+  int num_stages0, num_stages, flushing;
+  int fade_len, slew_len, xfade, stage_inc, switch_stage_num;
+  double new_io_ratio, default_io_ratio;
+  stage_t * stages;
+  fifo_t output_fifo;
+  half_iir_t halfer;
+  stream_t current, fadeout; /* Current/fade-in, fadeout streams. */
+} rate_t;
+
+static float fade_coefs[(2 << FADE_LEN_BITS) + 1];
+
+static void vr_init(rate_t * p, double default_io_ratio, int num_stages, double mult)
+{
+  int i;
+  assert(num_stages >= 0);
+  memset(p, 0, sizeof(*p));
+
+  p->num_stages0 = num_stages;
+  p->num_stages = num_stages = max(num_stages, 1);
+  p->stages = (stage_t *)calloc((unsigned)num_stages + 1, sizeof(*p->stages)) + 1;
+  for (i = -1; i < p->num_stages; ++i) {
+    stage_t * s = &p->stages[i];
+    fifo_create(&s->fifo, sizeof(float));
+    s->step_mult = 2 * MULT32 / shiftl(2, i);
+    s->preload = i < 0? 0 : i == 0? 2 * HALF_FIR_LEN_2 : 3 * HALF_FIR_LEN_2 / 2;
+    stage_preload(s);
+    s->is_fast = true;
+    lsx_debug("%-3i preload=%i", i, s->preload);
+  }
+  fifo_create(&p->output_fifo, sizeof(float));
+  p->default_io_ratio = default_io_ratio;
+  if (!fade_coefs[0]) {
+    for (i = 0; i < iAL(fade_coefs); ++i)
+      fade_coefs[i] = (float)(.5 * (1 + cos(M_PI * i / (AL(fade_coefs) - 1))));
+    prepare_coefs(poly_fir_coefs_u, POLY_FIR_LEN_U, PHASES0_U, PHASES_U, coefs0_u, mult);
+    prepare_coefs(poly_fir_coefs_d, POLY_FIR_LEN_D, PHASES0_D, PHASES_D, coefs0_d, mult *.5);
+  }
+  assert(fade_coefs[0]);
+}
+
+static void enter_new_stage(rate_t * p, int occupancy0)
+{
+  p->current.len = shiftr(occupancy0, p->current.stage_num);
+  p->current.input = stage_read_p(&p->stages[p->current.stage_num]);
+
+  p->current.step_mult = p->stages[p->current.stage_num].step_mult;
+  p->current.is_d = p->current.stage_num >= 0;
+  if (p->current.is_d)
+    p->current.step_mult *= .5;
+}
+
+static void set_step(stream_t * p, double io_ratio)
+{
+  p->step.all = (int64_t)(io_ratio * p->step_mult + .5);
+}
+
+static bool set_step_step(stream_t * p, double io_ratio, int slew_len)
+{
+  int64_t dif;
+  int difi;
+  stream_t tmp = *p;
+  set_step(&tmp, io_ratio);
+  dif = tmp.step.all - p->step.all;
+  dif = dif < 0? dif - (slew_len >> 1) : dif + (slew_len >> 1);
+  difi = (int)dif;   /* Try to avoid int64_t div. */
+  p->step_step.all = difi == dif? difi / slew_len : dif / slew_len;
+  return p->step_step.all != 0;
+}
+
+static void vr_set_io_ratio(rate_t * p, double io_ratio, size_t slew_len)
+{
+  assert(io_ratio > 0);
+  if (slew_len) {
+    if (!set_step_step(&p->current, io_ratio, p->slew_len = (int)slew_len))
+      p->slew_len = 0, p->new_io_ratio = 0, p->fadeout.step_step.all = 0;
+    else {
+      p->new_io_ratio = io_ratio;
+      if (p->fade_len)
+        set_step_step(&p->fadeout, io_ratio, p->slew_len);
+    }
+  }
+  else {
+    if (p->default_io_ratio) { /* Then this is the first call to this fn. */
+      int octave = (int)floor(log(io_ratio) / M_LN2);
+      p->current.stage_num = octave < 0? -1 : min(octave, p->num_stages0-1);
+      enter_new_stage(p, 0);
+    }
+    else if (p->fade_len)
+      set_step(&p->fadeout, io_ratio);
+    set_step(&p->current, io_ratio);
+    if (p->default_io_ratio) FRAC(p->current.at) = FRAC(p->current.step) >> 1;
+    p->default_io_ratio = 0;
+  }
+}
+
+static bool do_input_stage(rate_t * p, int stage_num, int sign, int min_stage_num)
+{
+  int i = 0;
+  float * dest;
+  stage_t * s = &p->stages[stage_num];
+  stage_t * s1 = &p->stages[stage_num - sign];
+  float const * src = (float *)fifo_read_ptr(&s1->fifo) + HALF_FIR_LEN_2;
+  int len = shiftr(fifo_occupancy(&s1->fifo) - HALF_FIR_LEN_2 * 2, sign);
+  int already_done = fifo_occupancy(&s->fifo) - s->preload;
+  if ((len -= already_done) <= 0)
+    return false;
+  src += shiftl(already_done, sign);
+
+  dest = fifo_reserve(&s->fifo, len);
+  if (stage_num < 0) for (; i < len; ++src)
+    dest[i++] = double_fir0(src), dest[i++] = double_fir1(src);
+  else {
+    bool should_be_fast = p->stage_inc;
+    if (!s->x_fade_len && stage_num == p->switch_stage_num) {
+      p->switch_stage_num = 0;
+      if (s->is_fast != should_be_fast) {
+        s->x_fade_len = 1 << FADE_LEN_BITS, s->is_fast = should_be_fast, ++p->xfade;
+        lsx_debug("xfade level %i, inc?=%i", stage_num, p->stage_inc);
+      }
+    }
+    if (s->x_fade_len) {
+      float const * vol1 = fade_coefs + (s->x_fade_len << 1);
+      float const * vol2 = fade_coefs + (((1 << FADE_LEN_BITS) - s->x_fade_len) << 1);
+      int n = min(len, s->x_fade_len);
+      /*lsx_debug("xfade level %i, inc?=%i len=%i n=%i", stage_num, p->stage_inc, s->x_fade_len, n);*/
+      if (should_be_fast)
+        for (; i < n; vol2 += 2, vol1 -= 2, src += 2)
+          dest[i++] = *vol1 * fast_half_fir(src) + *vol2 * half_fir(src);
+      else for (; i < n; vol2 += 2, vol1 -= 2, src += 2)
+        dest[i++] = *vol2 * fast_half_fir(src) + *vol1 * half_fir(src);
+      s->x_fade_len -= n;
+      p->xfade -= !s->x_fade_len;
+    }
+    if (stage_num < min_stage_num)
+      for (; i < len; dest[i++] = fast_half_fir(src), src += 2);
+    else for (; i < len; dest[i++] = half_fir(src), src += 2);
+  }
+  if (p->flushing > 0)
+    stage_preload(s);
+  return true;
+}
+
+static int vr_process(rate_t * p, int olen0)
+{
+  assert(p->num_stages > 0);
+  if (p->default_io_ratio)
+    vr_set_io_ratio(p, p->default_io_ratio, 0);
+  {
+    float * output = fifo_reserve(&p->output_fifo, olen0);
+    int j, odone0 = 0, min_stage_num = p->current.stage_num;
+    int occupancy0, max_stage_num = min_stage_num;
+    if (p->fade_len) {
+      min_stage_num = min(min_stage_num, p->fadeout.stage_num);
+      max_stage_num = max(max_stage_num, p->fadeout.stage_num);
+    }
+
+    for (j = min(min_stage_num, 0); j <= max_stage_num; ++j)
+      if (j && !do_input_stage(p, j, j < 0? -1 : 1, min_stage_num))
+        break;
+    if (p->flushing > 0)
+      p->flushing = -1;
+
+    occupancy0 = shiftl(max(0,stage_occupancy(&p->stages[max_stage_num])), max_stage_num);
+    p->current.len = shiftr(occupancy0, p->current.stage_num);
+    p->current.input = stage_read_p(&p->stages[p->current.stage_num]);
+    if (p->fade_len) {
+      p->fadeout.len = shiftr(occupancy0, p->fadeout.stage_num);
+      p->fadeout.input = stage_read_p(&p->stages[p->fadeout.stage_num]);
+    }
+
+    while (odone0 < olen0) {
+      int odone, odone2, olen = olen0 - odone0, stage_dif = 0, shift;
+      float buf[64 << 1];
+
+      olen = min(olen, (int)(AL(buf) >> 1));
+      if (p->slew_len)
+        olen = min(olen, p->slew_len);
+      else if (p->new_io_ratio) {
+        set_step(&p->current, p->new_io_ratio);
+        set_step(&p->fadeout, p->new_io_ratio);
+        p->fadeout.step_step.all = p->current.step_step.all = 0;
+        p->new_io_ratio = 0;
+      }
+      if (!p->flushing && !p->fade_len && !p->xfade) {
+        if (p->current.is_d) {
+          if (INT(p->current.step) && FRAC(p->current.step))
+            stage_dif = 1, ++max_stage_num;
+          else if (!INT(p->current.step) && FRAC(p->current.step) < (1u << 31))
+            stage_dif = -1, --min_stage_num;
+        } else if (INT(p->current.step) > 1 && FRAC(p->current.step))
+          stage_dif = 1, ++max_stage_num;
+      }
+      if (stage_dif) {
+        int n = p->current.stage_num + stage_dif;
+        if (n >= p->num_stages)
+          --max_stage_num;
+        else {
+          p->stage_inc = stage_dif > 0;
+          p->fadeout = p->current;
+          p->current.stage_num += stage_dif;
+          if (!p->stage_inc)
+          p->switch_stage_num = p->current.stage_num;
+          if ((p->current.stage_num < 0 && stage_dif < 0) ||
+              (p->current.stage_num > 0 && stage_dif > 0)) {
+            stage_t * s = &p->stages[p->current.stage_num];
+            fifo_clear(&s->fifo);
+            stage_preload(s);
+            s->is_fast = false;
+            do_input_stage(p, p->current.stage_num, stage_dif, p->current.stage_num);
+          }
+          if (p->current.stage_num > 0 && stage_dif < 0) {
+            int idone = INT(p->current.at);
+            stage_t * s = &p->stages[p->current.stage_num];
+            fifo_trim_to(&s->fifo, 2 * HALF_FIR_LEN_2 + idone + (POLY_FIR_LEN_D >> 1));
+            do_input_stage(p, p->current.stage_num, 1, p->current.stage_num);
+          }
+          enter_new_stage(p, occupancy0);
+          shift = -stage_dif;
+#define lshift(x,by) (x)=(by)>0?(x)<<(by):(x)>>-(by)
+          lshift(p->current.at.all, shift);
+          shift += p->fadeout.is_d - p->current.is_d;
+          lshift(p->current.step.all, shift);
+          lshift(p->current.step_step.all, shift);
+          p->fade_len = AL(fade_coefs) - 1;
+          lsx_debug("switch from stage %i to %i, x2 from %i to %i", p->fadeout.stage_num, p->current.stage_num, p->fadeout.is_d, p->current.is_d);
+        }
+      }
+
+      if (p->fade_len) {
+        float const * vol1 = fade_coefs + p->fade_len;
+        float const * vol2 = fade_coefs + (iAL(fade_coefs) - 1 - p->fade_len);
+        int olen2 = (olen = min(olen, p->fade_len >> 1)) << 1;
+
+        /* x2 is more fine-grained so may fail to produce a pair of samples
+         * where x1 would not (the x1 second sample is a zero so is always
+         * available).  So do x2 first, then feed odone to the second one. */
+        memset(buf, 0, sizeof(*buf) * (size_t)olen2);
+        if (p->current.is_d && p->fadeout.is_d) {
+          odone  = poly_fir_fade_d(&p->current, vol1,-1, buf, olen2);
+          odone2 = poly_fir_fade_d(&p->fadeout, vol2, 1, buf, odone);
+        } else if (p->current.is_d) {
+          odone  = poly_fir_fade_d(&p->current, vol1,-1, buf, olen2);
+          odone2 = poly_fir_fade_u(&p->fadeout, vol2, 2, buf, odone);
+        } else {
+          assert(p->fadeout.is_d);
+          odone  = poly_fir_fade_d(&p->fadeout, vol2, 1, buf, olen2);
+          odone2 = poly_fir_fade_u(&p->current, vol1,-2, buf, odone);
+        }
+        assert(odone == odone2);
+        (void)odone2;
+        p->fade_len -= odone;
+        if (!p->fade_len) {
+          if (p->stage_inc)
+            p->switch_stage_num = min_stage_num++;
+          else
+            --max_stage_num;
+        }
+        half_iir(&p->halfer, &output[odone0], buf, odone >>= 1);
+      }
+      else if (p->current.is_d) {
+        odone = poly_fir_d(&p->current, buf, olen << 1) >> 1;
+        half_iir(&p->halfer, &output[odone0], buf, odone);
+      }
+      else {
+        odone = poly_fir_u(&p->current, &output[odone0], olen);
+        if (p->num_stages0)
+          half_phase(&p->halfer, &output[odone0], odone);
+      }
+      odone0 += odone;
+      if (p->slew_len)
+        p->slew_len -= odone;
+      if (odone != olen)
+        break; /* Need more input. */
+    } {
+      int from = max(0, max_stage_num), to = min(0, min_stage_num);
+      int i, idone = shiftr(INT(p->current.at), from - p->current.stage_num);
+      INT(p->current.at) -= shiftl(idone, from - p->current.stage_num);
+      if (p->fade_len)
+        INT(p->fadeout.at) -= shiftl(idone, from - p->fadeout.stage_num);
+      for (i = from; i >= to; --i, idone <<= 1)
+        fifo_read(&p->stages[i].fifo, idone, NULL);
+    }
+    fifo_trim_by(&p->output_fifo, olen0 - odone0);
+    return odone0;
+  }
+}
+
+static float * vr_input(rate_t * p, float const * input, size_t n)
+{
+  return fifo_write(&p->stages[0].fifo, (int)n, input);
+}
+
+static float const * vr_output(rate_t * p, float * output, size_t * n)
+{
+  fifo_t * fifo = &p->output_fifo;
+  if (1 || !p->num_stages0)
+    return fifo_read(fifo, (int)(*n = min(*n, (size_t)fifo_occupancy(fifo))), output);
+  else { /* Ignore this complication for now. */
+    int const IIR_DELAY = 2;
+    float * ptr = fifo_read_ptr(fifo);
+    int olen = min((int)*n, max(0, fifo_occupancy(fifo) - IIR_DELAY));
+    *n = (size_t)olen;
+    if (output)
+      memcpy(output, ptr + IIR_DELAY, *n * sizeof(*output));
+    fifo_read(fifo, olen, NULL);
+    return ptr + IIR_DELAY;
+  }
+}
+
+static void vr_flush(rate_t * p)
+{
+  if (!p->flushing) {
+    stage_preload(&p->stages[0]);
+    ++p->flushing;
+  }
+}
+
+static void vr_close(rate_t * p)
+{
+  int i;
+
+  fifo_delete(&p->output_fifo);
+  for (i = -1; i < p->num_stages; ++i) {
+    stage_t * s = &p->stages[i];
+    fifo_delete(&s->fifo);
+  }
+  free(p->stages - 1);
+}
+
+static double vr_delay(rate_t * p)
+{
+  return 100; /* TODO */
+  (void)p;
+}
+
+static void vr_sizes(size_t * shared, size_t * channel)
+{
+  *shared = 0;
+  *channel = sizeof(rate_t);
+}
+
+static char const * vr_create(void * channel, void * shared,double max_io_ratio,
+    void * q_spec, void * r_spec, double scale)
+{
+  double x = max_io_ratio;
+  int n;
+  for (n = 0; x > 1; x *= .5, ++n);
+  vr_init(channel, max_io_ratio, n, scale);
+  return 0;
+  (void)shared, (void)q_spec, (void)r_spec;
+}
+
+static char const * vr_id(void)
+{
+  return "single-precision variable-rate";
+}
+
+typedef void (* fn_t)(void);
+fn_t _soxr_vr32_cb[] = {
+  (fn_t)vr_input,
+  (fn_t)vr_process,
+  (fn_t)vr_output,
+  (fn_t)vr_flush,
+  (fn_t)vr_close,
+  (fn_t)vr_delay,
+  (fn_t)vr_sizes,
+  (fn_t)vr_create,
+  (fn_t)vr_set_io_ratio,
+  (fn_t)vr_id,
+};
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..277fc55
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,50 @@
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+add_definitions (${PROJECT_C_FLAGS})
+link_libraries (${PROJECT_NAME})
+
+file (GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.c)
+foreach (fe ${SOURCES})
+  get_filename_component (f ${fe} NAME_WE)
+  add_executable (${f} ${fe})
+endforeach ()
+
+enable_testing ()
+
+set (sweep_to_freq 22050)
+set (leader 1)
+set (len 16)
+math (EXPR base_rate "${sweep_to_freq} + ${sweep_to_freq}")
+
+macro (add_vector r)
+  set (output ${CMAKE_CURRENT_BINARY_DIR}/ref-${r}.s32)
+  add_custom_command (OUTPUT ${output} DEPENDS vector-gen ${CMAKE_CURRENT_LIST_FILE}
+    COMMAND vector-gen ${r} ${leader} ${len} ${sweep_to_freq} 1 ${output})
+  set (vectors ${output} ${vectors})
+endmacro ()
+
+macro (add_cmp_test from to bits)
+  set (name ${bits}-bit-perfect-${from}-${to})
+  add_test (NAME ${name} COMMAND ${CMAKE_COMMAND} -Dbits=${bits} -DBIN=${BIN} -DEXAMPLES_BIN=${EXAMPLES_BIN} -Dleader=${leader} -Dto=${to}
+    -Dfrom=${from} -Dlen=${len} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmp-test.cmake)
+  add_vector (${from})
+  add_vector (${to})
+endmacro ()
+
+unset (test_bits)
+if (WITH_SINGLE_PRECISION)
+  set (test_bits 20)
+endif ()
+if (WITH_DOUBLE_PRECISION)
+  set (test_bits ${test_bits} 24)
+endif ()
+
+foreach (b ${test_bits})
+  foreach (r 96000 65537)
+    add_cmp_test (${base_rate} ${r} ${b})
+    add_cmp_test (${r} ${base_rate} ${b})
+  endforeach ()
+endforeach ()
+
+add_custom_target (test-vectors ALL DEPENDS ${vectors})
diff --git a/tests/README b/tests/README

new file mode 100644 (file)

index 0000000..44460d6
--- /dev/null
+++ b/tests/README
@@ -0,0 +1 @@
+A few tests on the pass-band performance; not a comprehensive test suite.
diff --git a/tests/cmp-test.cmake b/tests/cmp-test.cmake

new file mode 100644 (file)

index 0000000..8db76c5
--- /dev/null
+++ b/tests/cmp-test.cmake
@@ -0,0 +1,30 @@
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+if (${bits} STREQUAL 24)
+  set (quality 45)
+else ()
+  set (quality 44)
+endif ()
+
+set (output ${from}-${to}-${quality}.s32)
+
+execute_process(COMMAND ${EXAMPLES_BIN}3-options-input-fn ${from} ${to} 1 2 2 ${quality} a
+  INPUT_FILE ref-${from}.s32
+  OUTPUT_FILE ${output}
+  ERROR_VARIABLE test_error
+  RESULT_VARIABLE test_result)
+
+if (test_result)
+  message (FATAL_ERROR "Resampling failure: ${test_error}")
+endif ()
+
+execute_process(COMMAND ${BIN}vector-cmp ref-${to}.s32 ${output} ${to} ${leader} ${len} ${bits} 98
+  OUTPUT_VARIABLE test_output
+  RESULT_VARIABLE test_result)
+
+if (test_result)
+  message (FATAL_ERROR ${test_output})
+else ()
+  message (STATUS ${test_output})
+endif ()
diff --git a/tests/eg-test b/tests/eg-test

new file mode 100755 (executable)

index 0000000..797c715
--- /dev/null
+++ b/tests/eg-test
@@ -0,0 +1,29 @@
+#!/bin/bash
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+len=8
+#vg="valgrind --leak-check=full --show-reachable=yes"
+
+$vg ./1-single-block
+
+ir=96000
+or=44100
+for i in 1 2; do
+  prev=""
+  sox -r $ir -n 0.f32 synth $len sin 0+`expr $ir / 2`
+  for f in `find . -type f -executable -name "[2-4]*"`; do
+    $vg $f $ir $or < 0.f32 > $f.f32
+    test x$prev != x && cmp $f.f32 $prev
+    prev=$f.f32
+  done
+  or=96000
+  ir=44100
+done
+rm *.f32
+
+rm ?.png
+for n in 0 1 2 3; do
+  $vg ./5-variable-rate $n | sox -tf32 -r44100 -c1 - -n spectrogram -hwk -o $n.png -X 50
+  vg=""
+done
diff --git a/tests/io-test b/tests/io-test

new file mode 100755 (executable)

index 0000000..537f896
--- /dev/null
+++ b/tests/io-test
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+ir=96000
+or=44100
+len=16
+f=0+48k
+g=48k+0
+ex=./3-options-input-fn
+
+types=(f32 f64 s32 s16)
+
+do_one() {
+  $ex $ir $or $c $1 $2 $3 < $c.${types[$1]} |
+  sox -t ${types[$2]} -r $or -c $c - -n spectrogram -X50 -hwk -z180 -o io$n$c.png
+  n=`expr $n + 1`
+}
+
+rm io??.png
+
+j=2; test z$1 != z && j=$1
+
+for c in `seq 1 $j`; do
+  for n in `seq 0 3`; do
+    sox -r $ir -n $c.${types[$n]} synth $len sin $f gain -.1
+  done
+
+  n=0
+  for m in `seq 0 3`; do do_one $m $m 4; done
+  do_one 1 2 5
+  do_one 2 0 5
+  do_one 3 2 4
+  do_one 0 3 4
+
+  f="$f sin $g"
+  g=48k:0
+done
+
+rm ?.[sf][0-9][0-9]
diff --git a/tests/large-ratio b/tests/large-ratio

new file mode 100755 (executable)

index 0000000..a12bda9
--- /dev/null
+++ b/tests/large-ratio
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SoX Resampler Library       Copyright (c) 2007-13 robs@users.sourceforge.net
+# Licence for this file: LGPL v2.1                  See LICENCE for details.
+
+# Warning: the intermediate signal (piped) is 3.2 Gbytes so may slug the
+# system somewhat.
+
+ex=../examples/3-options-input-fn
+q=6
+r=1e5
+
+rm lr.png
+
+../tests/vector-gen 1000 0 8 500 .9375 1.s32
+
+$ex 1 $r 1 2 1 $q < 1.s32 | $ex $r 1 1 1 2 $q > 2.s32
+
+sox -M -r 1k 1.s32 -r 1k 2.s32 -n spectrogram -hwk -z180 -o lr.png
+
+display lr.png &
+
+rm [12].s32
diff --git a/tests/vector-cmp.c b/tests/vector-cmp.c

new file mode 100644 (file)

index 0000000..6edd2d5
--- /dev/null
+++ b/tests/vector-cmp.c
@@ -0,0 +1,53 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Utility used to help test the library; not for general consumption.
+ *
+ * Compare two swept-sine files.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "../src/rint.h"
+
+int main(int bit, char const * arg[])
+{
+  FILE    * f1       = fopen(arg[1], "rb"),
+          * f2       = fopen(arg[2], "rb");
+  double  rate       = atof (arg[3]), /* Rate for this vector */
+          leader_len = atof (arg[4]), /* Leader length in seconds */
+          len        = atof (arg[5]), /* Sweep length (excl. leader_len) */
+          expect_bits= atof (arg[6]),
+          expect_bw  = atof (arg[7]);
+
+  int32_t s1, s2;
+  long count = 0;
+  static long thresh[32];
+  double bw, prev = 0;
+
+  for (; fread(&s1, sizeof(s1), 1, f1) == 1 &&
+         fread(&s2, sizeof(s2), 1, f2) == 1; ++count) {
+    long diff = abs((int)(s1 - s2));
+    for (bit = 0; diff && bit < 32; bit++, diff >>= 1)
+      if ((diff & 1) && !thresh[bit])
+        thresh[bit] = count + 1;
+  }
+
+  if (count != (long)((leader_len + len) * rate + .5)) {
+    printf("incorrect file length\n");
+    exit(1);
+  }
+
+  for (bit = 0; bit < 32; ++bit) {
+    bw = ((double)thresh[bit] - 1) / rate - leader_len;
+    if (bit && bw >= 0 && (bw - prev) * 100 / len < .08) {
+      --bit;
+      break;
+    }
+    prev = bw;
+  }
+  bit = 32 - bit;
+  bw = bw * 100 / len;
+  printf("Bit perfect to %i bits, from DC to %.2f%% nyquist.\n", bit, bw);
+  return !(bit >= expect_bits && bw >= expect_bw);
+}
diff --git a/tests/vector-gen.c b/tests/vector-gen.c

new file mode 100644 (file)

index 0000000..06d4bac
--- /dev/null
+++ b/tests/vector-gen.c
@@ -0,0 +1,56 @@
+/* SoX Resampler Library      Copyright (c) 2007-13 robs@users.sourceforge.net
+ * Licence for this file: LGPL v2.1                  See LICENCE for details. */
+
+/* Utility used to help test the library; not for general consumption.
+ *
+ * Generate a swept sine to a file, with faded `lead-in' section.  */
+
+#define QUAD 0
+
+#if QUAD
+  #include <quadmath.h>
+#endif
+
+#include "../examples/examples-common.h"
+
+#if QUAD
+  #define modf modfq
+  #define cos cosq
+  #define sin sinq
+  #undef M_PI
+  #define M_PI M_PIq
+  #define real __float128
+  #define atof(x) strtoflt128(x, 0)
+#else
+  #define real double
+  #include "rint.h"
+#endif
+
+int main(int i, char const * argv[])
+{
+  real rate           = atof(argv[1]), /* Rate for this vector */
+       lead_in_len    = atof(argv[2]), /* Lead-in length in seconds */
+       len            = atof(argv[3]), /* Sweep length (excl. lead_in_len) */
+       sweep_to_freq  = atof(argv[4]), /* Sweep from DC to this freq. */
+       multiplier     = atof(argv[5]), /* For headroom */
+       f1 = -sweep_to_freq / len * lead_in_len, f2 = sweep_to_freq,
+       n1 = rate * -lead_in_len, n2 = rate * len,
+       m = (f2 - f1) / (n2 - n1) / 2, dummy;
+  FILE * file = fopen(argv[6], "wb");
+  i = (int)n1;
+  if (!file || i != n1)
+    exit(1);
+  for (; i < (int)(n2 + .5); ++i) {
+    double d1 = multiplier * sin(2 * M_PI * modf(i * m * i / rate, &dummy));
+    double d = i < 0? d1 * (1 - cos(M_PI * (i + n1) / n1)) * .5 : d1;
+#if QUAD
+    size_t actual = fwrite(&d, sizeof(d), 1, file);
+#else
+    int32_t out = rint32(d * (32768. * 65536 - 1));
+    size_t actual = fwrite(&out, sizeof(out), 1, file);
+#endif
+    if (actual != 1)
+      return 1;
+  }
+  return 0;
+}
author	Benjamin Drung <bdrung@debian.org>
	Fri, 5 Apr 2013 11:06:38 +0000 (12:06 +0100)
committer	Benjamin Drung <bdrung@debian.org>
	Fri, 5 Apr 2013 11:06:38 +0000 (12:06 +0100)
AUTHORS	[new file with mode: 0644]	patch \| blob
CMakeLists.txt	[new file with mode: 0644]	patch \| blob
COPYING.LGPL	[new file with mode: 0644]	patch \| blob
INSTALL	[new file with mode: 0644]	patch \| blob
LICENCE	[new file with mode: 0644]	patch \| blob
NEWS	[new file with mode: 0644]	patch \| blob
README	[new file with mode: 0644]	patch \| blob
TODO	[new file with mode: 0644]	patch \| blob
cmake/Modules/FindLibAVCodec.cmake	[new file with mode: 0644]	patch \| blob
cmake/Modules/FindOpenMP.cmake	[new file with mode: 0644]	patch \| blob
cmake/Modules/FindSIMD.cmake	[new file with mode: 0644]	patch \| blob
cmake/Modules/TestBigEndian.cmake	[new file with mode: 0644]	patch \| blob
deinstall.cmake.in	[new file with mode: 0644]	patch \| blob
examples/1-single-block.c	[new file with mode: 0644]	patch \| blob
examples/1a-lsr.c	[new file with mode: 0644]	patch \| blob
examples/2-stream.C	[new file with mode: 0644]	patch \| blob
examples/3-options-input-fn.c	[new file with mode: 0644]	patch \| blob
examples/4-split-channels.c	[new file with mode: 0644]	patch \| blob
examples/5-variable-rate.c	[new file with mode: 0644]	patch \| blob
examples/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
examples/README	[new file with mode: 0644]	patch \| blob
examples/examples-common.h	[new file with mode: 0644]	patch \| blob
go	[new file with mode: 0755]	patch \| blob
go.bat	[new file with mode: 0644]	patch \| blob
inst-check	[new file with mode: 0755]	patch \| blob
inst-check-soxr	[new file with mode: 0755]	patch \| blob
inst-check-soxr-lsr	[new symlink]	patch \| blob
msvc/README	[new file with mode: 0644]	patch \| blob
msvc/libsoxr.vcproj	[new file with mode: 0644]	patch \| blob
msvc/soxr-config.h	[new file with mode: 0644]	patch \| blob
soxr-config.h.in	[new file with mode: 0644]	patch \| blob
src/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
src/aliases.h	[new file with mode: 0644]	patch \| blob
src/avfft32.c	[new file with mode: 0644]	patch \| blob
src/avfft32s.c	[new file with mode: 0644]	patch \| blob
src/ccrw2.h	[new file with mode: 0644]	patch \| blob
src/data-io.c	[new file with mode: 0644]	patch \| blob
src/data-io.h	[new file with mode: 0644]	patch \| blob
src/dbesi0.c	[new file with mode: 0644]	patch \| blob
src/fft4g.c	[new file with mode: 0644]	patch \| blob
src/fft4g.h	[new file with mode: 0644]	patch \| blob
src/fft4g32.c	[new file with mode: 0644]	patch \| blob
src/fft4g32s.c	[new file with mode: 0644]	patch \| blob
src/fft4g64.c	[new file with mode: 0644]	patch \| blob
src/fft4g_cache.h	[new file with mode: 0644]	patch \| blob
src/fifo.h	[new file with mode: 0644]	patch \| blob
src/filter.c	[new file with mode: 0644]	patch \| blob
src/filter.h	[new file with mode: 0644]	patch \| blob
src/filters.h	[new file with mode: 0644]	patch \| blob
src/half-fir.h	[new file with mode: 0644]	patch \| blob
src/half_coefs.h	[new file with mode: 0644]	patch \| blob
src/internal.h	[new file with mode: 0644]	patch \| blob
src/libsoxr-dev.src.in	[new file with mode: 0644]	patch \| blob
src/libsoxr.src.in	[new file with mode: 0644]	patch \| blob
src/lsr.c	[new file with mode: 0644]	patch \| blob
src/pffft.c	[new file with mode: 0644]	patch \| blob
src/pffft.h	[new file with mode: 0644]	patch \| blob
src/pffft32.c	[new file with mode: 0644]	patch \| blob
src/pffft32s.c	[new file with mode: 0644]	patch \| blob
src/poly-fir.h	[new file with mode: 0644]	patch \| blob
src/poly-fir0.h	[new file with mode: 0644]	patch \| blob
src/rate.h	[new file with mode: 0644]	patch \| blob
src/rate32.c	[new file with mode: 0644]	patch \| blob
src/rate32s.c	[new file with mode: 0644]	patch \| blob
src/rate64.c	[new file with mode: 0644]	patch \| blob
src/rdft.h	[new file with mode: 0644]	patch \| blob
src/rint-clip.h	[new file with mode: 0644]	patch \| blob
src/rint.h	[new file with mode: 0644]	patch \| blob
src/samplerate.h	[new file with mode: 0644]	patch \| blob
src/simd-dev.h	[new file with mode: 0644]	patch \| blob
src/simd.c	[new file with mode: 0644]	patch \| blob
src/simd.h	[new file with mode: 0644]	patch \| blob
src/soxr-lsr.h	[new file with mode: 0644]	patch \| blob
src/soxr-lsr.pc.in	[new file with mode: 0644]	patch \| blob
src/soxr.c	[new file with mode: 0644]	patch \| blob
src/soxr.h	[new file with mode: 0644]	patch \| blob
src/soxr.pc.in	[new file with mode: 0644]	patch \| blob
src/vr32.c	[new file with mode: 0644]	patch \| blob
tests/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
tests/README	[new file with mode: 0644]	patch \| blob
tests/cmp-test.cmake	[new file with mode: 0644]	patch \| blob
tests/eg-test	[new file with mode: 0755]	patch \| blob
tests/io-test	[new file with mode: 0755]	patch \| blob
tests/large-ratio	[new file with mode: 0755]	patch \| blob
tests/vector-cmp.c	[new file with mode: 0644]	patch \| blob
tests/vector-gen.c	[new file with mode: 0644]	patch \| blob