+++ /dev/null
-/*
- * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
- /*
-* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-
-
-/* CUda UTility Library */
-
-#ifndef _CUTIL_H_
-#define _CUTIL_H_
-
-#ifdef _WIN32
-# pragma warning( disable : 4996 ) // disable deprecated warning
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- // helper typedefs for building DLL
-#ifdef _WIN32
-# ifdef BUILD_DLL
-# define DLL_MAPPING __declspec(dllexport)
-# else
-# define DLL_MAPPING __declspec(dllimport)
-# endif
-#else
-# define DLL_MAPPING
-#endif
-
-#ifdef _WIN32
- #define CUTIL_API __stdcall
-#else
- #define CUTIL_API
-#endif
-
- ////////////////////////////////////////////////////////////////////////////
- //! CUT bool type
- ////////////////////////////////////////////////////////////////////////////
- enum CUTBoolean
- {
- CUTFalse = 0,
- CUTTrue = 1
- };
-
- ////////////////////////////////////////////////////////////////////////////
- //! Deallocate memory allocated within Cutil
- //! @param pointer to memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- void CUTIL_API
- cutFree( void* ptr);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Helper for bank conflict checking (should only be used with the
- //! CUT_BANK_CHECKER macro)
- //! @param tidx thread id in x dimension of block
- //! @param tidy thread id in y dimension of block
- //! @param tidz thread id in z dimension of block
- //! @param bdimx block size in x dimension
- //! @param bdimy block size in y dimension
- //! @param bdimz block size in z dimension
- //! @param file name of the source file where the access takes place
- //! @param line line in the source file where the access takes place
- //! @param aname name of the array which is accessed
- //! @param index index into the array
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- void CUTIL_API
- cutCheckBankAccess( unsigned int tidx, unsigned int tidy, unsigned int tidz,
- unsigned int bdimx, unsigned int bdimy,
- unsigned int bdimz, const char* file, const int line,
- const char* aname, const int index);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Find the path for a filename
- //! @return the path if succeeded, otherwise 0
- //! @param filename name of the file
- //! @param executablePath optional absolute path of the executable
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- char* CUTIL_API
- cutFindFilePath(const char* filename, const char* executablePath);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Read file \filename containing single precision floating point data
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param filename name of the source file
- //! @param data uninitialized pointer, returned initialized and pointing to
- //! the data read
- //! @param len number of data elements in data, -1 on error
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized within Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutReadFilef( const char* filename, float** data, unsigned int* len,
- bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Read file \filename containing double precision floating point data
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param filename name of the source file
- //! @param data uninitialized pointer, returned initialized and pointing to
- //! the data read
- //! @param len number of data elements in data, -1 on error
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized within Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutReadFiled( const char* filename, double** data, unsigned int* len,
- bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Read file \filename containing integer data
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param filename name of the source file
- //! @param data uninitialized pointer, returned initialized and pointing to
- //! the data read
- //! @param len number of data elements in data, -1 on error
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized within Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Read file \filename containing unsigned integer data
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param filename name of the source file
- //! @param data uninitialized pointer, returned initialized and pointing to
- //! the data read
- //! @param len number of data elements in data, -1 on error
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized within Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutReadFileui( const char* filename, unsigned int** data,
- unsigned int* len, bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Read file \filename containing char / byte data
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param filename name of the source file
- //! @param data uninitialized pointer, returned initialized and pointing to
- //! the data read
- //! @param len number of data elements in data, -1 on error
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized within Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutReadFileb( const char* filename, char** data, unsigned int* len,
- bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Read file \filename containing unsigned char / byte data
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param filename name of the source file
- //! @param data uninitialized pointer, returned initialized and pointing to
- //! the data read
- //! @param len number of data elements in data, -1 on error
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized within Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutReadFileub( const char* filename, unsigned char** data,
- unsigned int* len, bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Write a data file \filename containing single precision floating point
- //! data
- //! @return CUTTrue if writing the file succeeded, otherwise false
- //! @param filename name of the file to write
- //! @param data pointer to data to write
- //! @param len number of data elements in data, -1 on error
- //! @param epsilon epsilon for comparison
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutWriteFilef( const char* filename, const float* data, unsigned int len,
- const float epsilon, bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Write a data file \filename containing double precision floating point
- //! data
- //! @return CUTTrue if writing the file succeeded, otherwise false
- //! @param filename name of the file to write
- //! @param data pointer to data to write
- //! @param len number of data elements in data, -1 on error
- //! @param epsilon epsilon for comparison
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutWriteFiled( const char* filename, const float* data, unsigned int len,
- const double epsilon, bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Write a data file \filename containing integer data
- //! @return CUTTrue if writing the file succeeded, otherwise false
- //! @param filename name of the file to write
- //! @param data pointer to data to write
- //! @param len number of data elements in data, -1 on error
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutWriteFilei( const char* filename, const int* data, unsigned int len,
- bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Write a data file \filename containing unsigned integer data
- //! @return CUTTrue if writing the file succeeded, otherwise false
- //! @param filename name of the file to write
- //! @param data pointer to data to write
- //! @param len number of data elements in data, -1 on error
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutWriteFileui( const char* filename,const unsigned int* data,
- unsigned int len, bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Write a data file \filename containing char / byte data
- //! @return CUTTrue if writing the file succeeded, otherwise false
- //! @param filename name of the file to write
- //! @param data pointer to data to write
- //! @param len number of data elements in data, -1 on error
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutWriteFileb( const char* filename, const char* data, unsigned int len,
- bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Write a data file \filename containing unsigned char / byte data
- //! @return CUTTrue if writing the file succeeded, otherwise false
- //! @param filename name of the file to write
- //! @param data pointer to data to write
- //! @param len number of data elements in data, -1 on error
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutWriteFileub( const char* filename,const unsigned char* data,
- unsigned int len, bool verbose = false);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Load PGM image file (with unsigned char as data element type)
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized within Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutLoadPGMub( const char* file, unsigned char** data,
- unsigned int *w,unsigned int *h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Load PPM image file (with unsigned char as data element type)
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutLoadPPMub( const char* file, unsigned char** data,
- unsigned int *w,unsigned int *h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Load PPM image file (with unsigned char as data element type), padding
- //! 4th component
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutLoadPPM4ub( const char* file, unsigned char** data,
- unsigned int *w,unsigned int *h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Load PGM image file (with unsigned int as data element type)
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized within Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutLoadPGMi( const char* file, unsigned int** data,
- unsigned int* w, unsigned int* h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Load PGM image file (with unsigned short as data element type)
- //! @return CUTTrue if reading the file succeeded, otherwise false
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized withing Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutLoadPGMs( const char* file, unsigned short** data,
- unsigned int* w, unsigned int* h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Load PGM image file (with float as data element type)
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- //! @note If a NULL pointer is passed to this function and it is
- //! initialized withing Cutil then cutFree() has to be used to
- //! deallocate the memory
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutLoadPGMf( const char* file, float** data,
- unsigned int* w, unsigned int* h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Save PGM image file (with unsigned char as data element type)
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutSavePGMub( const char* file, unsigned char* data,
- unsigned int w, unsigned int h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Save PPM image file (with unsigned char as data element type)
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutSavePPMub( const char* file, unsigned char *data,
- unsigned int w, unsigned int h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Save PPM image file (with unsigned char as data element type, padded to
- //! 4 bytes)
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutSavePPM4ub( const char* file, unsigned char *data,
- unsigned int w, unsigned int h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Save PGM image file (with unsigned int as data element type)
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutSavePGMi( const char* file, unsigned int* data,
- unsigned int w, unsigned int h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Save PGM image file (with unsigned short as data element type)
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutSavePGMs( const char* file, unsigned short* data,
- unsigned int w, unsigned int h);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Save PGM image file (with float as data element type)
- //! @param file name of the image file
- //! @param data handle to the data read
- //! @param w width of the image
- //! @param h height of the image
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutSavePGMf( const char* file, float* data,
- unsigned int w, unsigned int h);
-
- ////////////////////////////////////////////////////////////////////////////
- // Command line arguments: General notes
- // * All command line arguments begin with '--' followed by the token;
- // token and value are seperated by '='; example --samples=50
- // * Arrays have the form --model=[one.obj,two.obj,three.obj]
- // (without whitespaces)
- ////////////////////////////////////////////////////////////////////////////
-
- ////////////////////////////////////////////////////////////////////////////
- //! Check if command line argument \a flag-name is given
- //! @return CUTTrue if command line argument \a flag_name has been given,
- //! otherwise 0
- //! @param argc argc as passed to main()
- //! @param argv argv as passed to main()
- //! @param flag_name name of command line flag
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutCheckCmdLineFlag( const int argc, const char** argv,
- const char* flag_name);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Get the value of a command line argument of type int
- //! @return CUTTrue if command line argument \a arg_name has been given and
- //! is of the requested type, otherwise CUTFalse
- //! @param argc argc as passed to main()
- //! @param argv argv as passed to main()
- //! @param arg_name name of the command line argument
- //! @param val value of the command line argument
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutGetCmdLineArgumenti( const int argc, const char** argv,
- const char* arg_name, int* val);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Get the value of a command line argument of type float
- //! @return CUTTrue if command line argument \a arg_name has been given and
- //! is of the requested type, otherwise CUTFalse
- //! @param argc argc as passed to main()
- //! @param argv argv as passed to main()
- //! @param arg_name name of the command line argument
- //! @param val value of the command line argument
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutGetCmdLineArgumentf( const int argc, const char** argv,
- const char* arg_name, float* val);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Get the value of a command line argument of type string
- //! @return CUTTrue if command line argument \a arg_name has been given and
- //! is of the requested type, otherwise CUTFalse
- //! @param argc argc as passed to main()
- //! @param argv argv as passed to main()
- //! @param arg_name name of the command line argument
- //! @param val value of the command line argument
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutGetCmdLineArgumentstr( const int argc, const char** argv,
- const char* arg_name, char** val);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Get the value of a command line argument list those element are strings
- //! @return CUTTrue if command line argument \a arg_name has been given and
- //! is of the requested type, otherwise CUTFalse
- //! @param argc argc as passed to main()
- //! @param argv argv as passed to main()
- //! @param arg_name name of the command line argument
- //! @param val command line argument list
- //! @param len length of the list / number of elements
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutGetCmdLineArgumentListstr( const int argc, const char** argv,
- const char* arg_name, char** val,
- unsigned int* len);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Extended assert
- //! @return CUTTrue if the condition \a val holds, otherwise CUTFalse
- //! @param val condition to test
- //! @param file __FILE__ macro
- //! @param line __LINE__ macro
- //! @note This function should be used via the CONDITION(val) macro
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutCheckCondition( int val, const char* file, const int line);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Compare two float arrays
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutComparef( const float* reference, const float* data,
- const unsigned int len);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Compare two integer arrays
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutComparei( const int* reference, const int* data,
- const unsigned int len );
-
- ////////////////////////////////////////////////////////////////////////////////
- //! Compare two unsigned integer arrays, with epsilon and threshold
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- //! @param threshold tolerance % # of comparison errors (0.15f = 15%)
- ////////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutCompareuit( const unsigned int* reference, const unsigned int* data,
- const unsigned int len, const float epsilon, const float threshold );
-
- ////////////////////////////////////////////////////////////////////////////
- //! Compare two unsigned char arrays
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutCompareub( const unsigned char* reference, const unsigned char* data,
- const unsigned int len );
-
- ////////////////////////////////////////////////////////////////////////////////
- //! Compare two integers with a tolernance for # of byte errors
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- //! @param epsilon epsilon to use for the comparison
- //! @param threshold tolerance % # of comparison errors (0.15f = 15%)
- ////////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutCompareubt( const unsigned char* reference, const unsigned char* data,
- const unsigned int len, const float epsilon, const float threshold );
-
- ////////////////////////////////////////////////////////////////////////////////
- //! Compare two integer arrays witha n epsilon tolerance for equality
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- //! @param epsilon epsilon to use for the comparison
- ////////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutCompareube( const unsigned char* reference, const unsigned char* data,
- const unsigned int len, const float epsilon );
-
- ////////////////////////////////////////////////////////////////////////////
- //! Compare two float arrays with an epsilon tolerance for equality
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- //! @param epsilon epsilon to use for the comparison
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutComparefe( const float* reference, const float* data,
- const unsigned int len, const float epsilon );
-
- ////////////////////////////////////////////////////////////////////////////////
- //! Compare two float arrays with an epsilon tolerance for equality and a
- //! threshold for # pixel errors
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- //! @param epsilon epsilon to use for the comparison
- ////////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutComparefet( const float* reference, const float* data,
- const unsigned int len, const float epsilon, const float threshold );
-
- ////////////////////////////////////////////////////////////////////////////
- //! Compare two float arrays using L2-norm with an epsilon tolerance for
- //! equality
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param reference handle to the reference data / gold image
- //! @param data handle to the computed data
- //! @param len number of elements in reference and data
- //! @param epsilon epsilon to use for the comparison
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutCompareL2fe( const float* reference, const float* data,
- const unsigned int len, const float epsilon );
-
- ////////////////////////////////////////////////////////////////////////////////
- //! Compare two PPM image files with an epsilon tolerance for equality
- //! @return CUTTrue if \a reference and \a data are identical,
- //! otherwise CUTFalse
- //! @param src_file filename for the image to be compared
- //! @param data filename for the reference data / gold image
- //! @param epsilon epsilon to use for the comparison
- //! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass)
- //! $param verboseErrors output details of image mismatch to std::err
- ////////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold, bool verboseErrors = false );
-
-
- ////////////////////////////////////////////////////////////////////////////
- //! Timer functionality
-
- ////////////////////////////////////////////////////////////////////////////
- //! Create a new timer
- //! @return CUTTrue if a time has been created, otherwise false
- //! @param name of the new timer, 0 if the creation failed
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutCreateTimer( unsigned int* name);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Delete a timer
- //! @return CUTTrue if a time has been deleted, otherwise false
- //! @param name of the timer to delete
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutDeleteTimer( unsigned int name);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Start the time with name \a name
- //! @param name name of the timer to start
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutStartTimer( const unsigned int name);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Stop the time with name \a name. Does not reset.
- //! @param name name of the timer to stop
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutStopTimer( const unsigned int name);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Resets the timer's counter.
- //! @param name name of the timer to reset.
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- CUTBoolean CUTIL_API
- cutResetTimer( const unsigned int name);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Returns total execution time in milliseconds for the timer over all
- //! runs since the last reset or timer creation.
- //! @param name name of the timer to return the time of
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- float CUTIL_API
- cutGetTimerValue( const unsigned int name);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Return the average time in milliseconds for timer execution as the
- //! total time for the timer dividied by the number of completed (stopped)
- //! runs the timer has made.
- //! Excludes the current running time if the timer is currently running.
- //! @param name name of the timer to return the time of
- ////////////////////////////////////////////////////////////////////////////
- DLL_MAPPING
- float CUTIL_API
- cutGetAverageTimerValue( const unsigned int name);
-
- ////////////////////////////////////////////////////////////////////////////
- //! Macros
-
-#if CUDART_VERSION >= 4000
-#define CUT_DEVICE_SYNCHRONIZE( ) cudaDeviceSynchronize();
-#else
-#define CUT_DEVICE_SYNCHRONIZE( ) cudaThreadSynchronize();
-#endif
-
-#if CUDART_VERSION >= 4000
-#define CUT_DEVICE_RESET( ) cudaDeviceReset();
-#else
-#define CUT_DEVICE_RESET( ) cudaThreadExit();
-#endif
-
-// This is for the CUTIL bank checker
-#ifdef _DEBUG
- #if __DEVICE_EMULATION__
- // Interface for bank conflict checker
- #define CUT_BANK_CHECKER( array, index) \
- (cutCheckBankAccess( threadIdx.x, threadIdx.y, threadIdx.z, blockDim.x, \
- blockDim.y, blockDim.z, \
- __FILE__, __LINE__, #array, index ), \
- array[index])
- #else
- #define CUT_BANK_CHECKER( array, index) array[index]
- #endif
-#else
- #define CUT_BANK_CHECKER( array, index) array[index]
-#endif
-
-# define CU_SAFE_CALL_NO_SYNC( call ) { \
- CUresult err = call; \
- if( CUDA_SUCCESS != err) { \
- fprintf(stderr, "Cuda driver error %x in file '%s' in line %i.\n", \
- err, __FILE__, __LINE__ ); \
- exit(EXIT_FAILURE); \
- } }
-
-# define CU_SAFE_CALL( call ) CU_SAFE_CALL_NO_SYNC(call);
-
-# define CU_SAFE_CTX_SYNC( ) { \
- CUresult err = cuCtxSynchronize(); \
- if( CUDA_SUCCESS != err) { \
- fprintf(stderr, "Cuda driver error %x in file '%s' in line %i.\n", \
- err, __FILE__, __LINE__ ); \
- exit(EXIT_FAILURE); \
- } }
-
-# define CUDA_SAFE_CALL_NO_SYNC( call) { \
- cudaError err = call; \
- if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
- __FILE__, __LINE__, cudaGetErrorString( err) ); \
- exit(EXIT_FAILURE); \
- } }
-
-# define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call); \
-
-# define CUDA_SAFE_THREAD_SYNC( ) { \
- cudaError err = CUT_DEVICE_SYNCHRONIZE(); \
- if ( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
- __FILE__, __LINE__, cudaGetErrorString( err) ); \
- } }
-
-# define CUFFT_SAFE_CALL( call) { \
- cufftResult err = call; \
- if( CUFFT_SUCCESS != err) { \
- fprintf(stderr, "CUFFT error in file '%s' in line %i.\n", \
- __FILE__, __LINE__); \
- exit(EXIT_FAILURE); \
- } }
-
-# define CUT_SAFE_CALL( call) \
- if( CUTTrue != call) { \
- fprintf(stderr, "Cut error in file '%s' in line %i.\n", \
- __FILE__, __LINE__); \
- exit(EXIT_FAILURE); \
- }
-
- //! Check for CUDA error
-#ifdef _DEBUG
-# define CUT_CHECK_ERROR(errorMessage) { \
- cudaError_t err = cudaGetLastError(); \
- if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
- errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
- exit(EXIT_FAILURE); \
- } \
- err = CUT_DEVICE_SYNCHRONIZE(); \
- if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
- errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
- exit(EXIT_FAILURE); \
- } \
- }
-#else
-# define CUT_CHECK_ERROR(errorMessage) { \
- cudaError_t err = cudaGetLastError(); \
- if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
- errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
- exit(EXIT_FAILURE); \
- } \
- }
-#endif
-
- //! Check for malloc error
-# define CUT_SAFE_MALLOC( mallocCall ) { \
- if( !(mallocCall)) { \
- fprintf(stderr, "Host malloc failure in file '%s' in line %i\n", \
- __FILE__, __LINE__); \
- exit(EXIT_FAILURE); \
- } } while(0);
-
- //! Check if conditon is true (flexible assert)
-# define CUT_CONDITION( val) \
- if( CUTFalse == cutCheckCondition( val, __FILE__, __LINE__)) { \
- exit(EXIT_FAILURE); \
- }
-
-#if __DEVICE_EMULATION__
-
-# define CUT_DEVICE_INIT(ARGC, ARGV)
-
-#else
-
-# define CUT_DEVICE_INIT(ARGC, ARGV) { \
- int deviceCount; \
- CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceCount(&deviceCount)); \
- if (deviceCount == 0) { \
- fprintf(stderr, "cutil error: no devices supporting CUDA.\n"); \
- exit(EXIT_FAILURE); \
- } \
- int dev = 0; \
- cutGetCmdLineArgumenti(ARGC, (const char **) ARGV, "device", &dev); \
- if (dev < 0) dev = 0; \
- if (dev > deviceCount-1) dev = deviceCount - 1; \
- cudaDeviceProp deviceProp; \
- CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceProperties(&deviceProp, dev)); \
- if (deviceProp.major < 1) { \
- fprintf(stderr, "cutil error: device does not support CUDA.\n"); \
- exit(EXIT_FAILURE); \
- } \
- if (cutCheckCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == CUTFalse) \
- fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name); \
- CUDA_SAFE_CALL(cudaSetDevice(dev)); \
-}
-
-
- //! Check for CUDA context lost
-# define CUDA_CHECK_CTX_LOST(errorMessage) { \
- cudaError_t err = cudaGetLastError(); \
- if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
- errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
- exit(EXIT_FAILURE); \
- } \
- err = CUT_DEVICE_SYNCHRONIZE(); \
- if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
- errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
- exit(EXIT_FAILURE); \
- } }
-
-//! Check for CUDA context lost
-# define CU_CHECK_CTX_LOST(errorMessage) { \
- cudaError_t err = cudaGetLastError(); \
- if( CUDA_ERROR_INVALID_CONTEXT != err) { \
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
- errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
- exit(EXIT_FAILURE); \
- } \
- err = CUT_DEVICE_SYNCHRONIZE(); \
- if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
- errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
- exit(EXIT_FAILURE); \
- } }
-
-
-#endif
-
-# define CUT_DEVICE_INIT_DRV(cuDevice, ARGC, ARGV) { \
- cuDevice = 0; \
- int deviceCount = 0; \
- CUresult err = cuInit(0); \
- if (CUDA_SUCCESS == err) \
- CU_SAFE_CALL_NO_SYNC(cuDeviceGetCount(&deviceCount)); \
- if (deviceCount == 0) { \
- fprintf(stderr, "cutil error: no devices supporting CUDA\n"); \
- exit(EXIT_FAILURE); \
- } \
- int dev = 0; \
- cutGetCmdLineArgumenti(ARGC, (const char **) ARGV, "device", &dev); \
- if (dev < 0) dev = 0; \
- if (dev > deviceCount-1) dev = deviceCount - 1; \
- CU_SAFE_CALL_NO_SYNC(cuDeviceGet(&cuDevice, dev)); \
- char name[100]; \
- cuDeviceGetName(name, 100, cuDevice); \
- if (cutCheckCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == CUTFalse) \
- fprintf(stderr, "Using device %d: %s\n", dev, name); \
-}
-
-#define CUT_EXIT(argc, argv) \
- if (!cutCheckCmdLineFlag(argc, (const char**)argv, "noprompt")) { \
- printf("\nPress ENTER to exit...\n"); \
- fflush( stdout); \
- fflush( stderr); \
- getchar(); \
- } \
- exit(EXIT_SUCCESS);
-
-
-#ifdef __cplusplus
-}
-#endif // #ifdef _DEBUG (else branch)
-
-#endif // #ifndef _CUTIL_H_
+++ /dev/null
-/*
- * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-#ifndef _CUTIL_INLINE_H_
-#define _CUTIL_INLINE_H_
-
-#include <cuda.h>
-#include <pcl/cuda/cutil.h>
-#include <cuda_runtime_api.h>
-
-#include <pcl/cuda/cutil_inline_bankchecker.h>
-#include <pcl/cuda/cutil_inline_runtime.h>
-#include <pcl/cuda/cutil_inline_drvapi.h>
-
-inline void print_NVCC_min_spec(const char *sSDKsample, const char *sNVCCReq, const char *sDriverReq)
-{
- printf("CUDA %d.%02d Toolkit built this project.\n", CUDART_VERSION/1000, (CUDART_VERSION%100));
- printf(" [ %s ] requirements:\n", sSDKsample);
- printf(" -> CUDA %s Toolkit\n" , sNVCCReq);
- printf(" -> %s NVIDIA Display Driver.\n", sDriverReq);
-}
-
-#define ALIGN_OFFSET(offset, alignment) offset = (offset + (alignment-1)) & ~((alignment-1))
-
-
-#endif // _CUTIL_INLINE_H_
+++ /dev/null
-/*
- * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
- #ifndef _CUTIL_INLINE_BANKCHECKER_H_
-#define _CUTIL_INLINE_BANKCHECKER_H_
-
-#ifdef _DEBUG
- #if __DEVICE_EMULATION__
- #define cutilBankChecker(array, idx) (__cutilBankChecker (threadIdx.x, threadIdx.y, threadIdx.z, \
- blockDim.x, blockDim.y, blockDim.z, \
- #array, idx, __FILE__, __LINE__), \
- array[idx])
-
- #else
- #define cutilBankChecker(array, idx) array[idx]
- #endif
-#else
- #define cutilBankChecker(array, idx) array[idx]
-#endif
-
- // Interface for bank conflict checker
-inline void __cutilBankChecker(unsigned int tidx, unsigned int tidy, unsigned int tidz,
- unsigned int bdimx, unsigned int bdimy, unsigned int bdimz,
- char *aname, int index, char *file, int line)
-{
- cutCheckBankAccess( tidx, tidy, tidz, bdimx, bdimy, bdimz, file, line, aname, index);
-}
-
-#endif // _CUTIL_INLINE_BANKCHECKER_H_
+++ /dev/null
-/*
- * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-#ifndef _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_
-#define _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-
-// We define these calls here, so the user doesn't need to include __FILE__ and __LINE__
-// The advantage is the developers gets to use the inline function so they can debug
-#define cutilDrvSafeCallNoSync(err) __cuSafeCallNoSync (err, __FILE__, __LINE__)
-#define cutilDrvSafeCall(err) __cuSafeCall (err, __FILE__, __LINE__)
-#define cutilDrvCtxSync() __cuCtxSync (__FILE__, __LINE__)
-#define cutilDrvCheckMsg(msg) __cuCheckMsg (msg, __FILE__, __LINE__)
-#define cutilDrvAlignOffset(offset, alignment) ( offset = (offset + (alignment-1)) & ~((alignment-1)) )
-
-// These are the inline versions for all of the CUTIL functions
-inline void __cuSafeCallNoSync( CUresult err, const char *file, const int line )
-{
- if( CUDA_SUCCESS != err) {
- fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
- err, file, line );
- exit(-1);
- }
-}
-inline void __cuSafeCall( CUresult err, const char *file, const int line )
-{
- __cuSafeCallNoSync( err, file, line );
-}
-
-inline void __cuCtxSync(const char *file, const int line )
-{
- CUresult err = cuCtxSynchronize();
- if( CUDA_SUCCESS != err ) {
- fprintf(stderr, "cuCtxSynchronize() API error = %04d in file <%s>, line %i.\n",
- err, file, line );
- exit(-1);
- }
-}
-
-#define MIN(a,b) ((a < b) ? a : b)
-#define MAX(a,b) ((a > b) ? a : b)
-
-// Beginning of GPU Architecture definitions
-inline int _ConvertSMVer2CoresDrvApi(int major, int minor)
-{
- // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
- typedef struct {
- int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
- int Cores;
- } sSMtoCores;
-
- sSMtoCores nGpuArchCoresPerSM[] =
- { { 0x10, 8 },
- { 0x11, 8 },
- { 0x12, 8 },
- { 0x13, 8 },
- { 0x20, 32 },
- { 0x21, 48 },
- { -1, -1 }
- };
-
- int index = 0;
- while (nGpuArchCoresPerSM[index].SM != -1) {
- if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
- return nGpuArchCoresPerSM[index].Cores;
- }
- index++;
- }
- printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
- return -1;
-}
-// end of GPU Architecture definitions
-
-// This function returns the best GPU based on performance
-inline int cutilDrvGetMaxGflopsDeviceId()
-{
- CUdevice current_device = 0, max_perf_device = 0;
- int device_count = 0, sm_per_multiproc = 0;
- int max_compute_perf = 0, best_SM_arch = 0;
- int major = 0, minor = 0, multiProcessorCount, clockRate;
-
- cuInit(0);
- cutilDrvSafeCallNoSync(cuDeviceGetCount(&device_count));
-
- // Find the best major SM Architecture GPU device
- while ( current_device < device_count ) {
- cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) );
- if (major > 0 && major < 9999) {
- best_SM_arch = MAX(best_SM_arch, major);
- }
- current_device++;
- }
-
- // Find the best CUDA capable GPU device
- current_device = 0;
- while( current_device < device_count ) {
- cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &multiProcessorCount,
- CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- current_device ) );
- cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &clockRate,
- CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
- current_device ) );
- cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) );
-
- if (major == 9999 && minor == 9999) {
- sm_per_multiproc = 1;
- } else {
- sm_per_multiproc = _ConvertSMVer2CoresDrvApi(major, minor);
- }
-
- int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
- if( compute_perf > max_compute_perf ) {
- // If we find GPU with SM major > 2, search only these
- if ( best_SM_arch > 2 ) {
- // If our device==dest_SM_arch, choose this, or else pass
- if (major == best_SM_arch) {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- } else {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- ++current_device;
- }
- return max_perf_device;
-}
-
-// This function returns the best Graphics GPU based on performance
-inline int cutilDrvGetMaxGflopsGraphicsDeviceId()
-{
- CUdevice current_device = 0, max_perf_device = 0;
- int device_count = 0, sm_per_multiproc = 0;
- int max_compute_perf = 0, best_SM_arch = 0;
- int major = 0, minor = 0, multiProcessorCount, clockRate;
- int bTCC = 0;
- char deviceName[256];
-
- cuInit(0);
- cutilDrvSafeCallNoSync(cuDeviceGetCount(&device_count));
-
- // Find the best major SM Architecture GPU device that are graphics devices
- while ( current_device < device_count ) {
- cutilDrvSafeCallNoSync( cuDeviceGetName(deviceName, 256, current_device) );
- cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) );
-
-#if CUDA_VERSION >= 3020
- cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device ) );
-#else
- // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
- if (deviceName[0] == 'T') bTCC = 1;
-#endif
- if (!bTCC) {
- if (major > 0 && major < 9999) {
- best_SM_arch = MAX(best_SM_arch, major);
- }
- }
- current_device++;
- }
-
- // Find the best CUDA capable GPU device
- current_device = 0;
- while( current_device < device_count ) {
- cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &multiProcessorCount,
- CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- current_device ) );
- cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &clockRate,
- CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
- current_device ) );
- cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) );
-
-#if CUDA_VERSION >= 3020
- cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device ) );
-#else
- // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
- if (deviceName[0] == 'T') bTCC = 1;
-#endif
-
- if (major == 9999 && minor == 9999) {
- sm_per_multiproc = 1;
- } else {
- sm_per_multiproc = _ConvertSMVer2CoresDrvApi(major, minor);
- }
-
- // If this is a Tesla based GPU and SM 2.0, and TCC is disabled, this is a contendor
- if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this
- {
- int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
- if( compute_perf > max_compute_perf ) {
- // If we find GPU with SM major > 2, search only these
- if ( best_SM_arch > 2 ) {
- // If our device = dest_SM_arch, then we pick this one
- if (major == best_SM_arch) {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- } else {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- }
- ++current_device;
- }
- return max_perf_device;
-}
-
-inline void __cuCheckMsg( const char * msg, const char *file, const int line )
-{
- CUresult err = cuCtxSynchronize();
- if( CUDA_SUCCESS != err) {
- fprintf(stderr, "cutilDrvCheckMsg -> %s", msg);
- fprintf(stderr, "cutilDrvCheckMsg -> cuCtxSynchronize API error = %04d in file <%s>, line %i.\n",
- err, file, line );
- exit(-1);
- }
-}
-
-
-#if __DEVICE_EMULATION__
- inline int cutilDeviceInitDrv(int ARGC, char **ARGV) { }
-#else
- inline int cutilDeviceInitDrv(int ARGC, char ** ARGV)
- {
- int cuDevice = 0;
- int deviceCount = 0;
- CUresult err = cuInit(0);
- if (CUDA_SUCCESS == err)
- cutilDrvSafeCallNoSync(cuDeviceGetCount(&deviceCount));
- if (deviceCount == 0) {
- fprintf(stderr, "CUTIL DeviceInitDrv error: no devices supporting CUDA\n");
- exit(-1);
- }
- int dev = 0;
- cutGetCmdLineArgumenti(ARGC, (const char **) ARGV, "device", &dev);
- if (dev < 0) dev = 0;
- if (dev > deviceCount-1) {
- fprintf(stderr, "\n");
- fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
- fprintf(stderr, ">> cutilDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
- fprintf(stderr, "\n");
- return -dev;
- }
- cutilDrvSafeCallNoSync(cuDeviceGet(&cuDevice, dev));
- char name[100];
- cuDeviceGetName(name, 100, cuDevice);
- if (cutCheckCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == CUTFalse) {
- printf("> Using CUDA Device [%d]: %s\n", dev, name);
- }
- return dev;
- }
-#endif
-
- // General initialization call to pick the best CUDA Device
-#if __DEVICE_EMULATION__
- inline CUdevice cutilChooseCudaDeviceDrv(int argc, char **argv, int *p_devID)
-#else
- inline CUdevice cutilChooseCudaDeviceDrv(int argc, char **argv, int *p_devID)
- {
- CUdevice cuDevice;
- int devID = 0;
- // If the command-line has a device number specified, use it
- if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) {
- devID = cutilDeviceInitDrv(argc, argv);
- if (devID < 0) {
- printf("exiting...\n");
- exit(0);
- }
- } else {
- // Otherwise pick the device with highest Gflops/s
- char name[100];
- devID = cutilDrvGetMaxGflopsDeviceId();
- cutilDrvSafeCallNoSync(cuDeviceGet(&cuDevice, devID));
- cuDeviceGetName(name, 100, cuDevice);
- printf("> Using CUDA Device [%d]: %s\n", devID, name);
- }
- cuDeviceGet(&cuDevice, devID);
- if (p_devID) *p_devID = devID;
- return cuDevice;
- }
-#endif
-
-
-//! Check for CUDA context lost
-inline void cutilDrvCudaCheckCtxLost(const char *errorMessage, const char *file, const int line )
-{
- CUresult err = cuCtxSynchronize();
- if( CUDA_ERROR_INVALID_CONTEXT != err) {
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i\n",
- errorMessage, file, line );
- exit(-1);
- }
- err = cuCtxSynchronize();
- if( CUDA_SUCCESS != err) {
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i\n",
- errorMessage, file, line );
- exit(-1);
- }
-}
-
-#ifndef STRCASECMP
-#ifdef _WIN32
-#define STRCASECMP _stricmp
-#else
-#define STRCASECMP strcasecmp
-#endif
-#endif
-
-#ifndef STRNCASECMP
-#ifdef _WIN32
-#define STRNCASECMP _strnicmp
-#else
-#define STRNCASECMP strncasecmp
-#endif
-#endif
-
-inline void __cutilDrvQAFinish(int argc, char **argv, bool bStatus)
-{
- const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL };
-
- bool bFlag = false;
- for (int i=1; i < argc; i++) {
- if (!STRCASECMP(argv[i], "-qatest") || !STRCASECMP(argv[i], "-noprompt")) {
- bFlag |= true;
- }
- }
-
- if (bFlag) {
- printf("&&&& %s %s", sStatus[bStatus], argv[0]);
- for (int i=1; i < argc; i++) printf(" %s", argv[i]);
- } else {
- printf("[%s] test result\n%s\n", argv[0], sStatus[bStatus]);
- }
-}
-
-// General check for CUDA GPU SM Capabilities for a specific device #
-inline bool cutilDrvCudaDevCapabilities(int major_version, int minor_version, int deviceNum, int argc, char** argv)
-{
- int major, minor, dev;
- char device_name[256];
-
-#ifdef __DEVICE_EMULATION__
- printf("> Compute Device Emulation Mode \n");
-#endif
-
- cutilDrvSafeCallNoSync( cuDeviceGet(&dev, deviceNum) );
- cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, dev));
- cutilDrvSafeCallNoSync( cuDeviceGetName(device_name, 256, dev) );
-
- if((major > major_version) ||
- (major == major_version && minor >= minor_version))
- {
- printf("> Device %d: < %s >, Compute SM %d.%d detected\n", dev, device_name, major, minor);
- return true;
- }
- else
- {
- printf("There is no device supporting CUDA compute capability %d.%d.\n", major_version, minor_version);
- __cutilDrvQAFinish(argc, argv, true);
- return false;
- }
-}
-
-// General check for CUDA GPU SM Capabilities
-inline bool cutilDrvCudaCapabilities(int major_version, int minor_version, int argc, char **argv)
-{
- return cutilDrvCudaDevCapabilities(major_version, minor_version, 0, argc, argv);
-}
-
-
-#endif // _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_
+++ /dev/null
-/*
- * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-#ifndef _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_
-#define _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_
-
-#ifdef _WIN32
-#ifdef _DEBUG // Do this only in debug mode...
-# define WINDOWS_LEAN_AND_MEAN
-# include <windows.h>
-# include <stdlib.h>
-# undef min
-# undef max
-#endif
-#endif
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include <cufft.h>
-
-// We define these calls here, so the user doesn't need to include __FILE__ and __LINE__
-// The advantage is the developers gets to use the inline function so they can debug
-#define cutilSafeCallNoSync(err) __cudaSafeCallNoSync(err, __FILE__, __LINE__)
-#define cutilSafeCall(err) __cudaSafeCall (err, __FILE__, __LINE__)
-#define cutilSafeThreadSync() __cudaSafeThreadSync(__FILE__, __LINE__)
-#define cufftSafeCall(err) __cufftSafeCall (err, __FILE__, __LINE__)
-#define cutilCheckError(err) __cutilCheckError (err, __FILE__, __LINE__)
-#define cutilCheckMsg(msg) __cutilGetLastError (msg, __FILE__, __LINE__)
-#define cutilCheckMsgAndSync(msg) __cutilGetLastErrorAndSync (msg, __FILE__, __LINE__)
-#define cutilSafeMalloc(mallocCall) __cutilSafeMalloc ((mallocCall), __FILE__, __LINE__)
-#define cutilCondition(val) __cutilCondition (val, __FILE__, __LINE__)
-#define cutilExit(argc, argv) __cutilExit (argc, argv)
-
-inline cudaError cutilDeviceSynchronize()
-{
-#if CUDART_VERSION >= 4000
- return cudaDeviceSynchronize();
-#else
- return cudaThreadSynchronize();
-#endif
-}
-
-inline cudaError cutilDeviceReset()
-{
-#if CUDART_VERSION >= 4000
- return cudaDeviceReset();
-#else
- return cudaThreadExit();
-#endif
-}
-
-inline void __cutilCondition(int val, char *file, int line)
-{
- if( CUTFalse == cutCheckCondition( val, file, line ) ) {
- exit(EXIT_FAILURE);
- }
-}
-
-inline void __cutilExit(int argc, char **argv)
-{
- if (!cutCheckCmdLineFlag(argc, (const char**)argv, "noprompt")) {
- printf("\nPress ENTER to exit...\n");
- fflush( stdout);
- fflush( stderr);
- getchar();
- }
- exit(EXIT_SUCCESS);
-}
-
-#define MIN(a,b) ((a < b) ? a : b)
-#define MAX(a,b) ((a > b) ? a : b)
-
-// Beginning of GPU Architecture definitions
-inline int _ConvertSMVer2Cores(int major, int minor)
-{
- // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
- typedef struct {
- int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
- int Cores;
- } sSMtoCores;
-
- sSMtoCores nGpuArchCoresPerSM[] =
- { { 0x10, 8 },
- { 0x11, 8 },
- { 0x12, 8 },
- { 0x13, 8 },
- { 0x20, 32 },
- { 0x21, 48 },
- { -1, -1 }
- };
-
- int index = 0;
- while (nGpuArchCoresPerSM[index].SM != -1) {
- if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
- return nGpuArchCoresPerSM[index].Cores;
- }
- index++;
- }
- printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
- return -1;
-}
-// end of GPU Architecture definitions
-
-// This function returns the best GPU (with maximum GFLOPS)
-inline int cutGetMaxGflopsDeviceId()
-{
- int current_device = 0, sm_per_multiproc = 0;
- int max_compute_perf = 0, max_perf_device = 0;
- int device_count = 0, best_SM_arch = 0;
- cudaDeviceProp deviceProp;
-
- cudaGetDeviceCount( &device_count );
- // Find the best major SM Architecture GPU device
- while ( current_device < device_count ) {
- cudaGetDeviceProperties( &deviceProp, current_device );
- if (deviceProp.major > 0 && deviceProp.major < 9999) {
- best_SM_arch = MAX(best_SM_arch, deviceProp.major);
- }
- current_device++;
- }
-
- // Find the best CUDA capable GPU device
- current_device = 0;
- while( current_device < device_count ) {
- cudaGetDeviceProperties( &deviceProp, current_device );
- if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
- sm_per_multiproc = 1;
- } else {
- sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
- }
-
- int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
- if( compute_perf > max_compute_perf ) {
- // If we find GPU with SM major > 2, search only these
- if ( best_SM_arch > 2 ) {
- // If our device==dest_SM_arch, choose this, or else pass
- if (deviceProp.major == best_SM_arch) {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- } else {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- ++current_device;
- }
- return max_perf_device;
-}
-
-// This function returns the best GPU (with maximum GFLOPS)
-inline int cutGetMaxGflopsGraphicsDeviceId()
-{
- int current_device = 0, sm_per_multiproc = 0;
- int max_compute_perf = 0, max_perf_device = 0;
- int device_count = 0, best_SM_arch = 0;
- int bTCC = 0;
- cudaDeviceProp deviceProp;
-
- cudaGetDeviceCount( &device_count );
- // Find the best major SM Architecture GPU device that is graphics capable
- while ( current_device < device_count ) {
- cudaGetDeviceProperties( &deviceProp, current_device );
-
-#if CUDA_VERSION >= 3020
- if (deviceProp.tccDriver) bTCC = 1;
-#else
- // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
- if (deviceProp.name[0] == 'T') bTCC = 1;
-#endif
-
- if (!bTCC) {
- if (deviceProp.major > 0 && deviceProp.major < 9999) {
- best_SM_arch = MAX(best_SM_arch, deviceProp.major);
- }
- }
- current_device++;
- }
-
- // Find the best CUDA capable GPU device
- current_device = 0;
- while( current_device < device_count ) {
- cudaGetDeviceProperties( &deviceProp, current_device );
- if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
- sm_per_multiproc = 1;
- } else {
- sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
- }
-
-#if CUDA_VERSION >= 3020
- if (deviceProp.tccDriver) bTCC = 1;
-#else
- // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
- if (deviceProp.name[0] == 'T') bTCC = 1;
-#endif
-
- if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this
- {
- int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
- if( compute_perf > max_compute_perf ) {
- // If we find GPU with SM major > 2, search only these
- if ( best_SM_arch > 2 ) {
- // If our device==dest_SM_arch, choose this, or else pass
- if (deviceProp.major == best_SM_arch) {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- } else {
- max_compute_perf = compute_perf;
- max_perf_device = current_device;
- }
- }
- }
- ++current_device;
- }
- return max_perf_device;
-}
-
-// Give a little more for Windows : the console window often disapears before we can read the message
-#ifdef _WIN32
-# if 1//ndef UNICODE
-# ifdef _DEBUG // Do this only in debug mode...
- inline void VSPrintf(FILE *file, LPCSTR fmt, ...)
- {
- size_t fmt2_sz = 2048;
- char *fmt2 = (char*)malloc(fmt2_sz);
- va_list vlist;
- va_start(vlist, fmt);
- while((_vsnprintf(fmt2, fmt2_sz, fmt, vlist)) < 0) // means there wasn't anough room
- {
- fmt2_sz *= 2;
- if(fmt2) free(fmt2);
- fmt2 = (char*)malloc(fmt2_sz);
- }
- OutputDebugStringA(fmt2);
- fprintf(file, fmt2);
- free(fmt2);
- }
-# define FPRINTF(a) VSPrintf a
-# else //debug
-# define FPRINTF(a) fprintf a
-// For other than Win32
-# endif //debug
-# else //unicode
-// Unicode case... let's give-up for now and keep basic printf
-# define FPRINTF(a) fprintf a
-# endif //unicode
-#else //win32
-# define FPRINTF(a) fprintf a
-#endif //win32
-
-// NOTE: "%s(%i) : " allows Visual Studio to directly jump to the file at the right line
-// when the user double clicks on the error line in the Output pane. Like any compile error.
-
-inline void __cudaSafeCallNoSync( cudaError err, const char *file, const int line )
-{
- if( cudaSuccess != err) {
- FPRINTF((stderr, "%s(%i) : cudaSafeCallNoSync() Runtime API error : %s.\n",
- file, line, cudaGetErrorString( err) ));
- exit(-1);
- }
-}
-
-inline void __cudaSafeCall( cudaError err, const char *file, const int line )
-{
- if( cudaSuccess != err) {
- FPRINTF((stderr, "%s(%i) : cudaSafeCall() Runtime API error : %s.\n",
- file, line, cudaGetErrorString( err) ));
- exit(-1);
- }
-}
-
-inline void __cudaSafeThreadSync( const char *file, const int line )
-{
- cudaError err = cutilDeviceSynchronize();
- if ( cudaSuccess != err) {
- FPRINTF((stderr, "%s(%i) : cudaDeviceSynchronize() Runtime API error : %s.\n",
- file, line, cudaGetErrorString( err) ));
- exit(-1);
- }
-}
-
-inline void __cufftSafeCall( cufftResult err, const char *file, const int line )
-{
- if( CUFFT_SUCCESS != err) {
- FPRINTF((stderr, "%s(%i) : cufftSafeCall() CUFFT error.\n",
- file, line));
- exit(-1);
- }
-}
-
-inline void __cutilCheckError( CUTBoolean err, const char *file, const int line )
-{
- if( CUTTrue != err) {
- FPRINTF((stderr, "%s(%i) : CUTIL CUDA error.\n",
- file, line));
- exit(-1);
- }
-}
-
-inline void __cutilGetLastError( const char *errorMessage, const char *file, const int line )
-{
- cudaError_t err = cudaGetLastError();
- if( cudaSuccess != err) {
- FPRINTF((stderr, "%s(%i) : cutilCheckMsg() CUTIL CUDA error : %s : %s.\n",
- file, line, errorMessage, cudaGetErrorString( err) ));
- exit(-1);
- }
-}
-
-inline void __cutilGetLastErrorAndSync( const char *errorMessage, const char *file, const int line )
-{
- cudaError_t err = cudaGetLastError();
- if( cudaSuccess != err) {
- FPRINTF((stderr, "%s(%i) : cutilCheckMsg() CUTIL CUDA error : %s : %s.\n",
- file, line, errorMessage, cudaGetErrorString( err) ));
- exit(-1);
- }
-
- err = cutilDeviceSynchronize();
- if( cudaSuccess != err) {
- FPRINTF((stderr, "%s(%i) : cutilCheckMsg cudaDeviceSynchronize error: %s : %s.\n",
- file, line, errorMessage, cudaGetErrorString( err) ));
- exit(-1);
- }
-}
-
-inline void __cutilSafeMalloc( void *pointer, const char *file, const int line )
-{
- if( !(pointer)) {
- FPRINTF((stderr, "%s(%i) : cutilSafeMalloc host malloc failure\n",
- file, line));
- exit(-1);
- }
-}
-
-#if __DEVICE_EMULATION__
- inline int cutilDeviceInit(int ARGC, char **ARGV) { }
- inline int cutilChooseCudaDevice(int ARGC, char **ARGV) { }
-#else
- inline int cutilDeviceInit(int ARGC, char **ARGV)
- {
- int deviceCount;
- cutilSafeCallNoSync(cudaGetDeviceCount(&deviceCount));
- if (deviceCount == 0) {
- FPRINTF((stderr, "CUTIL CUDA error: no devices supporting CUDA.\n"));
- exit(-1);
- }
- int dev = 0;
- cutGetCmdLineArgumenti(ARGC, (const char **) ARGV, "device", &dev);
- if (dev < 0)
- dev = 0;
- if (dev > deviceCount-1) {
- fprintf(stderr, "\n");
- fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
- fprintf(stderr, ">> cutilDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
- fprintf(stderr, "\n");
- return -dev;
- }
- cudaDeviceProp deviceProp;
- cutilSafeCallNoSync(cudaGetDeviceProperties(&deviceProp, dev));
- if (deviceProp.major < 1) {
- FPRINTF((stderr, "cutil error: GPU device does not support CUDA.\n"));
- exit(-1); \
- }
- printf("> Using CUDA device [%d]: %s\n", dev, deviceProp.name);
- cutilSafeCall(cudaSetDevice(dev));
-
- return dev;
- }
-
- // General initialization call to pick the best CUDA Device
- inline int cutilChooseCudaDevice(int argc, char **argv)
- {
- cudaDeviceProp deviceProp;
- int devID = 0;
- // If the command-line has a device number specified, use it
- if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) {
- devID = cutilDeviceInit(argc, argv);
- if (devID < 0) {
- printf("exiting...\n");
- cutilExit(argc, argv);
- exit(0);
- }
- } else {
- // Otherwise pick the device with highest Gflops/s
- devID = cutGetMaxGflopsDeviceId();
- cutilSafeCallNoSync( cudaSetDevice( devID ) );
- cutilSafeCallNoSync( cudaGetDeviceProperties(&deviceProp, devID) );
- printf("> Using CUDA device [%d]: %s\n", devID, deviceProp.name);
- }
- return devID;
- }
-#endif
-
-
-//! Check for CUDA context lost
-inline void cutilCudaCheckCtxLost(const char *errorMessage, const char *file, const int line )
-{
- cudaError_t err = cudaGetLastError();
- if( cudaSuccess != err) {
- FPRINTF((stderr, "%s(%i) : CUDA error: %s : %s.\n",
- file, line, errorMessage, cudaGetErrorString( err) ));
- exit(-1);
- }
- err = cutilDeviceSynchronize();
- if( cudaSuccess != err) {
- FPRINTF((stderr, "%s(%i) : CUDA error: %s : %s.\n",
- file, line, errorMessage, cudaGetErrorString( err) ));
- exit(-1);
- }
-}
-
-#ifndef STRCASECMP
-#ifdef _WIN32
-#define STRCASECMP _stricmp
-#else
-#define STRCASECMP strcasecmp
-#endif
-#endif
-
-#ifndef STRNCASECMP
-#ifdef _WIN32
-#define STRNCASECMP _strnicmp
-#else
-#define STRNCASECMP strncasecmp
-#endif
-#endif
-
-inline void __cutilQAFinish(int argc, char **argv, bool bStatus)
-{
- const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL };
-
- bool bFlag = false;
- for (int i=1; i < argc; i++) {
- if (!STRCASECMP(argv[i], "-qatest") || !STRCASECMP(argv[i], "-noprompt")) {
- bFlag |= true;
- }
- }
-
- if (bFlag) {
- printf("&&&& %s %s", sStatus[bStatus], argv[0]);
- for (int i=1; i < argc; i++) printf(" %s", argv[i]);
- } else {
- printf("[%s] test result\n%s\n", argv[0], sStatus[bStatus]);
- }
-}
-
-// General check for CUDA GPU SM Capabilities
-inline bool cutilCudaCapabilities(int major_version, int minor_version, int argc, char **argv)
-{
- cudaDeviceProp deviceProp;
- deviceProp.major = 0;
- deviceProp.minor = 0;
- int dev;
-
-#ifdef __DEVICE_EMULATION__
- printf("> Compute Device Emulation Mode \n");
-#endif
-
- cutilSafeCall( cudaGetDevice(&dev) );
- cutilSafeCall( cudaGetDeviceProperties(&deviceProp, dev));
-
- if((deviceProp.major > major_version) ||
- (deviceProp.major == major_version && deviceProp.minor >= minor_version))
- {
- printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
- return true;
- }
- else
- {
- printf("There is no device supporting CUDA compute capability %d.%d.\n", major_version, minor_version);
- __cutilQAFinish(argc, argv, true);
- return false;
- }
-}
-
-#endif // _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_
+++ /dev/null
-/*
- * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-/*
- This file implements common mathematical operations on vector types
- (float3, float4 etc.) since these are not provided as standard by CUDA.
-
- The syntax is modelled on the Cg standard library.
-
- This is part of the CUTIL library and is not supported by NVIDIA.
-
- Thanks to Linh Hah for additions and fixes.
-*/
-
-#ifndef CUTIL_MATH_H
-#define CUTIL_MATH_H
-
-#include "cuda_runtime.h"
-
-typedef unsigned int uint;
-typedef unsigned short ushort;
-
-#ifndef __CUDACC__
-#include <math.h>
-
-////////////////////////////////////////////////////////////////////////////////
-// host implementations of CUDA functions
-////////////////////////////////////////////////////////////////////////////////
-
-inline float fminf(float a, float b)
-{
- return a < b ? a : b;
-}
-
-inline float fmaxf(float a, float b)
-{
- return a > b ? a : b;
-}
-
-inline int max(int a, int b)
-{
- return a > b ? a : b;
-}
-
-inline int min(int a, int b)
-{
- return a < b ? a : b;
-}
-
-inline float rsqrtf(float x)
-{
- return 1.0f / sqrtf(x);
-}
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-// constructors
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 make_float2(float s)
-{
- return make_float2(s, s);
-}
-inline __host__ __device__ float2 make_float2(float3 a)
-{
- return make_float2(a.x, a.y);
-}
-inline __host__ __device__ float2 make_float2(int2 a)
-{
- return make_float2(float(a.x), float(a.y));
-}
-inline __host__ __device__ float2 make_float2(uint2 a)
-{
- return make_float2(float(a.x), float(a.y));
-}
-
-inline __host__ __device__ int2 make_int2(int s)
-{
- return make_int2(s, s);
-}
-inline __host__ __device__ int2 make_int2(int3 a)
-{
- return make_int2(a.x, a.y);
-}
-inline __host__ __device__ int2 make_int2(uint2 a)
-{
- return make_int2(int(a.x), int(a.y));
-}
-inline __host__ __device__ int2 make_int2(float2 a)
-{
- return make_int2(int(a.x), int(a.y));
-}
-
-inline __host__ __device__ uint2 make_uint2(uint s)
-{
- return make_uint2(s, s);
-}
-inline __host__ __device__ uint2 make_uint2(uint3 a)
-{
- return make_uint2(a.x, a.y);
-}
-inline __host__ __device__ uint2 make_uint2(int2 a)
-{
- return make_uint2(uint(a.x), uint(a.y));
-}
-
-inline __host__ __device__ float3 make_float3(float s)
-{
- return make_float3(s, s, s);
-}
-inline __host__ __device__ float3 make_float3(float2 a)
-{
- return make_float3(a.x, a.y, 0.0f);
-}
-inline __host__ __device__ float3 make_float3(float2 a, float s)
-{
- return make_float3(a.x, a.y, s);
-}
-inline __host__ __device__ float3 make_float3(float4 a)
-{
- return make_float3(a.x, a.y, a.z);
-}
-inline __host__ __device__ float3 make_float3(int3 a)
-{
- return make_float3(float(a.x), float(a.y), float(a.z));
-}
-inline __host__ __device__ float3 make_float3(uint3 a)
-{
- return make_float3(float(a.x), float(a.y), float(a.z));
-}
-
-inline __host__ __device__ int3 make_int3(int s)
-{
- return make_int3(s, s, s);
-}
-inline __host__ __device__ int3 make_int3(int2 a)
-{
- return make_int3(a.x, a.y, 0);
-}
-inline __host__ __device__ int3 make_int3(int2 a, int s)
-{
- return make_int3(a.x, a.y, s);
-}
-inline __host__ __device__ int3 make_int3(uint3 a)
-{
- return make_int3(int(a.x), int(a.y), int(a.z));
-}
-inline __host__ __device__ int3 make_int3(float3 a)
-{
- return make_int3(int(a.x), int(a.y), int(a.z));
-}
-
-inline __host__ __device__ uint3 make_uint3(uint s)
-{
- return make_uint3(s, s, s);
-}
-inline __host__ __device__ uint3 make_uint3(uint2 a)
-{
- return make_uint3(a.x, a.y, 0);
-}
-inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
-{
- return make_uint3(a.x, a.y, s);
-}
-inline __host__ __device__ uint3 make_uint3(uint4 a)
-{
- return make_uint3(a.x, a.y, a.z);
-}
-inline __host__ __device__ uint3 make_uint3(int3 a)
-{
- return make_uint3(uint(a.x), uint(a.y), uint(a.z));
-}
-
-inline __host__ __device__ float4 make_float4(float s)
-{
- return make_float4(s, s, s, s);
-}
-inline __host__ __device__ float4 make_float4(float3 a)
-{
- return make_float4(a.x, a.y, a.z, 0.0f);
-}
-inline __host__ __device__ float4 make_float4(float3 a, float w)
-{
- return make_float4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ float4 make_float4(int4 a)
-{
- return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
-}
-inline __host__ __device__ float4 make_float4(uint4 a)
-{
- return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
-}
-
-inline __host__ __device__ int4 make_int4(int s)
-{
- return make_int4(s, s, s, s);
-}
-inline __host__ __device__ int4 make_int4(int3 a)
-{
- return make_int4(a.x, a.y, a.z, 0);
-}
-inline __host__ __device__ int4 make_int4(int3 a, int w)
-{
- return make_int4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ int4 make_int4(uint4 a)
-{
- return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
-}
-inline __host__ __device__ int4 make_int4(float4 a)
-{
- return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
-}
-
-
-inline __host__ __device__ uint4 make_uint4(uint s)
-{
- return make_uint4(s, s, s, s);
-}
-inline __host__ __device__ uint4 make_uint4(uint3 a)
-{
- return make_uint4(a.x, a.y, a.z, 0);
-}
-inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
-{
- return make_uint4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ uint4 make_uint4(int4 a)
-{
- return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// negate
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator-(float2 &a)
-{
- return make_float2(-a.x, -a.y);
-}
-inline __host__ __device__ int2 operator-(int2 &a)
-{
- return make_int2(-a.x, -a.y);
-}
-inline __host__ __device__ float3 operator-(float3 &a)
-{
- return make_float3(-a.x, -a.y, -a.z);
-}
-inline __host__ __device__ int3 operator-(int3 &a)
-{
- return make_int3(-a.x, -a.y, -a.z);
-}
-inline __host__ __device__ float4 operator-(float4 &a)
-{
- return make_float4(-a.x, -a.y, -a.z, -a.w);
-}
-inline __host__ __device__ int4 operator-(int4 &a)
-{
- return make_int4(-a.x, -a.y, -a.z, -a.w);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// addition
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator+(float2 a, float2 b)
-{
- return make_float2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(float2 &a, float2 b)
-{
- a.x += b.x; a.y += b.y;
-}
-inline __host__ __device__ float2 operator+(float2 a, float b)
-{
- return make_float2(a.x + b, a.y + b);
-}
-inline __host__ __device__ float2 operator+(float b, float2 a)
-{
- return make_float2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(float2 &a, float b)
-{
- a.x += b; a.y += b;
-}
-
-inline __host__ __device__ int2 operator+(int2 a, int2 b)
-{
- return make_int2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(int2 &a, int2 b)
-{
- a.x += b.x; a.y += b.y;
-}
-inline __host__ __device__ int2 operator+(int2 a, int b)
-{
- return make_int2(a.x + b, a.y + b);
-}
-inline __host__ __device__ int2 operator+(int b, int2 a)
-{
- return make_int2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(int2 &a, int b)
-{
- a.x += b; a.y += b;
-}
-
-inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
-{
- return make_uint2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(uint2 &a, uint2 b)
-{
- a.x += b.x; a.y += b.y;
-}
-inline __host__ __device__ uint2 operator+(uint2 a, uint b)
-{
- return make_uint2(a.x + b, a.y + b);
-}
-inline __host__ __device__ uint2 operator+(uint b, uint2 a)
-{
- return make_uint2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(uint2 &a, uint b)
-{
- a.x += b; a.y += b;
-}
-
-
-inline __host__ __device__ float3 operator+(float3 a, float3 b)
-{
- return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(float3 &a, float3 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z;
-}
-inline __host__ __device__ float3 operator+(float3 a, float b)
-{
- return make_float3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(float3 &a, float b)
-{
- a.x += b; a.y += b; a.z += b;
-}
-
-inline __host__ __device__ int3 operator+(int3 a, int3 b)
-{
- return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(int3 &a, int3 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z;
-}
-inline __host__ __device__ int3 operator+(int3 a, int b)
-{
- return make_int3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(int3 &a, int b)
-{
- a.x += b; a.y += b; a.z += b;
-}
-
-inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
-{
- return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z;
-}
-inline __host__ __device__ uint3 operator+(uint3 a, uint b)
-{
- return make_uint3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(uint3 &a, uint b)
-{
- a.x += b; a.y += b; a.z += b;
-}
-
-inline __host__ __device__ int3 operator+(int b, int3 a)
-{
- return make_int3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ uint3 operator+(uint b, uint3 a)
-{
- return make_uint3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ float3 operator+(float b, float3 a)
-{
- return make_float3(a.x + b, a.y + b, a.z + b);
-}
-
-inline __host__ __device__ float4 operator+(float4 a, float4 b)
-{
- return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(float4 &a, float4 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
-}
-inline __host__ __device__ float4 operator+(float4 a, float b)
-{
- return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ float4 operator+(float b, float4 a)
-{
- return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(float4 &a, float b)
-{
- a.x += b; a.y += b; a.z += b; a.w += b;
-}
-
-inline __host__ __device__ int4 operator+(int4 a, int4 b)
-{
- return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(int4 &a, int4 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
-}
-inline __host__ __device__ int4 operator+(int4 a, int b)
-{
- return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ int4 operator+(int b, int4 a)
-{
- return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(int4 &a, int b)
-{
- a.x += b; a.y += b; a.z += b; a.w += b;
-}
-
-inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
-{
- return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(uint4 &a, uint4 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
-}
-inline __host__ __device__ uint4 operator+(uint4 a, uint b)
-{
- return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ uint4 operator+(uint b, uint4 a)
-{
- return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(uint4 &a, uint b)
-{
- a.x += b; a.y += b; a.z += b; a.w += b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// subtract
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator-(float2 a, float2 b)
-{
- return make_float2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(float2 &a, float2 b)
-{
- a.x -= b.x; a.y -= b.y;
-}
-inline __host__ __device__ float2 operator-(float2 a, float b)
-{
- return make_float2(a.x - b, a.y - b);
-}
-inline __host__ __device__ float2 operator-(float b, float2 a)
-{
- return make_float2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(float2 &a, float b)
-{
- a.x -= b; a.y -= b;
-}
-
-inline __host__ __device__ int2 operator-(int2 a, int2 b)
-{
- return make_int2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(int2 &a, int2 b)
-{
- a.x -= b.x; a.y -= b.y;
-}
-inline __host__ __device__ int2 operator-(int2 a, int b)
-{
- return make_int2(a.x - b, a.y - b);
-}
-inline __host__ __device__ int2 operator-(int b, int2 a)
-{
- return make_int2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(int2 &a, int b)
-{
- a.x -= b; a.y -= b;
-}
-
-inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
-{
- return make_uint2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(uint2 &a, uint2 b)
-{
- a.x -= b.x; a.y -= b.y;
-}
-inline __host__ __device__ uint2 operator-(uint2 a, uint b)
-{
- return make_uint2(a.x - b, a.y - b);
-}
-inline __host__ __device__ uint2 operator-(uint b, uint2 a)
-{
- return make_uint2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(uint2 &a, uint b)
-{
- a.x -= b; a.y -= b;
-}
-
-inline __host__ __device__ float3 operator-(float3 a, float3 b)
-{
- return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(float3 &a, float3 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z;
-}
-inline __host__ __device__ float3 operator-(float3 a, float b)
-{
- return make_float3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ float3 operator-(float b, float3 a)
-{
- return make_float3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(float3 &a, float b)
-{
- a.x -= b; a.y -= b; a.z -= b;
-}
-
-inline __host__ __device__ int3 operator-(int3 a, int3 b)
-{
- return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(int3 &a, int3 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z;
-}
-inline __host__ __device__ int3 operator-(int3 a, int b)
-{
- return make_int3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ int3 operator-(int b, int3 a)
-{
- return make_int3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(int3 &a, int b)
-{
- a.x -= b; a.y -= b; a.z -= b;
-}
-
-inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
-{
- return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z;
-}
-inline __host__ __device__ uint3 operator-(uint3 a, uint b)
-{
- return make_uint3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ uint3 operator-(uint b, uint3 a)
-{
- return make_uint3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(uint3 &a, uint b)
-{
- a.x -= b; a.y -= b; a.z -= b;
-}
-
-inline __host__ __device__ float4 operator-(float4 a, float4 b)
-{
- return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(float4 &a, float4 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
-}
-inline __host__ __device__ float4 operator-(float4 a, float b)
-{
- return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ void operator-=(float4 &a, float b)
-{
- a.x -= b; a.y -= b; a.z -= b; a.w -= b;
-}
-
-inline __host__ __device__ int4 operator-(int4 a, int4 b)
-{
- return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(int4 &a, int4 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
-}
-inline __host__ __device__ int4 operator-(int4 a, int b)
-{
- return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ int4 operator-(int b, int4 a)
-{
- return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
-}
-inline __host__ __device__ void operator-=(int4 &a, int b)
-{
- a.x -= b; a.y -= b; a.z -= b; a.w -= b;
-}
-
-inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
-{
- return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(uint4 &a, uint4 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
-}
-inline __host__ __device__ uint4 operator-(uint4 a, uint b)
-{
- return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ uint4 operator-(uint b, uint4 a)
-{
- return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
-}
-inline __host__ __device__ void operator-=(uint4 &a, uint b)
-{
- a.x -= b; a.y -= b; a.z -= b; a.w -= b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// multiply
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator*(float2 a, float2 b)
-{
- return make_float2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(float2 &a, float2 b)
-{
- a.x *= b.x; a.y *= b.y;
-}
-inline __host__ __device__ float2 operator*(float2 a, float b)
-{
- return make_float2(a.x * b, a.y * b);
-}
-inline __host__ __device__ float2 operator*(float b, float2 a)
-{
- return make_float2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(float2 &a, float b)
-{
- a.x *= b; a.y *= b;
-}
-
-inline __host__ __device__ int2 operator*(int2 a, int2 b)
-{
- return make_int2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(int2 &a, int2 b)
-{
- a.x *= b.x; a.y *= b.y;
-}
-inline __host__ __device__ int2 operator*(int2 a, int b)
-{
- return make_int2(a.x * b, a.y * b);
-}
-inline __host__ __device__ int2 operator*(int b, int2 a)
-{
- return make_int2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(int2 &a, int b)
-{
- a.x *= b; a.y *= b;
-}
-
-inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
-{
- return make_uint2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(uint2 &a, uint2 b)
-{
- a.x *= b.x; a.y *= b.y;
-}
-inline __host__ __device__ uint2 operator*(uint2 a, uint b)
-{
- return make_uint2(a.x * b, a.y * b);
-}
-inline __host__ __device__ uint2 operator*(uint b, uint2 a)
-{
- return make_uint2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(uint2 &a, uint b)
-{
- a.x *= b; a.y *= b;
-}
-
-inline __host__ __device__ float3 operator*(float3 a, float3 b)
-{
- return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(float3 &a, float3 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z;
-}
-inline __host__ __device__ float3 operator*(float3 a, float b)
-{
- return make_float3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ float3 operator*(float b, float3 a)
-{
- return make_float3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(float3 &a, float b)
-{
- a.x *= b; a.y *= b; a.z *= b;
-}
-
-inline __host__ __device__ int3 operator*(int3 a, int3 b)
-{
- return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(int3 &a, int3 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z;
-}
-inline __host__ __device__ int3 operator*(int3 a, int b)
-{
- return make_int3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ int3 operator*(int b, int3 a)
-{
- return make_int3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(int3 &a, int b)
-{
- a.x *= b; a.y *= b; a.z *= b;
-}
-
-inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
-{
- return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(uint3 &a, uint3 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z;
-}
-inline __host__ __device__ uint3 operator*(uint3 a, uint b)
-{
- return make_uint3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ uint3 operator*(uint b, uint3 a)
-{
- return make_uint3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(uint3 &a, uint b)
-{
- a.x *= b; a.y *= b; a.z *= b;
-}
-
-inline __host__ __device__ float4 operator*(float4 a, float4 b)
-{
- return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(float4 &a, float4 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
-}
-inline __host__ __device__ float4 operator*(float4 a, float b)
-{
- return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ float4 operator*(float b, float4 a)
-{
- return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(float4 &a, float b)
-{
- a.x *= b; a.y *= b; a.z *= b; a.w *= b;
-}
-
-inline __host__ __device__ int4 operator*(int4 a, int4 b)
-{
- return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(int4 &a, int4 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
-}
-inline __host__ __device__ int4 operator*(int4 a, int b)
-{
- return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ int4 operator*(int b, int4 a)
-{
- return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(int4 &a, int b)
-{
- a.x *= b; a.y *= b; a.z *= b; a.w *= b;
-}
-
-inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
-{
- return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(uint4 &a, uint4 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
-}
-inline __host__ __device__ uint4 operator*(uint4 a, uint b)
-{
- return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ uint4 operator*(uint b, uint4 a)
-{
- return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(uint4 &a, uint b)
-{
- a.x *= b; a.y *= b; a.z *= b; a.w *= b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// divide
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator/(float2 a, float2 b)
-{
- return make_float2(a.x / b.x, a.y / b.y);
-}
-inline __host__ __device__ void operator/=(float2 &a, float2 b)
-{
- a.x /= b.x; a.y /= b.y;
-}
-inline __host__ __device__ float2 operator/(float2 a, float b)
-{
- return make_float2(a.x / b, a.y / b);
-}
-inline __host__ __device__ void operator/=(float2 &a, float b)
-{
- a.x /= b; a.y /= b;
-}
-inline __host__ __device__ float2 operator/(float b, float2 a)
-{
- return make_float2(b / a.x, b / a.y);
-}
-
-inline __host__ __device__ float3 operator/(float3 a, float3 b)
-{
- return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-}
-inline __host__ __device__ void operator/=(float3 &a, float3 b)
-{
- a.x /= b.x; a.y /= b.y; a.z /= b.z;
-}
-inline __host__ __device__ float3 operator/(float3 a, float b)
-{
- return make_float3(a.x / b, a.y / b, a.z / b);
-}
-inline __host__ __device__ void operator/=(float3 &a, float b)
-{
- a.x /= b; a.y /= b; a.z /= b;
-}
-inline __host__ __device__ float3 operator/(float b, float3 a)
-{
- return make_float3(b / a.x, b / a.y, b / a.z);
-}
-
-inline __host__ __device__ float4 operator/(float4 a, float4 b)
-{
- return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
-}
-inline __host__ __device__ void operator/=(float4 &a, float4 b)
-{
- a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w;
-}
-inline __host__ __device__ float4 operator/(float4 a, float b)
-{
- return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
-}
-inline __host__ __device__ void operator/=(float4 &a, float b)
-{
- a.x /= b; a.y /= b; a.z /= b; a.w /= b;
-}
-inline __host__ __device__ float4 operator/(float b, float4 a){
- return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// min
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fminf(float2 a, float2 b)
-{
- return make_float2(fminf(a.x,b.x), fminf(a.y,b.y));
-}
-inline __host__ __device__ float3 fminf(float3 a, float3 b)
-{
- return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
-}
-inline __host__ __device__ float4 fminf(float4 a, float4 b)
-{
- return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
-}
-
-inline __host__ __device__ int2 min(int2 a, int2 b)
-{
- return make_int2(min(a.x,b.x), min(a.y,b.y));
-}
-inline __host__ __device__ int3 min(int3 a, int3 b)
-{
- return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
-}
-inline __host__ __device__ int4 min(int4 a, int4 b)
-{
- return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
-}
-
-inline __host__ __device__ uint2 min(uint2 a, uint2 b)
-{
- return make_uint2(min(a.x,b.x), min(a.y,b.y));
-}
-inline __host__ __device__ uint3 min(uint3 a, uint3 b)
-{
- return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
-}
-inline __host__ __device__ uint4 min(uint4 a, uint4 b)
-{
- return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// max
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
-{
- return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y));
-}
-inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
-{
- return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
-}
-inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
-{
- return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
-}
-
-inline __host__ __device__ int2 max(int2 a, int2 b)
-{
- return make_int2(max(a.x,b.x), max(a.y,b.y));
-}
-inline __host__ __device__ int3 max(int3 a, int3 b)
-{
- return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
-}
-inline __host__ __device__ int4 max(int4 a, int4 b)
-{
- return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
-}
-
-inline __host__ __device__ uint2 max(uint2 a, uint2 b)
-{
- return make_uint2(max(a.x,b.x), max(a.y,b.y));
-}
-inline __host__ __device__ uint3 max(uint3 a, uint3 b)
-{
- return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
-}
-inline __host__ __device__ uint4 max(uint4 a, uint4 b)
-{
- return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// lerp
-// - linear interpolation between a and b, based on value t in [0, 1] range
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float lerp(float a, float b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
-{
- return a + t*(b-a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// clamp
-// - clamp the value v to be in the range [a, b]
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float clamp(float f, float a, float b)
-{
- return fmaxf(a, fminf(f, b));
-}
-inline __device__ __host__ int clamp(int f, int a, int b)
-{
- return max(a, min(f, b));
-}
-inline __device__ __host__ uint clamp(uint f, uint a, uint b)
-{
- return max(a, min(f, b));
-}
-
-inline __device__ __host__ float2 clamp(float2 v, float a, float b)
-{
- return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
-{
- return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ float3 clamp(float3 v, float a, float b)
-{
- return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
-{
- return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ float4 clamp(float4 v, float a, float b)
-{
- return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
-{
- return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-inline __device__ __host__ int2 clamp(int2 v, int a, int b)
-{
- return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
-{
- return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ int3 clamp(int3 v, int a, int b)
-{
- return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
-{
- return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ int4 clamp(int4 v, int a, int b)
-{
- return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
-{
- return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
-{
- return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
-{
- return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
-{
- return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
-{
- return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
-{
- return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
-{
- return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// dot product
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float dot(float2 a, float2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ float dot(float3 a, float3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ float dot(float4 a, float4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-inline __host__ __device__ int dot(int2 a, int2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ int dot(int3 a, int3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ int dot(int4 a, int4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-inline __host__ __device__ uint dot(uint2 a, uint2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ uint dot(uint3 a, uint3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ uint dot(uint4 a, uint4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// length
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float length(float2 v)
-{
- return sqrtf(dot(v, v));
-}
-inline __host__ __device__ float length(float3 v)
-{
- return sqrtf(dot(v, v));
-}
-inline __host__ __device__ float length(float4 v)
-{
- return sqrtf(dot(v, v));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// normalize
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 normalize(float2 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-inline __host__ __device__ float3 normalize(float3 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-inline __host__ __device__ float4 normalize(float4 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// floor
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 floorf(float2 v)
-{
- return make_float2(floorf(v.x), floorf(v.y));
-}
-inline __host__ __device__ float3 floorf(float3 v)
-{
- return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
-}
-inline __host__ __device__ float4 floorf(float4 v)
-{
- return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// frac - returns the fractional portion of a scalar or each vector component
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float fracf(float v)
-{
- return v - floorf(v);
-}
-inline __host__ __device__ float2 fracf(float2 v)
-{
- return make_float2(fracf(v.x), fracf(v.y));
-}
-inline __host__ __device__ float3 fracf(float3 v)
-{
- return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
-}
-inline __host__ __device__ float4 fracf(float4 v)
-{
- return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// fmod
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fmodf(float2 a, float2 b)
-{
- return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
-}
-inline __host__ __device__ float3 fmodf(float3 a, float3 b)
-{
- return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
-}
-inline __host__ __device__ float4 fmodf(float4 a, float4 b)
-{
- return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// absolute value
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fabs(float2 v)
-{
- return make_float2(fabs(v.x), fabs(v.y));
-}
-inline __host__ __device__ float3 fabs(float3 v)
-{
- return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
-}
-inline __host__ __device__ float4 fabs(float4 v)
-{
- return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
-}
-
-inline __host__ __device__ int2 abs(int2 v)
-{
- return make_int2(abs(v.x), abs(v.y));
-}
-inline __host__ __device__ int3 abs(int3 v)
-{
- return make_int3(abs(v.x), abs(v.y), abs(v.z));
-}
-inline __host__ __device__ int4 abs(int4 v)
-{
- return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// reflect
-// - returns reflection of incident ray I around surface normal N
-// - N should be normalized, reflected vector's length is equal to length of I
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float3 reflect(float3 i, float3 n)
-{
- return i - 2.0f * n * dot(n,i);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// cross product
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float3 cross(float3 a, float3 b)
-{
- return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// smoothstep
-// - returns 0 if x < a
-// - returns 1 if x > b
-// - otherwise returns smooth interpolation between 0 and 1 based on x
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float smoothstep(float a, float b, float x)
-{
- float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(3.0f - (2.0f*y)));
-}
-inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
-{
- float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y)));
-}
-inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
-{
- float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y)));
-}
-inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
-{
- float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y)));
-}
-
-#endif
+++ /dev/null
-/*
- * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-/*
- This file implements common mathematical operations on vector types
- (float3, float4 etc.) since these are not provided as standard by CUDA.
-
- The syntax is modelled on the Cg standard library.
-
- This is part of the CUTIL library and is not supported by NVIDIA.
-
- Thanks to Linh Hah for additions and fixes.
-*/
-
-#ifndef CUTIL_MATH_H
-#define CUTIL_MATH_H
-
-#include "cuda_runtime.h"
-
-typedef unsigned int uint;
-typedef unsigned short ushort;
-
-#ifndef __CUDACC__
-#include <math.h>
-
-////////////////////////////////////////////////////////////////////////////////
-// host implementations of CUDA functions
-////////////////////////////////////////////////////////////////////////////////
-
-inline float fminf(float a, float b)
-{
- return a < b ? a : b;
-}
-
-inline float fmaxf(float a, float b)
-{
- return a > b ? a : b;
-}
-
-inline int max(int a, int b)
-{
- return a > b ? a : b;
-}
-
-inline int min(int a, int b)
-{
- return a < b ? a : b;
-}
-
-inline float rsqrtf(float x)
-{
- return 1.0f / sqrtf(x);
-}
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-// constructors
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 make_float2(float s)
-{
- return make_float2(s, s);
-}
-inline __host__ __device__ float2 make_float2(float3 a)
-{
- return make_float2(a.x, a.y);
-}
-inline __host__ __device__ float2 make_float2(int2 a)
-{
- return make_float2(float(a.x), float(a.y));
-}
-inline __host__ __device__ float2 make_float2(uint2 a)
-{
- return make_float2(float(a.x), float(a.y));
-}
-
-inline __host__ __device__ int2 make_int2(int s)
-{
- return make_int2(s, s);
-}
-inline __host__ __device__ int2 make_int2(int3 a)
-{
- return make_int2(a.x, a.y);
-}
-inline __host__ __device__ int2 make_int2(uint2 a)
-{
- return make_int2(int(a.x), int(a.y));
-}
-inline __host__ __device__ int2 make_int2(float2 a)
-{
- return make_int2(int(a.x), int(a.y));
-}
-
-inline __host__ __device__ uint2 make_uint2(uint s)
-{
- return make_uint2(s, s);
-}
-inline __host__ __device__ uint2 make_uint2(uint3 a)
-{
- return make_uint2(a.x, a.y);
-}
-inline __host__ __device__ uint2 make_uint2(int2 a)
-{
- return make_uint2(uint(a.x), uint(a.y));
-}
-
-inline __host__ __device__ float3 make_float3(float s)
-{
- return make_float3(s, s, s);
-}
-inline __host__ __device__ float3 make_float3(float2 a)
-{
- return make_float3(a.x, a.y, 0.0f);
-}
-inline __host__ __device__ float3 make_float3(float2 a, float s)
-{
- return make_float3(a.x, a.y, s);
-}
-inline __host__ __device__ float3 make_float3(float4 a)
-{
- return make_float3(a.x, a.y, a.z);
-}
-inline __host__ __device__ float3 make_float3(int3 a)
-{
- return make_float3(float(a.x), float(a.y), float(a.z));
-}
-inline __host__ __device__ float3 make_float3(uint3 a)
-{
- return make_float3(float(a.x), float(a.y), float(a.z));
-}
-
-inline __host__ __device__ int3 make_int3(int s)
-{
- return make_int3(s, s, s);
-}
-inline __host__ __device__ int3 make_int3(int2 a)
-{
- return make_int3(a.x, a.y, 0);
-}
-inline __host__ __device__ int3 make_int3(int2 a, int s)
-{
- return make_int3(a.x, a.y, s);
-}
-inline __host__ __device__ int3 make_int3(uint3 a)
-{
- return make_int3(int(a.x), int(a.y), int(a.z));
-}
-inline __host__ __device__ int3 make_int3(float3 a)
-{
- return make_int3(int(a.x), int(a.y), int(a.z));
-}
-
-inline __host__ __device__ uint3 make_uint3(uint s)
-{
- return make_uint3(s, s, s);
-}
-inline __host__ __device__ uint3 make_uint3(uint2 a)
-{
- return make_uint3(a.x, a.y, 0);
-}
-inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
-{
- return make_uint3(a.x, a.y, s);
-}
-inline __host__ __device__ uint3 make_uint3(uint4 a)
-{
- return make_uint3(a.x, a.y, a.z);
-}
-inline __host__ __device__ uint3 make_uint3(int3 a)
-{
- return make_uint3(uint(a.x), uint(a.y), uint(a.z));
-}
-
-inline __host__ __device__ float4 make_float4(float s)
-{
- return make_float4(s, s, s, s);
-}
-inline __host__ __device__ float4 make_float4(float3 a)
-{
- return make_float4(a.x, a.y, a.z, 0.0f);
-}
-inline __host__ __device__ float4 make_float4(float3 a, float w)
-{
- return make_float4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ float4 make_float4(int4 a)
-{
- return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
-}
-inline __host__ __device__ float4 make_float4(uint4 a)
-{
- return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
-}
-
-inline __host__ __device__ int4 make_int4(int s)
-{
- return make_int4(s, s, s, s);
-}
-inline __host__ __device__ int4 make_int4(int3 a)
-{
- return make_int4(a.x, a.y, a.z, 0);
-}
-inline __host__ __device__ int4 make_int4(int3 a, int w)
-{
- return make_int4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ int4 make_int4(uint4 a)
-{
- return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
-}
-inline __host__ __device__ int4 make_int4(float4 a)
-{
- return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
-}
-
-
-inline __host__ __device__ uint4 make_uint4(uint s)
-{
- return make_uint4(s, s, s, s);
-}
-inline __host__ __device__ uint4 make_uint4(uint3 a)
-{
- return make_uint4(a.x, a.y, a.z, 0);
-}
-inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
-{
- return make_uint4(a.x, a.y, a.z, w);
-}
-inline __host__ __device__ uint4 make_uint4(int4 a)
-{
- return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// negate
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator-(float2 &a)
-{
- return make_float2(-a.x, -a.y);
-}
-inline __host__ __device__ int2 operator-(int2 &a)
-{
- return make_int2(-a.x, -a.y);
-}
-inline __host__ __device__ float3 operator-(float3 &a)
-{
- return make_float3(-a.x, -a.y, -a.z);
-}
-inline __host__ __device__ int3 operator-(int3 &a)
-{
- return make_int3(-a.x, -a.y, -a.z);
-}
-inline __host__ __device__ float4 operator-(float4 &a)
-{
- return make_float4(-a.x, -a.y, -a.z, -a.w);
-}
-inline __host__ __device__ int4 operator-(int4 &a)
-{
- return make_int4(-a.x, -a.y, -a.z, -a.w);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// addition
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator+(float2 a, float2 b)
-{
- return make_float2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(float2 &a, float2 b)
-{
- a.x += b.x; a.y += b.y;
-}
-inline __host__ __device__ float2 operator+(float2 a, float b)
-{
- return make_float2(a.x + b, a.y + b);
-}
-inline __host__ __device__ float2 operator+(float b, float2 a)
-{
- return make_float2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(float2 &a, float b)
-{
- a.x += b; a.y += b;
-}
-
-inline __host__ __device__ int2 operator+(int2 a, int2 b)
-{
- return make_int2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(int2 &a, int2 b)
-{
- a.x += b.x; a.y += b.y;
-}
-inline __host__ __device__ int2 operator+(int2 a, int b)
-{
- return make_int2(a.x + b, a.y + b);
-}
-inline __host__ __device__ int2 operator+(int b, int2 a)
-{
- return make_int2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(int2 &a, int b)
-{
- a.x += b; a.y += b;
-}
-
-inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
-{
- return make_uint2(a.x + b.x, a.y + b.y);
-}
-inline __host__ __device__ void operator+=(uint2 &a, uint2 b)
-{
- a.x += b.x; a.y += b.y;
-}
-inline __host__ __device__ uint2 operator+(uint2 a, uint b)
-{
- return make_uint2(a.x + b, a.y + b);
-}
-inline __host__ __device__ uint2 operator+(uint b, uint2 a)
-{
- return make_uint2(a.x + b, a.y + b);
-}
-inline __host__ __device__ void operator+=(uint2 &a, uint b)
-{
- a.x += b; a.y += b;
-}
-
-
-inline __host__ __device__ float3 operator+(float3 a, float3 b)
-{
- return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(float3 &a, float3 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z;
-}
-inline __host__ __device__ float3 operator+(float3 a, float b)
-{
- return make_float3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(float3 &a, float b)
-{
- a.x += b; a.y += b; a.z += b;
-}
-
-inline __host__ __device__ int3 operator+(int3 a, int3 b)
-{
- return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(int3 &a, int3 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z;
-}
-inline __host__ __device__ int3 operator+(int3 a, int b)
-{
- return make_int3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(int3 &a, int b)
-{
- a.x += b; a.y += b; a.z += b;
-}
-
-inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
-{
- return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z;
-}
-inline __host__ __device__ uint3 operator+(uint3 a, uint b)
-{
- return make_uint3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ void operator+=(uint3 &a, uint b)
-{
- a.x += b; a.y += b; a.z += b;
-}
-
-inline __host__ __device__ int3 operator+(int b, int3 a)
-{
- return make_int3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ uint3 operator+(uint b, uint3 a)
-{
- return make_uint3(a.x + b, a.y + b, a.z + b);
-}
-inline __host__ __device__ float3 operator+(float b, float3 a)
-{
- return make_float3(a.x + b, a.y + b, a.z + b);
-}
-
-inline __host__ __device__ float4 operator+(float4 a, float4 b)
-{
- return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(float4 &a, float4 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
-}
-inline __host__ __device__ float4 operator+(float4 a, float b)
-{
- return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ float4 operator+(float b, float4 a)
-{
- return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(float4 &a, float b)
-{
- a.x += b; a.y += b; a.z += b; a.w += b;
-}
-
-inline __host__ __device__ int4 operator+(int4 a, int4 b)
-{
- return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(int4 &a, int4 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
-}
-inline __host__ __device__ int4 operator+(int4 a, int b)
-{
- return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ int4 operator+(int b, int4 a)
-{
- return make_int4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(int4 &a, int b)
-{
- a.x += b; a.y += b; a.z += b; a.w += b;
-}
-
-inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
-{
- return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-}
-inline __host__ __device__ void operator+=(uint4 &a, uint4 b)
-{
- a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
-}
-inline __host__ __device__ uint4 operator+(uint4 a, uint b)
-{
- return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ uint4 operator+(uint b, uint4 a)
-{
- return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b);
-}
-inline __host__ __device__ void operator+=(uint4 &a, uint b)
-{
- a.x += b; a.y += b; a.z += b; a.w += b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// subtract
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator-(float2 a, float2 b)
-{
- return make_float2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(float2 &a, float2 b)
-{
- a.x -= b.x; a.y -= b.y;
-}
-inline __host__ __device__ float2 operator-(float2 a, float b)
-{
- return make_float2(a.x - b, a.y - b);
-}
-inline __host__ __device__ float2 operator-(float b, float2 a)
-{
- return make_float2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(float2 &a, float b)
-{
- a.x -= b; a.y -= b;
-}
-
-inline __host__ __device__ int2 operator-(int2 a, int2 b)
-{
- return make_int2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(int2 &a, int2 b)
-{
- a.x -= b.x; a.y -= b.y;
-}
-inline __host__ __device__ int2 operator-(int2 a, int b)
-{
- return make_int2(a.x - b, a.y - b);
-}
-inline __host__ __device__ int2 operator-(int b, int2 a)
-{
- return make_int2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(int2 &a, int b)
-{
- a.x -= b; a.y -= b;
-}
-
-inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
-{
- return make_uint2(a.x - b.x, a.y - b.y);
-}
-inline __host__ __device__ void operator-=(uint2 &a, uint2 b)
-{
- a.x -= b.x; a.y -= b.y;
-}
-inline __host__ __device__ uint2 operator-(uint2 a, uint b)
-{
- return make_uint2(a.x - b, a.y - b);
-}
-inline __host__ __device__ uint2 operator-(uint b, uint2 a)
-{
- return make_uint2(b - a.x, b - a.y);
-}
-inline __host__ __device__ void operator-=(uint2 &a, uint b)
-{
- a.x -= b; a.y -= b;
-}
-
-inline __host__ __device__ float3 operator-(float3 a, float3 b)
-{
- return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(float3 &a, float3 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z;
-}
-inline __host__ __device__ float3 operator-(float3 a, float b)
-{
- return make_float3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ float3 operator-(float b, float3 a)
-{
- return make_float3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(float3 &a, float b)
-{
- a.x -= b; a.y -= b; a.z -= b;
-}
-
-inline __host__ __device__ int3 operator-(int3 a, int3 b)
-{
- return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(int3 &a, int3 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z;
-}
-inline __host__ __device__ int3 operator-(int3 a, int b)
-{
- return make_int3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ int3 operator-(int b, int3 a)
-{
- return make_int3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(int3 &a, int b)
-{
- a.x -= b; a.y -= b; a.z -= b;
-}
-
-inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
-{
- return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z;
-}
-inline __host__ __device__ uint3 operator-(uint3 a, uint b)
-{
- return make_uint3(a.x - b, a.y - b, a.z - b);
-}
-inline __host__ __device__ uint3 operator-(uint b, uint3 a)
-{
- return make_uint3(b - a.x, b - a.y, b - a.z);
-}
-inline __host__ __device__ void operator-=(uint3 &a, uint b)
-{
- a.x -= b; a.y -= b; a.z -= b;
-}
-
-inline __host__ __device__ float4 operator-(float4 a, float4 b)
-{
- return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(float4 &a, float4 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
-}
-inline __host__ __device__ float4 operator-(float4 a, float b)
-{
- return make_float4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ void operator-=(float4 &a, float b)
-{
- a.x -= b; a.y -= b; a.z -= b; a.w -= b;
-}
-
-inline __host__ __device__ int4 operator-(int4 a, int4 b)
-{
- return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(int4 &a, int4 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
-}
-inline __host__ __device__ int4 operator-(int4 a, int b)
-{
- return make_int4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ int4 operator-(int b, int4 a)
-{
- return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
-}
-inline __host__ __device__ void operator-=(int4 &a, int b)
-{
- a.x -= b; a.y -= b; a.z -= b; a.w -= b;
-}
-
-inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
-{
- return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-}
-inline __host__ __device__ void operator-=(uint4 &a, uint4 b)
-{
- a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
-}
-inline __host__ __device__ uint4 operator-(uint4 a, uint b)
-{
- return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b);
-}
-inline __host__ __device__ uint4 operator-(uint b, uint4 a)
-{
- return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
-}
-inline __host__ __device__ void operator-=(uint4 &a, uint b)
-{
- a.x -= b; a.y -= b; a.z -= b; a.w -= b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// multiply
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator*(float2 a, float2 b)
-{
- return make_float2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(float2 &a, float2 b)
-{
- a.x *= b.x; a.y *= b.y;
-}
-inline __host__ __device__ float2 operator*(float2 a, float b)
-{
- return make_float2(a.x * b, a.y * b);
-}
-inline __host__ __device__ float2 operator*(float b, float2 a)
-{
- return make_float2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(float2 &a, float b)
-{
- a.x *= b; a.y *= b;
-}
-
-inline __host__ __device__ int2 operator*(int2 a, int2 b)
-{
- return make_int2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(int2 &a, int2 b)
-{
- a.x *= b.x; a.y *= b.y;
-}
-inline __host__ __device__ int2 operator*(int2 a, int b)
-{
- return make_int2(a.x * b, a.y * b);
-}
-inline __host__ __device__ int2 operator*(int b, int2 a)
-{
- return make_int2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(int2 &a, int b)
-{
- a.x *= b; a.y *= b;
-}
-
-inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
-{
- return make_uint2(a.x * b.x, a.y * b.y);
-}
-inline __host__ __device__ void operator*=(uint2 &a, uint2 b)
-{
- a.x *= b.x; a.y *= b.y;
-}
-inline __host__ __device__ uint2 operator*(uint2 a, uint b)
-{
- return make_uint2(a.x * b, a.y * b);
-}
-inline __host__ __device__ uint2 operator*(uint b, uint2 a)
-{
- return make_uint2(b * a.x, b * a.y);
-}
-inline __host__ __device__ void operator*=(uint2 &a, uint b)
-{
- a.x *= b; a.y *= b;
-}
-
-inline __host__ __device__ float3 operator*(float3 a, float3 b)
-{
- return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(float3 &a, float3 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z;
-}
-inline __host__ __device__ float3 operator*(float3 a, float b)
-{
- return make_float3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ float3 operator*(float b, float3 a)
-{
- return make_float3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(float3 &a, float b)
-{
- a.x *= b; a.y *= b; a.z *= b;
-}
-
-inline __host__ __device__ int3 operator*(int3 a, int3 b)
-{
- return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(int3 &a, int3 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z;
-}
-inline __host__ __device__ int3 operator*(int3 a, int b)
-{
- return make_int3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ int3 operator*(int b, int3 a)
-{
- return make_int3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(int3 &a, int b)
-{
- a.x *= b; a.y *= b; a.z *= b;
-}
-
-inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
-{
- return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-inline __host__ __device__ void operator*=(uint3 &a, uint3 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z;
-}
-inline __host__ __device__ uint3 operator*(uint3 a, uint b)
-{
- return make_uint3(a.x * b, a.y * b, a.z * b);
-}
-inline __host__ __device__ uint3 operator*(uint b, uint3 a)
-{
- return make_uint3(b * a.x, b * a.y, b * a.z);
-}
-inline __host__ __device__ void operator*=(uint3 &a, uint b)
-{
- a.x *= b; a.y *= b; a.z *= b;
-}
-
-inline __host__ __device__ float4 operator*(float4 a, float4 b)
-{
- return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(float4 &a, float4 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
-}
-inline __host__ __device__ float4 operator*(float4 a, float b)
-{
- return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ float4 operator*(float b, float4 a)
-{
- return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(float4 &a, float b)
-{
- a.x *= b; a.y *= b; a.z *= b; a.w *= b;
-}
-
-inline __host__ __device__ int4 operator*(int4 a, int4 b)
-{
- return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(int4 &a, int4 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
-}
-inline __host__ __device__ int4 operator*(int4 a, int b)
-{
- return make_int4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ int4 operator*(int b, int4 a)
-{
- return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(int4 &a, int b)
-{
- a.x *= b; a.y *= b; a.z *= b; a.w *= b;
-}
-
-inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
-{
- return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-inline __host__ __device__ void operator*=(uint4 &a, uint4 b)
-{
- a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w;
-}
-inline __host__ __device__ uint4 operator*(uint4 a, uint b)
-{
- return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b);
-}
-inline __host__ __device__ uint4 operator*(uint b, uint4 a)
-{
- return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
-}
-inline __host__ __device__ void operator*=(uint4 &a, uint b)
-{
- a.x *= b; a.y *= b; a.z *= b; a.w *= b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// divide
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 operator/(float2 a, float2 b)
-{
- return make_float2(a.x / b.x, a.y / b.y);
-}
-inline __host__ __device__ void operator/=(float2 &a, float2 b)
-{
- a.x /= b.x; a.y /= b.y;
-}
-inline __host__ __device__ float2 operator/(float2 a, float b)
-{
- return make_float2(a.x / b, a.y / b);
-}
-inline __host__ __device__ void operator/=(float2 &a, float b)
-{
- a.x /= b; a.y /= b;
-}
-inline __host__ __device__ float2 operator/(float b, float2 a)
-{
- return make_float2(b / a.x, b / a.y);
-}
-
-inline __host__ __device__ float3 operator/(float3 a, float3 b)
-{
- return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-}
-inline __host__ __device__ void operator/=(float3 &a, float3 b)
-{
- a.x /= b.x; a.y /= b.y; a.z /= b.z;
-}
-inline __host__ __device__ float3 operator/(float3 a, float b)
-{
- return make_float3(a.x / b, a.y / b, a.z / b);
-}
-inline __host__ __device__ void operator/=(float3 &a, float b)
-{
- a.x /= b; a.y /= b; a.z /= b;
-}
-inline __host__ __device__ float3 operator/(float b, float3 a)
-{
- return make_float3(b / a.x, b / a.y, b / a.z);
-}
-
-inline __host__ __device__ float4 operator/(float4 a, float4 b)
-{
- return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
-}
-inline __host__ __device__ void operator/=(float4 &a, float4 b)
-{
- a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w;
-}
-inline __host__ __device__ float4 operator/(float4 a, float b)
-{
- return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
-}
-inline __host__ __device__ void operator/=(float4 &a, float b)
-{
- a.x /= b; a.y /= b; a.z /= b; a.w /= b;
-}
-inline __host__ __device__ float4 operator/(float b, float4 a){
- return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// min
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fminf(float2 a, float2 b)
-{
- return make_float2(fminf(a.x,b.x), fminf(a.y,b.y));
-}
-inline __host__ __device__ float3 fminf(float3 a, float3 b)
-{
- return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
-}
-inline __host__ __device__ float4 fminf(float4 a, float4 b)
-{
- return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
-}
-
-inline __host__ __device__ int2 min(int2 a, int2 b)
-{
- return make_int2(min(a.x,b.x), min(a.y,b.y));
-}
-inline __host__ __device__ int3 min(int3 a, int3 b)
-{
- return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
-}
-inline __host__ __device__ int4 min(int4 a, int4 b)
-{
- return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
-}
-
-inline __host__ __device__ uint2 min(uint2 a, uint2 b)
-{
- return make_uint2(min(a.x,b.x), min(a.y,b.y));
-}
-inline __host__ __device__ uint3 min(uint3 a, uint3 b)
-{
- return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
-}
-inline __host__ __device__ uint4 min(uint4 a, uint4 b)
-{
- return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// max
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
-{
- return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y));
-}
-inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
-{
- return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
-}
-inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
-{
- return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
-}
-
-inline __host__ __device__ int2 max(int2 a, int2 b)
-{
- return make_int2(max(a.x,b.x), max(a.y,b.y));
-}
-inline __host__ __device__ int3 max(int3 a, int3 b)
-{
- return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
-}
-inline __host__ __device__ int4 max(int4 a, int4 b)
-{
- return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
-}
-
-inline __host__ __device__ uint2 max(uint2 a, uint2 b)
-{
- return make_uint2(max(a.x,b.x), max(a.y,b.y));
-}
-inline __host__ __device__ uint3 max(uint3 a, uint3 b)
-{
- return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
-}
-inline __host__ __device__ uint4 max(uint4 a, uint4 b)
-{
- return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// lerp
-// - linear interpolation between a and b, based on value t in [0, 1] range
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float lerp(float a, float b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
-{
- return a + t*(b-a);
-}
-inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
-{
- return a + t*(b-a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// clamp
-// - clamp the value v to be in the range [a, b]
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float clamp(float f, float a, float b)
-{
- return fmaxf(a, fminf(f, b));
-}
-inline __device__ __host__ int clamp(int f, int a, int b)
-{
- return max(a, min(f, b));
-}
-inline __device__ __host__ uint clamp(uint f, uint a, uint b)
-{
- return max(a, min(f, b));
-}
-
-inline __device__ __host__ float2 clamp(float2 v, float a, float b)
-{
- return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
-{
- return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ float3 clamp(float3 v, float a, float b)
-{
- return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
-{
- return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ float4 clamp(float4 v, float a, float b)
-{
- return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
-{
- return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-inline __device__ __host__ int2 clamp(int2 v, int a, int b)
-{
- return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
-{
- return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ int3 clamp(int3 v, int a, int b)
-{
- return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
-{
- return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ int4 clamp(int4 v, int a, int b)
-{
- return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
-{
- return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
-{
- return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
-}
-inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
-{
- return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
-}
-inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
-{
- return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
-}
-inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
-{
- return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
-}
-inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
-{
- return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
-}
-inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
-{
- return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// dot product
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float dot(float2 a, float2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ float dot(float3 a, float3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ float dot(float4 a, float4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-inline __host__ __device__ int dot(int2 a, int2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ int dot(int3 a, int3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ int dot(int4 a, int4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-inline __host__ __device__ uint dot(uint2 a, uint2 b)
-{
- return a.x * b.x + a.y * b.y;
-}
-inline __host__ __device__ uint dot(uint3 a, uint3 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z;
-}
-inline __host__ __device__ uint dot(uint4 a, uint4 b)
-{
- return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// length
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float length(float2 v)
-{
- return sqrtf(dot(v, v));
-}
-inline __host__ __device__ float length(float3 v)
-{
- return sqrtf(dot(v, v));
-}
-inline __host__ __device__ float length(float4 v)
-{
- return sqrtf(dot(v, v));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// normalize
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 normalize(float2 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-inline __host__ __device__ float3 normalize(float3 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-inline __host__ __device__ float4 normalize(float4 v)
-{
- float invLen = rsqrtf(dot(v, v));
- return v * invLen;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// floor
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 floorf(float2 v)
-{
- return make_float2(floorf(v.x), floorf(v.y));
-}
-inline __host__ __device__ float3 floorf(float3 v)
-{
- return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
-}
-inline __host__ __device__ float4 floorf(float4 v)
-{
- return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// frac - returns the fractional portion of a scalar or each vector component
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float fracf(float v)
-{
- return v - floorf(v);
-}
-inline __host__ __device__ float2 fracf(float2 v)
-{
- return make_float2(fracf(v.x), fracf(v.y));
-}
-inline __host__ __device__ float3 fracf(float3 v)
-{
- return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
-}
-inline __host__ __device__ float4 fracf(float4 v)
-{
- return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// fmod
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fmodf(float2 a, float2 b)
-{
- return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
-}
-inline __host__ __device__ float3 fmodf(float3 a, float3 b)
-{
- return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
-}
-inline __host__ __device__ float4 fmodf(float4 a, float4 b)
-{
- return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// absolute value
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float2 fabs(float2 v)
-{
- return make_float2(fabs(v.x), fabs(v.y));
-}
-inline __host__ __device__ float3 fabs(float3 v)
-{
- return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
-}
-inline __host__ __device__ float4 fabs(float4 v)
-{
- return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
-}
-
-inline __host__ __device__ int2 abs(int2 v)
-{
- return make_int2(abs(v.x), abs(v.y));
-}
-inline __host__ __device__ int3 abs(int3 v)
-{
- return make_int3(abs(v.x), abs(v.y), abs(v.z));
-}
-inline __host__ __device__ int4 abs(int4 v)
-{
- return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// reflect
-// - returns reflection of incident ray I around surface normal N
-// - N should be normalized, reflected vector's length is equal to length of I
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float3 reflect(float3 i, float3 n)
-{
- return i - 2.0f * n * dot(n,i);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// cross product
-////////////////////////////////////////////////////////////////////////////////
-
-inline __host__ __device__ float3 cross(float3 a, float3 b)
-{
- return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// smoothstep
-// - returns 0 if x < a
-// - returns 1 if x > b
-// - otherwise returns smooth interpolation between 0 and 1 based on x
-////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ __host__ float smoothstep(float a, float b, float x)
-{
- float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(3.0f - (2.0f*y)));
-}
-inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
-{
- float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y)));
-}
-inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
-{
- float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y)));
-}
-inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
-{
- float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
- return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y)));
-}
-
-#endif
+++ /dev/null
-/*
- * Software License Agreement (BSD License)
- *
- * Point Cloud Library (PCL) - www.pointclouds.org
- * Copyright (c) 2014-, Open Perception, Inc.
- * Copyright (C) 2008 Ben Gurion University of the Negev, Beer Sheva, Israel.
- *
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met
- *
- * * The use for research only (no for any commercial application).
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- * * Neither the name of the copyright holder(s) nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef PCL_REGISTRATION_IA_FPCS_H_
-#define PCL_REGISTRATION_IA_FPCS_H_
-
-#include <pcl/common/common.h>
-#include <pcl/registration/registration.h>
-#include <pcl/registration/matching_candidate.h>
-
-namespace pcl
-{
- /** \brief Compute the mean point density of a given point cloud.
- * \param[in] cloud pointer to the input point cloud
- * \param[in] max_dist maximum distance of a point to be considered as a neighbor
- * \param[in] nr_threads number of threads to use (default = 1, only used if OpenMP flag is set)
- * \return the mean point density of a given point cloud
- */
- template <typename PointT> inline float
- getMeanPointDensity (const typename pcl::PointCloud<PointT>::ConstPtr &cloud, float max_dist, int nr_threads = 1);
-
- /** \brief Compute the mean point density of a given point cloud.
- * \param[in] cloud pointer to the input point cloud
- * \param[in] indices the vector of point indices to use from \a cloud
- * \param[in] max_dist maximum distance of a point to be considered as a neighbor
- * \param[in] nr_threads number of threads to use (default = 1, only used if OpenMP flag is set)
- * \return the mean point density of a given point cloud
- */
- template <typename PointT> inline float
- getMeanPointDensity (const typename pcl::PointCloud<PointT>::ConstPtr &cloud, const std::vector <int> &indices,
- float max_dist, int nr_threads = 1);
-
-
- namespace registration
- {
- /** \brief FPCSInitialAlignment computes corresponding four point congruent sets as described in:
- * "4-points congruent sets for robust pairwise surface registration", Dror Aiger, Niloy Mitra, Daniel Cohen-Or.
- * ACM Transactions on Graphics, vol. 27(3), 2008
- * \author P.W.Theiler
- * \ingroup registration
- */
- template <typename PointSource, typename PointTarget, typename NormalT = pcl::Normal, typename Scalar = float>
- class FPCSInitialAlignment : public Registration <PointSource, PointTarget, Scalar>
- {
- public:
- /** \cond */
- typedef boost::shared_ptr <FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar> > Ptr;
- typedef boost::shared_ptr <const FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar> > ConstPtr;
-
- typedef pcl::search::KdTree<PointSource> KdTreeReciprocal;
- typedef typename KdTreeReciprocal::Ptr KdTreeReciprocalPtr;
-
- typedef pcl::PointCloud <PointTarget> PointCloudTarget;
- typedef pcl::PointCloud <PointSource> PointCloudSource;
- typedef typename PointCloudSource::Ptr PointCloudSourcePtr;
- typedef typename PointCloudSource::iterator PointCloudSourceIterator;
-
- typedef pcl::PointCloud <NormalT> Normals;
- typedef typename Normals::ConstPtr NormalsConstPtr;
-
- typedef pcl::registration::MatchingCandidate MatchingCandidate;
- typedef pcl::registration::MatchingCandidates MatchingCandidates;
- /** \endcond */
-
-
- /** \brief Constructor.
- * Resets the maximum number of iterations to 0 thus forcing an internal computation if not set by the user.
- * Sets the number of RANSAC iterations to 1000 and the standard transformation estimation to TransformationEstimation3Point.
- */
- FPCSInitialAlignment ();
-
- /** \brief Destructor. */
- virtual ~FPCSInitialAlignment ()
- {};
-
-
- /** \brief Provide a pointer to the vector of target indices.
- * \param[in] target_indices a pointer to the target indices
- */
- inline void
- setTargetIndices (const IndicesPtr &target_indices)
- {
- target_indices_ = target_indices;
- };
-
- /** \return a pointer to the vector of target indices. */
- inline IndicesPtr
- getTargetIndices () const
- {
- return (target_indices_);
- };
-
-
- /** \brief Provide a pointer to the normals of the source point cloud.
- * \param[in] source_normals pointer to the normals of the source pointer cloud.
- */
- inline void
- setSourceNormals (const NormalsConstPtr &source_normals)
- {
- source_normals_ = source_normals;
- };
-
- /** \return the normals of the source point cloud. */
- inline NormalsConstPtr
- getSourceNormals () const
- {
- return (source_normals_);
- };
-
-
- /** \brief Provide a pointer to the normals of the target point cloud.
- * \param[in] target_normals point to the normals of the target point cloud.
- */
- inline void
- setTargetNormals (const NormalsConstPtr &target_normals)
- {
- target_normals_ = target_normals;
- };
-
- /** \return the normals of the target point cloud. */
- inline NormalsConstPtr
- getTargetNormals () const
- {
- return (target_normals_);
- };
-
-
- /** \brief Set the number of used threads if OpenMP is activated.
- * \param[in] nr_threads the number of used threads
- */
- inline void
- setNumberOfThreads (int nr_threads)
- {
- nr_threads_ = nr_threads;
- };
-
- /** \return the number of threads used if OpenMP is activated. */
- inline int
- getNumberOfThreads () const
- {
- return (nr_threads_);
- };
-
-
- /** \brief Set the constant factor delta which weights the internally calculated parameters.
- * \param[in] delta the weight factor delta
- * \param[in] normalize flag if delta should be normalized according to point cloud density
- */
- inline void
- setDelta (float delta, bool normalize = false)
- {
- delta_ = delta;
- normalize_delta_ = normalize;
- };
-
- /** \return the constant factor delta which weights the internally calculated parameters. */
- inline float
- getDelta () const
- {
- return (delta_);
- };
-
-
- /** \brief Set the approximate overlap between source and target.
- * \param[in] approx_overlap the estimated overlap
- */
- inline void
- setApproxOverlap (float approx_overlap)
- {
- approx_overlap_ = approx_overlap;
- };
-
- /** \return the approximated overlap between source and target. */
- inline float
- getApproxOverlap () const
- {
- return (approx_overlap_);
- };
-
-
- /** \brief Set the scoring threshold used for early finishing the method.
- * \param[in] score_threshold early terminating score criteria
- */
- inline void
- setScoreThreshold (float score_threshold)
- {
- score_threshold_ = score_threshold;
- };
-
- /** \return the scoring threshold used for early finishing the method. */
- inline float
- getScoreThreshold () const
- {
- return (score_threshold_);
- };
-
-
- /** \brief Set the number of source samples to use during alignment.
- * \param[in] nr_samples the number of source samples
- */
- inline void
- setNumberOfSamples (int nr_samples)
- {
- nr_samples_ = nr_samples;
- };
-
- /** \return the number of source samples to use during alignment. */
- inline int
- getNumberOfSamples () const
- {
- return (nr_samples_);
- };
-
-
- /** \brief Set the maximum normal difference between valid point correspondences in degree.
- * \param[in] max_norm_diff the maximum difference in degree
- */
- inline void
- setMaxNormalDifference (float max_norm_diff)
- {
- max_norm_diff_ = max_norm_diff;
- };
-
- /** \return the maximum normal difference between valid point correspondences in degree. */
- inline float
- getMaxNormalDifference () const
- {
- return (max_norm_diff_);
- };
-
-
- /** \brief Set the maximum computation time in seconds.
- * \param[in] max_runtime the maximum runtime of the method in seconds
- */
- inline void
- setMaxComputationTime (int max_runtime)
- {
- max_runtime_ = max_runtime;
- };
-
- /** \return the maximum computation time in seconds. */
- inline int
- getMaxComputationTime () const
- {
- return (max_runtime_);
- };
-
-
- /** \return the fitness score of the best scored four-point match. */
- inline float
- getFitnessScore () const
- {
- return (fitness_score_);
- };
-
- protected:
-
- using PCLBase <PointSource>::deinitCompute;
- using PCLBase <PointSource>::input_;
- using PCLBase <PointSource>::indices_;
-
- using Registration <PointSource, PointTarget, Scalar>::reg_name_;
- using Registration <PointSource, PointTarget, Scalar>::target_;
- using Registration <PointSource, PointTarget, Scalar>::tree_;
- using Registration <PointSource, PointTarget, Scalar>::correspondences_;
- using Registration <PointSource, PointTarget, Scalar>::target_cloud_updated_;
- using Registration <PointSource, PointTarget, Scalar>::final_transformation_;
- using Registration <PointSource, PointTarget, Scalar>::max_iterations_;
- using Registration <PointSource, PointTarget, Scalar>::ransac_iterations_;
- using Registration <PointSource, PointTarget, Scalar>::transformation_estimation_;
- using Registration <PointSource, PointTarget, Scalar>::converged_;
-
-
- /** \brief Rigid transformation computation method.
- * \param output the transformed input point cloud dataset using the rigid transformation found
- * \param guess The computed transforamtion
- */
- virtual void
- computeTransformation (PointCloudSource &output, const Eigen::Matrix4f& guess);
-
-
- /** \brief Internal computation initialization. */
- virtual bool
- initCompute ();
-
- /** \brief Select an approximately coplanar set of four points from the source cloud.
- * \param[out] base_indices selected source cloud indices, further used as base (B)
- * \param[out] ratio the two diagonal intersection ratios (r1,r2) of the base points
- * \return
- * * < 0 no coplanar four point sets with large enough sampling distance was found
- * * = 0 a set of four congruent points was selected
- */
- int
- selectBase (std::vector <int> &base_indices, float (&ratio)[2]);
-
- /** \brief Select randomly a triplet of points with large point-to-point distances. The minimum point
- * sampling distance is calculated based on the estimated point cloud overlap during initialization.
- *
- * \param[out] base_indices indices of base B
- * \return
- * * < 0 no triangle with large enough base lines could be selected
- * * = 0 base triangle succesully selected
- */
- int
- selectBaseTriangle (std::vector <int> &base_indices);
-
- /** \brief Setup the base (four coplanar points) by ordering the points and computing intersection
- * ratios and segment to segment distances of base diagonal.
- *
- * \param[in,out] base_indices indices of base B (will be reordered)
- * \param[out] ratio diagonal intersection ratios of base points
- */
- void
- setupBase (std::vector <int> &base_indices, float (&ratio)[2]);
-
- /** \brief Calculate intersection ratios and segment to segment distances of base diagonals.
- * \param[in] base_indices indices of base B
- * \param[out] ratio diagonal intersection ratios of base points
- * \return quality value of diagonal intersection
- */
- float
- segmentToSegmentDist (const std::vector <int> &base_indices, float (&ratio)[2]);
-
- /** \brief Search for corresponding point pairs given the distance between two base points.
- *
- * \param[in] idx1 first index of current base segment (in source cloud)
- * \param[in] idx2 second index of current base segment (in source cloud)
- * \param[out] pairs resulting point pairs with point-to-point distance close to ref_dist
- * \return
- * * < 0 no corresponding point pair was found
- * * = 0 at least one point pair candidate was found
- */
- virtual int
- bruteForceCorrespondences (int idx1, int idx2, pcl::Correspondences &pairs);
-
- /** \brief Determine base matches by combining the point pair candidate and search for coinciding
- * intersection points using the diagonal segment ratios of base B. The coincidation threshold is
- * calculated during initialization (coincidation_limit_).
- *
- * \param[in] base_indices indices of base B
- * \param[out] matches vector of candidate matches w.r.t the base B
- * \param[in] pairs_a point pairs corresponding to points of 1st diagonal of base B
- * \param[in] pairs_b point pairs corresponding to points of 2nd diagonal of base B
- * \param[in] ratio diagonal intersection ratios of base points
- * \return
- * * < 0 no base match could be found
- * * = 0 at least one base match was found
- */
- virtual int
- determineBaseMatches (
- const std::vector <int> &base_indices,
- std::vector <std::vector <int> > &matches,
- const pcl::Correspondences &pairs_a,
- const pcl::Correspondences &pairs_b,
- const float (&ratio)[2]);
-
- /** \brief Check if outer rectangle distance of matched points fit with the base rectangle.
- *
- * \param[in] match_indices indices of match M
- * \param[in] ds edge lengths of base B
- * \return
- * * < 0 at least one edge of the match M has no corresponding one in the base B
- * * = 0 edges of match M fits to the ones of base B
- */
- int
- checkBaseMatch (const std::vector <int> &match_indices, const float (&ds)[4]);
-
- /** \brief Method to handle current candidate matches. Here we validate and evaluate the matches w.r.t the
- * base and store the best fitting match (together with its score and estimated transformation).
- * \note For forwards compatibility the results are stored in 'vectors of size 1'.
- *
- * \param[in] base_indices indices of base B
- * \param[in,out] matches vector of candidate matches w.r.t the base B. The candidate matches are
- * reordered during this step.
- * \param[out] candidates vector which contains the candidates matches M
- */
- virtual void
- handleMatches (
- const std::vector <int> &base_indices,
- std::vector <std::vector <int> > &matches,
- MatchingCandidates &candidates);
-
- /** \brief Sets the correspondences between the base B and the match M by using the distance of each point
- * to the centroid of the rectangle.
- *
- * \param[in] base_indices indices of base B
- * \param[in] match_indices indices of match M
- * \param[out] correspondences resulting correspondences
- */
- virtual void
- linkMatchWithBase (
- const std::vector <int> &base_indices,
- std::vector <int> &match_indices,
- pcl::Correspondences &correspondences);
-
- /** \brief Validate the matching by computing the transformation between the source and target based on the
- * four matched points and by comparing the mean square error (MSE) to a threshold. The MSE limit was
- * calculated during initialization (max_mse_).
- *
- * \param[in] base_indices indices of base B
- * \param[in] match_indices indices of match M
- * \param[in] correspondences corresondences between source and target
- * \param[out] transformation resulting transformation matrix
- * \return
- * * < 0 MSE bigger than max_mse_
- * * = 0 MSE smaller than max_mse_
- */
- virtual int
- validateMatch (
- const std::vector <int> &base_indices,
- const std::vector <int> &match_indices,
- const pcl::Correspondences &correspondences,
- Eigen::Matrix4f &transformation);
-
- /** \brief Validate the transformation by calculating the number of inliers after transforming the source cloud.
- * The resulting fitness score is later used as the decision criteria of the best fitting match.
- *
- * \param[out] transformation updated orientation matrix using all inliers
- * \param[out] fitness_score current best fitness_score
- * \note fitness score is only updated if the score of the current transformation exceeds the input one.
- * \return
- * * < 0 if previous result is better than the current one (score remains)
- * * = 0 current result is better than the previous one (score updated)
- */
- virtual int
- validateTransformation (Eigen::Matrix4f &transformation, float &fitness_score);
-
- /** \brief Final computation of best match out of vector of best matches. To avoid cross thread dependencies
- * during parallel running, a best match for each try was calculated.
- * \note For forwards compatibility the candidates are stored in vectors of 'vectors of size 1'.
- * \param[in] candidates vector of candidate matches
- */
- virtual void
- finalCompute (const std::vector <MatchingCandidates > &candidates);
-
-
- /** \brief Normals of source point cloud. */
- NormalsConstPtr source_normals_;
-
- /** \brief Normals of target point cloud. */
- NormalsConstPtr target_normals_;
-
-
- /** \brief Number of threads for parallelization (standard = 1).
- * \note Only used if run compiled with OpenMP.
- */
- int nr_threads_;
-
- /** \brief Estimated overlap between source and target (standard = 0.5). */
- float approx_overlap_;
-
- /** \brief Delta value of 4pcs algorithm (standard = 1.0).
- * It can be used as:
- * * absolute value (normalization = false), value should represent the point accuracy to ensure finding neighbors between source <-> target
- * * relative value (normalization = true), to adjust the internally calculated point accuracy (= point density)
- */
- float delta_;
-
- /** \brief Score threshold to stop calculation with success.
- * If not set by the user it is equal to the approximated overlap
- */
- float score_threshold_;
-
- /** \brief The number of points to uniformly sample the source point cloud. (standard = 0 => full cloud). */
- int nr_samples_;
-
- /** \brief Maximum normal difference of corresponding point pairs in degrees (standard = 90). */
- float max_norm_diff_;
-
- /** \brief Maximum allowed computation time in seconds (standard = 0 => ~unlimited). */
- int max_runtime_;
-
-
- /** \brief Resulting fitness score of the best match. */
- float fitness_score_;
-
-
- /** \brief Estimated diamter of the target point cloud. */
- float diameter_;
-
- /** \brief Estimated squared metric overlap between source and target.
- * \note Internally calculated using the estimated overlap and the extent of the source cloud.
- * It is used to derive the minimum sampling distance of the base points as well as to calculated
- * the number of trys to reliable find a correct mach.
- */
- float max_base_diameter_sqr_;
-
- /** \brief Use normals flag. */
- bool use_normals_;
-
- /** \brief Normalize delta flag. */
- bool normalize_delta_;
-
-
- /** \brief A pointer to the vector of source point indices to use after sampling. */
- pcl::IndicesPtr source_indices_;
-
- /** \brief A pointer to the vector of target point indices to use after sampling. */
- pcl::IndicesPtr target_indices_;
-
- /** \brief Maximal difference between corresponding point pairs in source and target.
- * \note Internally calculated using an estimation of the point density.
- */
- float max_pair_diff_;
-
- /** \brief Maximal difference between the length of the base edges and valid match edges.
- * \note Internally calculated using an estimation of the point density.
- */
- float max_edge_diff_;
-
- /** \brief Maximal distance between coinciding intersection points to find valid matches.
- * \note Internally calculated using an estimation of the point density.
- */
- float coincidation_limit_;
-
- /** \brief Maximal mean squared errors of a transformation calculated from a candidate match.
- * \note Internally calculated using an estimation of the point density.
- */
- float max_mse_;
-
- /** \brief Maximal squared point distance between source and target points to count as inlier.
- * \note Internally calculated using an estimation of the point density.
- */
- float max_inlier_dist_sqr_;
-
-
- /** \brief Definition of a small error. */
- const float small_error_;
-
- };
- }; // namespace registration
-}; // namespace pcl
-
-#include <pcl/registration/impl/ia_fpcs.hpp>
-
-#endif // PCL_REGISTRATION_IA_FPCS_H_
+++ /dev/null
-/*
- * Software License Agreement (BSD License)
- *
- * Point Cloud Library (PCL) - www.pointclouds.org
- * Copyright (c) 2014-, Open Perception, Inc.
- * Copyright (C) 2008 Ben Gurion University of the Negev, Beer Sheva, Israel.
- *
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met
- *
- * * The use for research only (no for any commercial application).
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- * * Neither the name of the copyright holder(s) nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef PCL_REGISTRATION_IMPL_IA_FPCS_H_
-#define PCL_REGISTRATION_IMPL_IA_FPCS_H_
-
-#include <pcl/registration/ia_fpcs.h>
-#include <pcl/common/time.h>
-#include <pcl/common/distances.h>
-#include <pcl/sample_consensus/sac_model_plane.h>
-#include <pcl/registration/transformation_estimation_3point.h>
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointT> inline float
-pcl::getMeanPointDensity (const typename pcl::PointCloud<PointT>::ConstPtr &cloud, float max_dist, int nr_threads)
-{
- const float max_dist_sqr = max_dist * max_dist;
- const std::size_t s = cloud.size ();
-
- pcl::search::KdTree <PointT> tree;
- tree.setInputCloud (cloud);
-
- float mean_dist = 0.f;
- int num = 0;
- std::vector <int> ids (2);
- std::vector <float> dists_sqr (2);
-
-#ifdef _OPENMP
-#pragma omp parallel for \
- reduction (+:mean_dist, num) \
- private (ids, dists_sqr) shared (tree, cloud) \
- default (none)num_threads (nr_threads)
-#endif
-
- for (int i = 0; i < 1000; i++)
- {
- tree.nearestKSearch (cloud->points[rand () % s], 2, ids, dists_sqr);
- if (dists_sqr[1] < max_dist_sqr)
- {
- mean_dist += std::sqrt (dists_sqr[1]);
- num++;
- }
- }
-
- return (mean_dist / num);
-};
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointT> inline float
-pcl::getMeanPointDensity (const typename pcl::PointCloud<PointT>::ConstPtr &cloud, const std::vector <int> &indices,
- float max_dist, int nr_threads)
-{
- const float max_dist_sqr = max_dist * max_dist;
- const std::size_t s = indices.size ();
-
- pcl::search::KdTree <PointT> tree;
- tree.setInputCloud (cloud);
-
- float mean_dist = 0.f;
- int num = 0;
- std::vector <int> ids (2);
- std::vector <float> dists_sqr (2);
-
-#ifdef _OPENMP
-#pragma omp parallel for \
- reduction (+:mean_dist, num) \
- private (ids, dists_sqr) shared (tree, cloud, indices) \
- default (none)num_threads (nr_threads)
-#endif
-
- for (int i = 0; i < 1000; i++)
- {
- tree.nearestKSearch (cloud->points[indices[rand () % s]], 2, ids, dists_sqr);
- if (dists_sqr[1] < max_dist_sqr)
- {
- mean_dist += std::sqrt (dists_sqr[1]);
- num++;
- }
- }
-
- return (mean_dist / num);
-};
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar>
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::FPCSInitialAlignment () :
- source_normals_ (),
- target_normals_ (),
- nr_threads_ (1),
- approx_overlap_ (0.5f),
- delta_ (1.f),
- score_threshold_ (FLT_MAX),
- nr_samples_ (0),
- max_norm_diff_ (90.f),
- max_runtime_ (0),
- fitness_score_ (FLT_MAX),
- diameter_ (),
- max_base_diameter_sqr_ (),
- use_normals_ (false),
- normalize_delta_ (true),
- max_pair_diff_ (),
- max_edge_diff_ (),
- coincidation_limit_ (),
- max_mse_ (),
- max_inlier_dist_sqr_ (),
- small_error_ (0.00001f)
-{
- reg_name_ = "pcl::registration::FPCSInitialAlignment";
- max_iterations_ = 0;
- ransac_iterations_ = 1000;
- transformation_estimation_.reset (new pcl::registration::TransformationEstimation3Point <PointSource, PointTarget>);
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> void
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::computeTransformation (
- PointCloudSource &output,
- const Eigen::Matrix4f &guess)
-{
- if (!initCompute ())
- return;
-
- final_transformation_ = guess;
- bool abort = false;
- std::vector <MatchingCandidates> all_candidates (max_iterations_);
- pcl::StopWatch timer;
-
- #ifdef _OPENMP
- #pragma omp parallel num_threads (nr_threads_)
- #endif
- {
- #ifdef _OPENMP
- std::srand (static_cast <unsigned int> (std::time (NULL)) ^ omp_get_thread_num ());
- #pragma omp for schedule (dynamic)
- #endif
- for (int i = 0; i < max_iterations_; i++)
- {
-
- #ifdef _OPENMP
- #pragma omp flush (abort)
- #endif
-
- MatchingCandidates candidates (1);
- std::vector <int> base_indices (4);
- float ratio[2];
- all_candidates[i] = candidates;
-
- if (!abort)
- {
- // select four coplanar point base
- if (selectBase (base_indices, ratio) == 0)
- {
- // calculate candidate pair correspondences using diagonal lenghts of base
- pcl::Correspondences pairs_a, pairs_b;
- if (bruteForceCorrespondences (base_indices[0], base_indices[1], pairs_a) == 0 &&
- bruteForceCorrespondences (base_indices[2], base_indices[3], pairs_b) == 0)
- {
- // determine candidate matches by combining pair correspondences based on segment distances
- std::vector <std::vector <int> > matches;
- if (determineBaseMatches (base_indices, matches, pairs_a, pairs_b, ratio) == 0)
- {
- // check and evaluate candidate matches and store them
- handleMatches (base_indices, matches, candidates);
- if (candidates.size () != 0)
- all_candidates[i] = candidates;
- }
- }
- }
-
- // check terminate early (time or fitness_score threshold reached)
- abort = (candidates.size () > 0 ? candidates[0].fitness_score < score_threshold_ : abort);
- abort = (abort ? abort : timer.getTimeSeconds () > max_runtime_);
-
-
- #ifdef _OPENMP
- #pragma omp flush (abort)
- #endif
- }
- }
- }
-
-
- // determine best match over all trys
- finalCompute (all_candidates);
-
- // apply the final transformation
- pcl::transformPointCloud (*input_, output, final_transformation_);
-
- deinitCompute ();
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> bool
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::initCompute ()
-{
- std::srand (static_cast <unsigned int> (std::time (NULL)));
-
- // basic pcl initialization
- if (!pcl::PCLBase <PointSource>::initCompute ())
- return (false);
-
- // check if source and target are given
- if (!input_ || !target_)
- {
- PCL_ERROR ("[%s::initCompute] Source or target dataset not given!\n", reg_name_.c_str ());
- return (false);
- }
-
- if (!target_indices_ || target_indices_->size () == 0)
- {
- target_indices_.reset (new std::vector <int> (static_cast <int> (target_->size ())));
- int index = 0;
- for (std::vector <int>::iterator it = target_indices_->begin (), it_e = target_indices_->end (); it != it_e; it++)
- *it = index++;
- target_cloud_updated_ = true;
- }
-
- // if a sample size for the point clouds is given; prefarably no sampling of target cloud
- if (nr_samples_ != 0)
- {
- const int ss = static_cast <int> (indices_->size ());
- const int sample_fraction_src = std::max (1, static_cast <int> (ss / nr_samples_));
-
- source_indices_ = pcl::IndicesPtr (new std::vector <int>);
- for (int i = 0; i < ss; i++)
- if (rand () % sample_fraction_src == 0)
- source_indices_->push_back ((*indices_) [i]);
- }
- else
- source_indices_ = indices_;
-
- // check usage of normals
- if (source_normals_ && target_normals_ && source_normals_->size () == input_->size () && target_normals_->size () == target_->size ())
- use_normals_ = true;
-
- // set up tree structures
- if (target_cloud_updated_)
- {
- tree_->setInputCloud (target_, target_indices_);
- target_cloud_updated_ = false;
- }
-
- // set predefined variables
- const int min_iterations = 4;
- const float diameter_fraction = 0.3f;
-
- // get diameter of input cloud (distance between farthest points)
- Eigen::Vector4f pt_min, pt_max;
- pcl::getMinMax3D (*target_, *target_indices_, pt_min, pt_max);
- diameter_ = (pt_max - pt_min).norm ();
-
- // derive the limits for the random base selection
- float max_base_diameter = diameter_* approx_overlap_ * 2.f;
- max_base_diameter_sqr_ = max_base_diameter * max_base_diameter;
-
- // normalize the delta
- if (normalize_delta_)
- {
- float mean_dist = getMeanPointDensity <PointTarget> (target_, *target_indices_, 0.05f * diameter_, nr_threads_);
- delta_ *= mean_dist;
- }
-
- // heuristic determination of number of trials to have high probabilty of finding a good solution
- if (max_iterations_ == 0)
- {
- float first_est = std::log (small_error_) / std::log (1.0 - std::pow ((double) approx_overlap_, (double) min_iterations));
- max_iterations_ = static_cast <int> (first_est / (diameter_fraction * approx_overlap_ * 2.f));
- }
-
- // set further parameter
- if (score_threshold_ == FLT_MAX)
- score_threshold_ = 1.f - approx_overlap_;
-
- if (max_iterations_ < 4)
- max_iterations_ = 4;
-
- if (max_runtime_ < 1)
- max_runtime_ = INT_MAX;
-
- // calculate internal parameters based on the the estimated point density
- max_pair_diff_ = delta_ * 2.f;
- max_edge_diff_ = delta_ * 4.f;
- coincidation_limit_ = delta_ * 2.f; // EDITED: originally std::sqrt (delta_ * 2.f)
- max_mse_ = powf (delta_* 2.f, 2.f);
- max_inlier_dist_sqr_ = powf (delta_ * 2.f, 2.f);
-
- // reset fitness_score
- fitness_score_ = FLT_MAX;
-
- return (true);
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> int
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::selectBase (
- std::vector <int> &base_indices,
- float (&ratio)[2])
-{
- const float too_close_sqr = max_base_diameter_sqr_*0.01;
-
- Eigen::VectorXf coefficients (4);
- pcl::SampleConsensusModelPlane <PointTarget> plane (target_);
- plane.setIndices (target_indices_);
- Eigen::Vector4f centre_pt;
- float nearest_to_plane = FLT_MAX;
-
- // repeat base search until valid quadruple was found or ransac_iterations_ number of trys were unsuccessfull
- for (int i = 0; i < ransac_iterations_; i++)
- {
- // random select an appropriate point triple
- if (selectBaseTriangle (base_indices) < 0)
- continue;
-
- std::vector <int> base_triple (base_indices.begin (), base_indices.end () - 1);
- plane.computeModelCoefficients (base_triple, coefficients);
- pcl::compute3DCentroid (*target_, base_triple, centre_pt);
-
- // loop over all points in source cloud to find most suitable fourth point
- const PointTarget *pt1 = &(target_->points[base_indices[0]]);
- const PointTarget *pt2 = &(target_->points[base_indices[1]]);
- const PointTarget *pt3 = &(target_->points[base_indices[2]]);
-
- for (std::vector <int>::iterator it = target_indices_->begin (), it_e = target_indices_->end (); it != it_e; it++)
- {
- const PointTarget *pt4 = &(target_->points[*it]);
-
- float d1 = pcl::squaredEuclideanDistance (*pt4, *pt1);
- float d2 = pcl::squaredEuclideanDistance (*pt4, *pt2);
- float d3 = pcl::squaredEuclideanDistance (*pt4, *pt3);
- float d4 = (pt4->getVector3fMap () - centre_pt.head (3)).squaredNorm ();
-
- // check distance between points w.r.t minimum sampling distance; EDITED -> 4th point now also limited by max base line
- if (d1 < too_close_sqr || d2 < too_close_sqr || d3 < too_close_sqr || d4 < too_close_sqr ||
- d1 > max_base_diameter_sqr_ || d2 > max_base_diameter_sqr_ || d3 > max_base_diameter_sqr_)
- continue;
-
- // check distance to plane to get point closest to plane
- float dist_to_plane = pcl::pointToPlaneDistance (*pt4, coefficients);
- if (dist_to_plane < nearest_to_plane)
- {
- base_indices[3] = *it;
- nearest_to_plane = dist_to_plane;
- }
- }
-
- // check if at least one point fullfilled the conditions
- if (nearest_to_plane != FLT_MAX)
- {
- // order points to build largest quadrangle and calcuate intersection ratios of diagonals
- setupBase (base_indices, ratio);
- return (0);
- }
- }
-
- // return unsuccessfull if no quadruple was selected
- return (-1);
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> int
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::selectBaseTriangle (std::vector <int> &base_indices)
-{
- int nr_points = static_cast <int> (target_indices_->size ());
- float best_t = 0.f;
-
- // choose random first point
- base_indices[0] = (*target_indices_)[rand () % nr_points];
- int *index1 = &base_indices[0];
-
- // random search for 2 other points (as far away as overlap allows)
- for (int i = 0; i < ransac_iterations_; i++)
- {
- int *index2 = &(*target_indices_)[rand () % nr_points];
- int *index3 = &(*target_indices_)[rand () % nr_points];
-
- Eigen::Vector3f u = target_->points[*index2].getVector3fMap () - target_->points[*index1].getVector3fMap ();
- Eigen::Vector3f v = target_->points[*index3].getVector3fMap () - target_->points[*index1].getVector3fMap ();
- float t = u.cross (v).squaredNorm (); // triangle area (0.5 * sqrt(t)) should be maximal
-
- // check for most suitable point triple
- if (t > best_t && u.squaredNorm () < max_base_diameter_sqr_ && v.squaredNorm () < max_base_diameter_sqr_)
- {
- best_t = t;
- base_indices[1] = *index2;
- base_indices[2] = *index3;
- }
- }
-
- // return if a triplet could be selected
- return (best_t == 0.f ? -1 : 0);
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> void
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::setupBase (
- std::vector <int> &base_indices,
- float (&ratio)[2])
-{
- float best_t = FLT_MAX;
- const std::vector <int> copy (base_indices.begin (), base_indices.end ());
- std::vector <int> temp (base_indices.begin (), base_indices.end ());
-
- // loop over all combinations of base points
- for (std::vector <int>::const_iterator i = copy.begin (), i_e = copy.end (); i != i_e; i++)
- for (std::vector <int>::const_iterator j = copy.begin (), j_e = copy.end (); j != j_e; j++)
- {
- if (i == j)
- continue;
-
- for (std::vector <int>::const_iterator k = copy.begin (), k_e = copy.end (); k != k_e; k++)
- {
- if (k == j || k == i)
- continue;
-
- std::vector <int>::const_iterator l = copy.begin ();
- while (l == i || l == j || l == k)
- l++;
-
- temp[0] = *i;
- temp[1] = *j;
- temp[2] = *k;
- temp[3] = *l;
-
- // calculate diagonal intersection ratios and check for suitable segment to segment distances
- float ratio_temp[2];
- float t = segmentToSegmentDist (temp, ratio_temp);
- if (t < best_t)
- {
- best_t = t;
- ratio[0] = ratio_temp[0];
- ratio[1] = ratio_temp[1];
- base_indices = temp;
- }
- }
- }
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> float
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::segmentToSegmentDist (
- const std::vector <int> &base_indices,
- float (&ratio)[2])
-{
- // get point vectors
- Eigen::Vector3f u = target_->points[base_indices[1]].getVector3fMap () - target_->points[base_indices[0]].getVector3fMap ();
- Eigen::Vector3f v = target_->points[base_indices[3]].getVector3fMap () - target_->points[base_indices[2]].getVector3fMap ();
- Eigen::Vector3f w = target_->points[base_indices[0]].getVector3fMap () - target_->points[base_indices[2]].getVector3fMap ();
-
- // calculate segment distances
- float a = u.dot (u);
- float b = u.dot (v);
- float c = v.dot (v);
- float d = u.dot (w);
- float e = v.dot (w);
- float D = a * c - b * b;
- float sN = 0.f, sD = D;
- float tN = 0.f, tD = D;
-
- // check segments
- if (D < small_error_)
- {
- sN = 0.f;
- sD = 1.f;
- tN = e;
- tD = c;
- }
- else
- {
- sN = (b * e - c * d);
- tN = (a * e - b * d);
-
- if (sN < 0.f)
- {
- sN = 0.f;
- tN = e;
- tD = c;
- }
- else if (sN > sD)
- {
- sN = sD;
- tN = e + b;
- tD = c;
- }
- }
-
- if (tN < 0.f)
- {
- tN = 0.f;
-
- if (-d < 0.f)
- sN = 0.f;
-
- else if (-d > a)
- sN = sD;
-
- else
- {
- sN = -d;
- sD = a;
- }
- }
-
- else if (tN > tD)
- {
- tN = tD;
-
- if ((-d + b) < 0.f)
- sN = 0.f;
-
- else if ((-d + b) > a)
- sN = sD;
-
- else
- {
- sN = (-d + b);
- sD = a;
- }
- }
-
- // set intersection ratios
- ratio[0] = (std::abs (sN) < small_error_) ? 0.f : sN / sD;
- ratio[1] = (std::abs (tN) < small_error_) ? 0.f : tN / tD;
-
- Eigen::Vector3f x = w + (ratio[0] * u) - (ratio[1] * v);
- return (x.norm ());
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> int
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::bruteForceCorrespondences (
- int idx1,
- int idx2,
- pcl::Correspondences &pairs)
-{
- const float max_norm_diff = 0.5f * max_norm_diff_ * M_PI / 180.f;
-
- // calculate reference segment distance and normal angle
- float ref_dist = pcl::euclideanDistance (target_->points[idx1], target_->points[idx2]);
- float ref_norm_angle = (use_normals_ ? (target_normals_->points[idx1].getNormalVector3fMap () -
- target_normals_->points[idx2].getNormalVector3fMap ()).norm () : 0.f);
-
- // loop over all pairs of points in source point cloud
- std::vector <int>::iterator it_out = source_indices_->begin (), it_out_e = source_indices_->end () - 1;
- std::vector <int>::iterator it_in, it_in_e = source_indices_->end ();
- for ( ; it_out != it_out_e; it_out++)
- {
- it_in = it_out + 1;
- const PointSource *pt1 = &(*input_)[*it_out];
- for ( ; it_in != it_in_e; it_in++)
- {
- const PointSource *pt2 = &(*input_)[*it_in];
-
- // check point distance compared to reference dist (from base)
- float dist = pcl::euclideanDistance (*pt1, *pt2);
- if (std::abs(dist - ref_dist) < max_pair_diff_)
- {
- // add here normal evaluation if normals are given
- if (use_normals_)
- {
- const NormalT *pt1_n = &(source_normals_->points[*it_out]);
- const NormalT *pt2_n = &(source_normals_->points[*it_in]);
-
- float norm_angle_1 = (pt1_n->getNormalVector3fMap () - pt2_n->getNormalVector3fMap ()).norm ();
- float norm_angle_2 = (pt1_n->getNormalVector3fMap () + pt2_n->getNormalVector3fMap ()).norm ();
-
- float norm_diff = std::min <float> (std::abs (norm_angle_1 - ref_norm_angle), std::abs (norm_angle_2 - ref_norm_angle));
- if (norm_diff > max_norm_diff)
- continue;
- }
-
- pairs.push_back (pcl::Correspondence (*it_in, *it_out, dist));
- pairs.push_back (pcl::Correspondence (*it_out, *it_in, dist));
- }
- }
- }
-
- // return success if at least one correspondence was found
- return (pairs.size () == 0 ? -1 : 0);
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> int
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::determineBaseMatches (
- const std::vector <int> &base_indices,
- std::vector <std::vector <int> > &matches,
- const pcl::Correspondences &pairs_a,
- const pcl::Correspondences &pairs_b,
- const float (&ratio)[2])
-{
- // calculate edge lengths of base
- float dist_base[4];
- dist_base[0] = pcl::euclideanDistance (target_->points[base_indices[0]], target_->points[base_indices[2]]);
- dist_base[1] = pcl::euclideanDistance (target_->points[base_indices[0]], target_->points[base_indices[3]]);
- dist_base[2] = pcl::euclideanDistance (target_->points[base_indices[1]], target_->points[base_indices[2]]);
- dist_base[3] = pcl::euclideanDistance (target_->points[base_indices[1]], target_->points[base_indices[3]]);
-
- // loop over first point pair correspondences and store intermediate points 'e' in new point cloud
- PointCloudSourcePtr cloud_e (new PointCloudSource);
- cloud_e->resize (pairs_a.size () * 2);
- PointCloudSourceIterator it_pt = cloud_e->begin ();
- for (pcl::Correspondences::const_iterator it_pair = pairs_a.begin (), it_pair_e = pairs_a.end () ; it_pair != it_pair_e; it_pair++)
- {
- const PointSource *pt1 = &(input_->points[it_pair->index_match]);
- const PointSource *pt2 = &(input_->points[it_pair->index_query]);
-
- // calculate intermediate points using both ratios from base (r1,r2)
- for (int i = 0; i < 2; i++, it_pt++)
- {
- it_pt->x = pt1->x + ratio[i] * (pt2->x - pt1->x);
- it_pt->y = pt1->y + ratio[i] * (pt2->y - pt1->y);
- it_pt->z = pt1->z + ratio[i] * (pt2->z - pt1->z);
- }
- }
-
- // initialize new kd tree of intermediate points from first point pair correspondences
- KdTreeReciprocalPtr tree_e (new KdTreeReciprocal);
- tree_e->setInputCloud (cloud_e);
-
- std::vector <int> ids;
- std::vector <float> dists_sqr;
-
- // loop over second point pair correspondences
- for (pcl::Correspondences::const_iterator it_pair = pairs_b.begin (), it_pair_e = pairs_b.end () ; it_pair != it_pair_e; it_pair++)
- {
- const PointTarget *pt1 = &(input_->points[it_pair->index_match]);
- const PointTarget *pt2 = &(input_->points[it_pair->index_query]);
-
- // calculate intermediate points using both ratios from base (r1,r2)
- for (int i = 0; i < 2; i++)
- {
- PointTarget pt_e;
- pt_e.x = pt1->x + ratio[i] * (pt2->x - pt1->x);
- pt_e.y = pt1->y + ratio[i] * (pt2->y - pt1->y);
- pt_e.z = pt1->z + ratio[i] * (pt2->z - pt1->z);
-
- // search for corresponding intermediate points
- tree_e->radiusSearch (pt_e, coincidation_limit_, ids, dists_sqr);
- for (std::vector <int>::iterator it = ids.begin (), it_e = ids.end (); it != it_e; it++)
- {
- std::vector <int> match_indices (4);
-
- match_indices[0] = pairs_a[static_cast <int> (std::floor ((float)(*it/2.f)))].index_match;
- match_indices[1] = pairs_a[static_cast <int> (std::floor ((float)(*it/2.f)))].index_query;
- match_indices[2] = it_pair->index_match;
- match_indices[3] = it_pair->index_query;
-
- // EDITED: added coarse check of match based on edge length (due to rigid-body )
- if (checkBaseMatch (match_indices, dist_base) < 0)
- continue;
-
- matches.push_back (match_indices);
- }
- }
- }
-
- // return unsuccessfull if no match was found
- return (matches.size () > 0 ? 0 : -1);
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> int
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::checkBaseMatch (
- const std::vector <int> &match_indices,
- const float (&dist_ref)[4])
-{
- float d0 = pcl::euclideanDistance (input_->points[match_indices[0]], input_->points[match_indices[2]]);
- float d1 = pcl::euclideanDistance (input_->points[match_indices[0]], input_->points[match_indices[3]]);
- float d2 = pcl::euclideanDistance (input_->points[match_indices[1]], input_->points[match_indices[2]]);
- float d3 = pcl::euclideanDistance (input_->points[match_indices[1]], input_->points[match_indices[3]]);
-
- // check edge distances of match w.r.t the base
- return (std::abs (d0 - dist_ref[0]) < max_edge_diff_ && std::abs (d1 - dist_ref[1]) < max_edge_diff_ &&
- std::abs (d2 - dist_ref[2]) < max_edge_diff_ && std::abs (d3 - dist_ref[3]) < max_edge_diff_) ? 0 : -1;
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> void
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::handleMatches (
- const std::vector <int> &base_indices,
- std::vector <std::vector <int> > &matches,
- MatchingCandidates &candidates)
-{
- candidates.resize (1);
- float fitness_score = FLT_MAX;
-
- // loop over all Candidate matches
- for (std::vector <std::vector <int> >::iterator match_indices = matches.begin (), it_e = matches.end (); match_indices != it_e; match_indices++)
- {
- Eigen::Matrix4f transformation_temp;
- pcl::Correspondences correspondences_temp;
-
- // determine corresondences between base and match according to their distance to centroid
- linkMatchWithBase (base_indices, *match_indices, correspondences_temp);
-
- // check match based on residuals of the corresponding points after
- if (validateMatch (base_indices, *match_indices, correspondences_temp, transformation_temp) < 0)
- continue;
-
- // check resulting using a sub sample of the source point cloud and compare to previous matches
- if (validateTransformation (transformation_temp, fitness_score) < 0)
- continue;
-
- // store best match as well as associated fitness_score and transformation
- candidates[0].fitness_score = fitness_score;
- candidates [0].transformation = transformation_temp;
- correspondences_temp.erase (correspondences_temp.end () - 1);
- candidates[0].correspondences = correspondences_temp;
- }
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> void
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::linkMatchWithBase (
- const std::vector <int> &base_indices,
- std::vector <int> &match_indices,
- pcl::Correspondences &correspondences)
-{
- // calculate centroid of base and target
- Eigen::Vector4f centre_base, centre_match;
- pcl::compute3DCentroid (*target_, base_indices, centre_base);
- pcl::compute3DCentroid (*input_, match_indices, centre_match);
-
- PointTarget centre_pt_base;
- centre_pt_base.x = centre_base[0];
- centre_pt_base.y = centre_base[1];
- centre_pt_base.z = centre_base[2];
-
- PointSource centre_pt_match;
- centre_pt_match.x = centre_match[0];
- centre_pt_match.y = centre_match[1];
- centre_pt_match.z = centre_match[2];
-
- // find corresponding points according to their distance to the centroid
- std::vector <int> copy = match_indices;
-
- std::vector <int>::const_iterator it_base = base_indices.begin (), it_base_e = base_indices.end ();
- std::vector <int>::iterator it_match, it_match_e = copy.end ();
- std::vector <int>::iterator it_match_orig = match_indices.begin ();
- for (; it_base != it_base_e; it_base++, it_match_orig++)
- {
- float dist_sqr_1 = pcl::squaredEuclideanDistance (target_->points[*it_base], centre_pt_base);
- float best_diff_sqr = FLT_MAX;
- int best_index;
-
- for (it_match = copy.begin (); it_match != it_match_e; it_match++)
- {
- // calculate difference of distances to centre point
- float dist_sqr_2 = pcl::squaredEuclideanDistance (input_->points[*it_match], centre_pt_match);
- float diff_sqr = std::abs(dist_sqr_1 - dist_sqr_2);
-
- if (diff_sqr < best_diff_sqr)
- {
- best_diff_sqr = diff_sqr;
- best_index = *it_match;
- }
- }
-
- // assign new correspondence and update indices of matched targets
- correspondences.push_back (pcl::Correspondence (best_index, *it_base, best_diff_sqr));
- *it_match_orig = best_index;
- }
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> int
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::validateMatch (
- const std::vector <int> &base_indices,
- const std::vector <int> &match_indices,
- const pcl::Correspondences &correspondences,
- Eigen::Matrix4f &transformation)
-{
- // only use triplet of points to simlify process (possible due to planar case)
- pcl::Correspondences correspondences_temp = correspondences;
- correspondences_temp.erase (correspondences_temp.end () - 1);
-
- // estimate transformation between correspondence set
- transformation_estimation_->estimateRigidTransformation (*input_, *target_, correspondences_temp, transformation);
-
- // transform base points
- PointCloudSource match_transformed;
- pcl::transformPointCloud (*input_, match_indices, match_transformed, transformation);
-
- // calculate residuals of transformation and check against maximum threshold
- std::size_t nr_points = correspondences_temp.size ();
- float mse = 0.f;
- for (std::size_t i = 0; i < nr_points; i++)
- mse += pcl::squaredEuclideanDistance (match_transformed.points [i], target_->points [base_indices[i]]);
-
- mse /= nr_points;
- return (mse < max_mse_ ? 0 : -1);
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> int
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::validateTransformation (
- Eigen::Matrix4f &transformation,
- float &fitness_score)
-{
- // transform source point cloud
- PointCloudSource source_transformed;
- pcl::transformPointCloud (*input_, *source_indices_, source_transformed, transformation);
-
- std::size_t nr_points = source_transformed.size ();
- std::size_t terminate_value = fitness_score > 1 ? 0 : static_cast <std::size_t> ((1.f - fitness_score) * nr_points);
-
- float inlier_score_temp = 0;
- std::vector <int> ids;
- std::vector <float> dists_sqr;
- PointCloudSourceIterator it = source_transformed.begin ();
-
- for (std::size_t i = 0; i < nr_points; it++, i++)
- {
- // search for nearest point using kd tree search
- tree_->nearestKSearch (*it, 1, ids, dists_sqr);
- inlier_score_temp += (dists_sqr[0] < max_inlier_dist_sqr_ ? 1 : 0);
-
- // early terminating
- if (nr_points - i + inlier_score_temp < terminate_value)
- break;
- }
-
- // check current costs and return unsuccessfull if larger than previous ones
- inlier_score_temp /= static_cast <float> (nr_points);
- float fitness_score_temp = 1.f - inlier_score_temp;
-
- if (fitness_score_temp > fitness_score)
- return (-1);
-
- fitness_score = fitness_score_temp;
- return (0);
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////
-template <typename PointSource, typename PointTarget, typename NormalT, typename Scalar> void
-pcl::registration::FPCSInitialAlignment <PointSource, PointTarget, NormalT, Scalar>::finalCompute (
- const std::vector <MatchingCandidates > &candidates)
-{
- // get best fitness_score over all trys
- int nr_candidates = static_cast <int> (candidates.size ());
- int best_index = -1;
- float best_score = FLT_MAX;
- for (int i = 0; i < nr_candidates; i++)
- {
- const float &fitness_score = candidates [i][0].fitness_score;
- if (fitness_score < best_score)
- {
- best_score = fitness_score;
- best_index = i;
- }
- }
-
- // check if a valid candidate was available
- if (!(best_index < 0))
- {
- fitness_score_ = candidates [best_index][0].fitness_score;
- final_transformation_ = candidates [best_index][0].transformation;
- *correspondences_ = candidates [best_index][0].correspondences;
-
- // here we define convergence if resulting fitness_score is below 1-threshold
- converged_ = fitness_score_ < score_threshold_;
- }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////
-
-#endif // PCL_REGISTRATION_IMPL_IA_4PCS_H_
+++ /dev/null
-/*
- * Software License Agreement (BSD License)
- *
- * Point Cloud Library (PCL) - www.pointclouds.org
- * Copyright (c) 2014-, Open Perception, Inc.
- * Copyright (C) 2008 Ben Gurion University of the Negev, Beer Sheva, Israel.
- *
- * All rights reserved
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met
- *
- * * The use for research only (no for any commercial application).
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- * * Neither the name of the copyright holder(s) nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef PCL_REGISTRATION_MATCHING_CANDIDATE_H_
-#define PCL_REGISTRATION_MATCHING_CANDIDATE_H_
-
-#include <pcl/registration/registration.h>
-#include <pcl/common/common.h>
-
-namespace pcl
-{
- namespace registration
- {
- /** \brief Container for matching candidate consisting of
- *
- * * fitness score value as a result of the matching algorithm
- * * correspondences between source and target data set
- * * transformation matrix calculated based on the correspondences
- *
- */
- struct MatchingCandidate
- {
- /** \brief Constructor. */
- MatchingCandidate () :
- fitness_score (FLT_MAX),
- correspondences (),
- transformation (Eigen::Matrix4f::Identity ())
- {};
-
- /** \brief Value constructor. */
- MatchingCandidate (float s, const pcl::Correspondences &c, const Eigen::Matrix4f &m) :
- fitness_score (s),
- correspondences (c),
- transformation (m)
- {};
-
- /** \brief Destructor. */
- ~MatchingCandidate ()
- {};
-
-
- /** \brief Fitness score of current candidate resulting from matching algorithm. */
- float fitness_score;
-
- /** \brief Correspondences between source <-> target. */
- pcl::Correspondences correspondences;
-
- /** \brief Corresponding transformation matrix retrieved using \a corrs. */
- Eigen::Matrix4f transformation;
-
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW
- };
-
- typedef std::vector<MatchingCandidate, Eigen::aligned_allocator<MatchingCandidate> > MatchingCandidates;
-
- /** \brief Sorting of candidates based on fitness score value. */
- struct by_score
- {
- /** \brief Operator used to sort candidates based on fitness score. */
- bool operator () (MatchingCandidate const &left, MatchingCandidate const &right)
- {
- return (left.fitness_score < right.fitness_score);
- }
- };
-
- }; // namespace registration
-}; // namespace pcl
-
-
-#endif // PCL_REGISTRATION_MATCHING_CANDIDATE_H_