From: Jochen Sprickerhof Date: Mon, 15 Aug 2016 15:07:38 +0000 (+0200) Subject: Imported Upstream version 1.8.0+dfsg X-Git-Tag: archive/raspbian/1.14.0+dfsg-2+rpi1^2~10^2~12 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=b33c6dbcef571244fc9bcbf8c1ac12d4dc791689;p=pcl.git Imported Upstream version 1.8.0+dfsg --- diff --git a/cuda/common/include/pcl/cuda/cutil.h b/cuda/common/include/pcl/cuda/cutil.h deleted file mode 100644 index 8f2ad57d..00000000 --- a/cuda/common/include/pcl/cuda/cutil.h +++ /dev/null @@ -1,955 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - - /* -* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. -* -* Please refer to the NVIDIA end user license agreement (EULA) associated -* with this source code for terms and conditions that govern your use of -* this software. Any use, reproduction, disclosure, or distribution of -* this software and related documentation outside the terms of the EULA -* is strictly prohibited. -* -*/ - - -/* CUda UTility Library */ - -#ifndef _CUTIL_H_ -#define _CUTIL_H_ - -#ifdef _WIN32 -# pragma warning( disable : 4996 ) // disable deprecated warning -#endif - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - - // helper typedefs for building DLL -#ifdef _WIN32 -# ifdef BUILD_DLL -# define DLL_MAPPING __declspec(dllexport) -# else -# define DLL_MAPPING __declspec(dllimport) -# endif -#else -# define DLL_MAPPING -#endif - -#ifdef _WIN32 - #define CUTIL_API __stdcall -#else - #define CUTIL_API -#endif - - //////////////////////////////////////////////////////////////////////////// - //! CUT bool type - //////////////////////////////////////////////////////////////////////////// - enum CUTBoolean - { - CUTFalse = 0, - CUTTrue = 1 - }; - - //////////////////////////////////////////////////////////////////////////// - //! Deallocate memory allocated within Cutil - //! @param pointer to memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - void CUTIL_API - cutFree( void* ptr); - - //////////////////////////////////////////////////////////////////////////// - //! Helper for bank conflict checking (should only be used with the - //! CUT_BANK_CHECKER macro) - //! @param tidx thread id in x dimension of block - //! @param tidy thread id in y dimension of block - //! @param tidz thread id in z dimension of block - //! @param bdimx block size in x dimension - //! @param bdimy block size in y dimension - //! @param bdimz block size in z dimension - //! @param file name of the source file where the access takes place - //! @param line line in the source file where the access takes place - //! @param aname name of the array which is accessed - //! @param index index into the array - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - void CUTIL_API - cutCheckBankAccess( unsigned int tidx, unsigned int tidy, unsigned int tidz, - unsigned int bdimx, unsigned int bdimy, - unsigned int bdimz, const char* file, const int line, - const char* aname, const int index); - - //////////////////////////////////////////////////////////////////////////// - //! Find the path for a filename - //! @return the path if succeeded, otherwise 0 - //! @param filename name of the file - //! @param executablePath optional absolute path of the executable - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - char* CUTIL_API - cutFindFilePath(const char* filename, const char* executablePath); - - //////////////////////////////////////////////////////////////////////////// - //! Read file \filename containing single precision floating point data - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param filename name of the source file - //! @param data uninitialized pointer, returned initialized and pointing to - //! the data read - //! @param len number of data elements in data, -1 on error - //! @note If a NULL pointer is passed to this function and it is - //! initialized within Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutReadFilef( const char* filename, float** data, unsigned int* len, - bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Read file \filename containing double precision floating point data - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param filename name of the source file - //! @param data uninitialized pointer, returned initialized and pointing to - //! the data read - //! @param len number of data elements in data, -1 on error - //! @note If a NULL pointer is passed to this function and it is - //! initialized within Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutReadFiled( const char* filename, double** data, unsigned int* len, - bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Read file \filename containing integer data - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param filename name of the source file - //! @param data uninitialized pointer, returned initialized and pointing to - //! the data read - //! @param len number of data elements in data, -1 on error - //! @note If a NULL pointer is passed to this function and it is - //! initialized within Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutReadFilei( const char* filename, int** data, unsigned int* len, bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Read file \filename containing unsigned integer data - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param filename name of the source file - //! @param data uninitialized pointer, returned initialized and pointing to - //! the data read - //! @param len number of data elements in data, -1 on error - //! @note If a NULL pointer is passed to this function and it is - //! initialized within Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutReadFileui( const char* filename, unsigned int** data, - unsigned int* len, bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Read file \filename containing char / byte data - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param filename name of the source file - //! @param data uninitialized pointer, returned initialized and pointing to - //! the data read - //! @param len number of data elements in data, -1 on error - //! @note If a NULL pointer is passed to this function and it is - //! initialized within Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutReadFileb( const char* filename, char** data, unsigned int* len, - bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Read file \filename containing unsigned char / byte data - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param filename name of the source file - //! @param data uninitialized pointer, returned initialized and pointing to - //! the data read - //! @param len number of data elements in data, -1 on error - //! @note If a NULL pointer is passed to this function and it is - //! initialized within Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutReadFileub( const char* filename, unsigned char** data, - unsigned int* len, bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Write a data file \filename containing single precision floating point - //! data - //! @return CUTTrue if writing the file succeeded, otherwise false - //! @param filename name of the file to write - //! @param data pointer to data to write - //! @param len number of data elements in data, -1 on error - //! @param epsilon epsilon for comparison - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutWriteFilef( const char* filename, const float* data, unsigned int len, - const float epsilon, bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Write a data file \filename containing double precision floating point - //! data - //! @return CUTTrue if writing the file succeeded, otherwise false - //! @param filename name of the file to write - //! @param data pointer to data to write - //! @param len number of data elements in data, -1 on error - //! @param epsilon epsilon for comparison - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutWriteFiled( const char* filename, const float* data, unsigned int len, - const double epsilon, bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Write a data file \filename containing integer data - //! @return CUTTrue if writing the file succeeded, otherwise false - //! @param filename name of the file to write - //! @param data pointer to data to write - //! @param len number of data elements in data, -1 on error - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutWriteFilei( const char* filename, const int* data, unsigned int len, - bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Write a data file \filename containing unsigned integer data - //! @return CUTTrue if writing the file succeeded, otherwise false - //! @param filename name of the file to write - //! @param data pointer to data to write - //! @param len number of data elements in data, -1 on error - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutWriteFileui( const char* filename,const unsigned int* data, - unsigned int len, bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Write a data file \filename containing char / byte data - //! @return CUTTrue if writing the file succeeded, otherwise false - //! @param filename name of the file to write - //! @param data pointer to data to write - //! @param len number of data elements in data, -1 on error - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutWriteFileb( const char* filename, const char* data, unsigned int len, - bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Write a data file \filename containing unsigned char / byte data - //! @return CUTTrue if writing the file succeeded, otherwise false - //! @param filename name of the file to write - //! @param data pointer to data to write - //! @param len number of data elements in data, -1 on error - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutWriteFileub( const char* filename,const unsigned char* data, - unsigned int len, bool verbose = false); - - //////////////////////////////////////////////////////////////////////////// - //! Load PGM image file (with unsigned char as data element type) - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //! @note If a NULL pointer is passed to this function and it is - //! initialized within Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutLoadPGMub( const char* file, unsigned char** data, - unsigned int *w,unsigned int *h); - - //////////////////////////////////////////////////////////////////////////// - //! Load PPM image file (with unsigned char as data element type) - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutLoadPPMub( const char* file, unsigned char** data, - unsigned int *w,unsigned int *h); - - //////////////////////////////////////////////////////////////////////////// - //! Load PPM image file (with unsigned char as data element type), padding - //! 4th component - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutLoadPPM4ub( const char* file, unsigned char** data, - unsigned int *w,unsigned int *h); - - //////////////////////////////////////////////////////////////////////////// - //! Load PGM image file (with unsigned int as data element type) - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //! @note If a NULL pointer is passed to this function and it is - //! initialized within Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutLoadPGMi( const char* file, unsigned int** data, - unsigned int* w, unsigned int* h); - - //////////////////////////////////////////////////////////////////////////// - //! Load PGM image file (with unsigned short as data element type) - //! @return CUTTrue if reading the file succeeded, otherwise false - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //! @note If a NULL pointer is passed to this function and it is - //! initialized withing Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutLoadPGMs( const char* file, unsigned short** data, - unsigned int* w, unsigned int* h); - - //////////////////////////////////////////////////////////////////////////// - //! Load PGM image file (with float as data element type) - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //! @note If a NULL pointer is passed to this function and it is - //! initialized withing Cutil then cutFree() has to be used to - //! deallocate the memory - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutLoadPGMf( const char* file, float** data, - unsigned int* w, unsigned int* h); - - //////////////////////////////////////////////////////////////////////////// - //! Save PGM image file (with unsigned char as data element type) - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutSavePGMub( const char* file, unsigned char* data, - unsigned int w, unsigned int h); - - //////////////////////////////////////////////////////////////////////////// - //! Save PPM image file (with unsigned char as data element type) - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutSavePPMub( const char* file, unsigned char *data, - unsigned int w, unsigned int h); - - //////////////////////////////////////////////////////////////////////////// - //! Save PPM image file (with unsigned char as data element type, padded to - //! 4 bytes) - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutSavePPM4ub( const char* file, unsigned char *data, - unsigned int w, unsigned int h); - - //////////////////////////////////////////////////////////////////////////// - //! Save PGM image file (with unsigned int as data element type) - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutSavePGMi( const char* file, unsigned int* data, - unsigned int w, unsigned int h); - - //////////////////////////////////////////////////////////////////////////// - //! Save PGM image file (with unsigned short as data element type) - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutSavePGMs( const char* file, unsigned short* data, - unsigned int w, unsigned int h); - - //////////////////////////////////////////////////////////////////////////// - //! Save PGM image file (with float as data element type) - //! @param file name of the image file - //! @param data handle to the data read - //! @param w width of the image - //! @param h height of the image - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutSavePGMf( const char* file, float* data, - unsigned int w, unsigned int h); - - //////////////////////////////////////////////////////////////////////////// - // Command line arguments: General notes - // * All command line arguments begin with '--' followed by the token; - // token and value are seperated by '='; example --samples=50 - // * Arrays have the form --model=[one.obj,two.obj,three.obj] - // (without whitespaces) - //////////////////////////////////////////////////////////////////////////// - - //////////////////////////////////////////////////////////////////////////// - //! Check if command line argument \a flag-name is given - //! @return CUTTrue if command line argument \a flag_name has been given, - //! otherwise 0 - //! @param argc argc as passed to main() - //! @param argv argv as passed to main() - //! @param flag_name name of command line flag - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutCheckCmdLineFlag( const int argc, const char** argv, - const char* flag_name); - - //////////////////////////////////////////////////////////////////////////// - //! Get the value of a command line argument of type int - //! @return CUTTrue if command line argument \a arg_name has been given and - //! is of the requested type, otherwise CUTFalse - //! @param argc argc as passed to main() - //! @param argv argv as passed to main() - //! @param arg_name name of the command line argument - //! @param val value of the command line argument - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutGetCmdLineArgumenti( const int argc, const char** argv, - const char* arg_name, int* val); - - //////////////////////////////////////////////////////////////////////////// - //! Get the value of a command line argument of type float - //! @return CUTTrue if command line argument \a arg_name has been given and - //! is of the requested type, otherwise CUTFalse - //! @param argc argc as passed to main() - //! @param argv argv as passed to main() - //! @param arg_name name of the command line argument - //! @param val value of the command line argument - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutGetCmdLineArgumentf( const int argc, const char** argv, - const char* arg_name, float* val); - - //////////////////////////////////////////////////////////////////////////// - //! Get the value of a command line argument of type string - //! @return CUTTrue if command line argument \a arg_name has been given and - //! is of the requested type, otherwise CUTFalse - //! @param argc argc as passed to main() - //! @param argv argv as passed to main() - //! @param arg_name name of the command line argument - //! @param val value of the command line argument - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutGetCmdLineArgumentstr( const int argc, const char** argv, - const char* arg_name, char** val); - - //////////////////////////////////////////////////////////////////////////// - //! Get the value of a command line argument list those element are strings - //! @return CUTTrue if command line argument \a arg_name has been given and - //! is of the requested type, otherwise CUTFalse - //! @param argc argc as passed to main() - //! @param argv argv as passed to main() - //! @param arg_name name of the command line argument - //! @param val command line argument list - //! @param len length of the list / number of elements - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutGetCmdLineArgumentListstr( const int argc, const char** argv, - const char* arg_name, char** val, - unsigned int* len); - - //////////////////////////////////////////////////////////////////////////// - //! Extended assert - //! @return CUTTrue if the condition \a val holds, otherwise CUTFalse - //! @param val condition to test - //! @param file __FILE__ macro - //! @param line __LINE__ macro - //! @note This function should be used via the CONDITION(val) macro - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutCheckCondition( int val, const char* file, const int line); - - //////////////////////////////////////////////////////////////////////////// - //! Compare two float arrays - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutComparef( const float* reference, const float* data, - const unsigned int len); - - //////////////////////////////////////////////////////////////////////////// - //! Compare two integer arrays - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutComparei( const int* reference, const int* data, - const unsigned int len ); - - //////////////////////////////////////////////////////////////////////////////// - //! Compare two unsigned integer arrays, with epsilon and threshold - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //! @param threshold tolerance % # of comparison errors (0.15f = 15%) - //////////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutCompareuit( const unsigned int* reference, const unsigned int* data, - const unsigned int len, const float epsilon, const float threshold ); - - //////////////////////////////////////////////////////////////////////////// - //! Compare two unsigned char arrays - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutCompareub( const unsigned char* reference, const unsigned char* data, - const unsigned int len ); - - //////////////////////////////////////////////////////////////////////////////// - //! Compare two integers with a tolernance for # of byte errors - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //! @param epsilon epsilon to use for the comparison - //! @param threshold tolerance % # of comparison errors (0.15f = 15%) - //////////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutCompareubt( const unsigned char* reference, const unsigned char* data, - const unsigned int len, const float epsilon, const float threshold ); - - //////////////////////////////////////////////////////////////////////////////// - //! Compare two integer arrays witha n epsilon tolerance for equality - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //! @param epsilon epsilon to use for the comparison - //////////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutCompareube( const unsigned char* reference, const unsigned char* data, - const unsigned int len, const float epsilon ); - - //////////////////////////////////////////////////////////////////////////// - //! Compare two float arrays with an epsilon tolerance for equality - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //! @param epsilon epsilon to use for the comparison - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutComparefe( const float* reference, const float* data, - const unsigned int len, const float epsilon ); - - //////////////////////////////////////////////////////////////////////////////// - //! Compare two float arrays with an epsilon tolerance for equality and a - //! threshold for # pixel errors - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //! @param epsilon epsilon to use for the comparison - //////////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutComparefet( const float* reference, const float* data, - const unsigned int len, const float epsilon, const float threshold ); - - //////////////////////////////////////////////////////////////////////////// - //! Compare two float arrays using L2-norm with an epsilon tolerance for - //! equality - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param reference handle to the reference data / gold image - //! @param data handle to the computed data - //! @param len number of elements in reference and data - //! @param epsilon epsilon to use for the comparison - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutCompareL2fe( const float* reference, const float* data, - const unsigned int len, const float epsilon ); - - //////////////////////////////////////////////////////////////////////////////// - //! Compare two PPM image files with an epsilon tolerance for equality - //! @return CUTTrue if \a reference and \a data are identical, - //! otherwise CUTFalse - //! @param src_file filename for the image to be compared - //! @param data filename for the reference data / gold image - //! @param epsilon epsilon to use for the comparison - //! @param threshold threshold of pixels that can still mismatch to pass (i.e. 0.15f = 15% must pass) - //! $param verboseErrors output details of image mismatch to std::err - //////////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutComparePPM( const char *src_file, const char *ref_file, const float epsilon, const float threshold, bool verboseErrors = false ); - - - //////////////////////////////////////////////////////////////////////////// - //! Timer functionality - - //////////////////////////////////////////////////////////////////////////// - //! Create a new timer - //! @return CUTTrue if a time has been created, otherwise false - //! @param name of the new timer, 0 if the creation failed - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutCreateTimer( unsigned int* name); - - //////////////////////////////////////////////////////////////////////////// - //! Delete a timer - //! @return CUTTrue if a time has been deleted, otherwise false - //! @param name of the timer to delete - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutDeleteTimer( unsigned int name); - - //////////////////////////////////////////////////////////////////////////// - //! Start the time with name \a name - //! @param name name of the timer to start - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutStartTimer( const unsigned int name); - - //////////////////////////////////////////////////////////////////////////// - //! Stop the time with name \a name. Does not reset. - //! @param name name of the timer to stop - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutStopTimer( const unsigned int name); - - //////////////////////////////////////////////////////////////////////////// - //! Resets the timer's counter. - //! @param name name of the timer to reset. - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - CUTBoolean CUTIL_API - cutResetTimer( const unsigned int name); - - //////////////////////////////////////////////////////////////////////////// - //! Returns total execution time in milliseconds for the timer over all - //! runs since the last reset or timer creation. - //! @param name name of the timer to return the time of - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - float CUTIL_API - cutGetTimerValue( const unsigned int name); - - //////////////////////////////////////////////////////////////////////////// - //! Return the average time in milliseconds for timer execution as the - //! total time for the timer dividied by the number of completed (stopped) - //! runs the timer has made. - //! Excludes the current running time if the timer is currently running. - //! @param name name of the timer to return the time of - //////////////////////////////////////////////////////////////////////////// - DLL_MAPPING - float CUTIL_API - cutGetAverageTimerValue( const unsigned int name); - - //////////////////////////////////////////////////////////////////////////// - //! Macros - -#if CUDART_VERSION >= 4000 -#define CUT_DEVICE_SYNCHRONIZE( ) cudaDeviceSynchronize(); -#else -#define CUT_DEVICE_SYNCHRONIZE( ) cudaThreadSynchronize(); -#endif - -#if CUDART_VERSION >= 4000 -#define CUT_DEVICE_RESET( ) cudaDeviceReset(); -#else -#define CUT_DEVICE_RESET( ) cudaThreadExit(); -#endif - -// This is for the CUTIL bank checker -#ifdef _DEBUG - #if __DEVICE_EMULATION__ - // Interface for bank conflict checker - #define CUT_BANK_CHECKER( array, index) \ - (cutCheckBankAccess( threadIdx.x, threadIdx.y, threadIdx.z, blockDim.x, \ - blockDim.y, blockDim.z, \ - __FILE__, __LINE__, #array, index ), \ - array[index]) - #else - #define CUT_BANK_CHECKER( array, index) array[index] - #endif -#else - #define CUT_BANK_CHECKER( array, index) array[index] -#endif - -# define CU_SAFE_CALL_NO_SYNC( call ) { \ - CUresult err = call; \ - if( CUDA_SUCCESS != err) { \ - fprintf(stderr, "Cuda driver error %x in file '%s' in line %i.\n", \ - err, __FILE__, __LINE__ ); \ - exit(EXIT_FAILURE); \ - } } - -# define CU_SAFE_CALL( call ) CU_SAFE_CALL_NO_SYNC(call); - -# define CU_SAFE_CTX_SYNC( ) { \ - CUresult err = cuCtxSynchronize(); \ - if( CUDA_SUCCESS != err) { \ - fprintf(stderr, "Cuda driver error %x in file '%s' in line %i.\n", \ - err, __FILE__, __LINE__ ); \ - exit(EXIT_FAILURE); \ - } } - -# define CUDA_SAFE_CALL_NO_SYNC( call) { \ - cudaError err = call; \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString( err) ); \ - exit(EXIT_FAILURE); \ - } } - -# define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call); \ - -# define CUDA_SAFE_THREAD_SYNC( ) { \ - cudaError err = CUT_DEVICE_SYNCHRONIZE(); \ - if ( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString( err) ); \ - } } - -# define CUFFT_SAFE_CALL( call) { \ - cufftResult err = call; \ - if( CUFFT_SUCCESS != err) { \ - fprintf(stderr, "CUFFT error in file '%s' in line %i.\n", \ - __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } } - -# define CUT_SAFE_CALL( call) \ - if( CUTTrue != call) { \ - fprintf(stderr, "Cut error in file '%s' in line %i.\n", \ - __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } - - //! Check for CUDA error -#ifdef _DEBUG -# define CUT_CHECK_ERROR(errorMessage) { \ - cudaError_t err = cudaGetLastError(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - exit(EXIT_FAILURE); \ - } \ - err = CUT_DEVICE_SYNCHRONIZE(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - exit(EXIT_FAILURE); \ - } \ - } -#else -# define CUT_CHECK_ERROR(errorMessage) { \ - cudaError_t err = cudaGetLastError(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - exit(EXIT_FAILURE); \ - } \ - } -#endif - - //! Check for malloc error -# define CUT_SAFE_MALLOC( mallocCall ) { \ - if( !(mallocCall)) { \ - fprintf(stderr, "Host malloc failure in file '%s' in line %i\n", \ - __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } } while(0); - - //! Check if conditon is true (flexible assert) -# define CUT_CONDITION( val) \ - if( CUTFalse == cutCheckCondition( val, __FILE__, __LINE__)) { \ - exit(EXIT_FAILURE); \ - } - -#if __DEVICE_EMULATION__ - -# define CUT_DEVICE_INIT(ARGC, ARGV) - -#else - -# define CUT_DEVICE_INIT(ARGC, ARGV) { \ - int deviceCount; \ - CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceCount(&deviceCount)); \ - if (deviceCount == 0) { \ - fprintf(stderr, "cutil error: no devices supporting CUDA.\n"); \ - exit(EXIT_FAILURE); \ - } \ - int dev = 0; \ - cutGetCmdLineArgumenti(ARGC, (const char **) ARGV, "device", &dev); \ - if (dev < 0) dev = 0; \ - if (dev > deviceCount-1) dev = deviceCount - 1; \ - cudaDeviceProp deviceProp; \ - CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceProperties(&deviceProp, dev)); \ - if (deviceProp.major < 1) { \ - fprintf(stderr, "cutil error: device does not support CUDA.\n"); \ - exit(EXIT_FAILURE); \ - } \ - if (cutCheckCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == CUTFalse) \ - fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name); \ - CUDA_SAFE_CALL(cudaSetDevice(dev)); \ -} - - - //! Check for CUDA context lost -# define CUDA_CHECK_CTX_LOST(errorMessage) { \ - cudaError_t err = cudaGetLastError(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - exit(EXIT_FAILURE); \ - } \ - err = CUT_DEVICE_SYNCHRONIZE(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - exit(EXIT_FAILURE); \ - } } - -//! Check for CUDA context lost -# define CU_CHECK_CTX_LOST(errorMessage) { \ - cudaError_t err = cudaGetLastError(); \ - if( CUDA_ERROR_INVALID_CONTEXT != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - exit(EXIT_FAILURE); \ - } \ - err = CUT_DEVICE_SYNCHRONIZE(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - exit(EXIT_FAILURE); \ - } } - - -#endif - -# define CUT_DEVICE_INIT_DRV(cuDevice, ARGC, ARGV) { \ - cuDevice = 0; \ - int deviceCount = 0; \ - CUresult err = cuInit(0); \ - if (CUDA_SUCCESS == err) \ - CU_SAFE_CALL_NO_SYNC(cuDeviceGetCount(&deviceCount)); \ - if (deviceCount == 0) { \ - fprintf(stderr, "cutil error: no devices supporting CUDA\n"); \ - exit(EXIT_FAILURE); \ - } \ - int dev = 0; \ - cutGetCmdLineArgumenti(ARGC, (const char **) ARGV, "device", &dev); \ - if (dev < 0) dev = 0; \ - if (dev > deviceCount-1) dev = deviceCount - 1; \ - CU_SAFE_CALL_NO_SYNC(cuDeviceGet(&cuDevice, dev)); \ - char name[100]; \ - cuDeviceGetName(name, 100, cuDevice); \ - if (cutCheckCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == CUTFalse) \ - fprintf(stderr, "Using device %d: %s\n", dev, name); \ -} - -#define CUT_EXIT(argc, argv) \ - if (!cutCheckCmdLineFlag(argc, (const char**)argv, "noprompt")) { \ - printf("\nPress ENTER to exit...\n"); \ - fflush( stdout); \ - fflush( stderr); \ - getchar(); \ - } \ - exit(EXIT_SUCCESS); - - -#ifdef __cplusplus -} -#endif // #ifdef _DEBUG (else branch) - -#endif // #ifndef _CUTIL_H_ diff --git a/cuda/common/include/pcl/cuda/cutil_inline.h b/cuda/common/include/pcl/cuda/cutil_inline.h deleted file mode 100644 index 71885ee8..00000000 --- a/cuda/common/include/pcl/cuda/cutil_inline.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -#ifndef _CUTIL_INLINE_H_ -#define _CUTIL_INLINE_H_ - -#include -#include -#include - -#include -#include -#include - -inline void print_NVCC_min_spec(const char *sSDKsample, const char *sNVCCReq, const char *sDriverReq) -{ - printf("CUDA %d.%02d Toolkit built this project.\n", CUDART_VERSION/1000, (CUDART_VERSION%100)); - printf(" [ %s ] requirements:\n", sSDKsample); - printf(" -> CUDA %s Toolkit\n" , sNVCCReq); - printf(" -> %s NVIDIA Display Driver.\n", sDriverReq); -} - -#define ALIGN_OFFSET(offset, alignment) offset = (offset + (alignment-1)) & ~((alignment-1)) - - -#endif // _CUTIL_INLINE_H_ diff --git a/cuda/common/include/pcl/cuda/cutil_inline_bankchecker.h b/cuda/common/include/pcl/cuda/cutil_inline_bankchecker.h deleted file mode 100644 index d313d8e0..00000000 --- a/cuda/common/include/pcl/cuda/cutil_inline_bankchecker.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - - #ifndef _CUTIL_INLINE_BANKCHECKER_H_ -#define _CUTIL_INLINE_BANKCHECKER_H_ - -#ifdef _DEBUG - #if __DEVICE_EMULATION__ - #define cutilBankChecker(array, idx) (__cutilBankChecker (threadIdx.x, threadIdx.y, threadIdx.z, \ - blockDim.x, blockDim.y, blockDim.z, \ - #array, idx, __FILE__, __LINE__), \ - array[idx]) - - #else - #define cutilBankChecker(array, idx) array[idx] - #endif -#else - #define cutilBankChecker(array, idx) array[idx] -#endif - - // Interface for bank conflict checker -inline void __cutilBankChecker(unsigned int tidx, unsigned int tidy, unsigned int tidz, - unsigned int bdimx, unsigned int bdimy, unsigned int bdimz, - char *aname, int index, char *file, int line) -{ - cutCheckBankAccess( tidx, tidy, tidz, bdimx, bdimy, bdimz, file, line, aname, index); -} - -#endif // _CUTIL_INLINE_BANKCHECKER_H_ diff --git a/cuda/common/include/pcl/cuda/cutil_inline_drvapi.h b/cuda/common/include/pcl/cuda/cutil_inline_drvapi.h deleted file mode 100644 index 50261782..00000000 --- a/cuda/common/include/pcl/cuda/cutil_inline_drvapi.h +++ /dev/null @@ -1,384 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -#ifndef _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_ -#define _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_ - -#include -#include -#include - - -// We define these calls here, so the user doesn't need to include __FILE__ and __LINE__ -// The advantage is the developers gets to use the inline function so they can debug -#define cutilDrvSafeCallNoSync(err) __cuSafeCallNoSync (err, __FILE__, __LINE__) -#define cutilDrvSafeCall(err) __cuSafeCall (err, __FILE__, __LINE__) -#define cutilDrvCtxSync() __cuCtxSync (__FILE__, __LINE__) -#define cutilDrvCheckMsg(msg) __cuCheckMsg (msg, __FILE__, __LINE__) -#define cutilDrvAlignOffset(offset, alignment) ( offset = (offset + (alignment-1)) & ~((alignment-1)) ) - -// These are the inline versions for all of the CUTIL functions -inline void __cuSafeCallNoSync( CUresult err, const char *file, const int line ) -{ - if( CUDA_SUCCESS != err) { - fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n", - err, file, line ); - exit(-1); - } -} -inline void __cuSafeCall( CUresult err, const char *file, const int line ) -{ - __cuSafeCallNoSync( err, file, line ); -} - -inline void __cuCtxSync(const char *file, const int line ) -{ - CUresult err = cuCtxSynchronize(); - if( CUDA_SUCCESS != err ) { - fprintf(stderr, "cuCtxSynchronize() API error = %04d in file <%s>, line %i.\n", - err, file, line ); - exit(-1); - } -} - -#define MIN(a,b) ((a < b) ? a : b) -#define MAX(a,b) ((a > b) ? a : b) - -// Beginning of GPU Architecture definitions -inline int _ConvertSMVer2CoresDrvApi(int major, int minor) -{ - // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM - typedef struct { - int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version - int Cores; - } sSMtoCores; - - sSMtoCores nGpuArchCoresPerSM[] = - { { 0x10, 8 }, - { 0x11, 8 }, - { 0x12, 8 }, - { 0x13, 8 }, - { 0x20, 32 }, - { 0x21, 48 }, - { -1, -1 } - }; - - int index = 0; - while (nGpuArchCoresPerSM[index].SM != -1) { - if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) { - return nGpuArchCoresPerSM[index].Cores; - } - index++; - } - printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor); - return -1; -} -// end of GPU Architecture definitions - -// This function returns the best GPU based on performance -inline int cutilDrvGetMaxGflopsDeviceId() -{ - CUdevice current_device = 0, max_perf_device = 0; - int device_count = 0, sm_per_multiproc = 0; - int max_compute_perf = 0, best_SM_arch = 0; - int major = 0, minor = 0, multiProcessorCount, clockRate; - - cuInit(0); - cutilDrvSafeCallNoSync(cuDeviceGetCount(&device_count)); - - // Find the best major SM Architecture GPU device - while ( current_device < device_count ) { - cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) ); - if (major > 0 && major < 9999) { - best_SM_arch = MAX(best_SM_arch, major); - } - current_device++; - } - - // Find the best CUDA capable GPU device - current_device = 0; - while( current_device < device_count ) { - cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &multiProcessorCount, - CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - current_device ) ); - cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &clockRate, - CU_DEVICE_ATTRIBUTE_CLOCK_RATE, - current_device ) ); - cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) ); - - if (major == 9999 && minor == 9999) { - sm_per_multiproc = 1; - } else { - sm_per_multiproc = _ConvertSMVer2CoresDrvApi(major, minor); - } - - int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate; - if( compute_perf > max_compute_perf ) { - // If we find GPU with SM major > 2, search only these - if ( best_SM_arch > 2 ) { - // If our device==dest_SM_arch, choose this, or else pass - if (major == best_SM_arch) { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } else { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } - ++current_device; - } - return max_perf_device; -} - -// This function returns the best Graphics GPU based on performance -inline int cutilDrvGetMaxGflopsGraphicsDeviceId() -{ - CUdevice current_device = 0, max_perf_device = 0; - int device_count = 0, sm_per_multiproc = 0; - int max_compute_perf = 0, best_SM_arch = 0; - int major = 0, minor = 0, multiProcessorCount, clockRate; - int bTCC = 0; - char deviceName[256]; - - cuInit(0); - cutilDrvSafeCallNoSync(cuDeviceGetCount(&device_count)); - - // Find the best major SM Architecture GPU device that are graphics devices - while ( current_device < device_count ) { - cutilDrvSafeCallNoSync( cuDeviceGetName(deviceName, 256, current_device) ); - cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) ); - -#if CUDA_VERSION >= 3020 - cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device ) ); -#else - // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1 - if (deviceName[0] == 'T') bTCC = 1; -#endif - if (!bTCC) { - if (major > 0 && major < 9999) { - best_SM_arch = MAX(best_SM_arch, major); - } - } - current_device++; - } - - // Find the best CUDA capable GPU device - current_device = 0; - while( current_device < device_count ) { - cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &multiProcessorCount, - CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - current_device ) ); - cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &clockRate, - CU_DEVICE_ATTRIBUTE_CLOCK_RATE, - current_device ) ); - cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) ); - -#if CUDA_VERSION >= 3020 - cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device ) ); -#else - // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1 - if (deviceName[0] == 'T') bTCC = 1; -#endif - - if (major == 9999 && minor == 9999) { - sm_per_multiproc = 1; - } else { - sm_per_multiproc = _ConvertSMVer2CoresDrvApi(major, minor); - } - - // If this is a Tesla based GPU and SM 2.0, and TCC is disabled, this is a contendor - if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this - { - int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate; - if( compute_perf > max_compute_perf ) { - // If we find GPU with SM major > 2, search only these - if ( best_SM_arch > 2 ) { - // If our device = dest_SM_arch, then we pick this one - if (major == best_SM_arch) { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } else { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } - } - ++current_device; - } - return max_perf_device; -} - -inline void __cuCheckMsg( const char * msg, const char *file, const int line ) -{ - CUresult err = cuCtxSynchronize(); - if( CUDA_SUCCESS != err) { - fprintf(stderr, "cutilDrvCheckMsg -> %s", msg); - fprintf(stderr, "cutilDrvCheckMsg -> cuCtxSynchronize API error = %04d in file <%s>, line %i.\n", - err, file, line ); - exit(-1); - } -} - - -#if __DEVICE_EMULATION__ - inline int cutilDeviceInitDrv(int ARGC, char **ARGV) { } -#else - inline int cutilDeviceInitDrv(int ARGC, char ** ARGV) - { - int cuDevice = 0; - int deviceCount = 0; - CUresult err = cuInit(0); - if (CUDA_SUCCESS == err) - cutilDrvSafeCallNoSync(cuDeviceGetCount(&deviceCount)); - if (deviceCount == 0) { - fprintf(stderr, "CUTIL DeviceInitDrv error: no devices supporting CUDA\n"); - exit(-1); - } - int dev = 0; - cutGetCmdLineArgumenti(ARGC, (const char **) ARGV, "device", &dev); - if (dev < 0) dev = 0; - if (dev > deviceCount-1) { - fprintf(stderr, "\n"); - fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount); - fprintf(stderr, ">> cutilDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev); - fprintf(stderr, "\n"); - return -dev; - } - cutilDrvSafeCallNoSync(cuDeviceGet(&cuDevice, dev)); - char name[100]; - cuDeviceGetName(name, 100, cuDevice); - if (cutCheckCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == CUTFalse) { - printf("> Using CUDA Device [%d]: %s\n", dev, name); - } - return dev; - } -#endif - - // General initialization call to pick the best CUDA Device -#if __DEVICE_EMULATION__ - inline CUdevice cutilChooseCudaDeviceDrv(int argc, char **argv, int *p_devID) -#else - inline CUdevice cutilChooseCudaDeviceDrv(int argc, char **argv, int *p_devID) - { - CUdevice cuDevice; - int devID = 0; - // If the command-line has a device number specified, use it - if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { - devID = cutilDeviceInitDrv(argc, argv); - if (devID < 0) { - printf("exiting...\n"); - exit(0); - } - } else { - // Otherwise pick the device with highest Gflops/s - char name[100]; - devID = cutilDrvGetMaxGflopsDeviceId(); - cutilDrvSafeCallNoSync(cuDeviceGet(&cuDevice, devID)); - cuDeviceGetName(name, 100, cuDevice); - printf("> Using CUDA Device [%d]: %s\n", devID, name); - } - cuDeviceGet(&cuDevice, devID); - if (p_devID) *p_devID = devID; - return cuDevice; - } -#endif - - -//! Check for CUDA context lost -inline void cutilDrvCudaCheckCtxLost(const char *errorMessage, const char *file, const int line ) -{ - CUresult err = cuCtxSynchronize(); - if( CUDA_ERROR_INVALID_CONTEXT != err) { - fprintf(stderr, "Cuda error: %s in file '%s' in line %i\n", - errorMessage, file, line ); - exit(-1); - } - err = cuCtxSynchronize(); - if( CUDA_SUCCESS != err) { - fprintf(stderr, "Cuda error: %s in file '%s' in line %i\n", - errorMessage, file, line ); - exit(-1); - } -} - -#ifndef STRCASECMP -#ifdef _WIN32 -#define STRCASECMP _stricmp -#else -#define STRCASECMP strcasecmp -#endif -#endif - -#ifndef STRNCASECMP -#ifdef _WIN32 -#define STRNCASECMP _strnicmp -#else -#define STRNCASECMP strncasecmp -#endif -#endif - -inline void __cutilDrvQAFinish(int argc, char **argv, bool bStatus) -{ - const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; - - bool bFlag = false; - for (int i=1; i < argc; i++) { - if (!STRCASECMP(argv[i], "-qatest") || !STRCASECMP(argv[i], "-noprompt")) { - bFlag |= true; - } - } - - if (bFlag) { - printf("&&&& %s %s", sStatus[bStatus], argv[0]); - for (int i=1; i < argc; i++) printf(" %s", argv[i]); - } else { - printf("[%s] test result\n%s\n", argv[0], sStatus[bStatus]); - } -} - -// General check for CUDA GPU SM Capabilities for a specific device # -inline bool cutilDrvCudaDevCapabilities(int major_version, int minor_version, int deviceNum, int argc, char** argv) -{ - int major, minor, dev; - char device_name[256]; - -#ifdef __DEVICE_EMULATION__ - printf("> Compute Device Emulation Mode \n"); -#endif - - cutilDrvSafeCallNoSync( cuDeviceGet(&dev, deviceNum) ); - cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, dev)); - cutilDrvSafeCallNoSync( cuDeviceGetName(device_name, 256, dev) ); - - if((major > major_version) || - (major == major_version && minor >= minor_version)) - { - printf("> Device %d: < %s >, Compute SM %d.%d detected\n", dev, device_name, major, minor); - return true; - } - else - { - printf("There is no device supporting CUDA compute capability %d.%d.\n", major_version, minor_version); - __cutilDrvQAFinish(argc, argv, true); - return false; - } -} - -// General check for CUDA GPU SM Capabilities -inline bool cutilDrvCudaCapabilities(int major_version, int minor_version, int argc, char **argv) -{ - return cutilDrvCudaDevCapabilities(major_version, minor_version, 0, argc, argv); -} - - -#endif // _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_ diff --git a/cuda/common/include/pcl/cuda/cutil_inline_runtime.h b/cuda/common/include/pcl/cuda/cutil_inline_runtime.h deleted file mode 100644 index c0a607be..00000000 --- a/cuda/common/include/pcl/cuda/cutil_inline_runtime.h +++ /dev/null @@ -1,488 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -#ifndef _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ -#define _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ - -#ifdef _WIN32 -#ifdef _DEBUG // Do this only in debug mode... -# define WINDOWS_LEAN_AND_MEAN -# include -# include -# undef min -# undef max -#endif -#endif - -#include -#include -#include - -#include - -// We define these calls here, so the user doesn't need to include __FILE__ and __LINE__ -// The advantage is the developers gets to use the inline function so they can debug -#define cutilSafeCallNoSync(err) __cudaSafeCallNoSync(err, __FILE__, __LINE__) -#define cutilSafeCall(err) __cudaSafeCall (err, __FILE__, __LINE__) -#define cutilSafeThreadSync() __cudaSafeThreadSync(__FILE__, __LINE__) -#define cufftSafeCall(err) __cufftSafeCall (err, __FILE__, __LINE__) -#define cutilCheckError(err) __cutilCheckError (err, __FILE__, __LINE__) -#define cutilCheckMsg(msg) __cutilGetLastError (msg, __FILE__, __LINE__) -#define cutilCheckMsgAndSync(msg) __cutilGetLastErrorAndSync (msg, __FILE__, __LINE__) -#define cutilSafeMalloc(mallocCall) __cutilSafeMalloc ((mallocCall), __FILE__, __LINE__) -#define cutilCondition(val) __cutilCondition (val, __FILE__, __LINE__) -#define cutilExit(argc, argv) __cutilExit (argc, argv) - -inline cudaError cutilDeviceSynchronize() -{ -#if CUDART_VERSION >= 4000 - return cudaDeviceSynchronize(); -#else - return cudaThreadSynchronize(); -#endif -} - -inline cudaError cutilDeviceReset() -{ -#if CUDART_VERSION >= 4000 - return cudaDeviceReset(); -#else - return cudaThreadExit(); -#endif -} - -inline void __cutilCondition(int val, char *file, int line) -{ - if( CUTFalse == cutCheckCondition( val, file, line ) ) { - exit(EXIT_FAILURE); - } -} - -inline void __cutilExit(int argc, char **argv) -{ - if (!cutCheckCmdLineFlag(argc, (const char**)argv, "noprompt")) { - printf("\nPress ENTER to exit...\n"); - fflush( stdout); - fflush( stderr); - getchar(); - } - exit(EXIT_SUCCESS); -} - -#define MIN(a,b) ((a < b) ? a : b) -#define MAX(a,b) ((a > b) ? a : b) - -// Beginning of GPU Architecture definitions -inline int _ConvertSMVer2Cores(int major, int minor) -{ - // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM - typedef struct { - int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version - int Cores; - } sSMtoCores; - - sSMtoCores nGpuArchCoresPerSM[] = - { { 0x10, 8 }, - { 0x11, 8 }, - { 0x12, 8 }, - { 0x13, 8 }, - { 0x20, 32 }, - { 0x21, 48 }, - { -1, -1 } - }; - - int index = 0; - while (nGpuArchCoresPerSM[index].SM != -1) { - if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) { - return nGpuArchCoresPerSM[index].Cores; - } - index++; - } - printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor); - return -1; -} -// end of GPU Architecture definitions - -// This function returns the best GPU (with maximum GFLOPS) -inline int cutGetMaxGflopsDeviceId() -{ - int current_device = 0, sm_per_multiproc = 0; - int max_compute_perf = 0, max_perf_device = 0; - int device_count = 0, best_SM_arch = 0; - cudaDeviceProp deviceProp; - - cudaGetDeviceCount( &device_count ); - // Find the best major SM Architecture GPU device - while ( current_device < device_count ) { - cudaGetDeviceProperties( &deviceProp, current_device ); - if (deviceProp.major > 0 && deviceProp.major < 9999) { - best_SM_arch = MAX(best_SM_arch, deviceProp.major); - } - current_device++; - } - - // Find the best CUDA capable GPU device - current_device = 0; - while( current_device < device_count ) { - cudaGetDeviceProperties( &deviceProp, current_device ); - if (deviceProp.major == 9999 && deviceProp.minor == 9999) { - sm_per_multiproc = 1; - } else { - sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); - } - - int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; - if( compute_perf > max_compute_perf ) { - // If we find GPU with SM major > 2, search only these - if ( best_SM_arch > 2 ) { - // If our device==dest_SM_arch, choose this, or else pass - if (deviceProp.major == best_SM_arch) { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } else { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } - ++current_device; - } - return max_perf_device; -} - -// This function returns the best GPU (with maximum GFLOPS) -inline int cutGetMaxGflopsGraphicsDeviceId() -{ - int current_device = 0, sm_per_multiproc = 0; - int max_compute_perf = 0, max_perf_device = 0; - int device_count = 0, best_SM_arch = 0; - int bTCC = 0; - cudaDeviceProp deviceProp; - - cudaGetDeviceCount( &device_count ); - // Find the best major SM Architecture GPU device that is graphics capable - while ( current_device < device_count ) { - cudaGetDeviceProperties( &deviceProp, current_device ); - -#if CUDA_VERSION >= 3020 - if (deviceProp.tccDriver) bTCC = 1; -#else - // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1 - if (deviceProp.name[0] == 'T') bTCC = 1; -#endif - - if (!bTCC) { - if (deviceProp.major > 0 && deviceProp.major < 9999) { - best_SM_arch = MAX(best_SM_arch, deviceProp.major); - } - } - current_device++; - } - - // Find the best CUDA capable GPU device - current_device = 0; - while( current_device < device_count ) { - cudaGetDeviceProperties( &deviceProp, current_device ); - if (deviceProp.major == 9999 && deviceProp.minor == 9999) { - sm_per_multiproc = 1; - } else { - sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); - } - -#if CUDA_VERSION >= 3020 - if (deviceProp.tccDriver) bTCC = 1; -#else - // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1 - if (deviceProp.name[0] == 'T') bTCC = 1; -#endif - - if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this - { - int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; - if( compute_perf > max_compute_perf ) { - // If we find GPU with SM major > 2, search only these - if ( best_SM_arch > 2 ) { - // If our device==dest_SM_arch, choose this, or else pass - if (deviceProp.major == best_SM_arch) { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } else { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } - } - ++current_device; - } - return max_perf_device; -} - -// Give a little more for Windows : the console window often disapears before we can read the message -#ifdef _WIN32 -# if 1//ndef UNICODE -# ifdef _DEBUG // Do this only in debug mode... - inline void VSPrintf(FILE *file, LPCSTR fmt, ...) - { - size_t fmt2_sz = 2048; - char *fmt2 = (char*)malloc(fmt2_sz); - va_list vlist; - va_start(vlist, fmt); - while((_vsnprintf(fmt2, fmt2_sz, fmt, vlist)) < 0) // means there wasn't anough room - { - fmt2_sz *= 2; - if(fmt2) free(fmt2); - fmt2 = (char*)malloc(fmt2_sz); - } - OutputDebugStringA(fmt2); - fprintf(file, fmt2); - free(fmt2); - } -# define FPRINTF(a) VSPrintf a -# else //debug -# define FPRINTF(a) fprintf a -// For other than Win32 -# endif //debug -# else //unicode -// Unicode case... let's give-up for now and keep basic printf -# define FPRINTF(a) fprintf a -# endif //unicode -#else //win32 -# define FPRINTF(a) fprintf a -#endif //win32 - -// NOTE: "%s(%i) : " allows Visual Studio to directly jump to the file at the right line -// when the user double clicks on the error line in the Output pane. Like any compile error. - -inline void __cudaSafeCallNoSync( cudaError err, const char *file, const int line ) -{ - if( cudaSuccess != err) { - FPRINTF((stderr, "%s(%i) : cudaSafeCallNoSync() Runtime API error : %s.\n", - file, line, cudaGetErrorString( err) )); - exit(-1); - } -} - -inline void __cudaSafeCall( cudaError err, const char *file, const int line ) -{ - if( cudaSuccess != err) { - FPRINTF((stderr, "%s(%i) : cudaSafeCall() Runtime API error : %s.\n", - file, line, cudaGetErrorString( err) )); - exit(-1); - } -} - -inline void __cudaSafeThreadSync( const char *file, const int line ) -{ - cudaError err = cutilDeviceSynchronize(); - if ( cudaSuccess != err) { - FPRINTF((stderr, "%s(%i) : cudaDeviceSynchronize() Runtime API error : %s.\n", - file, line, cudaGetErrorString( err) )); - exit(-1); - } -} - -inline void __cufftSafeCall( cufftResult err, const char *file, const int line ) -{ - if( CUFFT_SUCCESS != err) { - FPRINTF((stderr, "%s(%i) : cufftSafeCall() CUFFT error.\n", - file, line)); - exit(-1); - } -} - -inline void __cutilCheckError( CUTBoolean err, const char *file, const int line ) -{ - if( CUTTrue != err) { - FPRINTF((stderr, "%s(%i) : CUTIL CUDA error.\n", - file, line)); - exit(-1); - } -} - -inline void __cutilGetLastError( const char *errorMessage, const char *file, const int line ) -{ - cudaError_t err = cudaGetLastError(); - if( cudaSuccess != err) { - FPRINTF((stderr, "%s(%i) : cutilCheckMsg() CUTIL CUDA error : %s : %s.\n", - file, line, errorMessage, cudaGetErrorString( err) )); - exit(-1); - } -} - -inline void __cutilGetLastErrorAndSync( const char *errorMessage, const char *file, const int line ) -{ - cudaError_t err = cudaGetLastError(); - if( cudaSuccess != err) { - FPRINTF((stderr, "%s(%i) : cutilCheckMsg() CUTIL CUDA error : %s : %s.\n", - file, line, errorMessage, cudaGetErrorString( err) )); - exit(-1); - } - - err = cutilDeviceSynchronize(); - if( cudaSuccess != err) { - FPRINTF((stderr, "%s(%i) : cutilCheckMsg cudaDeviceSynchronize error: %s : %s.\n", - file, line, errorMessage, cudaGetErrorString( err) )); - exit(-1); - } -} - -inline void __cutilSafeMalloc( void *pointer, const char *file, const int line ) -{ - if( !(pointer)) { - FPRINTF((stderr, "%s(%i) : cutilSafeMalloc host malloc failure\n", - file, line)); - exit(-1); - } -} - -#if __DEVICE_EMULATION__ - inline int cutilDeviceInit(int ARGC, char **ARGV) { } - inline int cutilChooseCudaDevice(int ARGC, char **ARGV) { } -#else - inline int cutilDeviceInit(int ARGC, char **ARGV) - { - int deviceCount; - cutilSafeCallNoSync(cudaGetDeviceCount(&deviceCount)); - if (deviceCount == 0) { - FPRINTF((stderr, "CUTIL CUDA error: no devices supporting CUDA.\n")); - exit(-1); - } - int dev = 0; - cutGetCmdLineArgumenti(ARGC, (const char **) ARGV, "device", &dev); - if (dev < 0) - dev = 0; - if (dev > deviceCount-1) { - fprintf(stderr, "\n"); - fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount); - fprintf(stderr, ">> cutilDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev); - fprintf(stderr, "\n"); - return -dev; - } - cudaDeviceProp deviceProp; - cutilSafeCallNoSync(cudaGetDeviceProperties(&deviceProp, dev)); - if (deviceProp.major < 1) { - FPRINTF((stderr, "cutil error: GPU device does not support CUDA.\n")); - exit(-1); \ - } - printf("> Using CUDA device [%d]: %s\n", dev, deviceProp.name); - cutilSafeCall(cudaSetDevice(dev)); - - return dev; - } - - // General initialization call to pick the best CUDA Device - inline int cutilChooseCudaDevice(int argc, char **argv) - { - cudaDeviceProp deviceProp; - int devID = 0; - // If the command-line has a device number specified, use it - if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) { - devID = cutilDeviceInit(argc, argv); - if (devID < 0) { - printf("exiting...\n"); - cutilExit(argc, argv); - exit(0); - } - } else { - // Otherwise pick the device with highest Gflops/s - devID = cutGetMaxGflopsDeviceId(); - cutilSafeCallNoSync( cudaSetDevice( devID ) ); - cutilSafeCallNoSync( cudaGetDeviceProperties(&deviceProp, devID) ); - printf("> Using CUDA device [%d]: %s\n", devID, deviceProp.name); - } - return devID; - } -#endif - - -//! Check for CUDA context lost -inline void cutilCudaCheckCtxLost(const char *errorMessage, const char *file, const int line ) -{ - cudaError_t err = cudaGetLastError(); - if( cudaSuccess != err) { - FPRINTF((stderr, "%s(%i) : CUDA error: %s : %s.\n", - file, line, errorMessage, cudaGetErrorString( err) )); - exit(-1); - } - err = cutilDeviceSynchronize(); - if( cudaSuccess != err) { - FPRINTF((stderr, "%s(%i) : CUDA error: %s : %s.\n", - file, line, errorMessage, cudaGetErrorString( err) )); - exit(-1); - } -} - -#ifndef STRCASECMP -#ifdef _WIN32 -#define STRCASECMP _stricmp -#else -#define STRCASECMP strcasecmp -#endif -#endif - -#ifndef STRNCASECMP -#ifdef _WIN32 -#define STRNCASECMP _strnicmp -#else -#define STRNCASECMP strncasecmp -#endif -#endif - -inline void __cutilQAFinish(int argc, char **argv, bool bStatus) -{ - const char *sStatus[] = { "FAILED", "PASSED", "WAIVED", NULL }; - - bool bFlag = false; - for (int i=1; i < argc; i++) { - if (!STRCASECMP(argv[i], "-qatest") || !STRCASECMP(argv[i], "-noprompt")) { - bFlag |= true; - } - } - - if (bFlag) { - printf("&&&& %s %s", sStatus[bStatus], argv[0]); - for (int i=1; i < argc; i++) printf(" %s", argv[i]); - } else { - printf("[%s] test result\n%s\n", argv[0], sStatus[bStatus]); - } -} - -// General check for CUDA GPU SM Capabilities -inline bool cutilCudaCapabilities(int major_version, int minor_version, int argc, char **argv) -{ - cudaDeviceProp deviceProp; - deviceProp.major = 0; - deviceProp.minor = 0; - int dev; - -#ifdef __DEVICE_EMULATION__ - printf("> Compute Device Emulation Mode \n"); -#endif - - cutilSafeCall( cudaGetDevice(&dev) ); - cutilSafeCall( cudaGetDeviceProperties(&deviceProp, dev)); - - if((deviceProp.major > major_version) || - (deviceProp.major == major_version && deviceProp.minor >= minor_version)) - { - printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor); - return true; - } - else - { - printf("There is no device supporting CUDA compute capability %d.%d.\n", major_version, minor_version); - __cutilQAFinish(argc, argv, true); - return false; - } -} - -#endif // _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ diff --git a/cuda/common/include/pcl/cuda/cutil_math.h b/cuda/common/include/pcl/cuda/cutil_math.h deleted file mode 100644 index 746f4d68..00000000 --- a/cuda/common/include/pcl/cuda/cutil_math.h +++ /dev/null @@ -1,1328 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -/* - This file implements common mathematical operations on vector types - (float3, float4 etc.) since these are not provided as standard by CUDA. - - The syntax is modelled on the Cg standard library. - - This is part of the CUTIL library and is not supported by NVIDIA. - - Thanks to Linh Hah for additions and fixes. -*/ - -#ifndef CUTIL_MATH_H -#define CUTIL_MATH_H - -#include "cuda_runtime.h" - -typedef unsigned int uint; -typedef unsigned short ushort; - -#ifndef __CUDACC__ -#include - -//////////////////////////////////////////////////////////////////////////////// -// host implementations of CUDA functions -//////////////////////////////////////////////////////////////////////////////// - -inline float fminf(float a, float b) -{ - return a < b ? a : b; -} - -inline float fmaxf(float a, float b) -{ - return a > b ? a : b; -} - -inline int max(int a, int b) -{ - return a > b ? a : b; -} - -inline int min(int a, int b) -{ - return a < b ? a : b; -} - -inline float rsqrtf(float x) -{ - return 1.0f / sqrtf(x); -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -// constructors -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 make_float2(float s) -{ - return make_float2(s, s); -} -inline __host__ __device__ float2 make_float2(float3 a) -{ - return make_float2(a.x, a.y); -} -inline __host__ __device__ float2 make_float2(int2 a) -{ - return make_float2(float(a.x), float(a.y)); -} -inline __host__ __device__ float2 make_float2(uint2 a) -{ - return make_float2(float(a.x), float(a.y)); -} - -inline __host__ __device__ int2 make_int2(int s) -{ - return make_int2(s, s); -} -inline __host__ __device__ int2 make_int2(int3 a) -{ - return make_int2(a.x, a.y); -} -inline __host__ __device__ int2 make_int2(uint2 a) -{ - return make_int2(int(a.x), int(a.y)); -} -inline __host__ __device__ int2 make_int2(float2 a) -{ - return make_int2(int(a.x), int(a.y)); -} - -inline __host__ __device__ uint2 make_uint2(uint s) -{ - return make_uint2(s, s); -} -inline __host__ __device__ uint2 make_uint2(uint3 a) -{ - return make_uint2(a.x, a.y); -} -inline __host__ __device__ uint2 make_uint2(int2 a) -{ - return make_uint2(uint(a.x), uint(a.y)); -} - -inline __host__ __device__ float3 make_float3(float s) -{ - return make_float3(s, s, s); -} -inline __host__ __device__ float3 make_float3(float2 a) -{ - return make_float3(a.x, a.y, 0.0f); -} -inline __host__ __device__ float3 make_float3(float2 a, float s) -{ - return make_float3(a.x, a.y, s); -} -inline __host__ __device__ float3 make_float3(float4 a) -{ - return make_float3(a.x, a.y, a.z); -} -inline __host__ __device__ float3 make_float3(int3 a) -{ - return make_float3(float(a.x), float(a.y), float(a.z)); -} -inline __host__ __device__ float3 make_float3(uint3 a) -{ - return make_float3(float(a.x), float(a.y), float(a.z)); -} - -inline __host__ __device__ int3 make_int3(int s) -{ - return make_int3(s, s, s); -} -inline __host__ __device__ int3 make_int3(int2 a) -{ - return make_int3(a.x, a.y, 0); -} -inline __host__ __device__ int3 make_int3(int2 a, int s) -{ - return make_int3(a.x, a.y, s); -} -inline __host__ __device__ int3 make_int3(uint3 a) -{ - return make_int3(int(a.x), int(a.y), int(a.z)); -} -inline __host__ __device__ int3 make_int3(float3 a) -{ - return make_int3(int(a.x), int(a.y), int(a.z)); -} - -inline __host__ __device__ uint3 make_uint3(uint s) -{ - return make_uint3(s, s, s); -} -inline __host__ __device__ uint3 make_uint3(uint2 a) -{ - return make_uint3(a.x, a.y, 0); -} -inline __host__ __device__ uint3 make_uint3(uint2 a, uint s) -{ - return make_uint3(a.x, a.y, s); -} -inline __host__ __device__ uint3 make_uint3(uint4 a) -{ - return make_uint3(a.x, a.y, a.z); -} -inline __host__ __device__ uint3 make_uint3(int3 a) -{ - return make_uint3(uint(a.x), uint(a.y), uint(a.z)); -} - -inline __host__ __device__ float4 make_float4(float s) -{ - return make_float4(s, s, s, s); -} -inline __host__ __device__ float4 make_float4(float3 a) -{ - return make_float4(a.x, a.y, a.z, 0.0f); -} -inline __host__ __device__ float4 make_float4(float3 a, float w) -{ - return make_float4(a.x, a.y, a.z, w); -} -inline __host__ __device__ float4 make_float4(int4 a) -{ - return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); -} -inline __host__ __device__ float4 make_float4(uint4 a) -{ - return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); -} - -inline __host__ __device__ int4 make_int4(int s) -{ - return make_int4(s, s, s, s); -} -inline __host__ __device__ int4 make_int4(int3 a) -{ - return make_int4(a.x, a.y, a.z, 0); -} -inline __host__ __device__ int4 make_int4(int3 a, int w) -{ - return make_int4(a.x, a.y, a.z, w); -} -inline __host__ __device__ int4 make_int4(uint4 a) -{ - return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); -} -inline __host__ __device__ int4 make_int4(float4 a) -{ - return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); -} - - -inline __host__ __device__ uint4 make_uint4(uint s) -{ - return make_uint4(s, s, s, s); -} -inline __host__ __device__ uint4 make_uint4(uint3 a) -{ - return make_uint4(a.x, a.y, a.z, 0); -} -inline __host__ __device__ uint4 make_uint4(uint3 a, uint w) -{ - return make_uint4(a.x, a.y, a.z, w); -} -inline __host__ __device__ uint4 make_uint4(int4 a) -{ - return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// negate -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator-(float2 &a) -{ - return make_float2(-a.x, -a.y); -} -inline __host__ __device__ int2 operator-(int2 &a) -{ - return make_int2(-a.x, -a.y); -} -inline __host__ __device__ float3 operator-(float3 &a) -{ - return make_float3(-a.x, -a.y, -a.z); -} -inline __host__ __device__ int3 operator-(int3 &a) -{ - return make_int3(-a.x, -a.y, -a.z); -} -inline __host__ __device__ float4 operator-(float4 &a) -{ - return make_float4(-a.x, -a.y, -a.z, -a.w); -} -inline __host__ __device__ int4 operator-(int4 &a) -{ - return make_int4(-a.x, -a.y, -a.z, -a.w); -} - -//////////////////////////////////////////////////////////////////////////////// -// addition -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator+(float2 a, float2 b) -{ - return make_float2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(float2 &a, float2 b) -{ - a.x += b.x; a.y += b.y; -} -inline __host__ __device__ float2 operator+(float2 a, float b) -{ - return make_float2(a.x + b, a.y + b); -} -inline __host__ __device__ float2 operator+(float b, float2 a) -{ - return make_float2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(float2 &a, float b) -{ - a.x += b; a.y += b; -} - -inline __host__ __device__ int2 operator+(int2 a, int2 b) -{ - return make_int2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(int2 &a, int2 b) -{ - a.x += b.x; a.y += b.y; -} -inline __host__ __device__ int2 operator+(int2 a, int b) -{ - return make_int2(a.x + b, a.y + b); -} -inline __host__ __device__ int2 operator+(int b, int2 a) -{ - return make_int2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(int2 &a, int b) -{ - a.x += b; a.y += b; -} - -inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) -{ - return make_uint2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(uint2 &a, uint2 b) -{ - a.x += b.x; a.y += b.y; -} -inline __host__ __device__ uint2 operator+(uint2 a, uint b) -{ - return make_uint2(a.x + b, a.y + b); -} -inline __host__ __device__ uint2 operator+(uint b, uint2 a) -{ - return make_uint2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(uint2 &a, uint b) -{ - a.x += b; a.y += b; -} - - -inline __host__ __device__ float3 operator+(float3 a, float3 b) -{ - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(float3 &a, float3 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; -} -inline __host__ __device__ float3 operator+(float3 a, float b) -{ - return make_float3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(float3 &a, float b) -{ - a.x += b; a.y += b; a.z += b; -} - -inline __host__ __device__ int3 operator+(int3 a, int3 b) -{ - return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(int3 &a, int3 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; -} -inline __host__ __device__ int3 operator+(int3 a, int b) -{ - return make_int3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(int3 &a, int b) -{ - a.x += b; a.y += b; a.z += b; -} - -inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) -{ - return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(uint3 &a, uint3 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; -} -inline __host__ __device__ uint3 operator+(uint3 a, uint b) -{ - return make_uint3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(uint3 &a, uint b) -{ - a.x += b; a.y += b; a.z += b; -} - -inline __host__ __device__ int3 operator+(int b, int3 a) -{ - return make_int3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ uint3 operator+(uint b, uint3 a) -{ - return make_uint3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ float3 operator+(float b, float3 a) -{ - return make_float3(a.x + b, a.y + b, a.z + b); -} - -inline __host__ __device__ float4 operator+(float4 a, float4 b) -{ - return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(float4 &a, float4 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; -} -inline __host__ __device__ float4 operator+(float4 a, float b) -{ - return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ float4 operator+(float b, float4 a) -{ - return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(float4 &a, float b) -{ - a.x += b; a.y += b; a.z += b; a.w += b; -} - -inline __host__ __device__ int4 operator+(int4 a, int4 b) -{ - return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(int4 &a, int4 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; -} -inline __host__ __device__ int4 operator+(int4 a, int b) -{ - return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ int4 operator+(int b, int4 a) -{ - return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(int4 &a, int b) -{ - a.x += b; a.y += b; a.z += b; a.w += b; -} - -inline __host__ __device__ uint4 operator+(uint4 a, uint4 b) -{ - return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(uint4 &a, uint4 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; -} -inline __host__ __device__ uint4 operator+(uint4 a, uint b) -{ - return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ uint4 operator+(uint b, uint4 a) -{ - return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(uint4 &a, uint b) -{ - a.x += b; a.y += b; a.z += b; a.w += b; -} - -//////////////////////////////////////////////////////////////////////////////// -// subtract -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator-(float2 a, float2 b) -{ - return make_float2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(float2 &a, float2 b) -{ - a.x -= b.x; a.y -= b.y; -} -inline __host__ __device__ float2 operator-(float2 a, float b) -{ - return make_float2(a.x - b, a.y - b); -} -inline __host__ __device__ float2 operator-(float b, float2 a) -{ - return make_float2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(float2 &a, float b) -{ - a.x -= b; a.y -= b; -} - -inline __host__ __device__ int2 operator-(int2 a, int2 b) -{ - return make_int2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(int2 &a, int2 b) -{ - a.x -= b.x; a.y -= b.y; -} -inline __host__ __device__ int2 operator-(int2 a, int b) -{ - return make_int2(a.x - b, a.y - b); -} -inline __host__ __device__ int2 operator-(int b, int2 a) -{ - return make_int2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(int2 &a, int b) -{ - a.x -= b; a.y -= b; -} - -inline __host__ __device__ uint2 operator-(uint2 a, uint2 b) -{ - return make_uint2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(uint2 &a, uint2 b) -{ - a.x -= b.x; a.y -= b.y; -} -inline __host__ __device__ uint2 operator-(uint2 a, uint b) -{ - return make_uint2(a.x - b, a.y - b); -} -inline __host__ __device__ uint2 operator-(uint b, uint2 a) -{ - return make_uint2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(uint2 &a, uint b) -{ - a.x -= b; a.y -= b; -} - -inline __host__ __device__ float3 operator-(float3 a, float3 b) -{ - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(float3 &a, float3 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; -} -inline __host__ __device__ float3 operator-(float3 a, float b) -{ - return make_float3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ float3 operator-(float b, float3 a) -{ - return make_float3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(float3 &a, float b) -{ - a.x -= b; a.y -= b; a.z -= b; -} - -inline __host__ __device__ int3 operator-(int3 a, int3 b) -{ - return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(int3 &a, int3 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; -} -inline __host__ __device__ int3 operator-(int3 a, int b) -{ - return make_int3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ int3 operator-(int b, int3 a) -{ - return make_int3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(int3 &a, int b) -{ - a.x -= b; a.y -= b; a.z -= b; -} - -inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) -{ - return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(uint3 &a, uint3 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; -} -inline __host__ __device__ uint3 operator-(uint3 a, uint b) -{ - return make_uint3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ uint3 operator-(uint b, uint3 a) -{ - return make_uint3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(uint3 &a, uint b) -{ - a.x -= b; a.y -= b; a.z -= b; -} - -inline __host__ __device__ float4 operator-(float4 a, float4 b) -{ - return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(float4 &a, float4 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; -} -inline __host__ __device__ float4 operator-(float4 a, float b) -{ - return make_float4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ void operator-=(float4 &a, float b) -{ - a.x -= b; a.y -= b; a.z -= b; a.w -= b; -} - -inline __host__ __device__ int4 operator-(int4 a, int4 b) -{ - return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(int4 &a, int4 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; -} -inline __host__ __device__ int4 operator-(int4 a, int b) -{ - return make_int4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ int4 operator-(int b, int4 a) -{ - return make_int4(b - a.x, b - a.y, b - a.z, b - a.w); -} -inline __host__ __device__ void operator-=(int4 &a, int b) -{ - a.x -= b; a.y -= b; a.z -= b; a.w -= b; -} - -inline __host__ __device__ uint4 operator-(uint4 a, uint4 b) -{ - return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(uint4 &a, uint4 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; -} -inline __host__ __device__ uint4 operator-(uint4 a, uint b) -{ - return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ uint4 operator-(uint b, uint4 a) -{ - return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w); -} -inline __host__ __device__ void operator-=(uint4 &a, uint b) -{ - a.x -= b; a.y -= b; a.z -= b; a.w -= b; -} - -//////////////////////////////////////////////////////////////////////////////// -// multiply -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator*(float2 a, float2 b) -{ - return make_float2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(float2 &a, float2 b) -{ - a.x *= b.x; a.y *= b.y; -} -inline __host__ __device__ float2 operator*(float2 a, float b) -{ - return make_float2(a.x * b, a.y * b); -} -inline __host__ __device__ float2 operator*(float b, float2 a) -{ - return make_float2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(float2 &a, float b) -{ - a.x *= b; a.y *= b; -} - -inline __host__ __device__ int2 operator*(int2 a, int2 b) -{ - return make_int2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(int2 &a, int2 b) -{ - a.x *= b.x; a.y *= b.y; -} -inline __host__ __device__ int2 operator*(int2 a, int b) -{ - return make_int2(a.x * b, a.y * b); -} -inline __host__ __device__ int2 operator*(int b, int2 a) -{ - return make_int2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(int2 &a, int b) -{ - a.x *= b; a.y *= b; -} - -inline __host__ __device__ uint2 operator*(uint2 a, uint2 b) -{ - return make_uint2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(uint2 &a, uint2 b) -{ - a.x *= b.x; a.y *= b.y; -} -inline __host__ __device__ uint2 operator*(uint2 a, uint b) -{ - return make_uint2(a.x * b, a.y * b); -} -inline __host__ __device__ uint2 operator*(uint b, uint2 a) -{ - return make_uint2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(uint2 &a, uint b) -{ - a.x *= b; a.y *= b; -} - -inline __host__ __device__ float3 operator*(float3 a, float3 b) -{ - return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(float3 &a, float3 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; -} -inline __host__ __device__ float3 operator*(float3 a, float b) -{ - return make_float3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ float3 operator*(float b, float3 a) -{ - return make_float3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(float3 &a, float b) -{ - a.x *= b; a.y *= b; a.z *= b; -} - -inline __host__ __device__ int3 operator*(int3 a, int3 b) -{ - return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(int3 &a, int3 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; -} -inline __host__ __device__ int3 operator*(int3 a, int b) -{ - return make_int3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ int3 operator*(int b, int3 a) -{ - return make_int3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(int3 &a, int b) -{ - a.x *= b; a.y *= b; a.z *= b; -} - -inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) -{ - return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(uint3 &a, uint3 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; -} -inline __host__ __device__ uint3 operator*(uint3 a, uint b) -{ - return make_uint3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ uint3 operator*(uint b, uint3 a) -{ - return make_uint3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(uint3 &a, uint b) -{ - a.x *= b; a.y *= b; a.z *= b; -} - -inline __host__ __device__ float4 operator*(float4 a, float4 b) -{ - return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(float4 &a, float4 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; -} -inline __host__ __device__ float4 operator*(float4 a, float b) -{ - return make_float4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ float4 operator*(float b, float4 a) -{ - return make_float4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(float4 &a, float b) -{ - a.x *= b; a.y *= b; a.z *= b; a.w *= b; -} - -inline __host__ __device__ int4 operator*(int4 a, int4 b) -{ - return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(int4 &a, int4 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; -} -inline __host__ __device__ int4 operator*(int4 a, int b) -{ - return make_int4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ int4 operator*(int b, int4 a) -{ - return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(int4 &a, int b) -{ - a.x *= b; a.y *= b; a.z *= b; a.w *= b; -} - -inline __host__ __device__ uint4 operator*(uint4 a, uint4 b) -{ - return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(uint4 &a, uint4 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; -} -inline __host__ __device__ uint4 operator*(uint4 a, uint b) -{ - return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ uint4 operator*(uint b, uint4 a) -{ - return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(uint4 &a, uint b) -{ - a.x *= b; a.y *= b; a.z *= b; a.w *= b; -} - -//////////////////////////////////////////////////////////////////////////////// -// divide -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator/(float2 a, float2 b) -{ - return make_float2(a.x / b.x, a.y / b.y); -} -inline __host__ __device__ void operator/=(float2 &a, float2 b) -{ - a.x /= b.x; a.y /= b.y; -} -inline __host__ __device__ float2 operator/(float2 a, float b) -{ - return make_float2(a.x / b, a.y / b); -} -inline __host__ __device__ void operator/=(float2 &a, float b) -{ - a.x /= b; a.y /= b; -} -inline __host__ __device__ float2 operator/(float b, float2 a) -{ - return make_float2(b / a.x, b / a.y); -} - -inline __host__ __device__ float3 operator/(float3 a, float3 b) -{ - return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); -} -inline __host__ __device__ void operator/=(float3 &a, float3 b) -{ - a.x /= b.x; a.y /= b.y; a.z /= b.z; -} -inline __host__ __device__ float3 operator/(float3 a, float b) -{ - return make_float3(a.x / b, a.y / b, a.z / b); -} -inline __host__ __device__ void operator/=(float3 &a, float b) -{ - a.x /= b; a.y /= b; a.z /= b; -} -inline __host__ __device__ float3 operator/(float b, float3 a) -{ - return make_float3(b / a.x, b / a.y, b / a.z); -} - -inline __host__ __device__ float4 operator/(float4 a, float4 b) -{ - return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); -} -inline __host__ __device__ void operator/=(float4 &a, float4 b) -{ - a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w; -} -inline __host__ __device__ float4 operator/(float4 a, float b) -{ - return make_float4(a.x / b, a.y / b, a.z / b, a.w / b); -} -inline __host__ __device__ void operator/=(float4 &a, float b) -{ - a.x /= b; a.y /= b; a.z /= b; a.w /= b; -} -inline __host__ __device__ float4 operator/(float b, float4 a){ - return make_float4(b / a.x, b / a.y, b / a.z, b / a.w); -} - -//////////////////////////////////////////////////////////////////////////////// -// min -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fminf(float2 a, float2 b) -{ - return make_float2(fminf(a.x,b.x), fminf(a.y,b.y)); -} -inline __host__ __device__ float3 fminf(float3 a, float3 b) -{ - return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); -} -inline __host__ __device__ float4 fminf(float4 a, float4 b) -{ - return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w)); -} - -inline __host__ __device__ int2 min(int2 a, int2 b) -{ - return make_int2(min(a.x,b.x), min(a.y,b.y)); -} -inline __host__ __device__ int3 min(int3 a, int3 b) -{ - return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); -} -inline __host__ __device__ int4 min(int4 a, int4 b) -{ - return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); -} - -inline __host__ __device__ uint2 min(uint2 a, uint2 b) -{ - return make_uint2(min(a.x,b.x), min(a.y,b.y)); -} -inline __host__ __device__ uint3 min(uint3 a, uint3 b) -{ - return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); -} -inline __host__ __device__ uint4 min(uint4 a, uint4 b) -{ - return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// max -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fmaxf(float2 a, float2 b) -{ - return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y)); -} -inline __host__ __device__ float3 fmaxf(float3 a, float3 b) -{ - return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); -} -inline __host__ __device__ float4 fmaxf(float4 a, float4 b) -{ - return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w)); -} - -inline __host__ __device__ int2 max(int2 a, int2 b) -{ - return make_int2(max(a.x,b.x), max(a.y,b.y)); -} -inline __host__ __device__ int3 max(int3 a, int3 b) -{ - return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); -} -inline __host__ __device__ int4 max(int4 a, int4 b) -{ - return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); -} - -inline __host__ __device__ uint2 max(uint2 a, uint2 b) -{ - return make_uint2(max(a.x,b.x), max(a.y,b.y)); -} -inline __host__ __device__ uint3 max(uint3 a, uint3 b) -{ - return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); -} -inline __host__ __device__ uint4 max(uint4 a, uint4 b) -{ - return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// lerp -// - linear interpolation between a and b, based on value t in [0, 1] range -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float lerp(float a, float b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) -{ - return a + t*(b-a); -} - -//////////////////////////////////////////////////////////////////////////////// -// clamp -// - clamp the value v to be in the range [a, b] -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float clamp(float f, float a, float b) -{ - return fmaxf(a, fminf(f, b)); -} -inline __device__ __host__ int clamp(int f, int a, int b) -{ - return max(a, min(f, b)); -} -inline __device__ __host__ uint clamp(uint f, uint a, uint b) -{ - return max(a, min(f, b)); -} - -inline __device__ __host__ float2 clamp(float2 v, float a, float b) -{ - return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) -{ - return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ float3 clamp(float3 v, float a, float b) -{ - return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) -{ - return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ float4 clamp(float4 v, float a, float b) -{ - return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) -{ - return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -inline __device__ __host__ int2 clamp(int2 v, int a, int b) -{ - return make_int2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b) -{ - return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ int3 clamp(int3 v, int a, int b) -{ - return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) -{ - return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ int4 clamp(int4 v, int a, int b) -{ - return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b) -{ - return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b) -{ - return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b) -{ - return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) -{ - return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) -{ - return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b) -{ - return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b) -{ - return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// dot product -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float dot(float2 a, float2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ float dot(float3 a, float3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ float dot(float4 a, float4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -inline __host__ __device__ int dot(int2 a, int2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ int dot(int3 a, int3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ int dot(int4 a, int4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -inline __host__ __device__ uint dot(uint2 a, uint2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ uint dot(uint3 a, uint3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ uint dot(uint4 a, uint4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -//////////////////////////////////////////////////////////////////////////////// -// length -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float length(float2 v) -{ - return sqrtf(dot(v, v)); -} -inline __host__ __device__ float length(float3 v) -{ - return sqrtf(dot(v, v)); -} -inline __host__ __device__ float length(float4 v) -{ - return sqrtf(dot(v, v)); -} - -//////////////////////////////////////////////////////////////////////////////// -// normalize -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 normalize(float2 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} -inline __host__ __device__ float3 normalize(float3 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} -inline __host__ __device__ float4 normalize(float4 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} - -//////////////////////////////////////////////////////////////////////////////// -// floor -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 floorf(float2 v) -{ - return make_float2(floorf(v.x), floorf(v.y)); -} -inline __host__ __device__ float3 floorf(float3 v) -{ - return make_float3(floorf(v.x), floorf(v.y), floorf(v.z)); -} -inline __host__ __device__ float4 floorf(float4 v) -{ - return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// frac - returns the fractional portion of a scalar or each vector component -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float fracf(float v) -{ - return v - floorf(v); -} -inline __host__ __device__ float2 fracf(float2 v) -{ - return make_float2(fracf(v.x), fracf(v.y)); -} -inline __host__ __device__ float3 fracf(float3 v) -{ - return make_float3(fracf(v.x), fracf(v.y), fracf(v.z)); -} -inline __host__ __device__ float4 fracf(float4 v) -{ - return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// fmod -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fmodf(float2 a, float2 b) -{ - return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y)); -} -inline __host__ __device__ float3 fmodf(float3 a, float3 b) -{ - return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z)); -} -inline __host__ __device__ float4 fmodf(float4 a, float4 b) -{ - return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// absolute value -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fabs(float2 v) -{ - return make_float2(fabs(v.x), fabs(v.y)); -} -inline __host__ __device__ float3 fabs(float3 v) -{ - return make_float3(fabs(v.x), fabs(v.y), fabs(v.z)); -} -inline __host__ __device__ float4 fabs(float4 v) -{ - return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); -} - -inline __host__ __device__ int2 abs(int2 v) -{ - return make_int2(abs(v.x), abs(v.y)); -} -inline __host__ __device__ int3 abs(int3 v) -{ - return make_int3(abs(v.x), abs(v.y), abs(v.z)); -} -inline __host__ __device__ int4 abs(int4 v) -{ - return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// reflect -// - returns reflection of incident ray I around surface normal N -// - N should be normalized, reflected vector's length is equal to length of I -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float3 reflect(float3 i, float3 n) -{ - return i - 2.0f * n * dot(n,i); -} - -//////////////////////////////////////////////////////////////////////////////// -// cross product -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float3 cross(float3 a, float3 b) -{ - return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); -} - -//////////////////////////////////////////////////////////////////////////////// -// smoothstep -// - returns 0 if x < a -// - returns 1 if x > b -// - otherwise returns smooth interpolation between 0 and 1 based on x -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float smoothstep(float a, float b, float x) -{ - float y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(3.0f - (2.0f*y))); -} -inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x) -{ - float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y))); -} -inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x) -{ - float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y))); -} -inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x) -{ - float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y))); -} - -#endif diff --git a/gpu/utils/include/pcl/gpu/utils/device/cutil_math.h b/gpu/utils/include/pcl/gpu/utils/device/cutil_math.h deleted file mode 100644 index 746f4d68..00000000 --- a/gpu/utils/include/pcl/gpu/utils/device/cutil_math.h +++ /dev/null @@ -1,1328 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -/* - This file implements common mathematical operations on vector types - (float3, float4 etc.) since these are not provided as standard by CUDA. - - The syntax is modelled on the Cg standard library. - - This is part of the CUTIL library and is not supported by NVIDIA. - - Thanks to Linh Hah for additions and fixes. -*/ - -#ifndef CUTIL_MATH_H -#define CUTIL_MATH_H - -#include "cuda_runtime.h" - -typedef unsigned int uint; -typedef unsigned short ushort; - -#ifndef __CUDACC__ -#include - -//////////////////////////////////////////////////////////////////////////////// -// host implementations of CUDA functions -//////////////////////////////////////////////////////////////////////////////// - -inline float fminf(float a, float b) -{ - return a < b ? a : b; -} - -inline float fmaxf(float a, float b) -{ - return a > b ? a : b; -} - -inline int max(int a, int b) -{ - return a > b ? a : b; -} - -inline int min(int a, int b) -{ - return a < b ? a : b; -} - -inline float rsqrtf(float x) -{ - return 1.0f / sqrtf(x); -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -// constructors -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 make_float2(float s) -{ - return make_float2(s, s); -} -inline __host__ __device__ float2 make_float2(float3 a) -{ - return make_float2(a.x, a.y); -} -inline __host__ __device__ float2 make_float2(int2 a) -{ - return make_float2(float(a.x), float(a.y)); -} -inline __host__ __device__ float2 make_float2(uint2 a) -{ - return make_float2(float(a.x), float(a.y)); -} - -inline __host__ __device__ int2 make_int2(int s) -{ - return make_int2(s, s); -} -inline __host__ __device__ int2 make_int2(int3 a) -{ - return make_int2(a.x, a.y); -} -inline __host__ __device__ int2 make_int2(uint2 a) -{ - return make_int2(int(a.x), int(a.y)); -} -inline __host__ __device__ int2 make_int2(float2 a) -{ - return make_int2(int(a.x), int(a.y)); -} - -inline __host__ __device__ uint2 make_uint2(uint s) -{ - return make_uint2(s, s); -} -inline __host__ __device__ uint2 make_uint2(uint3 a) -{ - return make_uint2(a.x, a.y); -} -inline __host__ __device__ uint2 make_uint2(int2 a) -{ - return make_uint2(uint(a.x), uint(a.y)); -} - -inline __host__ __device__ float3 make_float3(float s) -{ - return make_float3(s, s, s); -} -inline __host__ __device__ float3 make_float3(float2 a) -{ - return make_float3(a.x, a.y, 0.0f); -} -inline __host__ __device__ float3 make_float3(float2 a, float s) -{ - return make_float3(a.x, a.y, s); -} -inline __host__ __device__ float3 make_float3(float4 a) -{ - return make_float3(a.x, a.y, a.z); -} -inline __host__ __device__ float3 make_float3(int3 a) -{ - return make_float3(float(a.x), float(a.y), float(a.z)); -} -inline __host__ __device__ float3 make_float3(uint3 a) -{ - return make_float3(float(a.x), float(a.y), float(a.z)); -} - -inline __host__ __device__ int3 make_int3(int s) -{ - return make_int3(s, s, s); -} -inline __host__ __device__ int3 make_int3(int2 a) -{ - return make_int3(a.x, a.y, 0); -} -inline __host__ __device__ int3 make_int3(int2 a, int s) -{ - return make_int3(a.x, a.y, s); -} -inline __host__ __device__ int3 make_int3(uint3 a) -{ - return make_int3(int(a.x), int(a.y), int(a.z)); -} -inline __host__ __device__ int3 make_int3(float3 a) -{ - return make_int3(int(a.x), int(a.y), int(a.z)); -} - -inline __host__ __device__ uint3 make_uint3(uint s) -{ - return make_uint3(s, s, s); -} -inline __host__ __device__ uint3 make_uint3(uint2 a) -{ - return make_uint3(a.x, a.y, 0); -} -inline __host__ __device__ uint3 make_uint3(uint2 a, uint s) -{ - return make_uint3(a.x, a.y, s); -} -inline __host__ __device__ uint3 make_uint3(uint4 a) -{ - return make_uint3(a.x, a.y, a.z); -} -inline __host__ __device__ uint3 make_uint3(int3 a) -{ - return make_uint3(uint(a.x), uint(a.y), uint(a.z)); -} - -inline __host__ __device__ float4 make_float4(float s) -{ - return make_float4(s, s, s, s); -} -inline __host__ __device__ float4 make_float4(float3 a) -{ - return make_float4(a.x, a.y, a.z, 0.0f); -} -inline __host__ __device__ float4 make_float4(float3 a, float w) -{ - return make_float4(a.x, a.y, a.z, w); -} -inline __host__ __device__ float4 make_float4(int4 a) -{ - return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); -} -inline __host__ __device__ float4 make_float4(uint4 a) -{ - return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); -} - -inline __host__ __device__ int4 make_int4(int s) -{ - return make_int4(s, s, s, s); -} -inline __host__ __device__ int4 make_int4(int3 a) -{ - return make_int4(a.x, a.y, a.z, 0); -} -inline __host__ __device__ int4 make_int4(int3 a, int w) -{ - return make_int4(a.x, a.y, a.z, w); -} -inline __host__ __device__ int4 make_int4(uint4 a) -{ - return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); -} -inline __host__ __device__ int4 make_int4(float4 a) -{ - return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); -} - - -inline __host__ __device__ uint4 make_uint4(uint s) -{ - return make_uint4(s, s, s, s); -} -inline __host__ __device__ uint4 make_uint4(uint3 a) -{ - return make_uint4(a.x, a.y, a.z, 0); -} -inline __host__ __device__ uint4 make_uint4(uint3 a, uint w) -{ - return make_uint4(a.x, a.y, a.z, w); -} -inline __host__ __device__ uint4 make_uint4(int4 a) -{ - return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// negate -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator-(float2 &a) -{ - return make_float2(-a.x, -a.y); -} -inline __host__ __device__ int2 operator-(int2 &a) -{ - return make_int2(-a.x, -a.y); -} -inline __host__ __device__ float3 operator-(float3 &a) -{ - return make_float3(-a.x, -a.y, -a.z); -} -inline __host__ __device__ int3 operator-(int3 &a) -{ - return make_int3(-a.x, -a.y, -a.z); -} -inline __host__ __device__ float4 operator-(float4 &a) -{ - return make_float4(-a.x, -a.y, -a.z, -a.w); -} -inline __host__ __device__ int4 operator-(int4 &a) -{ - return make_int4(-a.x, -a.y, -a.z, -a.w); -} - -//////////////////////////////////////////////////////////////////////////////// -// addition -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator+(float2 a, float2 b) -{ - return make_float2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(float2 &a, float2 b) -{ - a.x += b.x; a.y += b.y; -} -inline __host__ __device__ float2 operator+(float2 a, float b) -{ - return make_float2(a.x + b, a.y + b); -} -inline __host__ __device__ float2 operator+(float b, float2 a) -{ - return make_float2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(float2 &a, float b) -{ - a.x += b; a.y += b; -} - -inline __host__ __device__ int2 operator+(int2 a, int2 b) -{ - return make_int2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(int2 &a, int2 b) -{ - a.x += b.x; a.y += b.y; -} -inline __host__ __device__ int2 operator+(int2 a, int b) -{ - return make_int2(a.x + b, a.y + b); -} -inline __host__ __device__ int2 operator+(int b, int2 a) -{ - return make_int2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(int2 &a, int b) -{ - a.x += b; a.y += b; -} - -inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) -{ - return make_uint2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(uint2 &a, uint2 b) -{ - a.x += b.x; a.y += b.y; -} -inline __host__ __device__ uint2 operator+(uint2 a, uint b) -{ - return make_uint2(a.x + b, a.y + b); -} -inline __host__ __device__ uint2 operator+(uint b, uint2 a) -{ - return make_uint2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(uint2 &a, uint b) -{ - a.x += b; a.y += b; -} - - -inline __host__ __device__ float3 operator+(float3 a, float3 b) -{ - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(float3 &a, float3 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; -} -inline __host__ __device__ float3 operator+(float3 a, float b) -{ - return make_float3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(float3 &a, float b) -{ - a.x += b; a.y += b; a.z += b; -} - -inline __host__ __device__ int3 operator+(int3 a, int3 b) -{ - return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(int3 &a, int3 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; -} -inline __host__ __device__ int3 operator+(int3 a, int b) -{ - return make_int3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(int3 &a, int b) -{ - a.x += b; a.y += b; a.z += b; -} - -inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) -{ - return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(uint3 &a, uint3 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; -} -inline __host__ __device__ uint3 operator+(uint3 a, uint b) -{ - return make_uint3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(uint3 &a, uint b) -{ - a.x += b; a.y += b; a.z += b; -} - -inline __host__ __device__ int3 operator+(int b, int3 a) -{ - return make_int3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ uint3 operator+(uint b, uint3 a) -{ - return make_uint3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ float3 operator+(float b, float3 a) -{ - return make_float3(a.x + b, a.y + b, a.z + b); -} - -inline __host__ __device__ float4 operator+(float4 a, float4 b) -{ - return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(float4 &a, float4 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; -} -inline __host__ __device__ float4 operator+(float4 a, float b) -{ - return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ float4 operator+(float b, float4 a) -{ - return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(float4 &a, float b) -{ - a.x += b; a.y += b; a.z += b; a.w += b; -} - -inline __host__ __device__ int4 operator+(int4 a, int4 b) -{ - return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(int4 &a, int4 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; -} -inline __host__ __device__ int4 operator+(int4 a, int b) -{ - return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ int4 operator+(int b, int4 a) -{ - return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(int4 &a, int b) -{ - a.x += b; a.y += b; a.z += b; a.w += b; -} - -inline __host__ __device__ uint4 operator+(uint4 a, uint4 b) -{ - return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(uint4 &a, uint4 b) -{ - a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; -} -inline __host__ __device__ uint4 operator+(uint4 a, uint b) -{ - return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ uint4 operator+(uint b, uint4 a) -{ - return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(uint4 &a, uint b) -{ - a.x += b; a.y += b; a.z += b; a.w += b; -} - -//////////////////////////////////////////////////////////////////////////////// -// subtract -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator-(float2 a, float2 b) -{ - return make_float2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(float2 &a, float2 b) -{ - a.x -= b.x; a.y -= b.y; -} -inline __host__ __device__ float2 operator-(float2 a, float b) -{ - return make_float2(a.x - b, a.y - b); -} -inline __host__ __device__ float2 operator-(float b, float2 a) -{ - return make_float2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(float2 &a, float b) -{ - a.x -= b; a.y -= b; -} - -inline __host__ __device__ int2 operator-(int2 a, int2 b) -{ - return make_int2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(int2 &a, int2 b) -{ - a.x -= b.x; a.y -= b.y; -} -inline __host__ __device__ int2 operator-(int2 a, int b) -{ - return make_int2(a.x - b, a.y - b); -} -inline __host__ __device__ int2 operator-(int b, int2 a) -{ - return make_int2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(int2 &a, int b) -{ - a.x -= b; a.y -= b; -} - -inline __host__ __device__ uint2 operator-(uint2 a, uint2 b) -{ - return make_uint2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(uint2 &a, uint2 b) -{ - a.x -= b.x; a.y -= b.y; -} -inline __host__ __device__ uint2 operator-(uint2 a, uint b) -{ - return make_uint2(a.x - b, a.y - b); -} -inline __host__ __device__ uint2 operator-(uint b, uint2 a) -{ - return make_uint2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(uint2 &a, uint b) -{ - a.x -= b; a.y -= b; -} - -inline __host__ __device__ float3 operator-(float3 a, float3 b) -{ - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(float3 &a, float3 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; -} -inline __host__ __device__ float3 operator-(float3 a, float b) -{ - return make_float3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ float3 operator-(float b, float3 a) -{ - return make_float3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(float3 &a, float b) -{ - a.x -= b; a.y -= b; a.z -= b; -} - -inline __host__ __device__ int3 operator-(int3 a, int3 b) -{ - return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(int3 &a, int3 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; -} -inline __host__ __device__ int3 operator-(int3 a, int b) -{ - return make_int3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ int3 operator-(int b, int3 a) -{ - return make_int3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(int3 &a, int b) -{ - a.x -= b; a.y -= b; a.z -= b; -} - -inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) -{ - return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(uint3 &a, uint3 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; -} -inline __host__ __device__ uint3 operator-(uint3 a, uint b) -{ - return make_uint3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ uint3 operator-(uint b, uint3 a) -{ - return make_uint3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(uint3 &a, uint b) -{ - a.x -= b; a.y -= b; a.z -= b; -} - -inline __host__ __device__ float4 operator-(float4 a, float4 b) -{ - return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(float4 &a, float4 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; -} -inline __host__ __device__ float4 operator-(float4 a, float b) -{ - return make_float4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ void operator-=(float4 &a, float b) -{ - a.x -= b; a.y -= b; a.z -= b; a.w -= b; -} - -inline __host__ __device__ int4 operator-(int4 a, int4 b) -{ - return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(int4 &a, int4 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; -} -inline __host__ __device__ int4 operator-(int4 a, int b) -{ - return make_int4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ int4 operator-(int b, int4 a) -{ - return make_int4(b - a.x, b - a.y, b - a.z, b - a.w); -} -inline __host__ __device__ void operator-=(int4 &a, int b) -{ - a.x -= b; a.y -= b; a.z -= b; a.w -= b; -} - -inline __host__ __device__ uint4 operator-(uint4 a, uint4 b) -{ - return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(uint4 &a, uint4 b) -{ - a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; -} -inline __host__ __device__ uint4 operator-(uint4 a, uint b) -{ - return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ uint4 operator-(uint b, uint4 a) -{ - return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w); -} -inline __host__ __device__ void operator-=(uint4 &a, uint b) -{ - a.x -= b; a.y -= b; a.z -= b; a.w -= b; -} - -//////////////////////////////////////////////////////////////////////////////// -// multiply -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator*(float2 a, float2 b) -{ - return make_float2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(float2 &a, float2 b) -{ - a.x *= b.x; a.y *= b.y; -} -inline __host__ __device__ float2 operator*(float2 a, float b) -{ - return make_float2(a.x * b, a.y * b); -} -inline __host__ __device__ float2 operator*(float b, float2 a) -{ - return make_float2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(float2 &a, float b) -{ - a.x *= b; a.y *= b; -} - -inline __host__ __device__ int2 operator*(int2 a, int2 b) -{ - return make_int2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(int2 &a, int2 b) -{ - a.x *= b.x; a.y *= b.y; -} -inline __host__ __device__ int2 operator*(int2 a, int b) -{ - return make_int2(a.x * b, a.y * b); -} -inline __host__ __device__ int2 operator*(int b, int2 a) -{ - return make_int2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(int2 &a, int b) -{ - a.x *= b; a.y *= b; -} - -inline __host__ __device__ uint2 operator*(uint2 a, uint2 b) -{ - return make_uint2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(uint2 &a, uint2 b) -{ - a.x *= b.x; a.y *= b.y; -} -inline __host__ __device__ uint2 operator*(uint2 a, uint b) -{ - return make_uint2(a.x * b, a.y * b); -} -inline __host__ __device__ uint2 operator*(uint b, uint2 a) -{ - return make_uint2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(uint2 &a, uint b) -{ - a.x *= b; a.y *= b; -} - -inline __host__ __device__ float3 operator*(float3 a, float3 b) -{ - return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(float3 &a, float3 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; -} -inline __host__ __device__ float3 operator*(float3 a, float b) -{ - return make_float3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ float3 operator*(float b, float3 a) -{ - return make_float3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(float3 &a, float b) -{ - a.x *= b; a.y *= b; a.z *= b; -} - -inline __host__ __device__ int3 operator*(int3 a, int3 b) -{ - return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(int3 &a, int3 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; -} -inline __host__ __device__ int3 operator*(int3 a, int b) -{ - return make_int3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ int3 operator*(int b, int3 a) -{ - return make_int3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(int3 &a, int b) -{ - a.x *= b; a.y *= b; a.z *= b; -} - -inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) -{ - return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(uint3 &a, uint3 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; -} -inline __host__ __device__ uint3 operator*(uint3 a, uint b) -{ - return make_uint3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ uint3 operator*(uint b, uint3 a) -{ - return make_uint3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(uint3 &a, uint b) -{ - a.x *= b; a.y *= b; a.z *= b; -} - -inline __host__ __device__ float4 operator*(float4 a, float4 b) -{ - return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(float4 &a, float4 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; -} -inline __host__ __device__ float4 operator*(float4 a, float b) -{ - return make_float4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ float4 operator*(float b, float4 a) -{ - return make_float4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(float4 &a, float b) -{ - a.x *= b; a.y *= b; a.z *= b; a.w *= b; -} - -inline __host__ __device__ int4 operator*(int4 a, int4 b) -{ - return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(int4 &a, int4 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; -} -inline __host__ __device__ int4 operator*(int4 a, int b) -{ - return make_int4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ int4 operator*(int b, int4 a) -{ - return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(int4 &a, int b) -{ - a.x *= b; a.y *= b; a.z *= b; a.w *= b; -} - -inline __host__ __device__ uint4 operator*(uint4 a, uint4 b) -{ - return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(uint4 &a, uint4 b) -{ - a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; -} -inline __host__ __device__ uint4 operator*(uint4 a, uint b) -{ - return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ uint4 operator*(uint b, uint4 a) -{ - return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(uint4 &a, uint b) -{ - a.x *= b; a.y *= b; a.z *= b; a.w *= b; -} - -//////////////////////////////////////////////////////////////////////////////// -// divide -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator/(float2 a, float2 b) -{ - return make_float2(a.x / b.x, a.y / b.y); -} -inline __host__ __device__ void operator/=(float2 &a, float2 b) -{ - a.x /= b.x; a.y /= b.y; -} -inline __host__ __device__ float2 operator/(float2 a, float b) -{ - return make_float2(a.x / b, a.y / b); -} -inline __host__ __device__ void operator/=(float2 &a, float b) -{ - a.x /= b; a.y /= b; -} -inline __host__ __device__ float2 operator/(float b, float2 a) -{ - return make_float2(b / a.x, b / a.y); -} - -inline __host__ __device__ float3 operator/(float3 a, float3 b) -{ - return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); -} -inline __host__ __device__ void operator/=(float3 &a, float3 b) -{ - a.x /= b.x; a.y /= b.y; a.z /= b.z; -} -inline __host__ __device__ float3 operator/(float3 a, float b) -{ - return make_float3(a.x / b, a.y / b, a.z / b); -} -inline __host__ __device__ void operator/=(float3 &a, float b) -{ - a.x /= b; a.y /= b; a.z /= b; -} -inline __host__ __device__ float3 operator/(float b, float3 a) -{ - return make_float3(b / a.x, b / a.y, b / a.z); -} - -inline __host__ __device__ float4 operator/(float4 a, float4 b) -{ - return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); -} -inline __host__ __device__ void operator/=(float4 &a, float4 b) -{ - a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w; -} -inline __host__ __device__ float4 operator/(float4 a, float b) -{ - return make_float4(a.x / b, a.y / b, a.z / b, a.w / b); -} -inline __host__ __device__ void operator/=(float4 &a, float b) -{ - a.x /= b; a.y /= b; a.z /= b; a.w /= b; -} -inline __host__ __device__ float4 operator/(float b, float4 a){ - return make_float4(b / a.x, b / a.y, b / a.z, b / a.w); -} - -//////////////////////////////////////////////////////////////////////////////// -// min -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fminf(float2 a, float2 b) -{ - return make_float2(fminf(a.x,b.x), fminf(a.y,b.y)); -} -inline __host__ __device__ float3 fminf(float3 a, float3 b) -{ - return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); -} -inline __host__ __device__ float4 fminf(float4 a, float4 b) -{ - return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w)); -} - -inline __host__ __device__ int2 min(int2 a, int2 b) -{ - return make_int2(min(a.x,b.x), min(a.y,b.y)); -} -inline __host__ __device__ int3 min(int3 a, int3 b) -{ - return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); -} -inline __host__ __device__ int4 min(int4 a, int4 b) -{ - return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); -} - -inline __host__ __device__ uint2 min(uint2 a, uint2 b) -{ - return make_uint2(min(a.x,b.x), min(a.y,b.y)); -} -inline __host__ __device__ uint3 min(uint3 a, uint3 b) -{ - return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); -} -inline __host__ __device__ uint4 min(uint4 a, uint4 b) -{ - return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// max -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fmaxf(float2 a, float2 b) -{ - return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y)); -} -inline __host__ __device__ float3 fmaxf(float3 a, float3 b) -{ - return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); -} -inline __host__ __device__ float4 fmaxf(float4 a, float4 b) -{ - return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w)); -} - -inline __host__ __device__ int2 max(int2 a, int2 b) -{ - return make_int2(max(a.x,b.x), max(a.y,b.y)); -} -inline __host__ __device__ int3 max(int3 a, int3 b) -{ - return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); -} -inline __host__ __device__ int4 max(int4 a, int4 b) -{ - return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); -} - -inline __host__ __device__ uint2 max(uint2 a, uint2 b) -{ - return make_uint2(max(a.x,b.x), max(a.y,b.y)); -} -inline __host__ __device__ uint3 max(uint3 a, uint3 b) -{ - return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); -} -inline __host__ __device__ uint4 max(uint4 a, uint4 b) -{ - return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// lerp -// - linear interpolation between a and b, based on value t in [0, 1] range -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float lerp(float a, float b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) -{ - return a + t*(b-a); -} - -//////////////////////////////////////////////////////////////////////////////// -// clamp -// - clamp the value v to be in the range [a, b] -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float clamp(float f, float a, float b) -{ - return fmaxf(a, fminf(f, b)); -} -inline __device__ __host__ int clamp(int f, int a, int b) -{ - return max(a, min(f, b)); -} -inline __device__ __host__ uint clamp(uint f, uint a, uint b) -{ - return max(a, min(f, b)); -} - -inline __device__ __host__ float2 clamp(float2 v, float a, float b) -{ - return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) -{ - return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ float3 clamp(float3 v, float a, float b) -{ - return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) -{ - return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ float4 clamp(float4 v, float a, float b) -{ - return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) -{ - return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -inline __device__ __host__ int2 clamp(int2 v, int a, int b) -{ - return make_int2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b) -{ - return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ int3 clamp(int3 v, int a, int b) -{ - return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) -{ - return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ int4 clamp(int4 v, int a, int b) -{ - return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b) -{ - return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b) -{ - return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b) -{ - return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) -{ - return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) -{ - return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b) -{ - return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b) -{ - return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// dot product -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float dot(float2 a, float2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ float dot(float3 a, float3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ float dot(float4 a, float4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -inline __host__ __device__ int dot(int2 a, int2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ int dot(int3 a, int3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ int dot(int4 a, int4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -inline __host__ __device__ uint dot(uint2 a, uint2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ uint dot(uint3 a, uint3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ uint dot(uint4 a, uint4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -//////////////////////////////////////////////////////////////////////////////// -// length -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float length(float2 v) -{ - return sqrtf(dot(v, v)); -} -inline __host__ __device__ float length(float3 v) -{ - return sqrtf(dot(v, v)); -} -inline __host__ __device__ float length(float4 v) -{ - return sqrtf(dot(v, v)); -} - -//////////////////////////////////////////////////////////////////////////////// -// normalize -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 normalize(float2 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} -inline __host__ __device__ float3 normalize(float3 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} -inline __host__ __device__ float4 normalize(float4 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} - -//////////////////////////////////////////////////////////////////////////////// -// floor -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 floorf(float2 v) -{ - return make_float2(floorf(v.x), floorf(v.y)); -} -inline __host__ __device__ float3 floorf(float3 v) -{ - return make_float3(floorf(v.x), floorf(v.y), floorf(v.z)); -} -inline __host__ __device__ float4 floorf(float4 v) -{ - return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// frac - returns the fractional portion of a scalar or each vector component -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float fracf(float v) -{ - return v - floorf(v); -} -inline __host__ __device__ float2 fracf(float2 v) -{ - return make_float2(fracf(v.x), fracf(v.y)); -} -inline __host__ __device__ float3 fracf(float3 v) -{ - return make_float3(fracf(v.x), fracf(v.y), fracf(v.z)); -} -inline __host__ __device__ float4 fracf(float4 v) -{ - return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// fmod -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fmodf(float2 a, float2 b) -{ - return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y)); -} -inline __host__ __device__ float3 fmodf(float3 a, float3 b) -{ - return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z)); -} -inline __host__ __device__ float4 fmodf(float4 a, float4 b) -{ - return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// absolute value -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fabs(float2 v) -{ - return make_float2(fabs(v.x), fabs(v.y)); -} -inline __host__ __device__ float3 fabs(float3 v) -{ - return make_float3(fabs(v.x), fabs(v.y), fabs(v.z)); -} -inline __host__ __device__ float4 fabs(float4 v) -{ - return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); -} - -inline __host__ __device__ int2 abs(int2 v) -{ - return make_int2(abs(v.x), abs(v.y)); -} -inline __host__ __device__ int3 abs(int3 v) -{ - return make_int3(abs(v.x), abs(v.y), abs(v.z)); -} -inline __host__ __device__ int4 abs(int4 v) -{ - return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// reflect -// - returns reflection of incident ray I around surface normal N -// - N should be normalized, reflected vector's length is equal to length of I -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float3 reflect(float3 i, float3 n) -{ - return i - 2.0f * n * dot(n,i); -} - -//////////////////////////////////////////////////////////////////////////////// -// cross product -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float3 cross(float3 a, float3 b) -{ - return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); -} - -//////////////////////////////////////////////////////////////////////////////// -// smoothstep -// - returns 0 if x < a -// - returns 1 if x > b -// - otherwise returns smooth interpolation between 0 and 1 based on x -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float smoothstep(float a, float b, float x) -{ - float y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(3.0f - (2.0f*y))); -} -inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x) -{ - float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y))); -} -inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x) -{ - float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y))); -} -inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x) -{ - float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y))); -} - -#endif diff --git a/registration/include/pcl/registration/ia_fpcs.h b/registration/include/pcl/registration/ia_fpcs.h deleted file mode 100644 index eeb47782..00000000 --- a/registration/include/pcl/registration/ia_fpcs.h +++ /dev/null @@ -1,571 +0,0 @@ -/* - * Software License Agreement (BSD License) - * - * Point Cloud Library (PCL) - www.pointclouds.org - * Copyright (c) 2014-, Open Perception, Inc. - * Copyright (C) 2008 Ben Gurion University of the Negev, Beer Sheva, Israel. - * - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met - * - * * The use for research only (no for any commercial application). - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of the copyright holder(s) nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef PCL_REGISTRATION_IA_FPCS_H_ -#define PCL_REGISTRATION_IA_FPCS_H_ - -#include -#include -#include - -namespace pcl -{ - /** \brief Compute the mean point density of a given point cloud. - * \param[in] cloud pointer to the input point cloud - * \param[in] max_dist maximum distance of a point to be considered as a neighbor - * \param[in] nr_threads number of threads to use (default = 1, only used if OpenMP flag is set) - * \return the mean point density of a given point cloud - */ - template inline float - getMeanPointDensity (const typename pcl::PointCloud::ConstPtr &cloud, float max_dist, int nr_threads = 1); - - /** \brief Compute the mean point density of a given point cloud. - * \param[in] cloud pointer to the input point cloud - * \param[in] indices the vector of point indices to use from \a cloud - * \param[in] max_dist maximum distance of a point to be considered as a neighbor - * \param[in] nr_threads number of threads to use (default = 1, only used if OpenMP flag is set) - * \return the mean point density of a given point cloud - */ - template inline float - getMeanPointDensity (const typename pcl::PointCloud::ConstPtr &cloud, const std::vector &indices, - float max_dist, int nr_threads = 1); - - - namespace registration - { - /** \brief FPCSInitialAlignment computes corresponding four point congruent sets as described in: - * "4-points congruent sets for robust pairwise surface registration", Dror Aiger, Niloy Mitra, Daniel Cohen-Or. - * ACM Transactions on Graphics, vol. 27(3), 2008 - * \author P.W.Theiler - * \ingroup registration - */ - template - class FPCSInitialAlignment : public Registration - { - public: - /** \cond */ - typedef boost::shared_ptr > Ptr; - typedef boost::shared_ptr > ConstPtr; - - typedef pcl::search::KdTree KdTreeReciprocal; - typedef typename KdTreeReciprocal::Ptr KdTreeReciprocalPtr; - - typedef pcl::PointCloud PointCloudTarget; - typedef pcl::PointCloud PointCloudSource; - typedef typename PointCloudSource::Ptr PointCloudSourcePtr; - typedef typename PointCloudSource::iterator PointCloudSourceIterator; - - typedef pcl::PointCloud Normals; - typedef typename Normals::ConstPtr NormalsConstPtr; - - typedef pcl::registration::MatchingCandidate MatchingCandidate; - typedef pcl::registration::MatchingCandidates MatchingCandidates; - /** \endcond */ - - - /** \brief Constructor. - * Resets the maximum number of iterations to 0 thus forcing an internal computation if not set by the user. - * Sets the number of RANSAC iterations to 1000 and the standard transformation estimation to TransformationEstimation3Point. - */ - FPCSInitialAlignment (); - - /** \brief Destructor. */ - virtual ~FPCSInitialAlignment () - {}; - - - /** \brief Provide a pointer to the vector of target indices. - * \param[in] target_indices a pointer to the target indices - */ - inline void - setTargetIndices (const IndicesPtr &target_indices) - { - target_indices_ = target_indices; - }; - - /** \return a pointer to the vector of target indices. */ - inline IndicesPtr - getTargetIndices () const - { - return (target_indices_); - }; - - - /** \brief Provide a pointer to the normals of the source point cloud. - * \param[in] source_normals pointer to the normals of the source pointer cloud. - */ - inline void - setSourceNormals (const NormalsConstPtr &source_normals) - { - source_normals_ = source_normals; - }; - - /** \return the normals of the source point cloud. */ - inline NormalsConstPtr - getSourceNormals () const - { - return (source_normals_); - }; - - - /** \brief Provide a pointer to the normals of the target point cloud. - * \param[in] target_normals point to the normals of the target point cloud. - */ - inline void - setTargetNormals (const NormalsConstPtr &target_normals) - { - target_normals_ = target_normals; - }; - - /** \return the normals of the target point cloud. */ - inline NormalsConstPtr - getTargetNormals () const - { - return (target_normals_); - }; - - - /** \brief Set the number of used threads if OpenMP is activated. - * \param[in] nr_threads the number of used threads - */ - inline void - setNumberOfThreads (int nr_threads) - { - nr_threads_ = nr_threads; - }; - - /** \return the number of threads used if OpenMP is activated. */ - inline int - getNumberOfThreads () const - { - return (nr_threads_); - }; - - - /** \brief Set the constant factor delta which weights the internally calculated parameters. - * \param[in] delta the weight factor delta - * \param[in] normalize flag if delta should be normalized according to point cloud density - */ - inline void - setDelta (float delta, bool normalize = false) - { - delta_ = delta; - normalize_delta_ = normalize; - }; - - /** \return the constant factor delta which weights the internally calculated parameters. */ - inline float - getDelta () const - { - return (delta_); - }; - - - /** \brief Set the approximate overlap between source and target. - * \param[in] approx_overlap the estimated overlap - */ - inline void - setApproxOverlap (float approx_overlap) - { - approx_overlap_ = approx_overlap; - }; - - /** \return the approximated overlap between source and target. */ - inline float - getApproxOverlap () const - { - return (approx_overlap_); - }; - - - /** \brief Set the scoring threshold used for early finishing the method. - * \param[in] score_threshold early terminating score criteria - */ - inline void - setScoreThreshold (float score_threshold) - { - score_threshold_ = score_threshold; - }; - - /** \return the scoring threshold used for early finishing the method. */ - inline float - getScoreThreshold () const - { - return (score_threshold_); - }; - - - /** \brief Set the number of source samples to use during alignment. - * \param[in] nr_samples the number of source samples - */ - inline void - setNumberOfSamples (int nr_samples) - { - nr_samples_ = nr_samples; - }; - - /** \return the number of source samples to use during alignment. */ - inline int - getNumberOfSamples () const - { - return (nr_samples_); - }; - - - /** \brief Set the maximum normal difference between valid point correspondences in degree. - * \param[in] max_norm_diff the maximum difference in degree - */ - inline void - setMaxNormalDifference (float max_norm_diff) - { - max_norm_diff_ = max_norm_diff; - }; - - /** \return the maximum normal difference between valid point correspondences in degree. */ - inline float - getMaxNormalDifference () const - { - return (max_norm_diff_); - }; - - - /** \brief Set the maximum computation time in seconds. - * \param[in] max_runtime the maximum runtime of the method in seconds - */ - inline void - setMaxComputationTime (int max_runtime) - { - max_runtime_ = max_runtime; - }; - - /** \return the maximum computation time in seconds. */ - inline int - getMaxComputationTime () const - { - return (max_runtime_); - }; - - - /** \return the fitness score of the best scored four-point match. */ - inline float - getFitnessScore () const - { - return (fitness_score_); - }; - - protected: - - using PCLBase ::deinitCompute; - using PCLBase ::input_; - using PCLBase ::indices_; - - using Registration ::reg_name_; - using Registration ::target_; - using Registration ::tree_; - using Registration ::correspondences_; - using Registration ::target_cloud_updated_; - using Registration ::final_transformation_; - using Registration ::max_iterations_; - using Registration ::ransac_iterations_; - using Registration ::transformation_estimation_; - using Registration ::converged_; - - - /** \brief Rigid transformation computation method. - * \param output the transformed input point cloud dataset using the rigid transformation found - * \param guess The computed transforamtion - */ - virtual void - computeTransformation (PointCloudSource &output, const Eigen::Matrix4f& guess); - - - /** \brief Internal computation initialization. */ - virtual bool - initCompute (); - - /** \brief Select an approximately coplanar set of four points from the source cloud. - * \param[out] base_indices selected source cloud indices, further used as base (B) - * \param[out] ratio the two diagonal intersection ratios (r1,r2) of the base points - * \return - * * < 0 no coplanar four point sets with large enough sampling distance was found - * * = 0 a set of four congruent points was selected - */ - int - selectBase (std::vector &base_indices, float (&ratio)[2]); - - /** \brief Select randomly a triplet of points with large point-to-point distances. The minimum point - * sampling distance is calculated based on the estimated point cloud overlap during initialization. - * - * \param[out] base_indices indices of base B - * \return - * * < 0 no triangle with large enough base lines could be selected - * * = 0 base triangle succesully selected - */ - int - selectBaseTriangle (std::vector &base_indices); - - /** \brief Setup the base (four coplanar points) by ordering the points and computing intersection - * ratios and segment to segment distances of base diagonal. - * - * \param[in,out] base_indices indices of base B (will be reordered) - * \param[out] ratio diagonal intersection ratios of base points - */ - void - setupBase (std::vector &base_indices, float (&ratio)[2]); - - /** \brief Calculate intersection ratios and segment to segment distances of base diagonals. - * \param[in] base_indices indices of base B - * \param[out] ratio diagonal intersection ratios of base points - * \return quality value of diagonal intersection - */ - float - segmentToSegmentDist (const std::vector &base_indices, float (&ratio)[2]); - - /** \brief Search for corresponding point pairs given the distance between two base points. - * - * \param[in] idx1 first index of current base segment (in source cloud) - * \param[in] idx2 second index of current base segment (in source cloud) - * \param[out] pairs resulting point pairs with point-to-point distance close to ref_dist - * \return - * * < 0 no corresponding point pair was found - * * = 0 at least one point pair candidate was found - */ - virtual int - bruteForceCorrespondences (int idx1, int idx2, pcl::Correspondences &pairs); - - /** \brief Determine base matches by combining the point pair candidate and search for coinciding - * intersection points using the diagonal segment ratios of base B. The coincidation threshold is - * calculated during initialization (coincidation_limit_). - * - * \param[in] base_indices indices of base B - * \param[out] matches vector of candidate matches w.r.t the base B - * \param[in] pairs_a point pairs corresponding to points of 1st diagonal of base B - * \param[in] pairs_b point pairs corresponding to points of 2nd diagonal of base B - * \param[in] ratio diagonal intersection ratios of base points - * \return - * * < 0 no base match could be found - * * = 0 at least one base match was found - */ - virtual int - determineBaseMatches ( - const std::vector &base_indices, - std::vector > &matches, - const pcl::Correspondences &pairs_a, - const pcl::Correspondences &pairs_b, - const float (&ratio)[2]); - - /** \brief Check if outer rectangle distance of matched points fit with the base rectangle. - * - * \param[in] match_indices indices of match M - * \param[in] ds edge lengths of base B - * \return - * * < 0 at least one edge of the match M has no corresponding one in the base B - * * = 0 edges of match M fits to the ones of base B - */ - int - checkBaseMatch (const std::vector &match_indices, const float (&ds)[4]); - - /** \brief Method to handle current candidate matches. Here we validate and evaluate the matches w.r.t the - * base and store the best fitting match (together with its score and estimated transformation). - * \note For forwards compatibility the results are stored in 'vectors of size 1'. - * - * \param[in] base_indices indices of base B - * \param[in,out] matches vector of candidate matches w.r.t the base B. The candidate matches are - * reordered during this step. - * \param[out] candidates vector which contains the candidates matches M - */ - virtual void - handleMatches ( - const std::vector &base_indices, - std::vector > &matches, - MatchingCandidates &candidates); - - /** \brief Sets the correspondences between the base B and the match M by using the distance of each point - * to the centroid of the rectangle. - * - * \param[in] base_indices indices of base B - * \param[in] match_indices indices of match M - * \param[out] correspondences resulting correspondences - */ - virtual void - linkMatchWithBase ( - const std::vector &base_indices, - std::vector &match_indices, - pcl::Correspondences &correspondences); - - /** \brief Validate the matching by computing the transformation between the source and target based on the - * four matched points and by comparing the mean square error (MSE) to a threshold. The MSE limit was - * calculated during initialization (max_mse_). - * - * \param[in] base_indices indices of base B - * \param[in] match_indices indices of match M - * \param[in] correspondences corresondences between source and target - * \param[out] transformation resulting transformation matrix - * \return - * * < 0 MSE bigger than max_mse_ - * * = 0 MSE smaller than max_mse_ - */ - virtual int - validateMatch ( - const std::vector &base_indices, - const std::vector &match_indices, - const pcl::Correspondences &correspondences, - Eigen::Matrix4f &transformation); - - /** \brief Validate the transformation by calculating the number of inliers after transforming the source cloud. - * The resulting fitness score is later used as the decision criteria of the best fitting match. - * - * \param[out] transformation updated orientation matrix using all inliers - * \param[out] fitness_score current best fitness_score - * \note fitness score is only updated if the score of the current transformation exceeds the input one. - * \return - * * < 0 if previous result is better than the current one (score remains) - * * = 0 current result is better than the previous one (score updated) - */ - virtual int - validateTransformation (Eigen::Matrix4f &transformation, float &fitness_score); - - /** \brief Final computation of best match out of vector of best matches. To avoid cross thread dependencies - * during parallel running, a best match for each try was calculated. - * \note For forwards compatibility the candidates are stored in vectors of 'vectors of size 1'. - * \param[in] candidates vector of candidate matches - */ - virtual void - finalCompute (const std::vector &candidates); - - - /** \brief Normals of source point cloud. */ - NormalsConstPtr source_normals_; - - /** \brief Normals of target point cloud. */ - NormalsConstPtr target_normals_; - - - /** \brief Number of threads for parallelization (standard = 1). - * \note Only used if run compiled with OpenMP. - */ - int nr_threads_; - - /** \brief Estimated overlap between source and target (standard = 0.5). */ - float approx_overlap_; - - /** \brief Delta value of 4pcs algorithm (standard = 1.0). - * It can be used as: - * * absolute value (normalization = false), value should represent the point accuracy to ensure finding neighbors between source <-> target - * * relative value (normalization = true), to adjust the internally calculated point accuracy (= point density) - */ - float delta_; - - /** \brief Score threshold to stop calculation with success. - * If not set by the user it is equal to the approximated overlap - */ - float score_threshold_; - - /** \brief The number of points to uniformly sample the source point cloud. (standard = 0 => full cloud). */ - int nr_samples_; - - /** \brief Maximum normal difference of corresponding point pairs in degrees (standard = 90). */ - float max_norm_diff_; - - /** \brief Maximum allowed computation time in seconds (standard = 0 => ~unlimited). */ - int max_runtime_; - - - /** \brief Resulting fitness score of the best match. */ - float fitness_score_; - - - /** \brief Estimated diamter of the target point cloud. */ - float diameter_; - - /** \brief Estimated squared metric overlap between source and target. - * \note Internally calculated using the estimated overlap and the extent of the source cloud. - * It is used to derive the minimum sampling distance of the base points as well as to calculated - * the number of trys to reliable find a correct mach. - */ - float max_base_diameter_sqr_; - - /** \brief Use normals flag. */ - bool use_normals_; - - /** \brief Normalize delta flag. */ - bool normalize_delta_; - - - /** \brief A pointer to the vector of source point indices to use after sampling. */ - pcl::IndicesPtr source_indices_; - - /** \brief A pointer to the vector of target point indices to use after sampling. */ - pcl::IndicesPtr target_indices_; - - /** \brief Maximal difference between corresponding point pairs in source and target. - * \note Internally calculated using an estimation of the point density. - */ - float max_pair_diff_; - - /** \brief Maximal difference between the length of the base edges and valid match edges. - * \note Internally calculated using an estimation of the point density. - */ - float max_edge_diff_; - - /** \brief Maximal distance between coinciding intersection points to find valid matches. - * \note Internally calculated using an estimation of the point density. - */ - float coincidation_limit_; - - /** \brief Maximal mean squared errors of a transformation calculated from a candidate match. - * \note Internally calculated using an estimation of the point density. - */ - float max_mse_; - - /** \brief Maximal squared point distance between source and target points to count as inlier. - * \note Internally calculated using an estimation of the point density. - */ - float max_inlier_dist_sqr_; - - - /** \brief Definition of a small error. */ - const float small_error_; - - }; - }; // namespace registration -}; // namespace pcl - -#include - -#endif // PCL_REGISTRATION_IA_FPCS_H_ diff --git a/registration/include/pcl/registration/impl/ia_fpcs.hpp b/registration/include/pcl/registration/impl/ia_fpcs.hpp deleted file mode 100644 index 585cf9e8..00000000 --- a/registration/include/pcl/registration/impl/ia_fpcs.hpp +++ /dev/null @@ -1,917 +0,0 @@ -/* - * Software License Agreement (BSD License) - * - * Point Cloud Library (PCL) - www.pointclouds.org - * Copyright (c) 2014-, Open Perception, Inc. - * Copyright (C) 2008 Ben Gurion University of the Negev, Beer Sheva, Israel. - * - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met - * - * * The use for research only (no for any commercial application). - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of the copyright holder(s) nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef PCL_REGISTRATION_IMPL_IA_FPCS_H_ -#define PCL_REGISTRATION_IMPL_IA_FPCS_H_ - -#include -#include -#include -#include -#include - -/////////////////////////////////////////////////////////////////////////////////////////// -template inline float -pcl::getMeanPointDensity (const typename pcl::PointCloud::ConstPtr &cloud, float max_dist, int nr_threads) -{ - const float max_dist_sqr = max_dist * max_dist; - const std::size_t s = cloud.size (); - - pcl::search::KdTree tree; - tree.setInputCloud (cloud); - - float mean_dist = 0.f; - int num = 0; - std::vector ids (2); - std::vector dists_sqr (2); - -#ifdef _OPENMP -#pragma omp parallel for \ - reduction (+:mean_dist, num) \ - private (ids, dists_sqr) shared (tree, cloud) \ - default (none)num_threads (nr_threads) -#endif - - for (int i = 0; i < 1000; i++) - { - tree.nearestKSearch (cloud->points[rand () % s], 2, ids, dists_sqr); - if (dists_sqr[1] < max_dist_sqr) - { - mean_dist += std::sqrt (dists_sqr[1]); - num++; - } - } - - return (mean_dist / num); -}; - - -/////////////////////////////////////////////////////////////////////////////////////////// -template inline float -pcl::getMeanPointDensity (const typename pcl::PointCloud::ConstPtr &cloud, const std::vector &indices, - float max_dist, int nr_threads) -{ - const float max_dist_sqr = max_dist * max_dist; - const std::size_t s = indices.size (); - - pcl::search::KdTree tree; - tree.setInputCloud (cloud); - - float mean_dist = 0.f; - int num = 0; - std::vector ids (2); - std::vector dists_sqr (2); - -#ifdef _OPENMP -#pragma omp parallel for \ - reduction (+:mean_dist, num) \ - private (ids, dists_sqr) shared (tree, cloud, indices) \ - default (none)num_threads (nr_threads) -#endif - - for (int i = 0; i < 1000; i++) - { - tree.nearestKSearch (cloud->points[indices[rand () % s]], 2, ids, dists_sqr); - if (dists_sqr[1] < max_dist_sqr) - { - mean_dist += std::sqrt (dists_sqr[1]); - num++; - } - } - - return (mean_dist / num); -}; - - -/////////////////////////////////////////////////////////////////////////////////////////// -template -pcl::registration::FPCSInitialAlignment ::FPCSInitialAlignment () : - source_normals_ (), - target_normals_ (), - nr_threads_ (1), - approx_overlap_ (0.5f), - delta_ (1.f), - score_threshold_ (FLT_MAX), - nr_samples_ (0), - max_norm_diff_ (90.f), - max_runtime_ (0), - fitness_score_ (FLT_MAX), - diameter_ (), - max_base_diameter_sqr_ (), - use_normals_ (false), - normalize_delta_ (true), - max_pair_diff_ (), - max_edge_diff_ (), - coincidation_limit_ (), - max_mse_ (), - max_inlier_dist_sqr_ (), - small_error_ (0.00001f) -{ - reg_name_ = "pcl::registration::FPCSInitialAlignment"; - max_iterations_ = 0; - ransac_iterations_ = 1000; - transformation_estimation_.reset (new pcl::registration::TransformationEstimation3Point ); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template void -pcl::registration::FPCSInitialAlignment ::computeTransformation ( - PointCloudSource &output, - const Eigen::Matrix4f &guess) -{ - if (!initCompute ()) - return; - - final_transformation_ = guess; - bool abort = false; - std::vector all_candidates (max_iterations_); - pcl::StopWatch timer; - - #ifdef _OPENMP - #pragma omp parallel num_threads (nr_threads_) - #endif - { - #ifdef _OPENMP - std::srand (static_cast (std::time (NULL)) ^ omp_get_thread_num ()); - #pragma omp for schedule (dynamic) - #endif - for (int i = 0; i < max_iterations_; i++) - { - - #ifdef _OPENMP - #pragma omp flush (abort) - #endif - - MatchingCandidates candidates (1); - std::vector base_indices (4); - float ratio[2]; - all_candidates[i] = candidates; - - if (!abort) - { - // select four coplanar point base - if (selectBase (base_indices, ratio) == 0) - { - // calculate candidate pair correspondences using diagonal lenghts of base - pcl::Correspondences pairs_a, pairs_b; - if (bruteForceCorrespondences (base_indices[0], base_indices[1], pairs_a) == 0 && - bruteForceCorrespondences (base_indices[2], base_indices[3], pairs_b) == 0) - { - // determine candidate matches by combining pair correspondences based on segment distances - std::vector > matches; - if (determineBaseMatches (base_indices, matches, pairs_a, pairs_b, ratio) == 0) - { - // check and evaluate candidate matches and store them - handleMatches (base_indices, matches, candidates); - if (candidates.size () != 0) - all_candidates[i] = candidates; - } - } - } - - // check terminate early (time or fitness_score threshold reached) - abort = (candidates.size () > 0 ? candidates[0].fitness_score < score_threshold_ : abort); - abort = (abort ? abort : timer.getTimeSeconds () > max_runtime_); - - - #ifdef _OPENMP - #pragma omp flush (abort) - #endif - } - } - } - - - // determine best match over all trys - finalCompute (all_candidates); - - // apply the final transformation - pcl::transformPointCloud (*input_, output, final_transformation_); - - deinitCompute (); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template bool -pcl::registration::FPCSInitialAlignment ::initCompute () -{ - std::srand (static_cast (std::time (NULL))); - - // basic pcl initialization - if (!pcl::PCLBase ::initCompute ()) - return (false); - - // check if source and target are given - if (!input_ || !target_) - { - PCL_ERROR ("[%s::initCompute] Source or target dataset not given!\n", reg_name_.c_str ()); - return (false); - } - - if (!target_indices_ || target_indices_->size () == 0) - { - target_indices_.reset (new std::vector (static_cast (target_->size ()))); - int index = 0; - for (std::vector ::iterator it = target_indices_->begin (), it_e = target_indices_->end (); it != it_e; it++) - *it = index++; - target_cloud_updated_ = true; - } - - // if a sample size for the point clouds is given; prefarably no sampling of target cloud - if (nr_samples_ != 0) - { - const int ss = static_cast (indices_->size ()); - const int sample_fraction_src = std::max (1, static_cast (ss / nr_samples_)); - - source_indices_ = pcl::IndicesPtr (new std::vector ); - for (int i = 0; i < ss; i++) - if (rand () % sample_fraction_src == 0) - source_indices_->push_back ((*indices_) [i]); - } - else - source_indices_ = indices_; - - // check usage of normals - if (source_normals_ && target_normals_ && source_normals_->size () == input_->size () && target_normals_->size () == target_->size ()) - use_normals_ = true; - - // set up tree structures - if (target_cloud_updated_) - { - tree_->setInputCloud (target_, target_indices_); - target_cloud_updated_ = false; - } - - // set predefined variables - const int min_iterations = 4; - const float diameter_fraction = 0.3f; - - // get diameter of input cloud (distance between farthest points) - Eigen::Vector4f pt_min, pt_max; - pcl::getMinMax3D (*target_, *target_indices_, pt_min, pt_max); - diameter_ = (pt_max - pt_min).norm (); - - // derive the limits for the random base selection - float max_base_diameter = diameter_* approx_overlap_ * 2.f; - max_base_diameter_sqr_ = max_base_diameter * max_base_diameter; - - // normalize the delta - if (normalize_delta_) - { - float mean_dist = getMeanPointDensity (target_, *target_indices_, 0.05f * diameter_, nr_threads_); - delta_ *= mean_dist; - } - - // heuristic determination of number of trials to have high probabilty of finding a good solution - if (max_iterations_ == 0) - { - float first_est = std::log (small_error_) / std::log (1.0 - std::pow ((double) approx_overlap_, (double) min_iterations)); - max_iterations_ = static_cast (first_est / (diameter_fraction * approx_overlap_ * 2.f)); - } - - // set further parameter - if (score_threshold_ == FLT_MAX) - score_threshold_ = 1.f - approx_overlap_; - - if (max_iterations_ < 4) - max_iterations_ = 4; - - if (max_runtime_ < 1) - max_runtime_ = INT_MAX; - - // calculate internal parameters based on the the estimated point density - max_pair_diff_ = delta_ * 2.f; - max_edge_diff_ = delta_ * 4.f; - coincidation_limit_ = delta_ * 2.f; // EDITED: originally std::sqrt (delta_ * 2.f) - max_mse_ = powf (delta_* 2.f, 2.f); - max_inlier_dist_sqr_ = powf (delta_ * 2.f, 2.f); - - // reset fitness_score - fitness_score_ = FLT_MAX; - - return (true); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template int -pcl::registration::FPCSInitialAlignment ::selectBase ( - std::vector &base_indices, - float (&ratio)[2]) -{ - const float too_close_sqr = max_base_diameter_sqr_*0.01; - - Eigen::VectorXf coefficients (4); - pcl::SampleConsensusModelPlane plane (target_); - plane.setIndices (target_indices_); - Eigen::Vector4f centre_pt; - float nearest_to_plane = FLT_MAX; - - // repeat base search until valid quadruple was found or ransac_iterations_ number of trys were unsuccessfull - for (int i = 0; i < ransac_iterations_; i++) - { - // random select an appropriate point triple - if (selectBaseTriangle (base_indices) < 0) - continue; - - std::vector base_triple (base_indices.begin (), base_indices.end () - 1); - plane.computeModelCoefficients (base_triple, coefficients); - pcl::compute3DCentroid (*target_, base_triple, centre_pt); - - // loop over all points in source cloud to find most suitable fourth point - const PointTarget *pt1 = &(target_->points[base_indices[0]]); - const PointTarget *pt2 = &(target_->points[base_indices[1]]); - const PointTarget *pt3 = &(target_->points[base_indices[2]]); - - for (std::vector ::iterator it = target_indices_->begin (), it_e = target_indices_->end (); it != it_e; it++) - { - const PointTarget *pt4 = &(target_->points[*it]); - - float d1 = pcl::squaredEuclideanDistance (*pt4, *pt1); - float d2 = pcl::squaredEuclideanDistance (*pt4, *pt2); - float d3 = pcl::squaredEuclideanDistance (*pt4, *pt3); - float d4 = (pt4->getVector3fMap () - centre_pt.head (3)).squaredNorm (); - - // check distance between points w.r.t minimum sampling distance; EDITED -> 4th point now also limited by max base line - if (d1 < too_close_sqr || d2 < too_close_sqr || d3 < too_close_sqr || d4 < too_close_sqr || - d1 > max_base_diameter_sqr_ || d2 > max_base_diameter_sqr_ || d3 > max_base_diameter_sqr_) - continue; - - // check distance to plane to get point closest to plane - float dist_to_plane = pcl::pointToPlaneDistance (*pt4, coefficients); - if (dist_to_plane < nearest_to_plane) - { - base_indices[3] = *it; - nearest_to_plane = dist_to_plane; - } - } - - // check if at least one point fullfilled the conditions - if (nearest_to_plane != FLT_MAX) - { - // order points to build largest quadrangle and calcuate intersection ratios of diagonals - setupBase (base_indices, ratio); - return (0); - } - } - - // return unsuccessfull if no quadruple was selected - return (-1); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template int -pcl::registration::FPCSInitialAlignment ::selectBaseTriangle (std::vector &base_indices) -{ - int nr_points = static_cast (target_indices_->size ()); - float best_t = 0.f; - - // choose random first point - base_indices[0] = (*target_indices_)[rand () % nr_points]; - int *index1 = &base_indices[0]; - - // random search for 2 other points (as far away as overlap allows) - for (int i = 0; i < ransac_iterations_; i++) - { - int *index2 = &(*target_indices_)[rand () % nr_points]; - int *index3 = &(*target_indices_)[rand () % nr_points]; - - Eigen::Vector3f u = target_->points[*index2].getVector3fMap () - target_->points[*index1].getVector3fMap (); - Eigen::Vector3f v = target_->points[*index3].getVector3fMap () - target_->points[*index1].getVector3fMap (); - float t = u.cross (v).squaredNorm (); // triangle area (0.5 * sqrt(t)) should be maximal - - // check for most suitable point triple - if (t > best_t && u.squaredNorm () < max_base_diameter_sqr_ && v.squaredNorm () < max_base_diameter_sqr_) - { - best_t = t; - base_indices[1] = *index2; - base_indices[2] = *index3; - } - } - - // return if a triplet could be selected - return (best_t == 0.f ? -1 : 0); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template void -pcl::registration::FPCSInitialAlignment ::setupBase ( - std::vector &base_indices, - float (&ratio)[2]) -{ - float best_t = FLT_MAX; - const std::vector copy (base_indices.begin (), base_indices.end ()); - std::vector temp (base_indices.begin (), base_indices.end ()); - - // loop over all combinations of base points - for (std::vector ::const_iterator i = copy.begin (), i_e = copy.end (); i != i_e; i++) - for (std::vector ::const_iterator j = copy.begin (), j_e = copy.end (); j != j_e; j++) - { - if (i == j) - continue; - - for (std::vector ::const_iterator k = copy.begin (), k_e = copy.end (); k != k_e; k++) - { - if (k == j || k == i) - continue; - - std::vector ::const_iterator l = copy.begin (); - while (l == i || l == j || l == k) - l++; - - temp[0] = *i; - temp[1] = *j; - temp[2] = *k; - temp[3] = *l; - - // calculate diagonal intersection ratios and check for suitable segment to segment distances - float ratio_temp[2]; - float t = segmentToSegmentDist (temp, ratio_temp); - if (t < best_t) - { - best_t = t; - ratio[0] = ratio_temp[0]; - ratio[1] = ratio_temp[1]; - base_indices = temp; - } - } - } -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template float -pcl::registration::FPCSInitialAlignment ::segmentToSegmentDist ( - const std::vector &base_indices, - float (&ratio)[2]) -{ - // get point vectors - Eigen::Vector3f u = target_->points[base_indices[1]].getVector3fMap () - target_->points[base_indices[0]].getVector3fMap (); - Eigen::Vector3f v = target_->points[base_indices[3]].getVector3fMap () - target_->points[base_indices[2]].getVector3fMap (); - Eigen::Vector3f w = target_->points[base_indices[0]].getVector3fMap () - target_->points[base_indices[2]].getVector3fMap (); - - // calculate segment distances - float a = u.dot (u); - float b = u.dot (v); - float c = v.dot (v); - float d = u.dot (w); - float e = v.dot (w); - float D = a * c - b * b; - float sN = 0.f, sD = D; - float tN = 0.f, tD = D; - - // check segments - if (D < small_error_) - { - sN = 0.f; - sD = 1.f; - tN = e; - tD = c; - } - else - { - sN = (b * e - c * d); - tN = (a * e - b * d); - - if (sN < 0.f) - { - sN = 0.f; - tN = e; - tD = c; - } - else if (sN > sD) - { - sN = sD; - tN = e + b; - tD = c; - } - } - - if (tN < 0.f) - { - tN = 0.f; - - if (-d < 0.f) - sN = 0.f; - - else if (-d > a) - sN = sD; - - else - { - sN = -d; - sD = a; - } - } - - else if (tN > tD) - { - tN = tD; - - if ((-d + b) < 0.f) - sN = 0.f; - - else if ((-d + b) > a) - sN = sD; - - else - { - sN = (-d + b); - sD = a; - } - } - - // set intersection ratios - ratio[0] = (std::abs (sN) < small_error_) ? 0.f : sN / sD; - ratio[1] = (std::abs (tN) < small_error_) ? 0.f : tN / tD; - - Eigen::Vector3f x = w + (ratio[0] * u) - (ratio[1] * v); - return (x.norm ()); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template int -pcl::registration::FPCSInitialAlignment ::bruteForceCorrespondences ( - int idx1, - int idx2, - pcl::Correspondences &pairs) -{ - const float max_norm_diff = 0.5f * max_norm_diff_ * M_PI / 180.f; - - // calculate reference segment distance and normal angle - float ref_dist = pcl::euclideanDistance (target_->points[idx1], target_->points[idx2]); - float ref_norm_angle = (use_normals_ ? (target_normals_->points[idx1].getNormalVector3fMap () - - target_normals_->points[idx2].getNormalVector3fMap ()).norm () : 0.f); - - // loop over all pairs of points in source point cloud - std::vector ::iterator it_out = source_indices_->begin (), it_out_e = source_indices_->end () - 1; - std::vector ::iterator it_in, it_in_e = source_indices_->end (); - for ( ; it_out != it_out_e; it_out++) - { - it_in = it_out + 1; - const PointSource *pt1 = &(*input_)[*it_out]; - for ( ; it_in != it_in_e; it_in++) - { - const PointSource *pt2 = &(*input_)[*it_in]; - - // check point distance compared to reference dist (from base) - float dist = pcl::euclideanDistance (*pt1, *pt2); - if (std::abs(dist - ref_dist) < max_pair_diff_) - { - // add here normal evaluation if normals are given - if (use_normals_) - { - const NormalT *pt1_n = &(source_normals_->points[*it_out]); - const NormalT *pt2_n = &(source_normals_->points[*it_in]); - - float norm_angle_1 = (pt1_n->getNormalVector3fMap () - pt2_n->getNormalVector3fMap ()).norm (); - float norm_angle_2 = (pt1_n->getNormalVector3fMap () + pt2_n->getNormalVector3fMap ()).norm (); - - float norm_diff = std::min (std::abs (norm_angle_1 - ref_norm_angle), std::abs (norm_angle_2 - ref_norm_angle)); - if (norm_diff > max_norm_diff) - continue; - } - - pairs.push_back (pcl::Correspondence (*it_in, *it_out, dist)); - pairs.push_back (pcl::Correspondence (*it_out, *it_in, dist)); - } - } - } - - // return success if at least one correspondence was found - return (pairs.size () == 0 ? -1 : 0); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template int -pcl::registration::FPCSInitialAlignment ::determineBaseMatches ( - const std::vector &base_indices, - std::vector > &matches, - const pcl::Correspondences &pairs_a, - const pcl::Correspondences &pairs_b, - const float (&ratio)[2]) -{ - // calculate edge lengths of base - float dist_base[4]; - dist_base[0] = pcl::euclideanDistance (target_->points[base_indices[0]], target_->points[base_indices[2]]); - dist_base[1] = pcl::euclideanDistance (target_->points[base_indices[0]], target_->points[base_indices[3]]); - dist_base[2] = pcl::euclideanDistance (target_->points[base_indices[1]], target_->points[base_indices[2]]); - dist_base[3] = pcl::euclideanDistance (target_->points[base_indices[1]], target_->points[base_indices[3]]); - - // loop over first point pair correspondences and store intermediate points 'e' in new point cloud - PointCloudSourcePtr cloud_e (new PointCloudSource); - cloud_e->resize (pairs_a.size () * 2); - PointCloudSourceIterator it_pt = cloud_e->begin (); - for (pcl::Correspondences::const_iterator it_pair = pairs_a.begin (), it_pair_e = pairs_a.end () ; it_pair != it_pair_e; it_pair++) - { - const PointSource *pt1 = &(input_->points[it_pair->index_match]); - const PointSource *pt2 = &(input_->points[it_pair->index_query]); - - // calculate intermediate points using both ratios from base (r1,r2) - for (int i = 0; i < 2; i++, it_pt++) - { - it_pt->x = pt1->x + ratio[i] * (pt2->x - pt1->x); - it_pt->y = pt1->y + ratio[i] * (pt2->y - pt1->y); - it_pt->z = pt1->z + ratio[i] * (pt2->z - pt1->z); - } - } - - // initialize new kd tree of intermediate points from first point pair correspondences - KdTreeReciprocalPtr tree_e (new KdTreeReciprocal); - tree_e->setInputCloud (cloud_e); - - std::vector ids; - std::vector dists_sqr; - - // loop over second point pair correspondences - for (pcl::Correspondences::const_iterator it_pair = pairs_b.begin (), it_pair_e = pairs_b.end () ; it_pair != it_pair_e; it_pair++) - { - const PointTarget *pt1 = &(input_->points[it_pair->index_match]); - const PointTarget *pt2 = &(input_->points[it_pair->index_query]); - - // calculate intermediate points using both ratios from base (r1,r2) - for (int i = 0; i < 2; i++) - { - PointTarget pt_e; - pt_e.x = pt1->x + ratio[i] * (pt2->x - pt1->x); - pt_e.y = pt1->y + ratio[i] * (pt2->y - pt1->y); - pt_e.z = pt1->z + ratio[i] * (pt2->z - pt1->z); - - // search for corresponding intermediate points - tree_e->radiusSearch (pt_e, coincidation_limit_, ids, dists_sqr); - for (std::vector ::iterator it = ids.begin (), it_e = ids.end (); it != it_e; it++) - { - std::vector match_indices (4); - - match_indices[0] = pairs_a[static_cast (std::floor ((float)(*it/2.f)))].index_match; - match_indices[1] = pairs_a[static_cast (std::floor ((float)(*it/2.f)))].index_query; - match_indices[2] = it_pair->index_match; - match_indices[3] = it_pair->index_query; - - // EDITED: added coarse check of match based on edge length (due to rigid-body ) - if (checkBaseMatch (match_indices, dist_base) < 0) - continue; - - matches.push_back (match_indices); - } - } - } - - // return unsuccessfull if no match was found - return (matches.size () > 0 ? 0 : -1); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template int -pcl::registration::FPCSInitialAlignment ::checkBaseMatch ( - const std::vector &match_indices, - const float (&dist_ref)[4]) -{ - float d0 = pcl::euclideanDistance (input_->points[match_indices[0]], input_->points[match_indices[2]]); - float d1 = pcl::euclideanDistance (input_->points[match_indices[0]], input_->points[match_indices[3]]); - float d2 = pcl::euclideanDistance (input_->points[match_indices[1]], input_->points[match_indices[2]]); - float d3 = pcl::euclideanDistance (input_->points[match_indices[1]], input_->points[match_indices[3]]); - - // check edge distances of match w.r.t the base - return (std::abs (d0 - dist_ref[0]) < max_edge_diff_ && std::abs (d1 - dist_ref[1]) < max_edge_diff_ && - std::abs (d2 - dist_ref[2]) < max_edge_diff_ && std::abs (d3 - dist_ref[3]) < max_edge_diff_) ? 0 : -1; -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template void -pcl::registration::FPCSInitialAlignment ::handleMatches ( - const std::vector &base_indices, - std::vector > &matches, - MatchingCandidates &candidates) -{ - candidates.resize (1); - float fitness_score = FLT_MAX; - - // loop over all Candidate matches - for (std::vector >::iterator match_indices = matches.begin (), it_e = matches.end (); match_indices != it_e; match_indices++) - { - Eigen::Matrix4f transformation_temp; - pcl::Correspondences correspondences_temp; - - // determine corresondences between base and match according to their distance to centroid - linkMatchWithBase (base_indices, *match_indices, correspondences_temp); - - // check match based on residuals of the corresponding points after - if (validateMatch (base_indices, *match_indices, correspondences_temp, transformation_temp) < 0) - continue; - - // check resulting using a sub sample of the source point cloud and compare to previous matches - if (validateTransformation (transformation_temp, fitness_score) < 0) - continue; - - // store best match as well as associated fitness_score and transformation - candidates[0].fitness_score = fitness_score; - candidates [0].transformation = transformation_temp; - correspondences_temp.erase (correspondences_temp.end () - 1); - candidates[0].correspondences = correspondences_temp; - } -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template void -pcl::registration::FPCSInitialAlignment ::linkMatchWithBase ( - const std::vector &base_indices, - std::vector &match_indices, - pcl::Correspondences &correspondences) -{ - // calculate centroid of base and target - Eigen::Vector4f centre_base, centre_match; - pcl::compute3DCentroid (*target_, base_indices, centre_base); - pcl::compute3DCentroid (*input_, match_indices, centre_match); - - PointTarget centre_pt_base; - centre_pt_base.x = centre_base[0]; - centre_pt_base.y = centre_base[1]; - centre_pt_base.z = centre_base[2]; - - PointSource centre_pt_match; - centre_pt_match.x = centre_match[0]; - centre_pt_match.y = centre_match[1]; - centre_pt_match.z = centre_match[2]; - - // find corresponding points according to their distance to the centroid - std::vector copy = match_indices; - - std::vector ::const_iterator it_base = base_indices.begin (), it_base_e = base_indices.end (); - std::vector ::iterator it_match, it_match_e = copy.end (); - std::vector ::iterator it_match_orig = match_indices.begin (); - for (; it_base != it_base_e; it_base++, it_match_orig++) - { - float dist_sqr_1 = pcl::squaredEuclideanDistance (target_->points[*it_base], centre_pt_base); - float best_diff_sqr = FLT_MAX; - int best_index; - - for (it_match = copy.begin (); it_match != it_match_e; it_match++) - { - // calculate difference of distances to centre point - float dist_sqr_2 = pcl::squaredEuclideanDistance (input_->points[*it_match], centre_pt_match); - float diff_sqr = std::abs(dist_sqr_1 - dist_sqr_2); - - if (diff_sqr < best_diff_sqr) - { - best_diff_sqr = diff_sqr; - best_index = *it_match; - } - } - - // assign new correspondence and update indices of matched targets - correspondences.push_back (pcl::Correspondence (best_index, *it_base, best_diff_sqr)); - *it_match_orig = best_index; - } -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template int -pcl::registration::FPCSInitialAlignment ::validateMatch ( - const std::vector &base_indices, - const std::vector &match_indices, - const pcl::Correspondences &correspondences, - Eigen::Matrix4f &transformation) -{ - // only use triplet of points to simlify process (possible due to planar case) - pcl::Correspondences correspondences_temp = correspondences; - correspondences_temp.erase (correspondences_temp.end () - 1); - - // estimate transformation between correspondence set - transformation_estimation_->estimateRigidTransformation (*input_, *target_, correspondences_temp, transformation); - - // transform base points - PointCloudSource match_transformed; - pcl::transformPointCloud (*input_, match_indices, match_transformed, transformation); - - // calculate residuals of transformation and check against maximum threshold - std::size_t nr_points = correspondences_temp.size (); - float mse = 0.f; - for (std::size_t i = 0; i < nr_points; i++) - mse += pcl::squaredEuclideanDistance (match_transformed.points [i], target_->points [base_indices[i]]); - - mse /= nr_points; - return (mse < max_mse_ ? 0 : -1); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template int -pcl::registration::FPCSInitialAlignment ::validateTransformation ( - Eigen::Matrix4f &transformation, - float &fitness_score) -{ - // transform source point cloud - PointCloudSource source_transformed; - pcl::transformPointCloud (*input_, *source_indices_, source_transformed, transformation); - - std::size_t nr_points = source_transformed.size (); - std::size_t terminate_value = fitness_score > 1 ? 0 : static_cast ((1.f - fitness_score) * nr_points); - - float inlier_score_temp = 0; - std::vector ids; - std::vector dists_sqr; - PointCloudSourceIterator it = source_transformed.begin (); - - for (std::size_t i = 0; i < nr_points; it++, i++) - { - // search for nearest point using kd tree search - tree_->nearestKSearch (*it, 1, ids, dists_sqr); - inlier_score_temp += (dists_sqr[0] < max_inlier_dist_sqr_ ? 1 : 0); - - // early terminating - if (nr_points - i + inlier_score_temp < terminate_value) - break; - } - - // check current costs and return unsuccessfull if larger than previous ones - inlier_score_temp /= static_cast (nr_points); - float fitness_score_temp = 1.f - inlier_score_temp; - - if (fitness_score_temp > fitness_score) - return (-1); - - fitness_score = fitness_score_temp; - return (0); -} - - -/////////////////////////////////////////////////////////////////////////////////////////// -template void -pcl::registration::FPCSInitialAlignment ::finalCompute ( - const std::vector &candidates) -{ - // get best fitness_score over all trys - int nr_candidates = static_cast (candidates.size ()); - int best_index = -1; - float best_score = FLT_MAX; - for (int i = 0; i < nr_candidates; i++) - { - const float &fitness_score = candidates [i][0].fitness_score; - if (fitness_score < best_score) - { - best_score = fitness_score; - best_index = i; - } - } - - // check if a valid candidate was available - if (!(best_index < 0)) - { - fitness_score_ = candidates [best_index][0].fitness_score; - final_transformation_ = candidates [best_index][0].transformation; - *correspondences_ = candidates [best_index][0].correspondences; - - // here we define convergence if resulting fitness_score is below 1-threshold - converged_ = fitness_score_ < score_threshold_; - } -} - -/////////////////////////////////////////////////////////////////////////////////////////// - -#endif // PCL_REGISTRATION_IMPL_IA_4PCS_H_ diff --git a/registration/include/pcl/registration/matching_candidate.h b/registration/include/pcl/registration/matching_candidate.h deleted file mode 100644 index 51c822eb..00000000 --- a/registration/include/pcl/registration/matching_candidate.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Software License Agreement (BSD License) - * - * Point Cloud Library (PCL) - www.pointclouds.org - * Copyright (c) 2014-, Open Perception, Inc. - * Copyright (C) 2008 Ben Gurion University of the Negev, Beer Sheva, Israel. - * - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met - * - * * The use for research only (no for any commercial application). - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of the copyright holder(s) nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef PCL_REGISTRATION_MATCHING_CANDIDATE_H_ -#define PCL_REGISTRATION_MATCHING_CANDIDATE_H_ - -#include -#include - -namespace pcl -{ - namespace registration - { - /** \brief Container for matching candidate consisting of - * - * * fitness score value as a result of the matching algorithm - * * correspondences between source and target data set - * * transformation matrix calculated based on the correspondences - * - */ - struct MatchingCandidate - { - /** \brief Constructor. */ - MatchingCandidate () : - fitness_score (FLT_MAX), - correspondences (), - transformation (Eigen::Matrix4f::Identity ()) - {}; - - /** \brief Value constructor. */ - MatchingCandidate (float s, const pcl::Correspondences &c, const Eigen::Matrix4f &m) : - fitness_score (s), - correspondences (c), - transformation (m) - {}; - - /** \brief Destructor. */ - ~MatchingCandidate () - {}; - - - /** \brief Fitness score of current candidate resulting from matching algorithm. */ - float fitness_score; - - /** \brief Correspondences between source <-> target. */ - pcl::Correspondences correspondences; - - /** \brief Corresponding transformation matrix retrieved using \a corrs. */ - Eigen::Matrix4f transformation; - - EIGEN_MAKE_ALIGNED_OPERATOR_NEW - }; - - typedef std::vector > MatchingCandidates; - - /** \brief Sorting of candidates based on fitness score value. */ - struct by_score - { - /** \brief Operator used to sort candidates based on fitness score. */ - bool operator () (MatchingCandidate const &left, MatchingCandidate const &right) - { - return (left.fitness_score < right.fitness_score); - } - }; - - }; // namespace registration -}; // namespace pcl - - -#endif // PCL_REGISTRATION_MATCHING_CANDIDATE_H_