diff options
-rw-r--r-- | .SRCINFO | 4 | ||||
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | PKGBUILD | 10 | ||||
-rw-r--r-- | kernelmatrix_kernel.cu.patch | 56 |
4 files changed, 67 insertions, 4 deletions
@@ -1,7 +1,7 @@ pkgbase = thundersvm pkgdesc = A Fast SVM Library on GPUs and CPUs pkgver = 0.3.4 - pkgrel = 2 + pkgrel = 3 url = https://github.com/Xtra-Computing/thundersvm arch = x86_64 license = APL @@ -11,7 +11,9 @@ pkgbase = thundersvm makedepends = python-setuptools makedepends = chrpath source = https://github.com/Xtra-Computing/thundersvm/archive/v0.3.4.tar.gz + source = kernelmatrix_kernel.cu.patch sha256sums = c8b4f7ece312a51ab72ef72e550f7ca3973f5328bc128df64158fe6e3b3b8c0e + sha256sums = c2d9d6891f77268ee1c8de832b17d9cadf8fcc7008ce94ba9c1e04d70b3aa396 pkgname = thundersvm depends = cuda diff --git a/.gitignore b/.gitignore index 25cca9a92b37..2d134e06f8fc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ pkg/ src/ *.gz *.xz +*.zst @@ -3,18 +3,22 @@ pkgbase=thundersvm _pkgbase=thundersvm pkgname=('thundersvm' 'python-thundersvm') pkgver=0.3.4 -pkgrel=2 +pkgrel=3 pkgdesc="A Fast SVM Library on GPUs and CPUs" url="https://github.com/Xtra-Computing/thundersvm" license=('APL') makedepends=(cuda cmake python-wheel python-setuptools chrpath) arch=('x86_64') -source=("https://github.com/Xtra-Computing/${_pkgbase}/archive/v${pkgver}.tar.gz") -sha256sums=('c8b4f7ece312a51ab72ef72e550f7ca3973f5328bc128df64158fe6e3b3b8c0e') +source=("https://github.com/Xtra-Computing/${_pkgbase}/archive/v${pkgver}.tar.gz" + "kernelmatrix_kernel.cu.patch") +sha256sums=('c8b4f7ece312a51ab72ef72e550f7ca3973f5328bc128df64158fe6e3b3b8c0e' + 'c2d9d6891f77268ee1c8de832b17d9cadf8fcc7008ce94ba9c1e04d70b3aa396') build() { cd "$srcdir/$_pkgbase-$pkgver" rm -rf build + sed -i 's/c++11/c++14/g; s/CXX_STANDARD 11/CXX_STANDARD 14/g' CMakeLists.txt + patch src/thundersvm/kernel/kernelmatrix_kernel.cu ../kernelmatrix_kernel.cu.patch mkdir build cd build cmake -DUSE_CUDA=ON .. diff --git a/kernelmatrix_kernel.cu.patch b/kernelmatrix_kernel.cu.patch new file mode 100644 index 000000000000..368344ab9995 --- /dev/null +++ b/kernelmatrix_kernel.cu.patch @@ -0,0 +1,56 @@ +--- test.cpp 2020-12-11 11:33:51.985362605 +0800 ++++ kernelmatrix_kernel.cu 2020-12-11 11:32:40.475793777 +0800 +@@ -144,6 +144,40 @@ namespace svm_kernel { + } + kernel_type one(1); + kernel_type zero(0); ++ ++#if (CUDART_VERSION >= 11000) ++ ++ cusparseSpMatDescr_t matA; ++ cusparseDnMatDescr_t matB, matC; ++#ifdef USE_DOUBLE ++ cudaDataType data_type = CUDA_R_64F; ++#else//kernel type is float ++ cudaDataType data_type = CUDA_R_32F; ++#endif ++ cusparseCreateCsr(&matA, m, k, nnz, (void*)csr_row_ptr.device_data(), (void*)csr_col_ind.device_data(), ++ (void*)csr_val.device_data(), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, ++ CUSPARSE_INDEX_BASE_ZERO, data_type); ++ cusparseCreateDnMat(&matB, n, k, n, (void*)dense_mat.device_data(), data_type, CUSPARSE_ORDER_COL); ++ cusparseCreateDnMat(&matC, m, n, m, (void*)result.device_data(), data_type, CUSPARSE_ORDER_COL); ++ ++ size_t buffer_size = 0; ++ cusparseSpMM_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, ++ &one, matA, matB, &zero, matC, data_type, CUSPARSE_CSRMM_ALG1, ++ &buffer_size); ++ ++ void *p_buffer = nullptr; ++ cudaMalloc((void**)&p_buffer, buffer_size); ++ ++ cusparseSpMM(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, ++ &one, matA, matB, &zero, matC, data_type, CUSPARSE_CSRMM_ALG1, p_buffer); ++ ++ cudaFree(p_buffer); ++ cusparseDestroySpMat(matA); ++ cusparseDestroyDnMat(matB); ++ cusparseDestroyDnMat(matC); ++ ++#else ++ + #ifdef USE_DOUBLE + cusparseDcsrmm2(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, + m, n, k, nnz, &one, descr, csr_val.device_data(), csr_row_ptr.device_data(), +@@ -154,9 +188,10 @@ namespace svm_kernel { + m, n, k, nnz, &one, descr, csr_val.device_data(), csr_row_ptr.device_data(), + csr_col_ind.device_data(), + dense_mat.device_data(), n, &zero, result.device_data(), m); +-#endif +- + + //cusparseScsrmm return row-major matrix, so no transpose is needed ++#endif // ifdef USE_DOUBLE ++ ++#endif // if CUDART_VERSION >= 11000 + } + } |