summarylogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.SRCINFO4
-rw-r--r--.gitignore1
-rw-r--r--PKGBUILD10
-rw-r--r--kernelmatrix_kernel.cu.patch56
4 files changed, 67 insertions, 4 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 3d013dee2ae5..4df2099c61e4 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,7 +1,7 @@
pkgbase = thundersvm
pkgdesc = A Fast SVM Library on GPUs and CPUs
pkgver = 0.3.4
- pkgrel = 2
+ pkgrel = 3
url = https://github.com/Xtra-Computing/thundersvm
arch = x86_64
license = APL
@@ -11,7 +11,9 @@ pkgbase = thundersvm
makedepends = python-setuptools
makedepends = chrpath
source = https://github.com/Xtra-Computing/thundersvm/archive/v0.3.4.tar.gz
+ source = kernelmatrix_kernel.cu.patch
sha256sums = c8b4f7ece312a51ab72ef72e550f7ca3973f5328bc128df64158fe6e3b3b8c0e
+ sha256sums = c2d9d6891f77268ee1c8de832b17d9cadf8fcc7008ce94ba9c1e04d70b3aa396
pkgname = thundersvm
depends = cuda
diff --git a/.gitignore b/.gitignore
index 25cca9a92b37..2d134e06f8fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ pkg/
src/
*.gz
*.xz
+*.zst
diff --git a/PKGBUILD b/PKGBUILD
index b41103ec4e8a..24a9db572c5d 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -3,18 +3,22 @@ pkgbase=thundersvm
_pkgbase=thundersvm
pkgname=('thundersvm' 'python-thundersvm')
pkgver=0.3.4
-pkgrel=2
+pkgrel=3
pkgdesc="A Fast SVM Library on GPUs and CPUs"
url="https://github.com/Xtra-Computing/thundersvm"
license=('APL')
makedepends=(cuda cmake python-wheel python-setuptools chrpath)
arch=('x86_64')
-source=("https://github.com/Xtra-Computing/${_pkgbase}/archive/v${pkgver}.tar.gz")
-sha256sums=('c8b4f7ece312a51ab72ef72e550f7ca3973f5328bc128df64158fe6e3b3b8c0e')
+source=("https://github.com/Xtra-Computing/${_pkgbase}/archive/v${pkgver}.tar.gz"
+ "kernelmatrix_kernel.cu.patch")
+sha256sums=('c8b4f7ece312a51ab72ef72e550f7ca3973f5328bc128df64158fe6e3b3b8c0e'
+ 'c2d9d6891f77268ee1c8de832b17d9cadf8fcc7008ce94ba9c1e04d70b3aa396')
build() {
cd "$srcdir/$_pkgbase-$pkgver"
rm -rf build
+ sed -i 's/c++11/c++14/g; s/CXX_STANDARD 11/CXX_STANDARD 14/g' CMakeLists.txt
+ patch src/thundersvm/kernel/kernelmatrix_kernel.cu ../kernelmatrix_kernel.cu.patch
mkdir build
cd build
cmake -DUSE_CUDA=ON ..
diff --git a/kernelmatrix_kernel.cu.patch b/kernelmatrix_kernel.cu.patch
new file mode 100644
index 000000000000..368344ab9995
--- /dev/null
+++ b/kernelmatrix_kernel.cu.patch
@@ -0,0 +1,56 @@
+--- test.cpp 2020-12-11 11:33:51.985362605 +0800
++++ kernelmatrix_kernel.cu 2020-12-11 11:32:40.475793777 +0800
+@@ -144,6 +144,40 @@ namespace svm_kernel {
+ }
+ kernel_type one(1);
+ kernel_type zero(0);
++
++#if (CUDART_VERSION >= 11000)
++
++ cusparseSpMatDescr_t matA;
++ cusparseDnMatDescr_t matB, matC;
++#ifdef USE_DOUBLE
++ cudaDataType data_type = CUDA_R_64F;
++#else//kernel type is float
++ cudaDataType data_type = CUDA_R_32F;
++#endif
++ cusparseCreateCsr(&matA, m, k, nnz, (void*)csr_row_ptr.device_data(), (void*)csr_col_ind.device_data(),
++ (void*)csr_val.device_data(), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
++ CUSPARSE_INDEX_BASE_ZERO, data_type);
++ cusparseCreateDnMat(&matB, n, k, n, (void*)dense_mat.device_data(), data_type, CUSPARSE_ORDER_COL);
++ cusparseCreateDnMat(&matC, m, n, m, (void*)result.device_data(), data_type, CUSPARSE_ORDER_COL);
++
++ size_t buffer_size = 0;
++ cusparseSpMM_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE,
++ &one, matA, matB, &zero, matC, data_type, CUSPARSE_CSRMM_ALG1,
++ &buffer_size);
++
++ void *p_buffer = nullptr;
++ cudaMalloc((void**)&p_buffer, buffer_size);
++
++ cusparseSpMM(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE,
++ &one, matA, matB, &zero, matC, data_type, CUSPARSE_CSRMM_ALG1, p_buffer);
++
++ cudaFree(p_buffer);
++ cusparseDestroySpMat(matA);
++ cusparseDestroyDnMat(matB);
++ cusparseDestroyDnMat(matC);
++
++#else
++
+ #ifdef USE_DOUBLE
+ cusparseDcsrmm2(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE,
+ m, n, k, nnz, &one, descr, csr_val.device_data(), csr_row_ptr.device_data(),
+@@ -154,9 +188,10 @@ namespace svm_kernel {
+ m, n, k, nnz, &one, descr, csr_val.device_data(), csr_row_ptr.device_data(),
+ csr_col_ind.device_data(),
+ dense_mat.device_data(), n, &zero, result.device_data(), m);
+-#endif
+-
+
+ //cusparseScsrmm return row-major matrix, so no transpose is needed
++#endif // ifdef USE_DOUBLE
++
++#endif // if CUDART_VERSION >= 11000
+ }
+ }