summarylogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.SRCINFO27
-rw-r--r--PKGBUILD96
-rw-r--r--configure.patch202
-rw-r--r--cuda.patch426
-rw-r--r--mpi.patch26
-rw-r--r--ospray.patch53
6 files changed, 217 insertions, 613 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 8cef04b31e2a..41dd6585957b 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,34 +1,31 @@
pkgbase = vmd-src
pkgdesc = Visual Molecular Dynamics
- pkgver = 1.9.3
- pkgrel = 1
+ pkgver = 1.9.4a57
+ pkgrel = 4
url = http://www.ks.uiuc.edu/Research/vmd/
arch = x86_64
+ arch = aarch64
license = custom
- makedepends = gcc
+ makedepends = opencl-headers
depends = tcsh
depends = tk
- depends = python2-numpy
+ depends = python-numpy
depends = fltk
- depends = ospray
depends = netcdf
depends = ocl-icd
+ depends = libxi
+ depends = openmpi
optdepends = openbabel: additional file formats support
optdepends = sqlite: dmsplugin
+ optdepends = ospray: accelerated ray tracing for Intel CPUs
optdepends = optix: accelerated ray tracing for NVIDIA GPUs
optdepends = cuda: NVIDIA CUDA GPU acceleration functions
+ provides = vmd
conflicts = vmd
conflicts = vmd-bin
- source = local://vmd-1.9.3.src.tar.gz
+ source = local://vmd-1.9.4a57.src.tar.gz
source = configure.patch
- source = ospray.patch
- source = cuda.patch
- source = mpi.patch
- sha256sums = 5bdc314dc836d620fe510ed4b6c3dbe3cf66525b61680ffec4e2563cf495f128
- sha256sums = c25475d51be75c2b9e3bcd5f8d5d3ed1fd0df992f9c8020b0346c49f6c40b275
- sha256sums = 09f1cf35ebd302095a68d2b5db03e1671493411aed3f839baf2b1f44dc7e0e0d
- sha256sums = 8880006bd97f7397d5a5592aaeeb6e80530d8c6026280c1b961e43f568e12daa
- sha256sums = e281a57831b8ff60c5a644219f0b6289d32bee239978af676474941c7d8548c0
+ sha256sums = de278d0c5d969336d89068e0806fb50aaa0cb0f546ba985d840b279357860679
+ sha256sums = a74a8bbee40667742907b59aa24bdb37607761389a9c332c2d449ef07a2f0937
pkgname = vmd-src
-
diff --git a/PKGBUILD b/PKGBUILD
index fc9c5493cfe0..376debaa359c 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,46 +1,40 @@
-# Maintainer: Anton Kudelin <kudelin at protonmail dot com>
+# Maintainer: Anton Kudelin <kudelin at protonmail dot com>
# Contributor: Eric Berquist <eric DOT berquist AT gmail>
# Contributor: steabert <steabert@member.fsf.org>
# Contributor: Ricardo Honorato Z.
pkgname=vmd-src
_pkgname=vmd
-pkgver=1.9.3
-pkgrel=1
+pkgver=1.9.4a57
+pkgrel=4
pkgdesc="Visual Molecular Dynamics"
url="http://www.ks.uiuc.edu/Research/vmd/"
-license=('custom')
-arch=('x86_64')
-depends=('tcsh' 'tk' 'python2-numpy' 'fltk' 'ospray' 'netcdf' 'ocl-icd')
-makedepends=('gcc')
+license=(custom)
+arch=(x86_64 aarch64)
+depends=(tcsh tk python-numpy fltk netcdf ocl-icd libxi openmpi)
+makedepends=(opencl-headers)
optdepends=('openbabel: additional file formats support'
'sqlite: dmsplugin'
+ 'ospray: accelerated ray tracing for Intel CPUs'
'optix: accelerated ray tracing for NVIDIA GPUs'
'cuda: NVIDIA CUDA GPU acceleration functions')
-conflicts=("$_pkgname" "$_pkgname-bin")
-# You have to download the package from the VMD url and put it in the PKGBUILD folder.
-source=("local://$_pkgname-${pkgver}.src.tar.gz"
- "configure.patch"
- "ospray.patch"
- "cuda.patch"
- "mpi.patch")
-sha256sums=('5bdc314dc836d620fe510ed4b6c3dbe3cf66525b61680ffec4e2563cf495f128'
- 'c25475d51be75c2b9e3bcd5f8d5d3ed1fd0df992f9c8020b0346c49f6c40b275'
- '09f1cf35ebd302095a68d2b5db03e1671493411aed3f839baf2b1f44dc7e0e0d'
- '8880006bd97f7397d5a5592aaeeb6e80530d8c6026280c1b961e43f568e12daa'
- 'e281a57831b8ff60c5a644219f0b6289d32bee239978af676474941c7d8548c0')
+provides=($_pkgname)
+conflicts=($_pkgname $_pkgname-bin)
+# You have to download the package from the VMD url
+# and put it in the PKGBUILD folder.
+source=(local://$_pkgname-${pkgver}.src.tar.gz
+ configure.patch)
+sha256sums=('de278d0c5d969336d89068e0806fb50aaa0cb0f546ba985d840b279357860679'
+ 'a74a8bbee40667742907b59aa24bdb37607761389a9c332c2d449ef07a2f0937')
prepare() {
sed -i 's/ltcl8.5/ltcl/g' plugins/Make-arch
cd $_pkgname-$pkgver
mkdir plugins
sed -i 's#:${LD_LIBRARY_PATH}/:${LD_LIBRARY_PATH}:#/opt/optix/lib64#g' bin/*
+ # Assuming openmpi; if it's not the case edit configure.patch
patch -p0 < ../configure.patch
- patch -p0 < ../ospray.patch
- patch -p0 < ../cuda.patch
- # Assuming openmpi; if not the case edit mpi.patch
- patch -p0 < ../mpi.patch
export TCLINC="-I/usr/include"
export TCLLIB="-L/usr/lib"
export PLUGINDIR=$srcdir/$_pkgname-$pkgver/plugins
@@ -48,26 +42,66 @@ prepare() {
export VMDINSTALLLIBRARYDIR=$pkgdir/usr/lib/vmd
# Enable CUDA if nvcc is in PATH
- if [ $( echo -n $( which nvcc) | tail -c 4 ) == nvcc ]
+ if command -v nvcc &> /dev/null
then
export ACC=CUDA
fi
+
+ # Enable OSPRAY if ospray_mpi_worker is in PATH
+ if command -v ospray_mpi_worker &> /dev/null
+ then
+ export RAY=LIBOSPRAY2
+ fi
+
+ # Architecture
+ export MACHINE=LINUXAMD64
+ if [ $CARCH == 'aarch64' ]
+ then
+ export MACHINE=LINUXARM64
+ fi
}
build() {
- cd $srcdir/plugins
- make -j1 LINUXAMD64 world
+ cd "$srcdir/plugins"
+ make -j1 $MACHINE
make distrib
+
cd ../$_pkgname-$pkgver
- ./configure LINUXAMD64 OPENGL EGLPBUFFER FLTKOPENGL FLTK TK $ACC IMD OPENCL MPI XINERAMA XINPUT LIBOSPRAY LIBPNG NETCDF COLVARS TCL PYTHON PTHREADS GCC
+ ./configure \
+ $MACHINE \
+ $ACC \
+ $RAY \
+ OPENGL \
+ EGLPBUFFER \
+ FLTKOPENGL \
+ FLTK \
+ TK \
+ IMD \
+ OPENCL \
+ MPI \
+ XINERAMA \
+ XINPUT \
+ LIBPNG \
+ ZLIB \
+ NETCDF \
+ COLVARS \
+ TCL \
+ PYTHON \
+ NUMPY \
+ PTHREADS \
+ GCC
+
cd src
make veryclean
make
}
package() {
- cd $srcdir/$_pkgname-$pkgver
- install -D -m644 LICENSE $pkgdir/usr/share/licenses/$pkgname/LICENSE
- cd src; make install
- sed -i 's#set defaultvmddir=.*#set defaultvmddir=/usr/lib/vmd#' $pkgdir/usr/bin/vmd
+ cd "$srcdir/$_pkgname-$pkgver"
+ install -Dm644 LICENSE "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
+
+ cd src
+ make install
+ sed -i 's#set defaultvmddir=.*#set defaultvmddir=/usr/lib/vmd#' \
+ "$pkgdir/usr/bin/vmd"
}
diff --git a/configure.patch b/configure.patch
index ad6dd0acb5e3..106fc6998ad0 100644
--- a/configure.patch
+++ b/configure.patch
@@ -1,15 +1,34 @@
---- configure 2016-12-01 10:11:33.000000000 +0300
-+++ configure 2019-05-24 15:40:14.742991210 +0300
-@@ -466,7 +466,7 @@
+--- configure 2021-08-08 10:32:02.637988859 +0000
++++ configure 2021-08-08 10:31:14.808470930 +0000
+@@ -497,19 +497,19 @@
$arch_cc = "cc";
$arch_ccpp = "CC";
--$arch_nvcc = "/usr/local/cuda-8.0/bin/nvcc";
+-$arch_nvcc = "/usr/local/cuda-10.2/bin/nvcc";
+$arch_nvcc = "nvcc";
- $arch_nvccflags = "--ptxas-options=-v " .
- "-gencode arch=compute_30,code=compute_30 " .
- "-gencode arch=compute_30,code=sm_35 " .
-@@ -729,8 +729,8 @@
+ $arch_nvccflags = "-lineinfo --ptxas-options=-v " .
+- "-gencode arch=compute_30,code=compute_30 " .
+- "-gencode arch=compute_30,code=sm_35 " .
+- "-gencode arch=compute_30,code=sm_37 " .
+- "-gencode arch=compute_50,code=compute_50 " .
++ "-gencode arch=compute_35,code=sm_35 " .
++ "-gencode arch=compute_35,code=sm_37 " .
+ "-gencode arch=compute_50,code=sm_50 " .
+- "-gencode arch=compute_60,code=compute_60 " .
++ "-gencode arch=compute_52,code=sm_52 " .
+ "-gencode arch=compute_60,code=sm_60 " .
+- "-gencode arch=compute_70,code=compute_70 " .
++ "-gencode arch=compute_61,code=sm_61 " .
+ "-gencode arch=compute_70,code=sm_70 " .
++ "-gencode arch=compute_75,code=sm_75 " .
++ "-gencode arch=compute_80,code=sm_80 " .
++ "-gencode arch=compute_86,code=sm_86 " .
+ "--ftz=true ";
+-# "-gencode arch=compute_75,code=sm_75 " .
+ $arch_gcc = "gcc";
+ $arch_gccpp = "g++";
+ $arch_lex = "lex";
+@@ -782,8 +782,8 @@
if ($config_tk) { $tcl_include .= " -I$stock_tk_include_dir"; }
$tcl_library = "-L$stock_tcl_library_dir";
if ($config_tk) { $tcl_library .= " -L$stock_tk_library_dir"; }
@@ -20,91 +39,150 @@
@tcl_cc = ();
@tcl_cu = ();
-@@ -888,11 +888,7 @@
+@@ -911,13 +911,7 @@
$system_dir = "";
$system_include = "-I.";
$system_library = "";
-if ( $config_gcc ) {
-- $system_libs = "-ll -lm";
+- ## Note: some old commercial versions of Unix require -ll for lex internals
+- ## now that we've been using flex for years, -ll should be moot.
+- $system_libs = "-lm";
-} else {
- $system_libs = "-lm";
-}
-+$system_libs = "-lrt -lm";
++$system_libs = "-lrt -lm";
@system_cc = ();
@system_cu = ();
@system_ccpp = ();
-@@ -922,7 +918,7 @@
+@@ -992,10 +986,10 @@
# This option enables the use of CUDA GPU acceleration functions.
#######################
$cuda_defines = "-DVMDCUDA -DMSMPOT_CUDA";
--$cuda_dir = "/usr/local/cuda-8.0";
+-$cuda_dir = "/usr/local/cuda-10.2";
+$cuda_dir = "/opt/cuda";
$cuda_include = "";
$cuda_library = "";
- $cuda_libs = "-Wl,-rpath -Wl,\$\$ORIGIN/ -lcudart_static";
-@@ -1042,7 +1038,7 @@
- # $liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-3.6.3-linux64";
- # $liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-3.8.0-linux64";
- # $liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-3.9.0-linux64";
--$liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-4.0.1-linux64";
+-$cuda_libs = "-Wl,-rpath -Wl,\$\$ORIGIN/ -lcudart_static -lrt";
++$cuda_libs = "-Wl,-rpath -Wl,\$\$ORIGIN/ -lcudart -lrt";
+ @cuda_cc = ();
+ @cuda_cu = ('msmpot_cuda.cu',
+ 'msmpot_cuda_latcut.cu',
+@@ -1052,15 +1046,15 @@
+ #######################
+ $mpi_defines = "-DVMDMPI ";
+ ## Argonne MPICH
+-$mpi_dir = "/usr/lib64/mpich";
+-$mpi_include = "-I/usr/include/mpich-x86_64";
+-$mpi_library = "-L$mpi_dir/lib";
+-$mpi_libs = "-lmpich";
++#$mpi_dir = "/usr";
++#$mpi_include = "-I/usr/include";
++#$mpi_library = "-L$mpi_dir/lib";
++#$mpi_libs = "-lmpicxx -lmpi";
+ ## OpenMPI
+-# $mpi_dir = "/usr/lib64/openmpi";
+-# $mpi_include = "-I/usr/include/openmpi-x86_64";
+-# $mpi_library = "-L$mpi_dir/lib";
+-# $mpi_libs = "-lmpi";
++$mpi_dir = "/usr";
++$mpi_include = "-I/usr/include";
++$mpi_library = "-L$mpi_dir/lib";
++$mpi_libs = "-lmpi";
+ ## OSU MVAPICH
+ # $mpi_dir = "/usr/mpi/gcc/mvapich-1.0.0";
+ # $mpi_include = "-I$mpi_dir/include";
+@@ -1201,7 +1195,7 @@
+ # $liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-5.0.1-linux64";
+ # $liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-5.1.0-linux64";
+ # $liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-6.0.0-linux64";
+-$liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-6.5.0-linux64";
+$liboptix_dir = "/opt/optix";
+ # $liboptix_dir = "/usr/local/encap/NVIDIA-OptiX-SDK-7.0.0-linux64";
+
# NCSA Blue Waters
- # $liboptix_dir = "/u/sciteam/stonej/local/NVIDIA-OptiX-SDK-3.6.3-linux64";
- # $liboptix_dir = "/u/sciteam/stonej/local/NVIDIA-OptiX-SDK-3.8.0-linux64";
-@@ -1082,7 +1078,7 @@
- $libospray_dir = "/usr/local/ospray-1.1.1.x86_64.linux";
- $libospray_include = "-I$libospray_dir/include -I$libospray_dir/ospray/include ";
- $libospray_library = "-L$libospray_dir/lib ";
--$libospray_libs = "-lospray -lospray_common -lembree -ltbb -ltbbmalloc ";
-+$libospray_libs = "-lospray -lospray_common -lembree3 -ltbb -ltbbmalloc ";
- # Both OptiX and OSPRay renderers use the Tachyon glwin code,
- # so we have to make sure we don't build/link it twice
- if ($config_liboptix) {
-@@ -1375,20 +1371,20 @@
- ###################
- # location of Python library and include file.
- # If left blank, standard system directories will be searched.
--#$stock_python_include_dir=$ENV{"PYTHON_INCLUDE_DIR"} || "/usr/local/include";
-+$stock_python_include_dir=$ENV{"PYTHON_INCLUDE_DIR"} || "/usr/include/python2.7";
- #$stock_python_library_dir=$ENV{"PYTHON_LIBRARY_DIR"} || "/usr/local/lib";
--$stock_python_include_dir=$ENV{"PYTHON_INCLUDE_DIR"} || "$vmd_library_dir/python/lib_$config_arch/include/python2.5";
--$stock_python_library_dir=$ENV{"PYTHON_LIBRARY_DIR"} || "$vmd_library_dir/python/lib_$config_arch/lib/python2.5/config";
-+#$stock_python_include_dir=$ENV{"PYTHON_INCLUDE_DIR"} || "$vmd_library_dir/python/lib_$config_arch/include/python2.7";
-+#$stock_python_library_dir=$ENV{"PYTHON_LIBRARY_DIR"} || "$vmd_library_dir/python/lib_$config_arch/lib/python2.7/config";
+@@ -1314,7 +1308,7 @@
+ $libospray2_dir = "/usr/local/ospray-2.1.1.x86_64.linux";
+ $libospray2_include = "-I$libospray2_dir/include -I$libospray2_dir/ospray/include ";
+ $libospray2_library = "-L$libospray2_dir/lib ";
+- $libospray2_libs = "-lospray -lospcommon -lembree3 -ltbb -ltbbmalloc ";
++ $libospray2_libs = "-lospray -lembree3 -ltbb -ltbbmalloc ";
+ }
- #$stock_numpy_include_dir=$ENV{"NUMPY_INCLUDE_DIR"} || "/usr/local/include";
- #$stock_numpy_library_dir=$ENV{"NUMPY_LIBRARY_DIR"} || "/usr/local/lib";
--$stock_numpy_include_dir=$ENV{"NUMPY_INCLUDE_DIR"} || "$vmd_library_dir/numpy/lib_$config_arch/include";
--$stock_numpy_library_dir=$ENV{"NUMPY_LIBRARY_DIR"} || "$vmd_library_dir/python/lib_$config_arch/lib/python2.5/site-packages/numpy/core/include";
-+#$stock_numpy_include_dir=$ENV{"NUMPY_INCLUDE_DIR"} || "$vmd_library_dir/numpy/lib_$config_arch/include";
-+#$stock_numpy_library_dir=$ENV{"NUMPY_LIBRARY_DIR"} || "$vmd_library_dir/python/lib_$config_arch/lib/python2.7/site-packages/numpy/core/include";
+ # Both OptiX and OSPRay renderers use the Tachyon glwin code,
+@@ -1650,16 +1644,16 @@
+ $stock_numpy_library_dir=$ENV{"NUMPY_LIBRARY_DIR"} || "$conda_root/lib/python-3.7/site-packages/numpy/core/include";
+ $python_libs = "-fno-lto -lpython3.7m -lpthread";
+ } else {
+-# $stock_python_include_dir=$ENV{"PYTHON_INCLUDE_DIR"} || "/usr/local/include";
+-# $stock_python_library_dir=$ENV{"PYTHON_LIBRARY_DIR"} || "/usr/local/lib";
+- $stock_python_include_dir=$ENV{"PYTHON_INCLUDE_DIR"} || "$vmd_library_dir/python/lib_$config_arch/include/python2.5";
+- $stock_python_library_dir=$ENV{"PYTHON_LIBRARY_DIR"} || "$vmd_library_dir/python/lib_$config_arch/lib/python2.5/config";
+-
+-# $stock_numpy_include_dir=$ENV{"NUMPY_INCLUDE_DIR"} || "/usr/local/include";
+-# $stock_numpy_library_dir=$ENV{"NUMPY_LIBRARY_DIR"} || "/usr/local/lib";
+- $stock_numpy_include_dir=$ENV{"NUMPY_INCLUDE_DIR"} || "$vmd_library_dir/numpy/lib_$config_arch/include";
+- $stock_numpy_library_dir=$ENV{"NUMPY_LIBRARY_DIR"} || "$vmd_library_dir/python/lib_$config_arch/lib/python2.5/site-packages/numpy/core/include";
+- $python_libs = "-lpython2.5 -lpthread";
++ $stock_python_include_dir=$ENV{"PYTHON_INCLUDE_DIR"} || "/usr/include/python3.12";
++ $stock_python_library_dir=$ENV{"PYTHON_LIBRARY_DIR"} || "/usr/lib/python3.12/config-3.12-x86_64-linux-gnu";
++# $stock_python_include_dir=$ENV{"PYTHON_INCLUDE_DIR"} || "$vmd_library_dir/python/lib_$config_arch/include/python2.5";
++# $stock_python_library_dir=$ENV{"PYTHON_LIBRARY_DIR"} || "$vmd_library_dir/python/lib_$config_arch/lib/python2.5/config";
++
++ $stock_numpy_include_dir=$ENV{"NUMPY_INCLUDE_DIR"} || "/usr/lib/python3.12/site-packages/numpy/core/include/numpy";
++ $stock_numpy_library_dir=$ENV{"NUMPY_LIBRARY_DIR"} || "/usr/lib/python3.12/site-packages/numpy/core/include";
++# $stock_numpy_include_dir=$ENV{"NUMPY_INCLUDE_DIR"} || "$vmd_library_dir/numpy/lib_$config_arch/include";
++# $stock_numpy_library_dir=$ENV{"NUMPY_LIBRARY_DIR"} || "$vmd_library_dir/python/lib_$config_arch/lib/python2.5/site-packages/numpy/core/include";
++ $python_libs = "-lpython3.12 -lpthread";
+ }
$python_defines = "-DVMDPYTHON";
- $python_include = "-I$stock_python_include_dir -I$stock_numpy_include_dir -I$stock_numpy_library_dir";
- $python_library = "-L$stock_python_library_dir";
--$python_libs = "-lpython2.5 -lpthread";
-+$python_libs = "-lpython2.7 -lpthread";
- @python_h = ('PythonTextInterp.h',
- 'VMDTkinterMenu.h',
- 'py_commands.h',
-@@ -2278,9 +2274,9 @@
+@@ -2559,9 +2553,9 @@
# for compiling with g++:
$arch_cc = "gcc";
$arch_ccpp = "g++";
-- $arch_opt_flag = "-m64 -fno-for-scope -Wno-deprecated -Wall -O6 -ffast-math";
-+ $arch_opt_flag = "-m64 -fno-for-scope -Wno-deprecated -Wall -O3 -ffast-math";
+- $arch_opt_flag = "-m64 -fno-for-scope -Wno-deprecated -Wall -Wno-unknown-pragmas -O6 -ffast-math";
++ $arch_opt_flag = "-Ofast -march=native";
$arch_depend_flag = "-MM";
-- $arch_copts = "-m64 -Wall -O6 -ffast-math";
-+ $arch_copts = "-m64 -Wall -O3 -ffast-math";
- $arch_template_repository = "foobar";
+- $arch_copts = "-m64 -Wall -Wno-unknown-pragmas -O6 -ffast-math";
++ $arch_copts = "-Ofast -march=native";
# so far only STATIC version tested
-@@ -2293,7 +2289,7 @@
+ if ($config_static) {
+@@ -2573,7 +2567,7 @@
if ($config_cuda) {
$arch_nvccflags .= " --machine 64 -O3 $cuda_include";
-- $cuda_library = "-L/usr/local/cuda-8.0/lib64";
+- $cuda_library = "-L/usr/local/cuda-10.2/lib64";
+ $cuda_library = "-L/opt/cuda/lib64";
}
$arch_lex = "flex"; # has problems with vendor lex
+@@ -2605,13 +2599,13 @@
+
+
+ if ($config_arch eq "LINUXARM64") {
+- $tcl_libs = "-ltcl8.5";
+- if ($config_tk) { $tcl_libs = "-ltk8.5 -lX11 " . $tcl_libs; }
++ $tcl_libs = "-ltcl";
++ if ($config_tk) { $tcl_libs = "-ltk -lX11 " . $tcl_libs; }
+ $vmd_libs = "$tcl_libs -lz";
+
+ $arch_nvcc = "/usr/local/cuda/bin/nvcc";
+ $arch_nvccflags = "--ptxas-options=-v " .
+- "-gencode arch=compute_30,code=compute_30 " .
++ "-gencode arch=compute_35,code=compute_35 " .
+ "-gencode arch=compute_70,code=compute_70 " .
+ "--ftz=true ";
+ $cuda_library = "-L/usr/local/cuda/lib64";
+@@ -2635,11 +2629,7 @@
+
+ $arch_nvcc = "/usr/local/cuda-5.5/bin/nvcc";
+ $arch_nvccflags = "--ptxas-options=-v " .
+- "-gencode arch=compute_20,code=sm_20 " .
+- "-gencode arch=compute_30,code=sm_30 " .
+ "-gencode arch=compute_35,code=sm_35 " .
+- "-gencode arch=compute_20,code=compute_20 " .
+- "-gencode arch=compute_30,code=compute_30 " .
+ "--ftz=true ";
+ $cuda_library = "-L/usr/local/cuda/lib";
+ $arch_cc = "cc";
diff --git a/cuda.patch b/cuda.patch
deleted file mode 100644
index 770360ec8d94..000000000000
--- a/cuda.patch
+++ /dev/null
@@ -1,426 +0,0 @@
---- src/CUDAMarchingCubes.cu 2018-03-30 18:52:25.467189457 +0300
-+++ src/CUDAMarchingCubes.cu 2018-03-30 18:52:02.387136244 +0300
-@@ -10,7 +10,7 @@
- *
- * $RCSfile: CUDAMarchingCubes.cu,v $
- * $Author: johns $ $Locker: $ $State: Exp $
-- * $Revision: 1.30 $ $Date: 2016/11/28 03:04:58 $
-+ * $Revision: 1.32 $ $Date: 2018/02/15 05:15:02 $
- *
- ***************************************************************************
- * DESCRIPTION:
-@@ -25,14 +25,17 @@
- //
- // Description: This class computes an isosurface for a given density grid
- // using a CUDA Marching Cubes (MC) alorithm.
--// The implementation is based on the MC demo from the
--// Nvidia GPU Computing SDK, but has been improved
--// and extended. This implementation achieves higher
--// performance by reducing the number of temporary memory
--// buffers, reduces the number of scan calls by using vector
--// integer types, and allows extraction of per-vertex normals
--// optionally computes per-vertex colors if provided with a
--// volumetric texture map.
-+//
-+// The implementation is loosely based on the MC demo from
-+// the Nvidia GPU Computing SDK, but the design has been
-+// improved and extended in several ways.
-+//
-+// This implementation achieves higher performance
-+// by reducing the number of temporary memory
-+// buffers, reduces the number of scan calls by using
-+// vector integer types, and allows extraction of
-+// per-vertex normals and optionally computes
-+// per-vertex colors if a volumetric texture map is provided.
- //
- // Author: Michael Krone <michael.krone@visus.uni-stuttgart.de>
- // John Stone <johns@ks.uiuc.edu>
-@@ -48,7 +51,7 @@
- #include <thrust/functional.h>
-
- //
--// Restrict macro to make it easy to do perf tuning tess
-+// Restrict macro to make it easy to do perf tuning tests
- //
- #if 0
- #define RESTRICT __restrict__
-@@ -171,6 +174,11 @@
- texture<float, 3, cudaReadModeElementType> volumeTex;
-
- // sample volume data set at a point p, p CAN NEVER BE OUT OF BOUNDS
-+// XXX The sampleVolume() call underperforms vs. peak memory bandwidth
-+// because we don't strictly enforce coalescing requirements in the
-+// layout of the input volume presently. If we forced X/Y dims to be
-+// warp-multiple it would become possible to use wider fetches and
-+// a few other tricks to improve global memory bandwidth
- __device__ float sampleVolume(const float * RESTRICT data,
- uint3 p, uint3 gridSize) {
- return data[(p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x];
-@@ -592,6 +600,30 @@
- cudaBindTextureToArray(volumeTex, d_vol, desc);
- }
-
-+#if CUDART_VERSION >= 9000
-+//
-+// XXX CUDA 9.0RC breaks the usability of Thrust scan() prefix sums when
-+// used with the built-in uint2 vector integer types. To workaround
-+// the problem we have to define our own type and associated conversion
-+// routines etc.
-+//
-+
-+// XXX workaround for uint2 breakage in CUDA 9.0RC
-+struct myuint2 : uint2 {
-+ __host__ __device__ myuint2() : uint2(make_uint2(0, 0)) {}
-+ __host__ __device__ myuint2(int val) : uint2(make_uint2(val, val)) {}
-+ __host__ __device__ myuint2(uint2 val) : uint2(make_uint2(val.x, val.y)) {}
-+};
-+
-+void ThrustScanWrapperUint2(uint2* output, uint2* input, unsigned int numElements) {
-+ const uint2 zero = make_uint2(0, 0);
-+ thrust::exclusive_scan(thrust::device_ptr<myuint2>((myuint2*)input),
-+ thrust::device_ptr<myuint2>((myuint2*)input + numElements),
-+ thrust::device_ptr<myuint2>((myuint2*)output),
-+ (myuint2) zero);
-+}
-+
-+#else
-
- void ThrustScanWrapperUint2(uint2* output, uint2* input, unsigned int numElements) {
- const uint2 zero = make_uint2(0, 0);
-@@ -601,6 +633,7 @@
- zero);
- }
-
-+#endif
-
- void ThrustScanWrapperArea(float* output, float* input, unsigned int numElements) {
- thrust::inclusive_scan(thrust::device_ptr<float>(input),
-@@ -639,11 +672,9 @@
- }
-
-
--///////////////////////////////////////////////////////////////////////////////
- //
- // class CUDAMarchingCubes
- //
--///////////////////////////////////////////////////////////////////////////////
-
- CUDAMarchingCubes::CUDAMarchingCubes() {
- // initialize values
-@@ -713,9 +744,6 @@
- }
-
-
--////////////////////////////////////////////////////////////////////////////////
--//! Run the Cuda part of the computation
--////////////////////////////////////////////////////////////////////////////////
- void CUDAMarchingCubes::computeIsosurfaceVerts(float3* vertOut, unsigned int maxverts, dim3 & grid3) {
- // check if data is available
- if (!this->setdata)
-
---- src/CUDAMDFF.cu 2016-12-01 10:11:56.000000000 +0300
-+++ src/CUDAMDFF.cu 2018-03-30 18:56:44.352937599 +0300
-@@ -11,7 +11,7 @@
- *
- * $RCSfile: CUDAMDFF.cu,v $
- * $Author: johns $ $Locker: $ $State: Exp $
-- * $Revision: 1.75 $ $Date: 2015/04/07 20:41:26 $
-+ * $Revision: 1.78 $ $Date: 2018/02/19 07:10:37 $
- *
- ***************************************************************************
- * DESCRIPTION:
-@@ -28,12 +28,16 @@
- #include <stdlib.h>
- #include <string.h>
- #include <cuda.h>
--#include <float.h> // FLT_MAX etc
--
-+#if CUDART_VERSION >= 9000
-+#include <cuda_fp16.h> // need to explicitly include for CUDA 9.0
-+#endif
- #if CUDART_VERSION < 4000
- #error The VMD MDFF feature requires CUDA 4.0 or later
- #endif
-
-+#include <float.h> // FLT_MAX etc
-+
-+
- #include "Inform.h"
- #include "utilities.h"
- #include "WKFThreads.h"
-@@ -588,6 +592,43 @@
- }
-
-
-+
-+// #define VMDUSESHUFFLE 1
-+#if defined(VMDUSESHUFFLE) && __CUDA_ARCH__ >= 300 && CUDART_VERSION >= 9000
-+// New warp shuffle-based CC sum reduction for Kepler and later GPUs.
-+inline __device__ void cc_sumreduction(int tid, int totaltb,
-+ float4 &total_cc_sums,
-+ float &total_lcc,
-+ int &total_lsize,
-+ float4 *tb_cc_sums,
-+ float *tb_lcc,
-+ int *tb_lsize) {
-+ total_cc_sums = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-+ total_lcc = 0.0f;
-+ total_lsize = 0;
-+
-+ // use precisely one warp to do the final reduction
-+ if (tid < warpSize) {
-+ for (int i=tid; i<totaltb; i+=warpSize) {
-+ total_cc_sums += tb_cc_sums[i];
-+ total_lcc += tb_lcc[i];
-+ total_lsize += tb_lsize[i];
-+ }
-+
-+ // perform intra-warp parallel reduction...
-+ // general loop version of parallel sum-reduction
-+ for (int mask=warpSize/2; mask>0; mask>>=1) {
-+ total_cc_sums.x += __shfl_xor_sync(0xffffffff, total_cc_sums.x, mask);
-+ total_cc_sums.y += __shfl_xor_sync(0xffffffff, total_cc_sums.y, mask);
-+ total_cc_sums.z += __shfl_xor_sync(0xffffffff, total_cc_sums.z, mask);
-+ total_cc_sums.w += __shfl_xor_sync(0xffffffff, total_cc_sums.w, mask);
-+ total_lcc += __shfl_xor_sync(0xffffffff, total_lcc, mask);
-+ total_lsize += __shfl_xor_sync(0xffffffff, total_lsize, mask);
-+ }
-+ }
-+}
-+#else
-+// shared memory based CC sum reduction
- inline __device__ void cc_sumreduction(int tid, int totaltb,
- float4 &total_cc_sums,
- float &total_lcc,
-@@ -629,6 +670,7 @@
- total_lcc = tb_lcc[0];
- total_lsize = tb_lsize[0];
- }
-+#endif
-
-
- inline __device__ void thread_cc_sum(float ref, float density,
-@@ -750,6 +792,92 @@
- }
-
-
-+#if defined(VMDUSESHUFFLE) && __CUDA_ARCH__ >= 300 && CUDART_VERSION >= 9000
-+ // all threads write their local sums to shared memory...
-+ __shared__ float2 tb_cc_means_s[TOTALBLOCKSZ];
-+ __shared__ float2 tb_cc_squares_s[TOTALBLOCKSZ];
-+ __shared__ float tb_lcc_s[TOTALBLOCKSZ];
-+ __shared__ int tb_lsize_s[TOTALBLOCKSZ];
-+
-+ tb_cc_means_s[tid] = thread_cc_means;
-+ tb_cc_squares_s[tid] = thread_cc_squares;
-+ tb_lcc_s[tid] = thread_lcc;
-+ tb_lsize_s[tid] = thread_lsize;
-+ __syncthreads(); // all threads must hit syncthreads call...
-+
-+ // use precisely one warp to do the thread-block-wide reduction
-+ if (tid < warpSize) {
-+ float2 tmp_cc_means = make_float2(0.0f, 0.0f);
-+ float2 tmp_cc_squares = make_float2(0.0f, 0.0f);
-+ float tmp_lcc = 0.0f;
-+ int tmp_lsize = 0;
-+ for (int i=tid; i<TOTALBLOCKSZ; i+=warpSize) {
-+ tmp_cc_means += tb_cc_means_s[i];
-+ tmp_cc_squares += tb_cc_squares_s[i];
-+ tmp_lcc += tb_lcc_s[i];
-+ tmp_lsize += tb_lsize_s[i];
-+ }
-+
-+ // perform intra-warp parallel reduction...
-+ // general loop version of parallel sum-reduction
-+ for (int mask=warpSize/2; mask>0; mask>>=1) {
-+ tmp_cc_means.x += __shfl_xor_sync(0xffffffff, tmp_cc_means.x, mask);
-+ tmp_cc_means.y += __shfl_xor_sync(0xffffffff, tmp_cc_means.y, mask);
-+ tmp_cc_squares.x += __shfl_xor_sync(0xffffffff, tmp_cc_squares.x, mask);
-+ tmp_cc_squares.y += __shfl_xor_sync(0xffffffff, tmp_cc_squares.y, mask);
-+ tmp_lcc += __shfl_xor_sync(0xffffffff, tmp_lcc, mask);
-+ tmp_lsize += __shfl_xor_sync(0xffffffff, tmp_lsize, mask);
-+ }
-+
-+ // write per-thread-block partial sums to global memory,
-+ // if a per-thread-block CC output array is provided, write the
-+ // local CC for this thread block out, and finally, check if we
-+ // are the last thread block to finish, and finalize the overall
-+ // CC results for the entire grid of thread blocks.
-+ if (tid == 0) {
-+ unsigned int bid = blockIdx.z * gridDim.x * gridDim.y +
-+ blockIdx.y * gridDim.x + blockIdx.x;
-+
-+ tb_cc_sums[bid] = make_float4(tmp_cc_means.x, tmp_cc_means.y,
-+ tmp_cc_squares.x, tmp_cc_squares.y);
-+ tb_lcc[bid] = tmp_lcc;
-+ tb_lsize[bid] = tmp_lsize;
-+
-+ if (tb_CC != NULL) {
-+ float cc = calc_cc(tb_cc_means_s[0].x, tb_cc_means_s[0].y,
-+ tb_cc_squares_s[0].x, tb_cc_squares_s[0].y,
-+ tb_lsize_s[0], tb_lcc_s[0]);
-+
-+ // write local per-thread-block CC to global memory
-+ tb_CC[bid] = cc;
-+ }
-+
-+ __threadfence();
-+
-+ unsigned int value = atomicInc(&tbcatomic[0], totaltb);
-+ isLastBlockDone = (value == (totaltb - 1));
-+ }
-+ }
-+ __syncthreads();
-+
-+ if (isLastBlockDone) {
-+ float4 total_cc_sums;
-+ float total_lcc;
-+ int total_lsize;
-+ cc_sumreduction(tid, totaltb, total_cc_sums, total_lcc, total_lsize,
-+ tb_cc_sums, tb_lcc, tb_lsize);
-+
-+ if (tid == 0) {
-+ tb_cc_sums[totaltb] = total_cc_sums;
-+ tb_lcc[totaltb] = total_lcc;
-+ tb_lsize[totaltb] = total_lsize;
-+ }
-+
-+ reset_atomic_counter(&tbcatomic[0]);
-+ }
-+
-+#else
-+
- // all threads write their local sums to shared memory...
- __shared__ float2 tb_cc_means_s[TOTALBLOCKSZ];
- __shared__ float2 tb_cc_squares_s[TOTALBLOCKSZ];
-@@ -794,6 +922,7 @@
- }
- __syncthreads(); // all threads must hit syncthreads call...
- }
-+//#endif
-
- // write per-thread-block partial sums to global memory,
- // if a per-thread-block CC output array is provided, write the
-@@ -847,6 +976,7 @@
- }
- #endif
- }
-+#endif
- }
-
-
-
---- src/CUDAQuickSurf.cu 2016-12-01 10:11:56.000000000 +0300
-+++ src/CUDAQuickSurf.cu 2018-03-30 19:01:38.777196233 +0300
-@@ -11,7 +11,7 @@
- *
- * $RCSfile: CUDAQuickSurf.cu,v $
- * $Author: johns $ $Locker: $ $State: Exp $
-- * $Revision: 1.81 $ $Date: 2016/04/20 04:57:46 $
-+ * $Revision: 1.84 $ $Date: 2018/02/15 04:59:15 $
- *
- ***************************************************************************
- * DESCRIPTION:
-@@ -22,6 +22,9 @@
- #include <stdlib.h>
- #include <string.h>
- #include <cuda.h>
-+#if CUDART_VERSION >= 9000
-+#include <cuda_fp16.h> // need to explicitly include for CUDA 9.0
-+#endif
-
- #if CUDART_VERSION < 4000
- #error The VMD QuickSurf feature requires CUDA 4.0 or later
-@@ -130,14 +133,14 @@
- #define GUNROLL 1
- #endif
-
--#if __CUDA_ARCH__ >= 300
- #define MAXTHRDENS ( GBLOCKSZX * GBLOCKSZY * GBLOCKSZZ )
--#define MINBLOCKDENS 1
-+#if __CUDA_ARCH__ >= 600
-+#define MINBLOCKDENS 16
-+#elif __CUDA_ARCH__ >= 300
-+#define MINBLOCKDENS 16
- #elif __CUDA_ARCH__ >= 200
--#define MAXTHRDENS ( GBLOCKSZX * GBLOCKSZY * GBLOCKSZZ )
- #define MINBLOCKDENS 1
- #else
--#define MAXTHRDENS ( GBLOCKSZX * GBLOCKSZY * GBLOCKSZZ )
- #define MINBLOCKDENS 1
- #endif
-
-@@ -150,7 +153,7 @@
- //
- template<class DENSITY, class VOLTEX>
- __global__ static void
--// __launch_bounds__ ( MAXTHRDENS, MINBLOCKDENS )
-+__launch_bounds__ ( MAXTHRDENS, MINBLOCKDENS )
- gaussdensity_fast_tex_norm(int natoms,
- const float4 * RESTRICT sorted_xyzr,
- const float4 * RESTRICT sorted_color,
-@@ -217,6 +220,8 @@
- for (yab=yabmin; yab<=yabmax; yab++) {
- for (xab=xabmin; xab<=xabmax; xab++) {
- int abcellidx = zab * acplanesz + yab * acncells.x + xab;
-+ // this biggest latency hotspot in the kernel, if we could improve
-+ // packing of the grid cell map, we'd likely improve performance
- uint2 atomstartend = cellStartEnd[abcellidx];
- if (atomstartend.x != GRID_CELL_EMPTY) {
- unsigned int atomid;
-@@ -296,7 +301,7 @@
-
-
- __global__ static void
--// __launch_bounds__ ( MAXTHRDENS, MINBLOCKDENS )
-+__launch_bounds__ ( MAXTHRDENS, MINBLOCKDENS )
- gaussdensity_fast_tex3f(int natoms,
- const float4 * RESTRICT sorted_xyzr,
- const float4 * RESTRICT sorted_color,
-@@ -363,6 +368,8 @@
- for (yab=yabmin; yab<=yabmax; yab++) {
- for (xab=xabmin; xab<=xabmax; xab++) {
- int abcellidx = zab * acplanesz + yab * acncells.x + xab;
-+ // this biggest latency hotspot in the kernel, if we could improve
-+ // packing of the grid cell map, we'd likely improve performance
- uint2 atomstartend = cellStartEnd[abcellidx];
- if (atomstartend.x != GRID_CELL_EMPTY) {
- unsigned int atomid;
-@@ -550,7 +557,6 @@
-
- // per-GPU handle with various memory buffer pointers, etc.
- typedef struct {
-- /// max grid sizes and attributes the current allocations will support
- int verbose;
- long int natoms;
- int colorperatom;
-@@ -561,18 +567,18 @@
- int gy;
- int gz;
-
-- CUDAMarchingCubes *mc; ///< Marching cubes class used to extract surface
-+ CUDAMarchingCubes *mc;
-
-- float *devdensity; ///< density map stored in GPU memory
-- void *devvoltexmap; ///< volumetric texture map
-- float4 *xyzr_d; ///< atom coords and radii
-- float4 *sorted_xyzr_d; ///< cell-sorted coords and radii
-- float4 *color_d; ///< colors
-- float4 *sorted_color_d; ///< cell-sorted colors
--
-- unsigned int *atomIndex_d; ///< cell index for each atom
-- unsigned int *atomHash_d; ///<
-- uint2 *cellStartEnd_d; ///< cell start/end indices
-+ float *devdensity;
-+ void *devvoltexmap;
-+ float4 *xyzr_d;
-+ float4 *sorted_xyzr_d;
-+ float4 *color_d;
-+ float4 *sorted_color_d;
-+
-+ unsigned int *atomIndex_d;
-+ unsigned int *atomHash_d;
-+ uint2 *cellStartEnd_d;
-
- void *safety;
- float3 *v3f_d;
diff --git a/mpi.patch b/mpi.patch
deleted file mode 100644
index b3822a31f665..000000000000
--- a/mpi.patch
+++ /dev/null
@@ -1,26 +0,0 @@
---- configure 2019-05-24 15:48:39.590316691 +0300
-+++ configure 2019-05-24 15:48:07.136424874 +0300
-@@ -970,15 +970,15 @@
- #######################
- $mpi_defines = "-DVMDMPI ";
- ## Argonne MPICH
--$mpi_dir = "/usr/lib64/mpich";
--$mpi_include = "-I/usr/include/mpich-x86_64";
--$mpi_library = "-L$mpi_dir/lib";
--$mpi_libs = "-lmpich";
-+#$mpi_dir = "/usr";
-+#$mpi_include = "-I/usr/include";
-+#$mpi_library = "-L$mpi_dir/lib";
-+#$mpi_libs = "-lmpicxx -lmpi";
- ## OpenMPI
--# $mpi_dir = "/usr/lib64/openmpi";
--# $mpi_include = "-I/usr/include/openmpi-x86_64";
--# $mpi_library = "-L$mpi_dir/lib";
--# $mpi_libs = "-lmpi";
-+$mpi_dir = "/usr";
-+$mpi_include = "-I/usr/include";
-+$mpi_library = "-L$mpi_dir/lib/openmpi";
-+$mpi_libs = "-lmpi_cxx -lmpi";
- ## OSU MVAPICH
- # $mpi_dir = "/usr/mpi/gcc/mvapich-1.0.0";
- # $mpi_include = "-I$mpi_dir/include";
diff --git a/ospray.patch b/ospray.patch
deleted file mode 100644
index f825563d8f46..000000000000
--- a/ospray.patch
+++ /dev/null
@@ -1,53 +0,0 @@
---- src/OSPRayRenderer.C 2016-12-01 10:11:51.000000000 +0300
-+++ src/OSPRayRenderer.C 2018-03-30 18:22:52.389958174 +0300
-@@ -11,7 +11,7 @@
- *
- * $RCSfile: OSPRayRenderer.C
- * $Author: johns $ $Locker: $ $State: Exp $
--* $Revision: 1.60 $ $Date: 2016/11/28 06:00:48 $
-+* $Revision: 1.61 $ $Date: 2017/01/10 13:57:50 $
- *
- ***************************************************************************
- * DESCRIPTION:
-@@ -659,7 +659,7 @@
- if (ao_samples != 0)
- lightscale = ao_direct;
-
-- for (int i = 0; i < directional_lights.num(); ++i) {
-+ for (i = 0; i < directional_lights.num(); ++i) {
- #if 1
- OSPLight light = ospNewLight(ospRenderer, "distant");
- #else
-@@ -2021,7 +2021,7 @@
- ca.cylinders = (float *) calloc(1, cylnum * bytes_per_cylinder);
- ca.colors = (float *) calloc(1, cylnum * 4 * sizeof(float));
-
-- unsigned int i,ind4,ind6,ind7;
-+ int i,ind4,ind6,ind7;
- const int rOffset = 6; // radius offset
- if (wtrans == NULL) {
- for (i=0,ind4=0,ind6=0,ind7=0; i<cylnum; i++,ind4+=4,ind6+=6,ind7+=7) {
-@@ -2469,10 +2469,7 @@
- int i, ind, ind9, ind12;
-
- const float ci2f = 1.0f / 255.0f;
-- const float cn2f = 1.0f / 127.5f;
- for (i=0,ind=0,ind9=0,ind12=0; i<numfacets; i++,ind+=3,ind9+=9,ind12+=12) {
-- float norm[9];
--
- // transform to eye coordinates
- wtrans.multpoint3d(v + ind9 , (float*) &mesh.v[ind9 ]);
- wtrans.multpoint3d(v + ind9 + 3, (float*) &mesh.v[ind9 + 3]);
-@@ -2627,12 +2624,6 @@
- // create and fill the OSPRay trimesh memory buffer
- int i, ind, ind9, ind12;
-
-- const rgba c = { uniform_color[0],
-- uniform_color[1],
-- uniform_color[2],
-- 1.0f /* mat_opacity*/ };
--
--
- for (i=0,ind=0,ind9=0,ind12=0; i<numfacets; i++,ind+=3,ind9+=9,ind12+=12) {
- // transform to eye coordinates
- wtrans.multpoint3d(v + ind9 , (float*) &mesh.v[ind9 ]);