Initial commit, based on version 0.21

author: Dylan Delgado 2020-12-06 17:50:54 -0500
committer: Dylan Delgado 2020-12-06 17:50:54 -0500
commit: e6e77ad8e438e666dd641ea0229e6ee04062a9dd (patch)
tree: d125871f7e606fbb8ee2d5beae667b4f87cc6882
download: aur-e6e77ad8e438e666dd641ea0229e6ee04062a9dd.tar.gz
4 files changed, 375 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO
new file mode 100644
index 000000000000..1f6c7096ac76
--- /dev/null
+++ b/.SRCINFO
@@ -0,0 +1,21 @@
+pkgbase = mfaktc
+	pkgdesc = A CUDA program for Mersenne prefactoring.
+	pkgver = 0.21
+	pkgrel = 1
+	url = https://mersenneforum.org/showthread.php?t=12827
+	arch = x86_64
+	license = GPL3
+	makedepends = cuda
+	depends = nvidia
+	source = https://download.mersenne.ca/mfaktc/mfaktc-0.21/mfaktc-0.21.tar.gz
+	source = mfaktc.ini
+	source = makeandparams.patch
+	md5sums = 292d9a3bee013e9992ab199dfdfeece9
+	md5sums = edf7197ee8d1e5d99ce8a9acf9287ea7
+	md5sums = 2acc9b787a77a2e6ba07cbd0904c776b
+	sha256sums = 1973b5fae564035a33aa89bcb1a7e6dae571307a874e2eeb845e5b93939c2079
+	sha256sums = SKIP
+	sha256sums = SKIP
+
+pkgname = mfaktc
+
diff --git a/PKGBUILD b/PKGBUILD
new file mode 100644
index 000000000000..876ca4dc5289
--- /dev/null
+++ b/PKGBUILD
@@ -0,0 +1,40 @@
+#Maintainer: Dylan Delgado <dylan1496 at live dot com>
+
+pkgname=mfaktc
+pkgver=0.21
+pkgrel=1
+pkgdesc="A CUDA program for Mersenne prefactoring."
+arch=('x86_64')
+url="https://mersenneforum.org/showthread.php?t=12827"
+license=('GPL3')
+depends=('nvidia')
+makedepends=('cuda')
+source=("https://download.mersenne.ca/${pkgname}/${pkgname}-${pkgver}/${pkgname}-${pkgver}.tar.gz"
+"mfaktc.ini"
+"makeandparams.patch")
+md5sums=('292d9a3bee013e9992ab199dfdfeece9'
+'edf7197ee8d1e5d99ce8a9acf9287ea7'
+'2acc9b787a77a2e6ba07cbd0904c776b')
+sha256sums=('1973b5fae564035a33aa89bcb1a7e6dae571307a874e2eeb845e5b93939c2079'
+'SKIP'
+'SKIP')
+
+prepare() {
+cd ${srcdir}/${pkgname}-${pkgver}/src
+patch -p2 < "../../../makeandparams.patch"
+}
+
+build () {
+cd ${srcdir}/${pkgname}-${pkgver}/src
+make -f Makefile all
+}
+
+package () {
+cd ${srcdir}/${pkgname}-${pkgver}
+install -Dm755 mfaktc.exe ${pkgdir}/usr/bin/mfaktc
+cd ..
+install -Dm755 mfaktc.ini ${pkgdir}/etc/mfaktc/mfaktc.ini
+echo "A default mfaktc.ini file will be installed"
+echo "in /etc/mfaktc/mfaktc.ini. Copy this to the"
+echo "directory where you want to run mfaktc."
+}
+\ No newline at end of file
diff --git a/makeandparams.patch b/makeandparams.patch
new file mode 100644
index 000000000000..5f4e8919ff50
--- /dev/null
+++ b/makeandparams.patch
@@ -0,0 +1,43 @@
+diff --unified --recursive --text mfaktc-0.21/src/Makefile mfaktc-0.21-new/src/Makefile
+--- mfaktc-0.21/src/Makefile	2015-02-17 15:12:49.305144447 -0500
++++ mfaktc-0.21-new/src/Makefile	2020-12-06 16:39:26.876217215 -0500
+@@ -1,5 +1,5 @@
+ # where is the CUDA Toolkit installed?
+-CUDA_DIR = /usr/local/cuda
++CUDA_DIR = /opt/cuda
+ CUDA_INCLUDE = -I$(CUDA_DIR)/include/
+ CUDA_LIB = -L$(CUDA_DIR)/lib64/
+ 
+@@ -13,11 +13,15 @@
+ NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v
+ 
+ # generate code for various compute capabilities
+-NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
+-NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
+-NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code 
+-NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc
+-NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
++# note, CUDA 11.1 supports CC 3.5+ (some Kepler, Maxwell, Pascal, Volta, Turing and Ampere)
++NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # CC 3.5 and 3.7 cards will use this code (some Kepler cards)
++NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code (Maxwell)
++NVCCFLAGS += --generate-code arch=compute_60,code=sm_60
++NVCCFLAGS += --generate-code arch=compute_61,code=sm_61 # CC 6.x GPUs will use this code (Pascal)
++NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # Titan V and Tesla V100 will use this code
++NVCCFLAGS += --generate-code arch=compute_75,code=sm_75 # CC 7.5 GPU's will use this code (Volta)
++NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.0 GPU's will use this code (Tesla A100)
++NVCCFLAGS += --generate-code arch=compute_86,code=sm_86 # CC 8.6 GPU's will use this code (Ampere)
+ 
+ # pass some options to the C host compiler (e.g. gcc on Linux)
+ NVCCFLAGS += --compiler-options=-Wall
+diff --unified --recursive --text mfaktc-0.21/src/params.h mfaktc-0.21-new/src/params.h
+--- mfaktc-0.21/src/params.h	2015-02-17 15:12:49.322145383 -0500
++++ mfaktc-0.21-new/src/params.h	2020-12-06 16:51:24.334385208 -0500
+@@ -175,7 +175,7 @@
+ 
+ #define GPU_SIEVE_SIZE_MIN                   4 /* A 4M bit sieve seems like a reasonable minimum */
+ #define GPU_SIEVE_SIZE_DEFAULT              64 /* Default is a 16M bit sieve */
+-#define GPU_SIEVE_SIZE_MAX                 128 /* We've only tested up to 128M bits.  The GPU sieve code may be able to go higher. */
++#define GPU_SIEVE_SIZE_MAX                 2047 /* We've only tested up to 128M bits.  The GPU sieve code may be able to go higher. */
+ 
+ #define GPU_SIEVE_PROCESS_SIZE_MIN           8 /* Processing 8K bits in each block is minimum (256 threads * 1 word of 32 bits) */
+ #define GPU_SIEVE_PROCESS_SIZE_DEFAULT      16 /* Default is processing 8K bits */
diff --git a/mfaktc.ini b/mfaktc.ini
new file mode 100644
index 000000000000..b85e1520bf8f
--- /dev/null
+++ b/mfaktc.ini
@@ -0,0 +1,271 @@
+# SievePrimes defines how far the factor candidates (FCs) are presieved on
+# the CPU. The first <SievePrimes> odd primes are used to sieve the FCs.
+#
+# Minimum: SievePrimes=2000 (check SievePrimesMin)
+# Maximum: SievePrimes=200000
+#
+# Default: SievePrimes=25000
+
+SievePrimes=25000
+
+
+# Set this to 1 to enable automatically adjustments of SievePrimes during
+# runtime based on the "average wait times".
+#
+# Default: SievePrimesAdjust=1
+
+SievePrimesAdjust=1
+
+
+# Set the minimum and maximum limit for SievePrimes, this is needed if
+# SievePrimesAdjust is enabled. These are soft limits, there are hard limits
+# in the code which should never ever be changed.
+#
+# Current limits are:
+# 2000 <= SievePrimesMin <= SievePrimes <= SievePrimesMax <= 200000
+#
+# Default: SievePrimesMin=5000
+#          SievePrimesMax=100000
+
+SievePrimesMin=5000
+SievePrimesMax=100000
+
+
+# Set the number of CUDA streams / data sets used by mfaktc.
+# NumStreams must be >= 1. In this case mfaktc can process one stream /
+# data set on the GPU while the GPU can preprocess the other one. When
+# NumStreams is >= 2 than the time needed to upload (CPU->GPU transfer)
+# the data sets can be hidden (if the hardware supports it (most GPUs are
+# supporting this)).
+# On Linux systems 2 or 3 seems a good numbers. There are comments that
+# Windows systems need a greater number of streams.
+# A greater number increases the memory consumed by mfaktc (host and GPU
+# memory). The current limit for the number of streams is 10!
+# Don't be too greedy with this value, high values are usualy just a waste
+# of memory.
+#
+# Minimum: NumStreams=1
+# Maximum: NumStreams=10
+#
+# Default: NumStreams=3
+
+NumStreams=3
+
+
+# Set the number of data sets which can be preprocessed on CPU. This allows
+# to tolerate more jitter on runtime of preprocessing and GPU stream
+# runtime.
+# Don't be too greedy with this value, high values are usualy just a waste
+# of memory.
+#
+# Minimum: CPUStreams=1
+# Maximum: CPUStreams=5
+#
+# Default: CPUStreams=3
+
+CPUStreams=3
+
+
+# The GridSize affects the number of threads per grid.
+# Depending on the number of multiprocessors of your GPU, too, the
+# automatic parameter threads per grid is set to:
+#   GridSize = 0:  65536 < threads per grid <=  131072
+#   GridSize = 1: 131072 < threads per grid <=  262144
+#   GridSize = 2: 262144 < threads per grid <=  524288
+#   GridSize = 3: 524288 < threads per grid <= 1048576 (default)
+# A smaller GridSize has more overhead than a bigger GridSize for long
+# running jobs. For really small jobs there can be a small benefit on
+# computation time if the GridSize is small. A smaller GridSize directly
+# reduces the runtime per kernel launch and might result in a better
+# interactivity one older GPUs.
+#
+# Default: GridSize=3
+
+GridSize=3
+
+
+# Checkpoints = 0: disable checkpoints
+# Checkpoints = 1: enable checkpoints
+# Checkpoints are needed for resume capability, after a class is finished a
+# checkpoint file is written. When mfaktc is interrupted during the run and
+# restarted later it will begin at the last processed class.
+#
+# Default: Checkpoints=1
+
+Checkpoints=1
+
+
+# CheckpointDelay is the time in seconds between two checkpoint writes.
+# Allowed values are 0 <= CheckpointDelay <= 900.
+#
+# Minimum: CheckpointDelay=0
+# Maximum: CheckpointDelay=900
+#
+# Default: CheckpointDelay=30
+
+CheckpointDelay=30
+
+
+# WorkFileAddDelay defines, how often mfaktc should check for worktodo.add.
+# When a worktodo.add is deteced it will wait WorkFileAddDelay seconds until
+# processing it.
+#
+# Minimum: WorkFileAddDelay=30 (WorkFileAddDelay=0 disables this feature)
+# Maximum: WorkFileAddDelay=3600
+#
+# Default: WorkFileAddDelay=600
+
+WorkFileAddDelay=600
+
+
+# Allow to split an assignment into multiple bit ranges.
+# 0 = disabled
+# 1 = enabled
+# Enabled Stages make only sense when StopAfterFactor is 1 or 2.
+# Do not change this in the middle of a run which spans over multiple
+# bitlevels, in this case mfaktc will ignore the checkpoint file and
+# restarts from the beginning.
+#
+# Default: Stages=1
+
+Stages=1
+
+
+# possible values for StopAfterFactor:
+# 0: Do not stop the current assignment after a factor was found.
+# 1: When a factor was found for the current assignment stop after the
+#    current bitlevel. This makes only sense when Stages is enabled.
+# 2: When a factor was found for the current assignment stop after the
+#    current class.
+#
+# Default: StopAfterFactor=1
+
+StopAfterFactor=1
+
+
+# possible values for PrintMode:
+# 0: print a new line for each finished class
+# 1: overwrite the current line (more compact output)
+#
+# Default: PrintMode=0
+
+PrintMode=0
+
+
+# allow the CPU to sleep if nothing can be preprocessed?
+# 0: Do not sleep if the CPU must wait for the GPU
+# 1: The CPU can sleep for a short time if it has to wait for the GPU
+#
+# Default: AllowSleep=0
+
+AllowSleep=0
+
+
+# if V5UserID and ComputerID are specified, then the result lines in the
+# results file will have the prefix "UID: user/host, " - the same way as
+# prime95 does it.
+# 
+# Default: none (unset)
+#
+#V5UserID=TheJudger
+#ComputerID=Ananke
+
+
+# TimeStampInResults allows to configure if each output line in the results
+# file should be preceeded with a date-and-time stamp (similar to prime95)
+#
+# Default: TimeStampInResults=0
+
+TimeStampInResults=0
+
+
+# PrintFormat allows to customize the progress output of mfaktc. You can use
+# any combination of the following format specifications, which will be
+# replaced correspondingly in the progress line:
+#
+#  %C - class ID (n/420) or (n/4620)   "%4d"
+#  %c - class number (n/96) or (n/960) "%3d"
+#  %p - percent complete (%)           "%5.1f"
+#  %g - GHz-days/day (GHz)             "%7.2f"
+#  %t - time per class (s)             "%6.0f" / "%6.1f" / "%6.2f" / "%6.3f"
+#  %e - eta (d/h/m/s)                  "%2dm%02ds" / "%2dh%02dm" / "%2dd%02dh"
+#  %n - number of candidates (M/G)     "%6.2fM"/"%6.2fG"
+#  %r - rate (M/s)                     "%6.2f" / "%6.1f"
+#  %s - SievePrimes                    "%7d"
+#  %w - CPU wait time for GPU (us)     n.a. (mfakto only!)
+#  %W - CPU wait % (%)                 "6.2f"
+#  %d - date (Mon nn)                  "%b %d"  (strftime format)
+#  %T - time (HH:MM)                   "%H:%M"  (strftime format)
+#  %U - username (as configured)       "%s"     no fixed width
+#  %H - hostname (as configured)       "%s"     no fixed width
+#  %M - the exponent being worked on   "%-10u"
+#  %l - the lower bit-limit            "%2d"
+#  %u - the upper bit-limit            "%2d"
+#
+# Using the ProgressHeader you can specify a fix string that is displayed as
+# a header to the progress without any modification.
+# Keep in mind that "number of candidates (M/G)" and "rate (M/s)" are NOT
+# compareable between CPU- and GPU-sieving. When sieving is done on GPU
+# those number count all factor candidates prior to sieving while CPU
+# sieving counts the numbers after the sieving process.
+#
+
+# mfaktc 0.20+ (default)
+ProgressHeader=Date    Time | class   Pct |   time     ETA | GHz-d/day    Sieve     Wait
+ProgressFormat=%d %T | %C %p%% | %t  %e |   %g  %s  %W%%
+
+# old mfaktc 0.18/0.19 style
+#
+#ProgressHeader=    class | candidates |    time |    ETA | avg. rate | SievePrimes | CPU wait
+#ProgressFormat=%C/4620 |    %n | %ts | %e | %rM/s |     %s |  %W%%
+
+# print everything
+#ProgressHeader=[date    time]  exponent [TF bits]: percent  class #, seq    |     GHZ |    time |    ETA |    #FCs |      rate | SieveP. | CPU wait | V5UserID@ComputerID
+#ProgressFormat=[%d %T] M%M [%l-%u]: %p%% %C/4620,%c/960 | %g | %ts | %e | %n | %rM/s | %s |  %W%% | %U@%H
+
+
+# enable or disable GPU sieving
+# SieveOnGPU=0: use the CPU for sieving factor candidates (high CPU usage)
+# SieveOnGPU=1: use the GPU for sieving factor candidates (very low CPU usage)
+# GPU sieving is supported on GPUs with compute capability 2.0 or higher.
+# (e.g. Geforce 400 series or newer)
+#
+# Default: SieveOnGPU=1
+
+SieveOnGPU=1
+
+
+# GPUSievePrimes defines how far we sieve the factor candidates on the GPU.
+# The first <GPUSievePrimes> primes are sieved.
+#
+# Minimum: GPUSievePrimes=0
+# Maximum: GPUSievePrimes=1075000
+#
+# Default: GPUSievePrimes=82486
+
+GPUSievePrimes=82486
+
+
+# GPUSieveSize defines how big of a GPU sieve we use (in M bits).
+#
+# Minimum: GPUSieveSize=4
+# Maximum: GPUSieveSize=2047
+#
+# Default: GPUSieveSize=64
+
+GPUSieveSize=2047
+
+
+# GPUSieveProcessSize defines how far many bits of the sieve each TF block
+# processes (in K bits). Larger values may lead to less wasted cycles by
+# reducing the number of times all threads in a warp are not TFing a
+# candidate.  However, more shared memory is used which may reduce occupancy.
+# Smaller values should lead to a more responsive system (each kernel takes
+# less time to execute). GPUSieveProcessSize must be a multiple of 8.
+#
+# Minimum: GPUSieveProcessSize=8
+# Maximum: GPUSieveProcessSize=32
+#
+# Default: GPUSieveProcessSize=16
+
+GPUSieveProcessSize=32
author	Dylan Delgado	2020-12-06 17:50:54 -0500
committer	Dylan Delgado	2020-12-06 17:50:54 -0500
commit	e6e77ad8e438e666dd641ea0229e6ee04062a9dd (patch)
tree	d125871f7e606fbb8ee2d5beae667b4f87cc6882
download	aur-e6e77ad8e438e666dd641ea0229e6ee04062a9dd.tar.gz