diff options
author | Dylan Delgado | 2020-12-06 17:50:54 -0500 |
---|---|---|
committer | Dylan Delgado | 2020-12-06 17:50:54 -0500 |
commit | e6e77ad8e438e666dd641ea0229e6ee04062a9dd (patch) | |
tree | d125871f7e606fbb8ee2d5beae667b4f87cc6882 | |
download | aur-e6e77ad8e438e666dd641ea0229e6ee04062a9dd.tar.gz |
Initial commit, based on version 0.21
-rw-r--r-- | .SRCINFO | 21 | ||||
-rw-r--r-- | PKGBUILD | 40 | ||||
-rw-r--r-- | makeandparams.patch | 43 | ||||
-rw-r--r-- | mfaktc.ini | 271 |
4 files changed, 375 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO new file mode 100644 index 000000000000..1f6c7096ac76 --- /dev/null +++ b/.SRCINFO @@ -0,0 +1,21 @@ +pkgbase = mfaktc + pkgdesc = A CUDA program for Mersenne prefactoring. + pkgver = 0.21 + pkgrel = 1 + url = https://mersenneforum.org/showthread.php?t=12827 + arch = x86_64 + license = GPL3 + makedepends = cuda + depends = nvidia + source = https://download.mersenne.ca/mfaktc/mfaktc-0.21/mfaktc-0.21.tar.gz + source = mfaktc.ini + source = makeandparams.patch + md5sums = 292d9a3bee013e9992ab199dfdfeece9 + md5sums = edf7197ee8d1e5d99ce8a9acf9287ea7 + md5sums = 2acc9b787a77a2e6ba07cbd0904c776b + sha256sums = 1973b5fae564035a33aa89bcb1a7e6dae571307a874e2eeb845e5b93939c2079 + sha256sums = SKIP + sha256sums = SKIP + +pkgname = mfaktc + diff --git a/PKGBUILD b/PKGBUILD new file mode 100644 index 000000000000..876ca4dc5289 --- /dev/null +++ b/PKGBUILD @@ -0,0 +1,40 @@ +#Maintainer: Dylan Delgado <dylan1496 at live dot com> + +pkgname=mfaktc +pkgver=0.21 +pkgrel=1 +pkgdesc="A CUDA program for Mersenne prefactoring." +arch=('x86_64') +url="https://mersenneforum.org/showthread.php?t=12827" +license=('GPL3') +depends=('nvidia') +makedepends=('cuda') +source=("https://download.mersenne.ca/${pkgname}/${pkgname}-${pkgver}/${pkgname}-${pkgver}.tar.gz" +"mfaktc.ini" +"makeandparams.patch") +md5sums=('292d9a3bee013e9992ab199dfdfeece9' +'edf7197ee8d1e5d99ce8a9acf9287ea7' +'2acc9b787a77a2e6ba07cbd0904c776b') +sha256sums=('1973b5fae564035a33aa89bcb1a7e6dae571307a874e2eeb845e5b93939c2079' +'SKIP' +'SKIP') + +prepare() { +cd ${srcdir}/${pkgname}-${pkgver}/src +patch -p2 < "../../../makeandparams.patch" +} + +build () { +cd ${srcdir}/${pkgname}-${pkgver}/src +make -f Makefile all +} + +package () { +cd ${srcdir}/${pkgname}-${pkgver} +install -Dm755 mfaktc.exe ${pkgdir}/usr/bin/mfaktc +cd .. +install -Dm755 mfaktc.ini ${pkgdir}/etc/mfaktc/mfaktc.ini +echo "A default mfaktc.ini file will be installed" +echo "in /etc/mfaktc/mfaktc.ini. Copy this to the" +echo "directory where you want to run mfaktc." +}
\ No newline at end of file diff --git a/makeandparams.patch b/makeandparams.patch new file mode 100644 index 000000000000..5f4e8919ff50 --- /dev/null +++ b/makeandparams.patch @@ -0,0 +1,43 @@ +diff --unified --recursive --text mfaktc-0.21/src/Makefile mfaktc-0.21-new/src/Makefile +--- mfaktc-0.21/src/Makefile 2015-02-17 15:12:49.305144447 -0500 ++++ mfaktc-0.21-new/src/Makefile 2020-12-06 16:39:26.876217215 -0500 +@@ -1,5 +1,5 @@ + # where is the CUDA Toolkit installed? +-CUDA_DIR = /usr/local/cuda ++CUDA_DIR = /opt/cuda + CUDA_INCLUDE = -I$(CUDA_DIR)/include/ + CUDA_LIB = -L$(CUDA_DIR)/lib64/ + +@@ -13,11 +13,15 @@ + NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v + + # generate code for various compute capabilities +-NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc) +-NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all! +-NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code +-NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc +-NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code ++# note, CUDA 11.1 supports CC 3.5+ (some Kepler, Maxwell, Pascal, Volta, Turing and Ampere) ++NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # CC 3.5 and 3.7 cards will use this code (some Kepler cards) ++NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code (Maxwell) ++NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 ++NVCCFLAGS += --generate-code arch=compute_61,code=sm_61 # CC 6.x GPUs will use this code (Pascal) ++NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # Titan V and Tesla V100 will use this code ++NVCCFLAGS += --generate-code arch=compute_75,code=sm_75 # CC 7.5 GPU's will use this code (Volta) ++NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.0 GPU's will use this code (Tesla A100) ++NVCCFLAGS += --generate-code arch=compute_86,code=sm_86 # CC 8.6 GPU's will use this code (Ampere) + + # pass some options to the C host compiler (e.g. gcc on Linux) + NVCCFLAGS += --compiler-options=-Wall +diff --unified --recursive --text mfaktc-0.21/src/params.h mfaktc-0.21-new/src/params.h +--- mfaktc-0.21/src/params.h 2015-02-17 15:12:49.322145383 -0500 ++++ mfaktc-0.21-new/src/params.h 2020-12-06 16:51:24.334385208 -0500 +@@ -175,7 +175,7 @@ + + #define GPU_SIEVE_SIZE_MIN 4 /* A 4M bit sieve seems like a reasonable minimum */ + #define GPU_SIEVE_SIZE_DEFAULT 64 /* Default is a 16M bit sieve */ +-#define GPU_SIEVE_SIZE_MAX 128 /* We've only tested up to 128M bits. The GPU sieve code may be able to go higher. */ ++#define GPU_SIEVE_SIZE_MAX 2047 /* We've only tested up to 128M bits. The GPU sieve code may be able to go higher. */ + + #define GPU_SIEVE_PROCESS_SIZE_MIN 8 /* Processing 8K bits in each block is minimum (256 threads * 1 word of 32 bits) */ + #define GPU_SIEVE_PROCESS_SIZE_DEFAULT 16 /* Default is processing 8K bits */ diff --git a/mfaktc.ini b/mfaktc.ini new file mode 100644 index 000000000000..b85e1520bf8f --- /dev/null +++ b/mfaktc.ini @@ -0,0 +1,271 @@ +# SievePrimes defines how far the factor candidates (FCs) are presieved on +# the CPU. The first <SievePrimes> odd primes are used to sieve the FCs. +# +# Minimum: SievePrimes=2000 (check SievePrimesMin) +# Maximum: SievePrimes=200000 +# +# Default: SievePrimes=25000 + +SievePrimes=25000 + + +# Set this to 1 to enable automatically adjustments of SievePrimes during +# runtime based on the "average wait times". +# +# Default: SievePrimesAdjust=1 + +SievePrimesAdjust=1 + + +# Set the minimum and maximum limit for SievePrimes, this is needed if +# SievePrimesAdjust is enabled. These are soft limits, there are hard limits +# in the code which should never ever be changed. +# +# Current limits are: +# 2000 <= SievePrimesMin <= SievePrimes <= SievePrimesMax <= 200000 +# +# Default: SievePrimesMin=5000 +# SievePrimesMax=100000 + +SievePrimesMin=5000 +SievePrimesMax=100000 + + +# Set the number of CUDA streams / data sets used by mfaktc. +# NumStreams must be >= 1. In this case mfaktc can process one stream / +# data set on the GPU while the GPU can preprocess the other one. When +# NumStreams is >= 2 than the time needed to upload (CPU->GPU transfer) +# the data sets can be hidden (if the hardware supports it (most GPUs are +# supporting this)). +# On Linux systems 2 or 3 seems a good numbers. There are comments that +# Windows systems need a greater number of streams. +# A greater number increases the memory consumed by mfaktc (host and GPU +# memory). The current limit for the number of streams is 10! +# Don't be too greedy with this value, high values are usualy just a waste +# of memory. +# +# Minimum: NumStreams=1 +# Maximum: NumStreams=10 +# +# Default: NumStreams=3 + +NumStreams=3 + + +# Set the number of data sets which can be preprocessed on CPU. This allows +# to tolerate more jitter on runtime of preprocessing and GPU stream +# runtime. +# Don't be too greedy with this value, high values are usualy just a waste +# of memory. +# +# Minimum: CPUStreams=1 +# Maximum: CPUStreams=5 +# +# Default: CPUStreams=3 + +CPUStreams=3 + + +# The GridSize affects the number of threads per grid. +# Depending on the number of multiprocessors of your GPU, too, the +# automatic parameter threads per grid is set to: +# GridSize = 0: 65536 < threads per grid <= 131072 +# GridSize = 1: 131072 < threads per grid <= 262144 +# GridSize = 2: 262144 < threads per grid <= 524288 +# GridSize = 3: 524288 < threads per grid <= 1048576 (default) +# A smaller GridSize has more overhead than a bigger GridSize for long +# running jobs. For really small jobs there can be a small benefit on +# computation time if the GridSize is small. A smaller GridSize directly +# reduces the runtime per kernel launch and might result in a better +# interactivity one older GPUs. +# +# Default: GridSize=3 + +GridSize=3 + + +# Checkpoints = 0: disable checkpoints +# Checkpoints = 1: enable checkpoints +# Checkpoints are needed for resume capability, after a class is finished a +# checkpoint file is written. When mfaktc is interrupted during the run and +# restarted later it will begin at the last processed class. +# +# Default: Checkpoints=1 + +Checkpoints=1 + + +# CheckpointDelay is the time in seconds between two checkpoint writes. +# Allowed values are 0 <= CheckpointDelay <= 900. +# +# Minimum: CheckpointDelay=0 +# Maximum: CheckpointDelay=900 +# +# Default: CheckpointDelay=30 + +CheckpointDelay=30 + + +# WorkFileAddDelay defines, how often mfaktc should check for worktodo.add. +# When a worktodo.add is deteced it will wait WorkFileAddDelay seconds until +# processing it. +# +# Minimum: WorkFileAddDelay=30 (WorkFileAddDelay=0 disables this feature) +# Maximum: WorkFileAddDelay=3600 +# +# Default: WorkFileAddDelay=600 + +WorkFileAddDelay=600 + + +# Allow to split an assignment into multiple bit ranges. +# 0 = disabled +# 1 = enabled +# Enabled Stages make only sense when StopAfterFactor is 1 or 2. +# Do not change this in the middle of a run which spans over multiple +# bitlevels, in this case mfaktc will ignore the checkpoint file and +# restarts from the beginning. +# +# Default: Stages=1 + +Stages=1 + + +# possible values for StopAfterFactor: +# 0: Do not stop the current assignment after a factor was found. +# 1: When a factor was found for the current assignment stop after the +# current bitlevel. This makes only sense when Stages is enabled. +# 2: When a factor was found for the current assignment stop after the +# current class. +# +# Default: StopAfterFactor=1 + +StopAfterFactor=1 + + +# possible values for PrintMode: +# 0: print a new line for each finished class +# 1: overwrite the current line (more compact output) +# +# Default: PrintMode=0 + +PrintMode=0 + + +# allow the CPU to sleep if nothing can be preprocessed? +# 0: Do not sleep if the CPU must wait for the GPU +# 1: The CPU can sleep for a short time if it has to wait for the GPU +# +# Default: AllowSleep=0 + +AllowSleep=0 + + +# if V5UserID and ComputerID are specified, then the result lines in the +# results file will have the prefix "UID: user/host, " - the same way as +# prime95 does it. +# +# Default: none (unset) +# +#V5UserID=TheJudger +#ComputerID=Ananke + + +# TimeStampInResults allows to configure if each output line in the results +# file should be preceeded with a date-and-time stamp (similar to prime95) +# +# Default: TimeStampInResults=0 + +TimeStampInResults=0 + + +# PrintFormat allows to customize the progress output of mfaktc. You can use +# any combination of the following format specifications, which will be +# replaced correspondingly in the progress line: +# +# %C - class ID (n/420) or (n/4620) "%4d" +# %c - class number (n/96) or (n/960) "%3d" +# %p - percent complete (%) "%5.1f" +# %g - GHz-days/day (GHz) "%7.2f" +# %t - time per class (s) "%6.0f" / "%6.1f" / "%6.2f" / "%6.3f" +# %e - eta (d/h/m/s) "%2dm%02ds" / "%2dh%02dm" / "%2dd%02dh" +# %n - number of candidates (M/G) "%6.2fM"/"%6.2fG" +# %r - rate (M/s) "%6.2f" / "%6.1f" +# %s - SievePrimes "%7d" +# %w - CPU wait time for GPU (us) n.a. (mfakto only!) +# %W - CPU wait % (%) "6.2f" +# %d - date (Mon nn) "%b %d" (strftime format) +# %T - time (HH:MM) "%H:%M" (strftime format) +# %U - username (as configured) "%s" no fixed width +# %H - hostname (as configured) "%s" no fixed width +# %M - the exponent being worked on "%-10u" +# %l - the lower bit-limit "%2d" +# %u - the upper bit-limit "%2d" +# +# Using the ProgressHeader you can specify a fix string that is displayed as +# a header to the progress without any modification. +# Keep in mind that "number of candidates (M/G)" and "rate (M/s)" are NOT +# compareable between CPU- and GPU-sieving. When sieving is done on GPU +# those number count all factor candidates prior to sieving while CPU +# sieving counts the numbers after the sieving process. +# + +# mfaktc 0.20+ (default) +ProgressHeader=Date Time | class Pct | time ETA | GHz-d/day Sieve Wait +ProgressFormat=%d %T | %C %p%% | %t %e | %g %s %W%% + +# old mfaktc 0.18/0.19 style +# +#ProgressHeader= class | candidates | time | ETA | avg. rate | SievePrimes | CPU wait +#ProgressFormat=%C/4620 | %n | %ts | %e | %rM/s | %s | %W%% + +# print everything +#ProgressHeader=[date time] exponent [TF bits]: percent class #, seq | GHZ | time | ETA | #FCs | rate | SieveP. | CPU wait | V5UserID@ComputerID +#ProgressFormat=[%d %T] M%M [%l-%u]: %p%% %C/4620,%c/960 | %g | %ts | %e | %n | %rM/s | %s | %W%% | %U@%H + + +# enable or disable GPU sieving +# SieveOnGPU=0: use the CPU for sieving factor candidates (high CPU usage) +# SieveOnGPU=1: use the GPU for sieving factor candidates (very low CPU usage) +# GPU sieving is supported on GPUs with compute capability 2.0 or higher. +# (e.g. Geforce 400 series or newer) +# +# Default: SieveOnGPU=1 + +SieveOnGPU=1 + + +# GPUSievePrimes defines how far we sieve the factor candidates on the GPU. +# The first <GPUSievePrimes> primes are sieved. +# +# Minimum: GPUSievePrimes=0 +# Maximum: GPUSievePrimes=1075000 +# +# Default: GPUSievePrimes=82486 + +GPUSievePrimes=82486 + + +# GPUSieveSize defines how big of a GPU sieve we use (in M bits). +# +# Minimum: GPUSieveSize=4 +# Maximum: GPUSieveSize=2047 +# +# Default: GPUSieveSize=64 + +GPUSieveSize=2047 + + +# GPUSieveProcessSize defines how far many bits of the sieve each TF block +# processes (in K bits). Larger values may lead to less wasted cycles by +# reducing the number of times all threads in a warp are not TFing a +# candidate. However, more shared memory is used which may reduce occupancy. +# Smaller values should lead to a more responsive system (each kernel takes +# less time to execute). GPUSieveProcessSize must be a multiple of 8. +# +# Minimum: GPUSieveProcessSize=8 +# Maximum: GPUSieveProcessSize=32 +# +# Default: GPUSieveProcessSize=16 + +GPUSieveProcessSize=32 |