Package Details: rocblas-gfx1010-backend 5.6.1-1

Git Clone URL: https://aur.archlinux.org/rocblas-gfx1010-backend.git (read-only, click to copy)
Package Base: rocblas-gfx1010-backend
Description: Adds rocBLAS backend libraries for gfx1010
Upstream URL: https://rocblas.readthedocs.io/en/latest
Licenses: MIT
Submitter: ulyssesrr
Maintainer: None
Last Packager: ulyssesrr
Votes: 0
Popularity: 0.000000
First Submitted: 2023-08-23 01:17 (UTC)
Last Updated: 2023-09-21 02:54 (UTC)

Pinned Comments

ulyssesrr commented on 2023-08-23 22:31 (UTC)

This package builds the missing Tensile backend for gfx1010, see issue registered upstream for more details: https://github.com/ROCmSoftwarePlatform/Tensile/issues/1757

Latest Comments

grdgkjrpdihe commented on 2024-11-07 00:46 (UTC) (edited on 2024-11-07 00:46 (UTC) by grdgkjrpdihe)

5dce86e9fa78fd87e796cb6dd89de2b37d20baee.patch

From 5dce86e9fa78fd87e796cb6dd89de2b37d20baee Mon Sep 17 00:00:00 2001
From: Gavin Zhao <git@gzgz.dev>
Date: Wed, 6 Mar 2024 14:39:44 -0500
Subject: [PATCH] Use fallback libraries for archs without optimized logic
 (#1897)

Fixes #1757.

Enables architectures that don't have optimized logic files to also produce
libraries when `--separate-architectures` or `--lazy-library-loading` is
turned on. Previously, one must disable both of these two flags in order for
rocBLAS to run on architectures like `gfx1010`.

Test plan:
cmake -GNinja -B build -S . \
    -DCMAKE_C_COMPILER=hipcc \
    -DCMAKE_CXX_COMPILER=hipcc \
    -DBUILD_CLIENTS_TESTS=OFF \
    -DBUILD_CLIENTS_BENCHMARKS=OFF \
    -DBUILD_CLIENTS_SAMPLES=OFF \
    -DBUILD_TESTING=OFF \
    -DBUILD_WITH_TENSILE=ON \
    -DTensile_PRINT_DEBUG=ON \
    -DTensile_LIBRARY_FORMAT=msgpack \
    -DTensile_CPU_THREADS=14 \
    -DTensile_LAZY_LIBRARY_LOADING=ON \
    -DAMDGPU_TARGETS="..."

With `AMDGPU_TARGETS` being one of the following
- `AMDGPU_TARGETS=gfx1010`
- `AMDGPU_TARGETS=gfx1030;gfx1010`
- `AMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102`

In all three cases,
`$ROCM_PATH/lib/rocblas/library/TensileLibrary_lazy_gfx1010.dat` is produced
and all other `*.dat` files remain unchanged.

Signed-off-by: Gavin Zhao <git@gzgz.dev>
---
 Tensile/TensileCreateLibrary.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
index ca3ef322ed..ac0486d8eb 100644
--- a/Tensile/TensileCreateLibrary.py
+++ b/Tensile/TensileCreateLibrary.py
@@ -943,12 +943,20 @@ def generateLogicDataAndSolutions(logicFiles, args):
     # logicData[problemType].append((scheduleName, deviceNames, \
     #     solutionsForSchedule, indexOrder, exactLogic, rangeLogic ))

+  (archs, _) = splitArchs()
   if globalParameters["SeparateArchitectures"] or globalParameters["LazyLibraryLoading"]:
     if "fallback" in masterLibraries.keys():
       for key, value in masterLibraries.items():
         if key != "fallback":
           value.merge(deepcopy(masterLibraries["fallback"]))

+      for archName in archs:
+        archName = archName.split('-', 1)[0]
+        if archName not in masterLibraries:
+          print1("Using fallback for arch: " + archName)
+          masterLibraries[archName] = deepcopy(masterLibraries["fallback"])
+          masterLibraries[archName].version = args.version
+
       masterLibraries.pop("fallback")

     for _, masterLibrary in masterLibraries.items():

find-msgpack-5.patch

--- Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt.bak    2023-01-27 08:30:16.374451318 +0100
+++ Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt        2023-01-27 08:30:33.194515443 +0100
@@ -103,7 +103,7 @@
 endif()

 if(TENSILE_USE_MSGPACK)
-    find_package(msgpack REQUIRED)
+    find_package(msgpack-cxx REQUIRED)
     target_compile_definitions(TensileHost PUBLIC -DTENSILE_MSGPACK=1)

     if(TARGET msgpackc-cxx)

grdgkjrpdihe commented on 2024-11-07 00:44 (UTC) (edited on 2024-11-07 00:44 (UTC) by grdgkjrpdihe)

update to 6.0.2
PKGBUILD

# Maintainer: Torsten Keßler <tpkessler at archlinux dot org>
# Contributor: Markus Näther <naether.markus@gmail.com>
pkgname=rocblas
pkgver=6.0.2
pkgrel=1
pkgdesc='Next generation BLAS implementation for ROCm platform'
arch=('x86_64')
url='https://rocblas.readthedocs.io/en/latest'
license=('MIT')
depends=('rocm-core' 'hip' 'glibc' 'gcc-libs' 'openmp')
makedepends=('rocm-cmake' 'python' 'python-virtualenv' 'python-pyaml' 'python-wheel'
             'python-msgpack' 'python-joblib' 'perl-file-which' 'msgpack-cxx' 'gcc-fortran')
_rocblas='https://github.com/ROCmSoftwarePlatform/rocBLAS'
_tensile='https://github.com/ROCmSoftwarePlatform/Tensile'
source=("$pkgname-$pkgver.tar.gz::$_rocblas/archive/rocm-$pkgver.tar.gz"
        "$pkgname-tensile-$pkgver.tar.gz::$_tensile/archive/refs/tags/rocm-$pkgver.tar.gz"
        "find-msgpack-5.patch"
5dce86e9fa78fd87e796cb6dd89de2b37d20baee.patch)
sha256sums=('d1bf31063a2d349797b88c994c91d05f94e681bafb5550ad9b53529703d89dbb'
            '1d8a92422560c1e908fa25fd97a4aa07a96659528a543f77618408ffcfe1f307'
            'ef6c1feef3177573e57f2502452264ad0a0fdd36616bf03f0094f41d9d779eb3'
            'd2897f30cbfdf7da1824c0f60c00a71e8512149639fa9c11b3197156ba6c1229')
options=(!lto)
_dirname="$(basename "$_rocblas")-$(basename "${source[0]}" ".tar.gz")"
_tensile_dir="$(basename "$_tensile")-$(basename "${source[1]}" ".tar.gz")"

prepare() {
    cd "$_tensile_dir"
    patch -Np1 -i "$srcdir/find-msgpack-5.patch"

        patch -p1 -i "$srcdir/5dce86e9fa78fd87e796cb6dd89de2b37d20baee.patch"
}

build() {
  # Compile source code for supported GPU archs in parallel
  export HIPCC_COMPILE_FLAGS_APPEND="-parallel-jobs=$(nproc)"
  export HIPCC_LINK_FLAGS_APPEND="-parallel-jobs=$(nproc)"
  # -fcf-protection is not supported by HIP, see
  # https://rocm.docs.amd.com/en/latest/reference/rocmcc.html#support-status-of-other-clang-options
  local cmake_args=(
    -Wno-dev
    -S "$_dirname"
    -B build
    -D CMAKE_BUILD_TYPE=None
    -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc
    -D CMAKE_TOOLCHAIN_FILE=toolchain-linux.cmake
    -D CMAKE_CXX_FLAGS="${CXXFLAGS} -fcf-protection=none"
    -D CMAKE_INSTALL_PREFIX=/opt/rocm
    -D CMAKE_PREFIX_PATH=/opt/rocm/llvm/lib/cmake/llvm
    -D amd_comgr_DIR=/opt/rocm/lib/cmake/amd_comgr
    -D BUILD_WITH_TENSILE=ON
    -D Tensile_LIBRARY_FORMAT=msgpack
    -D Tensile_TEST_LOCAL_PATH="$srcdir/$_tensile_dir"
  )
  cmake "${cmake_args[@]}"
  cmake --build build
}

package() {
  DESTDIR="$pkgdir" cmake --install build

  install -Dm644 "$_dirname/LICENSE.md" "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
}

ulyssesrr commented on 2023-08-23 22:31 (UTC)

This package builds the missing Tensile backend for gfx1010, see issue registered upstream for more details: https://github.com/ROCmSoftwarePlatform/Tensile/issues/1757