5dce86e9fa78fd87e796cb6dd89de2b37d20baee.patch
From 5dce86e9fa78fd87e796cb6dd89de2b37d20baee Mon Sep 17 00:00:00 2001
From: Gavin Zhao <git@gzgz.dev>
Date: Wed, 6 Mar 2024 14:39:44 -0500
Subject: [PATCH] Use fallback libraries for archs without optimized logic
(#1897)
Fixes #1757.
Enables architectures that don't have optimized logic files to also produce
libraries when `--separate-architectures` or `--lazy-library-loading` is
turned on. Previously, one must disable both of these two flags in order for
rocBLAS to run on architectures like `gfx1010`.
Test plan:
cmake -GNinja -B build -S . \
-DCMAKE_C_COMPILER=hipcc \
-DCMAKE_CXX_COMPILER=hipcc \
-DBUILD_CLIENTS_TESTS=OFF \
-DBUILD_CLIENTS_BENCHMARKS=OFF \
-DBUILD_CLIENTS_SAMPLES=OFF \
-DBUILD_TESTING=OFF \
-DBUILD_WITH_TENSILE=ON \
-DTensile_PRINT_DEBUG=ON \
-DTensile_LIBRARY_FORMAT=msgpack \
-DTensile_CPU_THREADS=14 \
-DTensile_LAZY_LIBRARY_LOADING=ON \
-DAMDGPU_TARGETS="..."
With `AMDGPU_TARGETS` being one of the following
- `AMDGPU_TARGETS=gfx1010`
- `AMDGPU_TARGETS=gfx1030;gfx1010`
- `AMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102`
In all three cases,
`$ROCM_PATH/lib/rocblas/library/TensileLibrary_lazy_gfx1010.dat` is produced
and all other `*.dat` files remain unchanged.
Signed-off-by: Gavin Zhao <git@gzgz.dev>
---
Tensile/TensileCreateLibrary.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/Tensile/TensileCreateLibrary.py b/Tensile/TensileCreateLibrary.py
index ca3ef322ed..ac0486d8eb 100644
--- a/Tensile/TensileCreateLibrary.py
+++ b/Tensile/TensileCreateLibrary.py
@@ -943,12 +943,20 @@ def generateLogicDataAndSolutions(logicFiles, args):
# logicData[problemType].append((scheduleName, deviceNames, \
# solutionsForSchedule, indexOrder, exactLogic, rangeLogic ))
+ (archs, _) = splitArchs()
if globalParameters["SeparateArchitectures"] or globalParameters["LazyLibraryLoading"]:
if "fallback" in masterLibraries.keys():
for key, value in masterLibraries.items():
if key != "fallback":
value.merge(deepcopy(masterLibraries["fallback"]))
+ for archName in archs:
+ archName = archName.split('-', 1)[0]
+ if archName not in masterLibraries:
+ print1("Using fallback for arch: " + archName)
+ masterLibraries[archName] = deepcopy(masterLibraries["fallback"])
+ masterLibraries[archName].version = args.version
+
masterLibraries.pop("fallback")
for _, masterLibrary in masterLibraries.items():
find-msgpack-5.patch
--- Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt.bak 2023-01-27 08:30:16.374451318 +0100
+++ Tensile-rocm-5.4.2/Tensile/Source/lib/CMakeLists.txt 2023-01-27 08:30:33.194515443 +0100
@@ -103,7 +103,7 @@
endif()
if(TENSILE_USE_MSGPACK)
- find_package(msgpack REQUIRED)
+ find_package(msgpack-cxx REQUIRED)
target_compile_definitions(TensileHost PUBLIC -DTENSILE_MSGPACK=1)
if(TARGET msgpackc-cxx)
Pinned Comments
ulyssesrr commented on 2023-08-23 22:31 (UTC)
This package builds the missing Tensile backend for gfx1010, see issue registered upstream for more details: https://github.com/ROCmSoftwarePlatform/Tensile/issues/1757