%global upstreamname rocWMMA %global rocm_release 6.0 %global rocm_patch 2 %global rocm_version %{rocm_release}.%{rocm_patch} %global toolchain rocm # hipcc does not support some clang flags %global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/') # there is no debug package %global debug_package %{nil} # It is necessary to use this with a local build # export QA_RPATHS=0xff %bcond_with test # When we are not using the system clang %bcond_without compat_build %global llvm_maj_ver 17 # may run out of memory for both compile and link # Use fine tuned cmake ROCWMMA_PARALLEL_COMPILE|LINK_JOBS switches %global _smp_mflags %{nil} Name: rocwmma Version: %{rocm_version} Release: %autorelease Summary: ROCm Matrix Multiple and Accumulate library Url: https://github.com/ROCm/%{upstreamname} License: MIT Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz#/%{upstreamname}-%{rocm_version}.tar.gz Patch0: 0001-add-link-and-compile-pools-for-rocwmma.patch BuildRequires: cmake %if %{with compat_build} BuildRequires: libomp%{llvm_maj_ver}-devel %else BuildRequires: libomp-devel %endif BuildRequires: ninja-build BuildRequires: rocm-cmake BuildRequires: rocm-comgr-devel BuildRequires: rocm-hip-devel BuildRequires: rocm-rpm-macros BuildRequires: rocm-runtime-devel %if %{with test} BuildRequires: rocblas-devel BuildRequires: gtest-devel %endif # Only x86_64 works right now: ExclusiveArch: x86_64 %description rocWMMA is a C++ library for accelerating mixed-precision matrix multiply-accumulate (MMA) operations leveraging AMD GPU hardware. rocWMMA makes it easier to break down MMA problems into fragments and distribute block-wise MMA operations in parallel across GPU wavefronts. Our API consists of a header library, that you can use to compile MMA acceleration directly into GPU kernel device code. This can benefit from compiler optimization in the generation of kernel assembly, and doesn't incur additional overhead costs of linking to external runtime libraries or having to launch separate kernels. %package devel Summary: Libraries and headers for %{name} Provides: %{name}-static = %{version}-%{release} %description devel %{summary} %if %{with test} %package test Summary: Tests for %{name} %description test %{summary} %endif %prep %autosetup -p1 -n %{upstreamname}-rocm-%{version} %build # Real cores, No hyperthreading COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'` if [ ${COMPILE_JOBS}x = x ]; then COMPILE_JOBS=1 fi # Take into account memmory usage per core, do not thrash real memory BUILD_MEM=2 MEM_KB=0 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'` MEM_MB=`eval "expr ${MEM_KB} / 1024"` MEM_GB=`eval "expr ${MEM_MB} / 1024"` COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"` if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then COMPILE_JOBS=$COMPILE_JOBS_MEM fi LINK_MEM=16 LINK_JOBS=`eval "expr 1 + ${MEM_GB} / ${LINK_MEM}"` %cmake -G Ninja \ -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \ -DROCM_SYMLINK_LIBS=OFF \ -DHIP_PLATFORM=amd \ -DROCWMMA_PARALLEL_COMPILE_JOBS=$COMPILE_JOBS \ -DROCWMMA_PARALLEL_LINK_JOBS=$LINK_JOBS \ -DROCWMMA_BUILD_SAMPLES=OFF \ -DROCWMMA_VALIDATE_WITH_ROCBLAS=OFF \ -DROCWMMA_BENCHMARK_WITH_ROCBLAS=OFF \ %if %{with test} -DROCWMMA_BUILD_TESTS=ON %else -DROCWMMA_BUILD_TESTS=OFF %endif %cmake_build %install %cmake_install %files devel %dir %{_includedir}/%{name}/ %license LICENSE.md %exclude %{_docdir}/%{name}/LICENSE.md %{_includedir}/%{name}/* %if %{with test} %files test %{_bindir}/* %endif %changelog %autochangelog