%global pypi_name triton # For testing %bcond_with test # Rebuilding clang is slow, do # export LLVM_SYSPATH=/path-to/llvm-project-install # export PYBIND11_SYSPATH=/path-to/pybind11-project-install %bcond_with local %if %{with local} %global _lto_cflags %nil # For debugging %bcond_without debug %else %bcond_with debug %endif # release/2.3.x - 4/24/23 %global commit0 54d0bb9e4b2e2dab7dc899008c0f14915f665a2f # from python/setup.py %global commit1 c5dede880d175f7229c9b2923f4753e12702305d %global pypi_version 2.3.0 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) %global shortcommit1 %(c=%{commit1}; echo ${c:0:7}) # The llvm build has its LLVM_PARALLEL_COMPILE|LINK_JOBS switches # Triton uses the envionment variable MAX_JOBS for both. %global _smp_mflags %{nil} %global toolchain gcc %global desc %{expand: \ Triton is a language and compiler for writing highly efficient custom Deep-Learning primitives. The aim of Triton is to provide an open-source environment to write fast code at higher productivity than CUDA, but also with higher flexibility than other existing DSLs.} Name: python-%{pypi_name}-rocm Version: %{pypi_version} Release: %autorelease Summary: A language and compiler for custom Deep Learning operations License: MIT AND Apache-2.0 AND BSD-3-Clause AND BSD-2-Clause # triton's main license is MIT # llvm is Apache-2.0, BSD-3-Clause AND BSD-2-Clause # pybind11 is BSD-3-Clause URL: https://github.com/ROCm/triton/ Source0: %{url}/archive/%{commit0}/triton-%{shortcommit0}.tar.gz %if %{without local} Source1: https://github.com/llvm/llvm-project/archive/%{commit1}.tar.gz Source2: https://github.com/pybind/pybind11/archive/refs/tags/v2.11.1.tar.gz %endif Patch1: 0001-disable-tma-on-rocm.patch Patch2: 0001-remove-ptxas.patch # GPUs really only work on x86_64 ExclusiveArch: x86_64 BuildRequires: gcc-c++ BuildRequires: cmake BuildRequires: ninja-build BuildRequires: zlib-devel BuildRequires: python3-devel BuildRequires: python3dist(filelock) BuildRequires: python3dist(pip) BuildRequires: python3dist(pytest) BuildRequires: python3dist(setuptools) BuildRequires: python3dist(wheel) BuildRequires: hipcc BuildRequires: rocm-comgr-devel BuildRequires: rocm-runtime-devel BuildRequires: rocm-hip-devel # Triton uses a custom snapshot of the in development llvm # Because of instablity of the llvm api, we must use the one # triton uses. llvm is statically built and none of the # llvm headers or libraries are distributed directly. Provides: bundled(llvm-project) = 17.0.0.g%{shortcommit1} Provides: bundled(pybind11) = 2.11.1 Requires: lld Requires: rocm-comgr-devel Requires: rocm-runtime-devel %description %{desc} %package -n python3-%{pypi_name}-rocm Summary: %{summary} %description -n python3-%{pypi_name}-rocm %{desc} %prep %autosetup -p1 -n triton-%{commit0} %if %{without local} # LLVM tar xf %{SOURCE1} # PyBind tar xf %{SOURCE2} %endif # Remove bundled egg-info rm -rf python/*.egg-info # Remove cuda rm -rf python/triton/third_party/cuda # Remove and replace packaged hip bits rm -rf python/triton/third_party/hip/lib/hsa/* sed -i -e 's@lib/libhsa-runtime64.so@lib64/libhsa-runtime64.so@g' CMakeLists.txt rm -rf python/triton/third_party/hip/lib/bitcode/* cd python/triton/third_party/hip/lib/bitcode/ HIP_CLANG_PATH=`/usr/bin/hipconfig -l` RESOURCE_DIR=`${HIP_CLANG_PATH}/clang -print-resource-dir` ln -s ${RESOURCE_DIR}/amdgcn/bitcode/* . cd - %if %{without local} # rm llvm-project bits we do not need rm -rf llvm-project-%{commit1}/{bolt,clang,compiler-rt,flang,libc,libclc,libcxx,libcxxabi,libunwind,lld,lldb,llvm-libgcc,openmp,polly,pst,runtimes,utils} %endif # Disable download sed -i -e '/^download_and_copy_ptxas/d' python/setup.py # Lie about the version sed -i -e 's@version="2.2.0",@version="2.3.0",@' python/setup.py # For debugging %if %{with debug} sed -i -e 's@${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS @-O1 -g -D__STDC_FORMAT_MACROS @' CMakeLists.txt %endif %if %{without test} # no knob to turn off downloading of googletest sed -i -e 's@add_subdirectory(unittest)@#add_subdirectory(unittest)@' CMakeLists.txt %else # E ValueError: option names {'--device'} already added sed -i -e 's@--device@--ddevice@' python/test/unit/operators/conftest.py # performance is only nvidia rm python/test/regression/test_performance.py # E ModuleNotFoundError: No module named 'triton.common' rm python/test/backend/test_device_backend.py %endif # disable -Werror sed -i -e 's@-Werror @ @' CMakeLists.txt # change default rocm location sed -i -e 's@set(ROCM_DEFAULT_DIR "/opt/rocm")@set(ROCM_DEFAULT_DIR "/usr")@' CMakeLists.txt # just removed cuda.h, can not use it now sed -i -e '/cuda.h/d' include/triton/Target/PTX/TmaMetadata.h # change path to find ld.lld sed -i -e 's@/llvm/bin/ld@/bin/ld@' lib/Target/HSACO/HSACOTranslation.cpp %build # Real cores, No hyperthreading COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'` if [ ${COMPILE_JOBS}x = x ]; then COMPILE_JOBS=1 fi # Take into account memmory usage per core, do not thrash real memory BUILD_MEM=2 MEM_KB=0 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'` MEM_MB=`eval "expr ${MEM_KB} / 1024"` MEM_GB=`eval "expr ${MEM_MB} / 1024"` COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"` if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then COMPILE_JOBS=$COMPILE_JOBS_MEM fi LINK_MEM=32 LINK_JOBS=`eval "expr 1 + ${MEM_GB} / ${LINK_MEM}"` %if %{without local} cd llvm-project-%{commit1} %cmake -G Ninja \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$PWD/install \ -DLLVM_ENABLE_PROJECTS="mlir;llvm" \ -DLLVM_PARALLEL_COMPILE_JOBS=$COMPILE_JOBS \ -DLLVM_PARALLEL_LINK_JOBS=$LINK_JOBS \ -DLLVM_TARGETS_TO_BUILD="X86;AMDGPU;NVPTX" \ llvm %cmake_build %cmake_build -t install export LLVM_SYSPATH=$PWD/install cd .. cd pybind11-2.11.1 %cmake -G Ninja \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$PWD/install \ -DPYBIND11_TEST=OFF %cmake_build %cmake_build -t install export PYBIND11_SYSPATH=$PWD/install cd .. %endif export PATH=$LLVM_SYSPATH/bin:$PATH %if %{with debug} export DEBUG=1 %else export REL_WITH_DEB_INFO=1 %endif export CC=gcc export CXX=g++ export MAX_JOBS=$LINK_JOBS cd python %py3_build %install cd python %py3_install %files -n python3-%{pypi_name}-rocm %dir %{python3_sitearch}/%{pypi_name} %dir %{python3_sitearch}/%{pypi_name}*.egg-info %{python3_sitearch}/%{pypi_name}/* %{python3_sitearch}/%{pypi_name}*.egg-info/* %license LICENSE %doc README.md %changelog %autochangelog