%global pypi_name triton # So pre releases can be tried %bcond_without gitcommit %if %{with gitcommit} # The top of tree ~2/11/24 %global commit0 846d6e7e77891706d179b20f27b1278ac3b9a9ac %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) # llvm %global commit1 4017f04e310454ccced4c404a23f7698eec735ca %global pypi_version 3.0.0 %else %global pypi_version 2.1.0 # The sdist does not contain enough to do the build # So fetch the repo at the 2.1.0 commit %global commit0 da40a1e984bf57c4708daf603eb427442025f99b %global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) # Do no use the prebuilt llvm # This commit should come from trition/cmake/llvm-hash.txt # But this file is not part of the tag, so fall back to what # setup dowloads # version = "llvm-17.0.0-c5dede880d17" %global commit1 c5dede880d175f7229c9b2923f4753e12702305d %endif # The llvm build has its LLVM_PARALLEL_COMPILE|LINK_JOBS switches # Triton uses the envionment variable MAX_JOBS for both. %global _smp_mflags %{nil} # reuse lto logic from llvm.spec %bcond_with lto_build %if %{without lto_build} %global _lto_cflags %nil %endif %bcond_with test Name: python-%{pypi_name} Version: %{pypi_version} Release: %autorelease Summary: A language and compiler for custom Deep Learning operations License: MIT AND Apache-2.0 AND BSD-3-Clause AND BSD-2-Clause # Main license is MIT # llvm is Apache-2.0, BSD-3-Clause AND BSD-2-Clause URL: https://github.com/openai/triton/ Source0: %{url}/archive/%{commit0}/triton-%{shortcommit0}.tar.gz Source1: https://github.com/llvm/llvm-project/archive/%{commit1}.tar.gz#/llvm-project.tar.gz # Can not download cuda bits, can not install prebuilt rocm bits either Patch1: 0001-Prepare-triton-setup-for-fedora.patch # Not using /opt/rocm or the prebuilt rocm bits Patch2: 0001-Fix-rocm-paths-in-triton.patch # GPUs really only work on x86_64 ExclusiveArch: x86_64 # Use gcc to the the just-built llvm parts do not get confused # with the system llvm. %global toolchain gcc BuildRequires: gcc BuildRequires: gcc-c++ BuildRequires: cmake BuildRequires: ninja-build BuildRequires: zlib-devel BuildRequires: pybind11-devel BuildRequires: python3-devel BuildRequires: python3dist(autopep8) BuildRequires: python3dist(filelock) BuildRequires: python3dist(flake8) BuildRequires: python3dist(isort) BuildRequires: python3dist(lit) BuildRequires: python3dist(matplotlib) BuildRequires: python3dist(numpy) BuildRequires: python3dist(pandas) BuildRequires: python3dist(pybind11) BuildRequires: python3dist(pytest) BuildRequires: python3dist(scipy) >= 1.7.1 BuildRequires: python3dist(setuptools) BuildRequires: python3dist(tabulate) BuildRequires: python3dist(torch) BuildRequires: python3dist(wheel) # Triton uses a custom snapshot of the in development llvm # Because of instablity of the llvm api, we must use the one # triton uses. llvm is statically built and none of the # llvm headers or libraries are distributed directly. Provides: bundled(llvm-project) Requires: ldd Requires: rocm-comgr-devel Requires: rocm-device-libs-devel Requires: rocm-runtime-devel %description Triton is a language and compiler for writing highly efficient custom Deep-Learning primitives. The aim of Triton is to provide an open-source environment to write fast code at higher productivity than CUDA, but also with higher flexibility than other existing DSLs. %package -n python3-%{pypi_name} Summary: %{summary} %description -n python3-%{pypi_name} Triton is a language and compiler for writing highly efficient custom Deep-Learning primitives. The aim of Triton is to provide an open-source environment to write fast code at higher productivity than CUDA, but also with higher flexibility than other existing DSLs. %prep %autosetup -p1 -a 1 -n triton-%{commit0} # Remove bundled egg-info rm -rf %{pypi_name}.egg-info # remove packaged hip bits rm -rf third_party/amd/backend/lib/* rm -rf third_party/amd/backend/include/hip # Logic for the backends is a little broken, give it some help # Move amd to backends mv third_party/amd python/triton/backends # Remove everything else rm -rf third_party/* # Now copy it back to make setup happy cp -r python/triton/backends/amd third_party/ # rm llvm-project bits we do not need rm -rf llvm-project-%{commit1}/{bolt,clang,compiler-rt,flang,libc,libclc,libcxx,libcxxabi,libunwind,lld,lldb,llvm-libgcc,openmp,polly,pst,runtimes,utils} # disable -Werror sed -i -e 's@-Werror @ @' CMakeLists.txt %if %{without test} # no knob to turn off downloading of googletest sed -i -e 's@add_subdirectory(unittest)@#add_subdirectory(unittest)@' CMakeLists.txt %endif %build # build llvm cd llvm-project-%{commit1} # Real cores, No hyperthreading COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'` if [ ${COMPILE_JOBS}x = x ]; then COMPILE_JOBS=1 fi # Take into account memmory usage per core, do not thrash real memory BUILD_MEM=2 MEM_KB=0 MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'` MEM_MB=`eval "expr ${MEM_KB} / 1024"` MEM_GB=`eval "expr ${MEM_MB} / 1024"` COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"` if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then COMPILE_JOBS=$COMPILE_JOBS_MEM fi %if %{without lto_build} LINK_MEM=12 %else LINK_MEM=32 %endif LINK_JOBS=`eval "expr 1 + ${MEM_GB} / ${LINK_MEM}"` %cmake -G Ninja \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$PWD/install \ -DLLVM_ENABLE_PROJECTS="mlir;llvm" \ -DLLVM_PARALLEL_COMPILE_JOBS=$COMPILE_JOBS \ -DLLVM_PARALLEL_LINK_JOBS=$LINK_JOBS \ -DLLVM_TARGETS_TO_BUILD="X86;AMDGPU;NVPTX" \ llvm %cmake_build %cmake_build -t install export LLVM_SYSPATH=$PWD/install export PYBIND11_SYSPATH=/usr export PATH=$PWD/install/bin:$PATH cd .. export REL_WITH_DEB_INFO=1 export MAX_JOBS=$LINK_JOBS export TRITON_CODEGEN_AMD=1 cd python %py3_build %install cd python %py3_install # empty files rm %{buildroot}%{python3_sitearch}/triton/compiler/make_launcher.py # Unit tests download so are not suitable for mock %if %{with test} %check cd llvm-project-%{commit1} %cmake_build -t test %endif %files -n python3-%{pypi_name} %{python3_sitearch}/%{pypi_name} %{python3_sitearch}/%{pypi_name}-%{pypi_version}-py%{python3_version}.egg-info %changelog %autochangelog