From d7c64160754cccd8dc11092a865f78d5432e50fd Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 10:05:28 -0500 Subject: [PATCH 01/30] lit: 15.0.6 -> 17.0.1 --- pkgs/development/python-modules/lit/default.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkgs/development/python-modules/lit/default.nix b/pkgs/development/python-modules/lit/default.nix index b4dee1e20b55..1467dfaa1f6d 100644 --- a/pkgs/development/python-modules/lit/default.nix +++ b/pkgs/development/python-modules/lit/default.nix @@ -6,11 +6,11 @@ buildPythonPackage rec { pname = "lit"; - version = "15.0.6"; + version = "17.0.1"; src = fetchPypi { inherit pname version; - hash = "sha256-S06OQfDmDyutls21HxyQ016ku3FTTsDOP8Di67d9f+k="; + hash = "sha256-RIZ65Xa1eQVnsSC8Pw2fAh2slCTRsIQMdazYX0YQrAQ="; }; passthru = { From ccdfcd324e90f57954c6bd631b0bfbe3e03a5f99 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 21:40:13 -0500 Subject: [PATCH 02/30] spirv-llvm-translator: Add 17.0.0 release --- .../compilers/spirv-llvm-translator/default.nix | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pkgs/development/compilers/spirv-llvm-translator/default.nix b/pkgs/development/compilers/spirv-llvm-translator/default.nix index c5e8ee5a6214..c6c743ce1e59 100644 --- a/pkgs/development/compilers/spirv-llvm-translator/default.nix +++ b/pkgs/development/compilers/spirv-llvm-translator/default.nix @@ -13,13 +13,17 @@ let llvmMajor = lib.versions.major llvm.version; isROCm = lib.hasPrefix "rocm" llvm.pname; - # ROCm will always be at the latest version + # ROCm, if actively updated will always be at the latest version branch = - if llvmMajor == "16" then rec { + if llvmMajor == "17" || isROCm then rec { + version = "17.0.0"; + rev = "v${version}"; + hash = "sha256-Rzm5Py9IPFtS9G7kME+uSwZ/0gPGW6MlL35ZWk4LfHM="; + } else if llvmMajor == "16" then rec { version = "16.0.0"; rev = "v${version}"; hash = "sha256-EUabcYqSjXshbPmcs1DRLvCSL1nd9rEdpqELBrItCW8="; - } else if llvmMajor == "15" || isROCm then rec { + } else if llvmMajor == "15" then rec { version = "15.0.0"; rev = "v${version}"; hash = "sha256-OsDohXRxovtEXaWiRGp8gJ0dXmoALyO+ZimeSO8aPVI="; From 37390cd11c878ac889ba8c27fe77f6d195737c9d Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 04:47:55 +0000 Subject: [PATCH 03/30] rocm-related: 5.4.X -> 5.7.0 hip-common: 5.4.2 -> 5.7.0 hipblas: 5.4.3 -> 5.7.0 hipcc: 5.4.2 -> 5.7.0 hipcub: 5.4.4 -> 5.7.0 hipfft: 5.4.3 -> 5.7.0 hipfort: 5.6.0 -> 5.7.0 hipify: 5.4.2 -> 5.7.0 hipsolver: 5.4.4 -> 5.7.0 hipsparse: 5.4.4 -> 5.7.0 llvmPackages_rocm.llvm: 5.4.4 -> 5.7.0 migraphx: 5.4.3 -> 5.7.0 clang-ocl: 5.4.2 -> 5.7.0 rccl: 5.4.3 -> 5.7.0 rdc: 5.4.2 -> 5.7.0 rocblas: 5.4.3 -> 5.7.0 rocdbgapi: 5.4.2 -> 5.7.0 rocfft: 5.4.3 -> 5.7.0 miopen: 5.4.2 -> 5.7.0 rocm-comgr: 5.4.4 -> 5.7.0 rocgdb: 5.4.2 -> 5.7.0 rocm-device-libs: 5.4.4 -> 5.7.0 rocalution: 5.4.3 -> 5.7.0 rocm-smi: 5.4.4 -> 5.7.0 rocm-runtime: 5.4.3 -> 5.7.0 rocminfo: 5.4.4 -> 5.7.0 rocm-thunk: 5.4.4 -> 5.7.0 rocprofiler: 5.4.3 -> 5.7.0 rocprim: 5.4.3 -> 5.7.0 rocrand: 5.4.3 -> 5.7.0 rocmlir: 5.4.1 -> 5.7.0 rocr-debug-agent: 5.4.2 -> 5.7.0 rocthrust: 5.4.3 -> 5.7.0 rocsparse: 5.4.3 -> 5.7.0 roctracer: 5.4.3 -> 5.7.0 rocsolver: 5.4.4 -> 5.7.0 tensile: 5.4.2 -> 5.7.0 rocwmma: 5.4.3 -> 5.7.0 rocm-cmake: 5.4.4 -> 5.7.0 --- pkgs/development/compilers/hip-common/default.nix | 4 ++-- pkgs/development/compilers/hipcc/default.nix | 4 ++-- pkgs/development/compilers/hipify/default.nix | 4 ++-- pkgs/development/compilers/llvm/rocm/llvm.nix | 4 ++-- pkgs/development/libraries/clang-ocl/default.nix | 2 +- pkgs/development/libraries/hipblas/default.nix | 4 ++-- pkgs/development/libraries/hipcub/default.nix | 4 ++-- pkgs/development/libraries/hipfft/default.nix | 4 ++-- pkgs/development/libraries/hipfort/default.nix | 4 ++-- pkgs/development/libraries/hipsolver/default.nix | 4 ++-- pkgs/development/libraries/hipsparse/default.nix | 4 ++-- pkgs/development/libraries/migraphx/default.nix | 4 ++-- pkgs/development/libraries/miopen/default.nix | 4 ++-- pkgs/development/libraries/rccl/default.nix | 4 ++-- pkgs/development/libraries/rocalution/default.nix | 4 ++-- pkgs/development/libraries/rocblas/default.nix | 4 ++-- pkgs/development/libraries/rocdbgapi/default.nix | 4 ++-- pkgs/development/libraries/rocfft/default.nix | 4 ++-- pkgs/development/libraries/rocm-comgr/default.nix | 4 ++-- pkgs/development/libraries/rocm-device-libs/default.nix | 4 ++-- pkgs/development/libraries/rocm-runtime/default.nix | 4 ++-- pkgs/development/libraries/rocm-thunk/default.nix | 4 ++-- pkgs/development/libraries/rocmlir/default.nix | 4 ++-- pkgs/development/libraries/rocprim/default.nix | 4 ++-- pkgs/development/libraries/rocprofiler/default.nix | 4 ++-- pkgs/development/libraries/rocr-debug-agent/default.nix | 4 ++-- pkgs/development/libraries/rocrand/default.nix | 4 ++-- pkgs/development/libraries/rocsolver/default.nix | 4 ++-- pkgs/development/libraries/rocsparse/default.nix | 4 ++-- pkgs/development/libraries/rocthrust/default.nix | 4 ++-- pkgs/development/libraries/roctracer/default.nix | 4 ++-- pkgs/development/libraries/rocwmma/default.nix | 4 ++-- pkgs/development/libraries/tensile/default.nix | 4 ++-- pkgs/development/tools/build-managers/rocm-cmake/default.nix | 4 ++-- pkgs/development/tools/misc/rdc/default.nix | 4 ++-- pkgs/development/tools/misc/rocgdb/default.nix | 4 ++-- pkgs/development/tools/rocminfo/default.nix | 4 ++-- pkgs/tools/system/rocm-smi/default.nix | 4 ++-- 38 files changed, 75 insertions(+), 75 deletions(-) diff --git a/pkgs/development/compilers/hip-common/default.nix b/pkgs/development/compilers/hip-common/default.nix index 1721091d6a60..754fea89ac5e 100644 --- a/pkgs/development/compilers/hip-common/default.nix +++ b/pkgs/development/compilers/hip-common/default.nix @@ -11,13 +11,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "hip-common"; - version = "5.4.2"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCm-Developer-Tools"; repo = "HIP"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-44CZWk6EsP5EduzBCBbOh2kshS89qOm4v3mx/xNDzV0="; + hash = "sha256-1Abit9qZCwrCVcnaFT4uMygFB9G6ovRasLmTsOsJ/Fw="; }; patches = [ diff --git a/pkgs/development/compilers/hipcc/default.nix b/pkgs/development/compilers/hipcc/default.nix index af0cb35c1480..b758d0e1ed96 100644 --- a/pkgs/development/compilers/hipcc/default.nix +++ b/pkgs/development/compilers/hipcc/default.nix @@ -12,13 +12,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "hipcc"; - version = "5.4.2"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCm-Developer-Tools"; repo = "HIPCC"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-PEwue4O43MiMkF8UmTeHsmlikBG2V3/nFQLKmtHrRWQ="; + hash = "sha256-lJX6nF1V4YmK5ai7jivXlRnG3doIOf6X9CWLHVdRuVg="; }; patches = [ diff --git a/pkgs/development/compilers/hipify/default.nix b/pkgs/development/compilers/hipify/default.nix index 342e8e7e8bd4..d7b243b9da04 100644 --- a/pkgs/development/compilers/hipify/default.nix +++ b/pkgs/development/compilers/hipify/default.nix @@ -8,13 +8,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "hipify"; - version = "5.4.2"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCm-Developer-Tools"; repo = "HIPIFY"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-EaHtI1ywjEHioWptuHvCllJ3dENtSClVoE6NpWTOa9I="; + hash = "sha256-lCQ2VTUGmFC90Xu70/tvoeDhFaInGqLT3vC2A1UojNI="; }; nativeBuildInputs = [ cmake ]; diff --git a/pkgs/development/compilers/llvm/rocm/llvm.nix b/pkgs/development/compilers/llvm/rocm/llvm.nix index 5475f411304b..7fa0bbd35eea 100644 --- a/pkgs/development/compilers/llvm/rocm/llvm.nix +++ b/pkgs/development/compilers/llvm/rocm/llvm.nix @@ -53,7 +53,7 @@ let llvmTargetsToBuild' = [ "AMDGPU" ] ++ builtins.map inferNativeTarget llvmTargetsToBuild; in stdenv.mkDerivation (finalAttrs: { pname = "rocm-llvm-${targetName}"; - version = "5.4.4"; + version = "5.7.0"; outputs = [ "out" @@ -70,7 +70,7 @@ in stdenv.mkDerivation (finalAttrs: { owner = "RadeonOpenCompute"; repo = "llvm-project"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-BDvC6QFDFtahA9hmJDLiM6K4mrO3j9E9rEXm7KulcuA="; + hash = "sha256-oJIXALwxo130jl8b6yCFw+a2kMBlny5/0ubiqF6MOWY="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/clang-ocl/default.nix b/pkgs/development/libraries/clang-ocl/default.nix index 8053b672d366..96fc4945747f 100644 --- a/pkgs/development/libraries/clang-ocl/default.nix +++ b/pkgs/development/libraries/clang-ocl/default.nix @@ -9,7 +9,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "clang-ocl"; - version = "5.4.2"; + version = "5.7.0"; src = fetchFromGitHub { owner = "RadeonOpenCompute"; diff --git a/pkgs/development/libraries/hipblas/default.nix b/pkgs/development/libraries/hipblas/default.nix index e8402c0d05bd..845c5b9d0d7d 100644 --- a/pkgs/development/libraries/hipblas/default.nix +++ b/pkgs/development/libraries/hipblas/default.nix @@ -18,7 +18,7 @@ # Can also use cuBLAS stdenv.mkDerivation (finalAttrs: { pname = "hipblas"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -34,7 +34,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "hipBLAS"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-mSZCq8UaiffMzWVflW1nAX6CQZ1DqwWJaSIzKslZSEk="; + hash = "sha256-abaEZN82dsoEC5gIF3/6epRDVz5ItUo6CkZsybu/G+g="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/hipcub/default.nix b/pkgs/development/libraries/hipcub/default.nix index d0f33c0dc1d2..fff34e1a0ec7 100644 --- a/pkgs/development/libraries/hipcub/default.nix +++ b/pkgs/development/libraries/hipcub/default.nix @@ -15,7 +15,7 @@ # CUB can also be used as a backend instead of rocPRIM. stdenv.mkDerivation (finalAttrs: { pname = "hipcub"; - version = "5.4.4"; + version = "5.7.0"; outputs = [ "out" @@ -29,7 +29,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "hipCUB"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-reFxSOYQOf9QcoZzaLt4D1yKGQoDxpt/3rwiHgP1DCo="; + hash = "sha256-ygBEA3NuCQ13QrSzGqyWXkx8Dy9WhR3u4syzapRTkFU="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/hipfft/default.nix b/pkgs/development/libraries/hipfft/default.nix index c4d13cb4f55e..c208296c687b 100644 --- a/pkgs/development/libraries/hipfft/default.nix +++ b/pkgs/development/libraries/hipfft/default.nix @@ -20,7 +20,7 @@ # Can also use cuFFT stdenv.mkDerivation (finalAttrs: { pname = "hipfft"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -36,7 +36,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "hipFFT"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-yDtm9J0wqH6zo4HcgQbqhvwbzbOiJPQ48gJ2gC8PvjA="; + hash = "sha256-fuYRKdlTrRMwxr3cgMeT3YniPzs4nuvF8YCzr3LLPFM="; fetchSubmodules = true; }; diff --git a/pkgs/development/libraries/hipfort/default.nix b/pkgs/development/libraries/hipfort/default.nix index 5c5f0f61e81c..4bb2a270271b 100644 --- a/pkgs/development/libraries/hipfort/default.nix +++ b/pkgs/development/libraries/hipfort/default.nix @@ -9,13 +9,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "hipfort"; - version = "5.6.0"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCmSoftwarePlatform"; repo = "hipfort"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-x1pF9md7RIcobE/4UxHxOaURbljFZGOashW1KM0lmo0="; + hash = "sha256-DRjUWhdinDKP7CZgq2SmU3lGmmodCuXvco9aEeMLSZ4="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/hipsolver/default.nix b/pkgs/development/libraries/hipsolver/default.nix index ff499b342f26..cd689856d418 100644 --- a/pkgs/development/libraries/hipsolver/default.nix +++ b/pkgs/development/libraries/hipsolver/default.nix @@ -18,7 +18,7 @@ # Can also use cuSOLVER stdenv.mkDerivation (finalAttrs: { pname = "hipsolver"; - version = "5.4.4"; + version = "5.7.0"; outputs = [ "out" @@ -34,7 +34,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "hipSOLVER"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-p9hgKqRALLItv/HTpVlTsu+m9wlwCBYPYnJcm8StIao="; + hash = "sha256-I9Xjkilo+baeM1CRXjLAbj/vrg8r5/E2yEImhHGSyf8="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/hipsparse/default.nix b/pkgs/development/libraries/hipsparse/default.nix index 4f4b0c7cdc00..45a571735b70 100644 --- a/pkgs/development/libraries/hipsparse/default.nix +++ b/pkgs/development/libraries/hipsparse/default.nix @@ -17,7 +17,7 @@ # This can also use cuSPARSE as a backend instead of rocSPARSE stdenv.mkDerivation (finalAttrs: { pname = "hipsparse"; - version = "5.4.4"; + version = "5.7.0"; outputs = [ "out" @@ -31,7 +31,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "hipSPARSE"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-JWjmMvqIm4in1aPq2UgYmL0eWjrrRBiU6vH3FnCZZ40="; + hash = "sha256-txigaOoZMI/v+EQLgGlj2O0IHfE7EpgjL0cyv49nKzo="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/migraphx/default.nix b/pkgs/development/libraries/migraphx/default.nix index 3b32b86f41af..2a842a3c24dd 100644 --- a/pkgs/development/libraries/migraphx/default.nix +++ b/pkgs/development/libraries/migraphx/default.nix @@ -46,7 +46,7 @@ let }; in stdenv.mkDerivation (finalAttrs: { pname = "migraphx"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -60,7 +60,7 @@ in stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "AMDMIGraphX"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-UDhm+j9qs4Rk81C1PE4kkacytfY2StYbfsCOtFL+p6s="; + hash = "sha256-7yL7Zn5I8GUPIAgB7tVLZI7OEHLv0E4FcLVx9xMfsNY="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/miopen/default.nix b/pkgs/development/libraries/miopen/default.nix index 1b24d8bfec73..5345c2216e66 100644 --- a/pkgs/development/libraries/miopen/default.nix +++ b/pkgs/development/libraries/miopen/default.nix @@ -53,7 +53,7 @@ let }; in stdenv.mkDerivation (finalAttrs: { pname = "miopen"; - version = "5.4.2"; + version = "5.7.0"; outputs = [ "out" @@ -67,7 +67,7 @@ in stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "MIOpen"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-GfXPCXiVJVve3d8sQCQcFLb/vEnKkVEn7xYUhHkEEVI="; + hash = "sha256-6Bz4yDbQtV8XlEpwbH0YsJFaaZqH7BOfZDL7F4JTS1Q="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rccl/default.nix b/pkgs/development/libraries/rccl/default.nix index b3aaaff82f08..acd0030cabd9 100644 --- a/pkgs/development/libraries/rccl/default.nix +++ b/pkgs/development/libraries/rccl/default.nix @@ -13,7 +13,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rccl"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -25,7 +25,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rccl"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-hQTzaiPMo5FAVScmxV0iNhy80uJ1xvx/kzlbfwROOs4="; + hash = "sha256-Abrwmsjnkx9JVTrARP/BM965g+R10lY+XPwthy/SG0k="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocalution/default.nix b/pkgs/development/libraries/rocalution/default.nix index 2796215d0311..f67384a95f08 100644 --- a/pkgs/development/libraries/rocalution/default.nix +++ b/pkgs/development/libraries/rocalution/default.nix @@ -21,7 +21,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocalution"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -37,7 +37,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocALUTION"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-jovhodhNa7tr1bSqpZCKI/9xF7Ie96JB+giqAEfis2k="; + hash = "sha256-+UGpFuZsC4+kmo8LWZWC2YoFJSdTukjN47e1YqW5Zu4="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocblas/default.nix b/pkgs/development/libraries/rocblas/default.nix index 11385ae64032..78d0e7df8b24 100644 --- a/pkgs/development/libraries/rocblas/default.nix +++ b/pkgs/development/libraries/rocblas/default.nix @@ -27,7 +27,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocblas"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -41,7 +41,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocBLAS"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-XhYpzBXviMnUdbF6lZi9g0LARKpzWLtDxJxLI3MuHiM="; + hash = "sha256-3wKnwvAra8u9xqlC05wUD+gSoBILTVJFU2cIV6xv3Lk="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocdbgapi/default.nix b/pkgs/development/libraries/rocdbgapi/default.nix index dfeb9249914c..a1cfacf33c27 100644 --- a/pkgs/development/libraries/rocdbgapi/default.nix +++ b/pkgs/development/libraries/rocdbgapi/default.nix @@ -37,7 +37,7 @@ let }; in stdenv.mkDerivation (finalAttrs: { pname = "rocdbgapi"; - version = "5.4.2"; + version = "5.7.0"; outputs = [ "out" @@ -49,7 +49,7 @@ in stdenv.mkDerivation (finalAttrs: { owner = "ROCm-Developer-Tools"; repo = "ROCdbgapi"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-KoFa6JzoEPT5/ns9X/hMfu8bOh29HD9n2qGJ3gzhiBA="; + hash = "sha256-qMXvgcS61lgcylz62ErYq8fhpYIR31skQEeKUryuP1w="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocfft/default.nix b/pkgs/development/libraries/rocfft/default.nix index d1136d4be8e6..8eed31b8b233 100644 --- a/pkgs/development/libraries/rocfft/default.nix +++ b/pkgs/development/libraries/rocfft/default.nix @@ -83,13 +83,13 @@ let in stdenv.mkDerivation (finalAttrs: { pname = "rocfft"; - version = "5.4.3"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCmSoftwarePlatform"; repo = "rocFFT"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-FsefE0B2hF5ZcHDB6TscwFeZ1NKFkWX7VDpEvvbDbOk="; + hash = "sha256-GZSi03geTT+NUztBWhGYyghLqJGsFjUQzVAKQ7d03uA="; }; patches = [ diff --git a/pkgs/development/libraries/rocm-comgr/default.nix b/pkgs/development/libraries/rocm-comgr/default.nix index 6dc7b87934f1..4d84af3afa54 100644 --- a/pkgs/development/libraries/rocm-comgr/default.nix +++ b/pkgs/development/libraries/rocm-comgr/default.nix @@ -15,13 +15,13 @@ let else throw "Unsupported ROCm LLVM platform"; in stdenv.mkDerivation (finalAttrs: { pname = "rocm-comgr"; - version = "5.4.4"; + version = "5.7.0"; src = fetchFromGitHub { owner = "RadeonOpenCompute"; repo = "ROCm-CompilerSupport"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-qLsrBTeSop7lIQv8gZDwgpvGZJOAq90zsvMi1QpfbAs="; + hash = "sha256-QB3G0V92UTW67hD6+zSuExN1+eMT820iYSlMyZeWSFw="; }; patches = [ ./cmake.patch ]; diff --git a/pkgs/development/libraries/rocm-device-libs/default.nix b/pkgs/development/libraries/rocm-device-libs/default.nix index 92e84fe14195..594e21031284 100644 --- a/pkgs/development/libraries/rocm-device-libs/default.nix +++ b/pkgs/development/libraries/rocm-device-libs/default.nix @@ -14,13 +14,13 @@ let else throw "Unsupported ROCm LLVM platform"; in stdenv.mkDerivation (finalAttrs: { pname = "rocm-device-libs"; - version = "5.4.4"; + version = "5.7.0"; src = fetchFromGitHub { owner = "RadeonOpenCompute"; repo = "ROCm-Device-Libs"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-8gxvgy2GlROxM5qKtZVu5Lxa1FmTIVlBTpfp8rxhNhk="; + hash = "sha256-f6/LAhJ2mBDO1/JloHvl7MJyDo3WutbXd4IDknA9nzM="; }; patches = [ ./cmake.patch ]; diff --git a/pkgs/development/libraries/rocm-runtime/default.nix b/pkgs/development/libraries/rocm-runtime/default.nix index d10f7811ccbb..dfb10c363153 100644 --- a/pkgs/development/libraries/rocm-runtime/default.nix +++ b/pkgs/development/libraries/rocm-runtime/default.nix @@ -16,13 +16,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocm-runtime"; - version = "5.4.3"; + version = "5.7.0"; src = fetchFromGitHub { owner = "RadeonOpenCompute"; repo = "ROCR-Runtime"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-JkTXTQmdESHSFbA6HZdMK3pYEApz9aoAlMzdXayzdyY="; + hash = "sha256-D7Ahan5cxDhqPtV5iDDNys0A4FlxQ9oVRa2EeMoY5Qk="; }; sourceRoot = "${finalAttrs.src.name}/src"; diff --git a/pkgs/development/libraries/rocm-thunk/default.nix b/pkgs/development/libraries/rocm-thunk/default.nix index 86f0044799c0..8a4ad2a098c6 100644 --- a/pkgs/development/libraries/rocm-thunk/default.nix +++ b/pkgs/development/libraries/rocm-thunk/default.nix @@ -13,13 +13,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocm-thunk"; - version = "5.4.4"; + version = "5.7.0"; src = fetchFromGitHub { owner = "RadeonOpenCompute"; repo = "ROCT-Thunk-Interface"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-EU5toaKzVeZpdm/YhaQ0bXq0eoYwYQ5qGLUJzxgZVjE="; + hash = "sha256-jAMBks2/JaXiA45B3qvLHY8fPeFcr1GHT5Jieuduqhw="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocmlir/default.nix b/pkgs/development/libraries/rocmlir/default.nix index b8dee1385436..a2a4923148a0 100644 --- a/pkgs/development/libraries/rocmlir/default.nix +++ b/pkgs/development/libraries/rocmlir/default.nix @@ -25,7 +25,7 @@ let else throw "Unsupported ROCm LLVM platform"; in stdenv.mkDerivation (finalAttrs: { pname = "rocmlir"; - version = "5.4.1"; + version = "5.7.0"; outputs = [ "out" @@ -37,7 +37,7 @@ in stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocMLIR"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-MokE7Ej8mLHTQeLYvKr7PPlsNG6ul91fqfXDlGu5JpI="; + hash = "sha256-vPi4UVljohVAfnwDVQqeOVaJPa6v8aV5uBOtqLddTtc="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocprim/default.nix b/pkgs/development/libraries/rocprim/default.nix index 9b6ed7edc476..b38684b24dd6 100644 --- a/pkgs/development/libraries/rocprim/default.nix +++ b/pkgs/development/libraries/rocprim/default.nix @@ -13,7 +13,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocprim"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -27,7 +27,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocPRIM"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-Sqr3lbDMK1Gwucqmr/CHoxw/L6bGj3wlXoHzKTnTqoc="; + hash = "sha256-+ukFWsWv3RhS+Z6tmR4TRT8QTYEDuAEk12F9Gv1eXGU="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocprofiler/default.nix b/pkgs/development/libraries/rocprofiler/default.nix index e7e0c9fed650..97f269beb84e 100644 --- a/pkgs/development/libraries/rocprofiler/default.nix +++ b/pkgs/development/libraries/rocprofiler/default.nix @@ -11,13 +11,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocprofiler"; - version = "5.4.3"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCm-Developer-Tools"; repo = "rocprofiler"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-CpD/+soMN8WTeSb5X7dsnZ596PMkw+4EVsVSvFtKCak="; + hash = "sha256-ue/2uiLbhOv/5XY4cIJuZ8DUMRhniYgxolq9xMwO1FY="; }; patches = [ ./0000-dont-require-hsa_amd_aqlprofile.patch ]; diff --git a/pkgs/development/libraries/rocr-debug-agent/default.nix b/pkgs/development/libraries/rocr-debug-agent/default.nix index 4361ffec3454..08d45f304a4f 100644 --- a/pkgs/development/libraries/rocr-debug-agent/default.nix +++ b/pkgs/development/libraries/rocr-debug-agent/default.nix @@ -12,13 +12,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocr-debug-agent"; - version = "5.4.2"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCm-Developer-Tools"; repo = "rocr_debug_agent"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-5l6svWSWCxVoyr1zJabxbt5rXQMtdZtHrf9gS2PcRKc="; + hash = "sha256-AUDbNrFtUQ5Hm+uv5KMovh7P9wXQKLyRNx9gEQFnv6Y="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocrand/default.nix b/pkgs/development/libraries/rocrand/default.nix index 5128e17376c3..8ea138d4c3cd 100644 --- a/pkgs/development/libraries/rocrand/default.nix +++ b/pkgs/development/libraries/rocrand/default.nix @@ -13,7 +13,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocrand"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -27,7 +27,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocRAND"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-xK1JRTW+7odlXRQV9WC6ZfXqRKow/TQ9grHCigw+/us="; + hash = "sha256-cFH38fLD8tk6V9JERcqHokuwKemdDgHCZ75bZNEqmdY="; fetchSubmodules = true; # For inline hipRAND }; diff --git a/pkgs/development/libraries/rocsolver/default.nix b/pkgs/development/libraries/rocsolver/default.nix index 632e93fa6c0d..c78b4d97a0ae 100644 --- a/pkgs/development/libraries/rocsolver/default.nix +++ b/pkgs/development/libraries/rocsolver/default.nix @@ -17,7 +17,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocsolver"; - version = "5.4.4"; + version = "5.7.0"; outputs = [ "out" @@ -31,7 +31,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocSOLVER"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-UHUcA9CVPuYFpE2DTvRrRMMj51yNPo5wMTKnByL2RTg="; + hash = "sha256-qxmjm4tgpCnfJ2SqUXndk6y0MsPJUKHvjv/3Uc0smr4="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocsparse/default.nix b/pkgs/development/libraries/rocsparse/default.nix index d821ee693880..a93d7a77bf26 100644 --- a/pkgs/development/libraries/rocsparse/default.nix +++ b/pkgs/development/libraries/rocsparse/default.nix @@ -18,7 +18,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocsparse"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -32,7 +32,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocSPARSE"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-jzHD55c4rlPab5IAj2UzHTJI9MKhTfevsLthSZKOEzQ="; + hash = "sha256-30q9bqgZJUaNrkMXTAG+Z34yjsQ5DpJP+WBcCiEmF58="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocthrust/default.nix b/pkgs/development/libraries/rocthrust/default.nix index 45099cb9f4c1..a4981d3fb270 100644 --- a/pkgs/development/libraries/rocthrust/default.nix +++ b/pkgs/development/libraries/rocthrust/default.nix @@ -13,7 +13,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocthrust"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -27,7 +27,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocThrust"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-JT2PX53N39H+EaThPHo2ol+BUjDQniSQlKMLiYD8NoM="; + hash = "sha256-i0XCtJth8caVQT5oUgsxWXNzcePa02Gb7AQsthYTOv8="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/roctracer/default.nix b/pkgs/development/libraries/roctracer/default.nix index a81f7dc18961..3aeb8e3ba198 100644 --- a/pkgs/development/libraries/roctracer/default.nix +++ b/pkgs/development/libraries/roctracer/default.nix @@ -19,7 +19,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "roctracer"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -33,7 +33,7 @@ stdenv.mkDerivation (finalAttrs: { owner = "ROCm-Developer-Tools"; repo = "roctracer"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-5vYUNczylB2ehlvhq1u/H8KUXt8ku2E+jawKrKsU7LY="; + hash = "sha256-P6QYyAjMRwFFWKF8AhbrYGe+mYVJXdbBW1or6vcobYU="; }; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocwmma/default.nix b/pkgs/development/libraries/rocwmma/default.nix index 08667b6d3c92..84db5b4dbebf 100644 --- a/pkgs/development/libraries/rocwmma/default.nix +++ b/pkgs/development/libraries/rocwmma/default.nix @@ -36,7 +36,7 @@ let }; in stdenv.mkDerivation (finalAttrs: { pname = "rocwmma"; - version = "5.4.3"; + version = "5.7.0"; outputs = [ "out" @@ -54,7 +54,7 @@ in stdenv.mkDerivation (finalAttrs: { owner = "ROCmSoftwarePlatform"; repo = "rocWMMA"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-HUJPb6IahBgl/v+W4kXludBTNAjRm8k6v0jxKAX+qZM="; + hash = "sha256-/EuBBSjhlMwJfsqYvRb9oCNC0hNkEa1JH1KUDLMSs08="; }; patches = lib.optionals (buildTests || buildBenchmarks) [ diff --git a/pkgs/development/libraries/tensile/default.nix b/pkgs/development/libraries/tensile/default.nix index c6117167855f..7d0165a42060 100644 --- a/pkgs/development/libraries/tensile/default.nix +++ b/pkgs/development/libraries/tensile/default.nix @@ -10,13 +10,13 @@ buildPythonPackage rec { pname = "tensile"; - version = "5.4.2"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCmSoftwarePlatform"; repo = "Tensile"; rev = "rocm-${version}"; - hash = "sha256-W6yr6mptfsiJSSzPCImgqI1EmsUv+l99SjqkoZsOjag="; + hash = "sha256-CyPGiM/53duJc/oNtOsl6JSsl9uOOYm5R7O6YXaVOm4="; }; buildInputs = [ diff --git a/pkgs/development/tools/build-managers/rocm-cmake/default.nix b/pkgs/development/tools/build-managers/rocm-cmake/default.nix index 206038c0b6e6..9e9cf3caf12e 100644 --- a/pkgs/development/tools/build-managers/rocm-cmake/default.nix +++ b/pkgs/development/tools/build-managers/rocm-cmake/default.nix @@ -7,13 +7,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocm-cmake"; - version = "5.4.4"; + version = "5.7.0"; src = fetchFromGitHub { owner = "RadeonOpenCompute"; repo = "rocm-cmake"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-JarQqiiZ36WV1d6vyQD546GN1EtoKLcdvcZsG3QWD2Y="; + hash = "sha256-aVjzuJ4BiSfwOdjufFc5CznfnL8di5h992zl+pzD0DU="; }; nativeBuildInputs = [ cmake ]; diff --git a/pkgs/development/tools/misc/rdc/default.nix b/pkgs/development/tools/misc/rdc/default.nix index 906f35b47c52..d2a7f46dc849 100644 --- a/pkgs/development/tools/misc/rdc/default.nix +++ b/pkgs/development/tools/misc/rdc/default.nix @@ -41,7 +41,7 @@ let }; in stdenv.mkDerivation (finalAttrs: { pname = "rdc"; - version = "5.4.2"; + version = "5.7.0"; outputs = [ "out" @@ -55,7 +55,7 @@ in stdenv.mkDerivation (finalAttrs: { owner = "RadeonOpenCompute"; repo = "rdc"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-dYacqkRp+zVejo/4dME1K6EN8t/1EBtIynEQ+AQ4JZo="; + hash = "sha256-xZD/WI/LfNtKK9j6ZjuU0OTTFZz3G4atyD5mVcSsQ8A="; }; nativeBuildInputs = [ diff --git a/pkgs/development/tools/misc/rocgdb/default.nix b/pkgs/development/tools/misc/rocgdb/default.nix index 8775ca6d2d68..a2f4435ee7bc 100644 --- a/pkgs/development/tools/misc/rocgdb/default.nix +++ b/pkgs/development/tools/misc/rocgdb/default.nix @@ -15,13 +15,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocgdb"; - version = "5.4.2"; + version = "5.7.0"; src = fetchFromGitHub { owner = "ROCm-Developer-Tools"; repo = "ROCgdb"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-DORPvfon32+rIk+YcO9LlUefNvvC7trmiTswg9MMuIs="; + hash = "sha256-TlT7vvTrVd7P6ilVnWIG5VIrjTleFgDezK/mudBV+xE="; }; nativeBuildInputs = [ diff --git a/pkgs/development/tools/rocminfo/default.nix b/pkgs/development/tools/rocminfo/default.nix index 61488b806e88..c9ff79e380ff 100644 --- a/pkgs/development/tools/rocminfo/default.nix +++ b/pkgs/development/tools/rocminfo/default.nix @@ -18,14 +18,14 @@ }: stdenv.mkDerivation (finalAttrs: { - version = "5.4.4"; + version = "5.7.0"; pname = "rocminfo"; src = fetchFromGitHub { owner = "RadeonOpenCompute"; repo = "rocminfo"; rev = "rocm-${finalAttrs.version}"; - sha256 = "sha256-4wZTm5AZgG8xEd6uYqxWq4bWZgcSYZ2WYA1z4RAPF8U="; + sha256 = "sha256-UzOo2qDT/uM+vdGdBM4pV5e143mfa+/6sZLBExOO26g="; }; nativeBuildInputs = [ diff --git a/pkgs/tools/system/rocm-smi/default.nix b/pkgs/tools/system/rocm-smi/default.nix index 7e3f1fb29cc0..2fa79828c63b 100644 --- a/pkgs/tools/system/rocm-smi/default.nix +++ b/pkgs/tools/system/rocm-smi/default.nix @@ -8,13 +8,13 @@ stdenv.mkDerivation (finalAttrs: { pname = "rocm-smi"; - version = "5.4.4"; + version = "5.7.0"; src = fetchFromGitHub { owner = "RadeonOpenCompute"; repo = "rocm_smi_lib"; rev = "rocm-${finalAttrs.version}"; - hash = "sha256-nkidiDNNU6MGhne9EbYClkODJZw/zZu3LWzlniJKyJE="; + hash = "sha256-swCRO4PBMBJ6fO2bLq/xxFZIYw2IgiFB490wsU8Wm2o="; }; postPatch = '' From d2bc96e5b5f004885c589ab92201bb77cca30366 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 11:56:54 -0500 Subject: [PATCH 04/30] llvmPackages_rocm: fixup for 5.7.0 llvmPackages_rocm.llvm: fixup for 5.7.0 llvmPackages_rocm.clang-unwrapped: fixup for 5.7.0 llvmPackages_rocm.lld: fixup for 5.7.0 llvmPackages_rocm.runtimes: fixup for 5.7.0 llvmPackages_rocm.libc: fixup for 5.7.0 llvmPackages_rocm.libunwind: fixup for 5.7.0 llvmPackages_rocm.libcxx: fixup for 5.7.0 llvmPackages_rocm.compiler-rt: fixup for 5.7.0 llvmPackages_rocm.clang: fixup for 5.7.0 llvmPackages_rocm.lldb: fixup for 5.7.0 llvmPackages_rocm.polly: fixup for 5.7.0 --- .../llvm/rocm/1000-libcxx-failing-tests.list | 171 ++++++++++++++++++ .../compilers/llvm/rocm/default.nix | 81 ++++++--- pkgs/development/compilers/llvm/rocm/llvm.nix | 16 +- 3 files changed, 234 insertions(+), 34 deletions(-) create mode 100644 pkgs/development/compilers/llvm/rocm/1000-libcxx-failing-tests.list diff --git a/pkgs/development/compilers/llvm/rocm/1000-libcxx-failing-tests.list b/pkgs/development/compilers/llvm/rocm/1000-libcxx-failing-tests.list new file mode 100644 index 000000000000..e005d6c928c2 --- /dev/null +++ b/pkgs/development/compilers/llvm/rocm/1000-libcxx-failing-tests.list @@ -0,0 +1,171 @@ +../libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp +../libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp +../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp +../libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/construct.cxx2a.pass.cpp +../libcxx/test/libcxx/input.output/filesystems/class.directory_entry/directory_entry.mods/last_write_time.pass.cpp +../libcxx/test/libcxx/input.output/filesystems/class.path/path.member/path.native.obs/string_alloc.pass.cpp +../libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp +../libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/no_allocation.pass.cpp +../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_underaligned_buffer.pass.cpp +../libcxx/test/libcxx/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp +../libcxx/test/std/containers/associative/map/map.access/index_key.pass.cpp +../libcxx/test/std/containers/associative/map/map.access/index_rv_key.pass.cpp +../libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp +../libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp +../libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp +../libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp +../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp +../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp +../libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp +../libcxx/test/std/containers/sequences/vector.bool/ctor_exceptions.pass.cpp +../libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp +../libcxx/test/std/containers/unord/unord.map/unord.map.elem/index.pass.cpp +../libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp +../libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp +../libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp +../libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp +../libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp +../libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp +../libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy_assign.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/copy.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move_assign.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/move.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/assign.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp +../libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/source.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/path.decompose.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_normal.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.gen/lexically_relative_and_proximate.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/generic_string_alloc.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.generic.obs/named_overloads.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/clear.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/make_preferred.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/remove_filename.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_extension.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/replace_filename.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.modifiers/swap.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.member/path.native.obs/named_overloads.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.factory.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/path.io.pass.cpp +../libcxx/test/std/input.output/filesystems/class.path/path.nonmember/swap.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp +../libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file_large.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_symlink/copy_symlink.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory_symlink/create_directory_symlink.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_symlink/create_symlink.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.proximate/proximate.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/remove_all.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove_all/toctou.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.remove/remove.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.temp_dir_path/temp_directory_path.pass.cpp +../libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp +../libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp +../libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp +../libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp +../libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp +../libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp +../libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp +../libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp +../libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp +../libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp +../libcxx/test/std/utilities/any/any.class/any.cons/default.pass.cpp +../libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp +../libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp +../libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp +../libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp +../libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp +../libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp +../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.alg/swap.pass.cpp +../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_assign.pass.cpp +../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp +../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_assign.pass.cpp +../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F.pass.cpp +../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp +../libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp +../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp +../libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp +../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.enab/enable_shared_from_this.pass.cpp +../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_throw.pass.cpp +../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp +../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp +../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp +../libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.ctor/without_buffer.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_deallocate.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_initial_buffer.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_from_zero_sized_buffer.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.ctor/ctor_does_not_allocate.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp +../libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate.pass.cpp diff --git a/pkgs/development/compilers/llvm/rocm/default.nix b/pkgs/development/compilers/llvm/rocm/default.nix index b6df2354f5ec..be2b7e4de375 100644 --- a/pkgs/development/compilers/llvm/rocm/default.nix +++ b/pkgs/development/compilers/llvm/rocm/default.nix @@ -22,6 +22,7 @@ , rocm-device-libs , rocm-runtime , elfutils +, graphviz , python3Packages }: @@ -40,7 +41,6 @@ let extraBuildInputs = [ llvm ]; extraCMakeFlags = [ - "-DCMAKE_POLICY_DEFAULT_CMP0116=NEW" "-DCLANG_INCLUDE_DOCS=ON" "-DCLANG_INCLUDE_TESTS=ON" ]; @@ -59,6 +59,12 @@ let # `does not depend on a module exporting 'baz.h'` rm test/Modules/header-attribs.cpp + # We do not have HIP or the ROCm stack available yet + rm test/Driver/hip-options.hip + + # ???? `ld: cannot find crti.o: No such file or directory` linker issue? + rm test/Interpreter/dynamic-library.cpp + # `fatal error: 'stdio.h' file not found` rm test/OpenMP/amdgcn_emit_llvm.c ''; @@ -73,19 +79,18 @@ let targetName = "lld"; targetDir = targetName; extraBuildInputs = [ llvm ]; - extraCMakeFlags = [ "-DCMAKE_POLICY_DEFAULT_CMP0116=NEW" ]; - checkTargets = [ "check-lld" ]; + checkTargets = [ "check-${targetName}" ]; }; # Runtimes - runtimes = callPackage ./llvm.nix { + runtimes = callPackage ./llvm.nix rec { buildDocs = false; buildMan = false; buildTests = false; - targetDir = "runtimes"; + targetName = "runtimes"; + targetDir = targetName; targetRuntimes = [ - # "libc" https://github.com/llvm/llvm-project/issues/57719 "libunwind" "libcxxabi" "libcxx" @@ -95,7 +100,6 @@ let extraBuildInputs = [ llvm ]; extraCMakeFlags = [ - "-DCMAKE_POLICY_DEFAULT_CMP0114=NEW" "-DLIBCXX_INCLUDE_BENCHMARKS=OFF" "-DLIBCXX_CXX_ABI=libcxxabi" ]; @@ -167,10 +171,24 @@ in rec { # Runtimes libc = callPackage ./llvm.nix rec { stdenv = rStdenv; + buildMan = false; # No man pages to build targetName = "libc"; targetDir = "runtimes"; targetRuntimes = [ targetName ]; - isBroken = true; # https://github.com/llvm/llvm-project/issues/57719 + + extraPostPatch = '' + # `Failed to match ... against ...` `Match value not within tolerance value of MPFR result:` + # We need a better way, but I don't know enough sed magic and patching `CMakeLists.txt` isn't working... + substituteInPlace ../libc/test/src/math/log10_test.cpp \ + --replace "i < N" "i < 0" \ + --replace "test(mpfr::RoundingMode::Nearest);" "" \ + --replace "test(mpfr::RoundingMode::Downward);" "" \ + --replace "test(mpfr::RoundingMode::Upward);" "" \ + --replace "test(mpfr::RoundingMode::TowardZero);" "" + ''; + + checkTargets = [ "check-${targetName}" ]; + hardeningDisable = [ "fortify" ]; # Prevent `error: "Assumed value of MB_LEN_MAX wrong"` }; libunwind = callPackage ./llvm.nix rec { @@ -185,6 +203,14 @@ in rec { "-DLIBUNWIND_INCLUDE_TESTS=ON" "-DLIBUNWIND_USE_COMPILER_RT=ON" ]; + + extraPostPatch = '' + # `command had no output on stdout or stderr` (Says these unsupported tests) + chmod +w -R ../libunwind/test + rm ../libunwind/test/floatregister.pass.cpp + rm ../libunwind/test/unwind_leaffunction.pass.cpp + rm ../libunwind/test/libunwind_02.pass.cpp + ''; }; libcxxabi = callPackage ./llvm.nix rec { @@ -254,14 +280,7 @@ in rec { # Most of these can't find `bash` or `mkdir`, might just be hard-coded paths, or PATH is altered extraPostPatch = '' chmod +w -R ../libcxx/test/{libcxx,std} - rm -rf ../libcxx/test/libcxx/input.output/filesystems - rm ../libcxx/test/libcxx/selftest/remote-substitutions.sh.cpp - rm ../libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/pbackfail.pass.cpp - rm ../libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp - rm ../libcxx/test/std/utilities/optional/optional.object/optional.object.assign/emplace_initializer_list.pass.cpp - rm ../libcxx/test/std/utilities/optional/optional.object/optional.object.assign/nullopt_t.pass.cpp - rm -rf ../libcxx/test/std/utilities/optional/optional.object/optional.object.ctor - rm -rf ../libcxx/test/std/input.output/filesystems/{class.directory_entry,class.directory_iterator,class.rec.dir.itr,fs.op.funcs} + cat ${./1000-libcxx-failing-tests.list} | xargs -d \\n rm ''; }; @@ -280,7 +299,6 @@ in rec { ]; extraCMakeFlags = [ - "-DCMAKE_POLICY_DEFAULT_CMP0114=NEW" "-DCOMPILER_RT_INCLUDE_TESTS=ON" "-DCOMPILER_RT_USE_LLVM_UNWINDER=ON" "-DCOMPILER_RT_CXX_LIBRARY=libcxx" @@ -313,6 +331,10 @@ in rec { # We can run these substituteInPlace ../compiler-rt/test/CMakeLists.txt \ --replace "endfunction()" "endfunction()''\nadd_subdirectory(builtins)''\nadd_subdirectory(shadowcallstack)" + + # Could not launch llvm-config in /build/source/runtimes/build/bin + mkdir -p build/bin + ln -s ${llvm}/bin/llvm-config build/bin ''; extraLicenses = [ lib.licenses.mit ]; @@ -323,7 +345,6 @@ in rec { rocmClangStdenv = overrideCC stdenv clang; clang = wrapCCWith rec { - # inherit libc libcxx bintools; inherit libcxx bintools; # We do this to avoid HIP pathing problems, and mimic a monolithic install @@ -337,14 +358,14 @@ in rec { clang_version=`${clang-unwrapped}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` mkdir -p $out/{bin,include/c++/v1,lib/{cmake,clang/$clang_version/{include,lib}},libexec,share} - for path in ${llvm} ${clang-unwrapped} ${lld} ${libunwind} ${libcxxabi} ${libcxx} ${compiler-rt}; do + for path in ${llvm} ${clang-unwrapped} ${lld} ${libc} ${libunwind} ${libcxxabi} ${libcxx} ${compiler-rt}; do cp -as $path/* $out chmod +w $out/{*,include/c++/v1,lib/{clang/$clang_version/include,cmake}} rm -f $out/lib/libc++.so done ln -s $out/lib/* $out/lib/clang/$clang_version/lib - ln -s $out/include/* $out/lib/clang/$clang_version/include + ln -sf $out/include/* $out/lib/clang/$clang_version/include runHook postInstall ''; @@ -355,6 +376,7 @@ in rec { extraPackages = [ llvm lld + libc libunwind libcxxabi compiler-rt @@ -374,8 +396,7 @@ in rec { ln -s ${cc}/lib/clang/$clang_version/{include,lib} $out/resource-root # Not sure why, but hardening seems to make things break - rm $out/nix-support/add-hardening.sh - touch $out/nix-support/add-hardening.sh + echo "" > $out/nix-support/add-hardening.sh # GPU compilation uses builtin `lld` substituteInPlace $out/bin/{clang,clang++} \ @@ -459,13 +480,20 @@ in rec { swig lua5_3 gtest + graphviz ]; extraCMakeFlags = [ - "-DLLVM_EXTERNAL_LIT=${lit}/bin/.lit-wrapped" + "-DLLDB_EXTERNAL_CLANG_RESOURCE_DIR=${clang}/resource-root/lib/clang/$clang_version" "-DLLDB_INCLUDE_TESTS=ON" "-DLLDB_INCLUDE_UNITTESTS=ON" ]; + + extraPostPatch = '' + export clang_version=`clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` + ''; + + checkTargets = [ "check-${targetName}" ]; }; mlir = callPackage ./llvm.nix rec { @@ -527,6 +555,13 @@ in rec { stdenv = rocmClangStdenv; targetName = "polly"; targetDir = targetName; + + extraPostPatch = '' + # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` + substituteInPlace CMakeLists.txt \ + --replace "NOT TARGET gtest" "FALSE" + ''; + checkTargets = [ "check-${targetName}" ]; }; diff --git a/pkgs/development/compilers/llvm/rocm/llvm.nix b/pkgs/development/compilers/llvm/rocm/llvm.nix index 7fa0bbd35eea..75aa7741a083 100644 --- a/pkgs/development/compilers/llvm/rocm/llvm.nix +++ b/pkgs/development/compilers/llvm/rocm/llvm.nix @@ -24,8 +24,7 @@ , targetDir ? "llvm" , targetProjects ? [ ] , targetRuntimes ? [ ] -# "NATIVE" resolves into x86 or aarch64 depending on stdenv -, llvmTargetsToBuild ? [ "NATIVE" ] +, llvmTargetsToBuild ? [ "NATIVE" ] # "NATIVE" resolves into x86 or aarch64 depending on stdenv , extraPatches ? [ ] , extraNativeBuildInputs ? [ ] , extraBuildInputs ? [ ] @@ -108,22 +107,20 @@ in stdenv.mkDerivation (finalAttrs: { "-DLLVM_ENABLE_PROJECTS=${lib.concatStringsSep ";" targetProjects}" ] ++ lib.optionals ((finalAttrs.passthru.isLLVM || targetDir == "runtimes") && targetRuntimes != [ ]) [ "-DLLVM_ENABLE_RUNTIMES=${lib.concatStringsSep ";" targetRuntimes}" - ] ++ lib.optionals (finalAttrs.passthru.isLLVM || finalAttrs.passthru.isClang) [ - "-DLLVM_ENABLE_RTTI=ON" - "-DLLVM_ENABLE_EH=ON" + ] ++ lib.optionals finalAttrs.passthru.isLLVM [ + "-DLLVM_INSTALL_UTILS=ON" + "-DLLVM_INSTALL_GTEST=ON" ] ++ lib.optionals (buildDocs || buildMan) [ "-DLLVM_INCLUDE_DOCS=ON" "-DLLVM_BUILD_DOCS=ON" # "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core "-DLLVM_ENABLE_SPHINX=ON" - "-DLLVM_ENABLE_OCAMLDOC=OFF" "-DSPHINX_OUTPUT_HTML=ON" "-DSPHINX_OUTPUT_MAN=ON" "-DSPHINX_WARNINGS_AS_ERRORS=OFF" ] ++ lib.optionals buildTests [ "-DLLVM_INCLUDE_TESTS=ON" "-DLLVM_BUILD_TESTS=ON" - ] ++ lib.optionals (buildTests && !finalAttrs.passthru.isLLVM) [ "-DLLVM_EXTERNAL_LIT=${lit}/bin/.lit-wrapped" ] ++ extraCMakeFlags; @@ -141,10 +138,7 @@ in stdenv.mkDerivation (finalAttrs: { doCheck = buildTests; checkTarget = lib.concatStringsSep " " checkTargets; - postInstall = lib.optionalString finalAttrs.passthru.isLLVM '' - # `lit` expects these for some test suites - mv bin/{FileCheck,not,count,yaml2obj,obj2yaml} $out/bin - '' + lib.optionalString buildMan '' + postInstall = lib.optionalString buildMan '' mkdir -p $info '' + extraPostInstall; From 95768ef3b680afbb54aa01dee19d87c79a50ff20 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 12:13:59 -0500 Subject: [PATCH 05/30] llvmPackages_rocm: add big-parallel for clang-unwrapped, clang-tools-extra, and mlir --- pkgs/development/compilers/llvm/rocm/default.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkgs/development/compilers/llvm/rocm/default.nix b/pkgs/development/compilers/llvm/rocm/default.nix index be2b7e4de375..045fcdae4803 100644 --- a/pkgs/development/compilers/llvm/rocm/default.nix +++ b/pkgs/development/compilers/llvm/rocm/default.nix @@ -72,6 +72,8 @@ let extraPostInstall = '' mv bin/clang-tblgen $out/bin ''; + + requiredSystemFeatures = [ "big-parallel" ]; }; lld = callPackage ./llvm.nix rec { @@ -437,6 +439,8 @@ in rec { # Cleanup empty directories find $out -type d -empty -delete ''; + + requiredSystemFeatures = [ "big-parallel" ]; }; # Projects @@ -549,6 +553,7 @@ in rec { ''; checkTargets = [ "check-${targetName}" ]; + requiredSystemFeatures = [ "big-parallel" ]; }; polly = callPackage ./llvm.nix rec { From c9132cc3be4165275d7aa0c8ffc2805df6d01fb5 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 17:31:19 -0500 Subject: [PATCH 06/30] llvmPackages_rocm: add hardeningDisable pass-in option --- pkgs/development/compilers/llvm/rocm/llvm.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkgs/development/compilers/llvm/rocm/llvm.nix b/pkgs/development/compilers/llvm/rocm/llvm.nix index 75aa7741a083..655192d892bb 100644 --- a/pkgs/development/compilers/llvm/rocm/llvm.nix +++ b/pkgs/development/compilers/llvm/rocm/llvm.nix @@ -38,6 +38,7 @@ ) )] , extraPostInstall ? "" +, hardeningDisable ? [ ] , requiredSystemFeatures ? [ ] , extraLicenses ? [ ] , isBroken ? false @@ -153,7 +154,7 @@ in stdenv.mkDerivation (finalAttrs: { }; }; - inherit requiredSystemFeatures; + inherit hardeningDisable requiredSystemFeatures; meta = with lib; { description = "ROCm fork of the LLVM compiler infrastructure"; From 117639a256c933269c9888cd54085d5f61619f42 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 21:42:33 -0500 Subject: [PATCH 07/30] llvmPackages_rocm.libclc: mark broken due to ROCm 5.7.0 LLVM not being up-to-date with LLVM upstream --- pkgs/development/compilers/llvm/rocm/default.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/pkgs/development/compilers/llvm/rocm/default.nix b/pkgs/development/compilers/llvm/rocm/default.nix index 045fcdae4803..f8bfade2b0c1 100644 --- a/pkgs/development/compilers/llvm/rocm/default.nix +++ b/pkgs/development/compilers/llvm/rocm/default.nix @@ -470,6 +470,7 @@ in rec { ''; checkTargets = [ ]; + isBroken = true; # ROCm 5.7.0 doesn't have IR/AttributeMask.h yet...? }; lldb = callPackage ./llvm.nix rec { From 68ca0c26d820689228518758d1ff782645d20532 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 22:34:52 -0500 Subject: [PATCH 08/30] llvmPackages_rocm.lldb: disable tests --- pkgs/development/compilers/llvm/rocm/default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/compilers/llvm/rocm/default.nix b/pkgs/development/compilers/llvm/rocm/default.nix index f8bfade2b0c1..5fe10f956ca6 100644 --- a/pkgs/development/compilers/llvm/rocm/default.nix +++ b/pkgs/development/compilers/llvm/rocm/default.nix @@ -475,7 +475,7 @@ in rec { lldb = callPackage ./llvm.nix rec { stdenv = rocmClangStdenv; - buildTests = false; # ld.lld: error: unable to find library -lllvm_gtest_main + buildTests = false; # FIXME: Bad pathing for clang executable in tests, using relative path most likely targetName = "lldb"; targetDir = targetName; extraNativeBuildInputs = [ python3Packages.sphinx-automodapi ]; From 7cb34ce34dee08b1bed6828366db87746d960cb0 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 00:09:04 -0500 Subject: [PATCH 09/30] rocm-core: Use official rocm-core repo --- .../libraries/rocm-core/default.nix | 60 +++++--------- .../libraries/rocm-core/src/CMakeLists.txt | 5 -- .../libraries/rocm-core/src/rocm_version.c | 10 --- .../libraries/rocm-core/src/rocm_version.h | 82 ------------------- 4 files changed, 19 insertions(+), 138 deletions(-) delete mode 100644 pkgs/development/libraries/rocm-core/src/CMakeLists.txt delete mode 100644 pkgs/development/libraries/rocm-core/src/rocm_version.c delete mode 100644 pkgs/development/libraries/rocm-core/src/rocm_version.h diff --git a/pkgs/development/libraries/rocm-core/default.nix b/pkgs/development/libraries/rocm-core/default.nix index 9f1a4ab4ccf5..aae431be8e62 100644 --- a/pkgs/development/libraries/rocm-core/default.nix +++ b/pkgs/development/libraries/rocm-core/default.nix @@ -1,58 +1,36 @@ { lib , stdenv , fetchFromGitHub -, runCommand -, substituteAll +, rocmUpdateScript , cmake }: -let - rocm_version = with lib; concatStrings (intersperse "0" (splitString "." stdenv.cc.version)); -in stdenv.mkDerivation (finalAttrs: { +stdenv.mkDerivation (finalAttrs: { pname = "rocm-core"; - version = stdenv.cc.version; + version = "5.7.0"; - # Based on https://github.com/rocm-arch/rocm-arch/tree/ad0b15690d403e5822db062ffff4db3912de6669/rocm-core - src = let - rocm_major = lib.versions.major finalAttrs.version; - rocm_minor = lib.versions.minor finalAttrs.version; - rocm_patch = lib.versions.patch finalAttrs.version; - - cmake_lists = substituteAll { - inherit rocm_version; - src = ./src/CMakeLists.txt; - }; - - version_c = substituteAll { - inherit rocm_major rocm_minor rocm_patch; - src = ./src/rocm_version.c; - }; - - version_h = substituteAll { - inherit rocm_major rocm_minor rocm_patch; - src = ./src/rocm_version.h; - }; - in runCommand "rocm-core-${finalAttrs.version}-source" { preferLocalBuild = true; } '' - mkdir -p $out/rocm-core - ln -s ${cmake_lists} $out/CMakeLists.txt - ln -s ${version_c} $out/rocm_version.c - ln -s ${version_h} $out/rocm-core/rocm_version.h - ''; + src = fetchFromGitHub { + owner = "RadeonOpenCompute"; + repo = "rocm-core"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-jFAHLqf/AR27Nbuq8aypWiKqApNcTgG5LWESVjVCKIg="; + }; nativeBuildInputs = [ cmake ]; + cmakeFlags = [ "-DROCM_VERSION=${finalAttrs.version}" ]; - postInstall = '' - mkdir -p $out/include - cp -a ../rocm-core $out/include - ln -s $out/include/rocm-core/rocm_version.h $out/include - ln -s $out/lib/librocm-core.so.1.0.${rocm_version} $out/lib/librocm-core.so.1 - ''; + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + owner = finalAttrs.src.owner; + repo = finalAttrs.src.repo; + }; meta = with lib; { - description = "ROCm core"; - homepage = "https://docs.amd.com"; - license = with licenses; [ ncsa ]; # See src/rocm_version.h + description = "Utility for getting the ROCm release version"; + homepage = "https://github.com/RadeonOpenCompute/rocm-core"; + license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocm-core/src/CMakeLists.txt b/pkgs/development/libraries/rocm-core/src/CMakeLists.txt deleted file mode 100644 index b2d9fc1f53b8..000000000000 --- a/pkgs/development/libraries/rocm-core/src/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -cmake_minimum_required(VERSION 3.23) -project(rocm-core) -add_library(rocm-core SHARED rocm_version.c) -set_target_properties(rocm-core PROPERTIES VERSION "1.0.@rocm_version@") -install(TARGETS rocm-core LIBRARY DESTINATION lib) diff --git a/pkgs/development/libraries/rocm-core/src/rocm_version.c b/pkgs/development/libraries/rocm-core/src/rocm_version.c deleted file mode 100644 index a35dfc6a72c8..000000000000 --- a/pkgs/development/libraries/rocm-core/src/rocm_version.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "rocm-core/rocm_version.h" - -VerErrors getROCmVersion(unsigned int *Major, unsigned int *Minor, - unsigned int *Patch) { - *Major = @rocm_major@; - *Minor = @rocm_minor@; - *Patch = @rocm_patch@; - - return 0; -} diff --git a/pkgs/development/libraries/rocm-core/src/rocm_version.h b/pkgs/development/libraries/rocm-core/src/rocm_version.h deleted file mode 100644 index d112a68b8653..000000000000 --- a/pkgs/development/libraries/rocm-core/src/rocm_version.h +++ /dev/null @@ -1,82 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// The University of Illinois/NCSA -// Open Source License (NCSA) -// -// Copyright (c) 2014-2021, Advanced Micro Devices, Inc. All rights reserved. -// -// Developed by: -// -// AMD Research and AMD HSA Software Development -// -// Advanced Micro Devices, Inc. -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal with the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in -// the documentation and/or other materials provided with the distribution. -// - Neither the names of Advanced Micro Devices, Inc, -// nor the names of its contributors may be used to endorse or promote -// products derived from this Software without specific prior written -// permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS WITH THE SOFTWARE. -// -//////////////////////////////////////////////////////////////////////////////// - - -#ifndef _ROCM_VERSION_H_ -#define _ROCM_VERSION_H_ - - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - - -#define ROCM_VERSION_MAJOR @rocm_major@ -#define ROCM_VERSION_MINOR @rocm_minor@ -#define ROCM_VERSION_PATCH @rocm_patch@ - - -typedef enum { - VerSuccess=0, - VerIncorrecPararmeters, - VerValuesNotDefined, - VerErrorMAX //This should always be last value in the enumerations -} VerErrors; - - -// API for getting the verion -// Return val : VerErros : API execution status. The parameters are valid only when the exetution status is SUCCESS==0 -VerErrors getROCmVersion(unsigned int* Major, unsigned int* Minor, unsigned int* Patch) __attribute__((nonnull)) ; -// Usage : -// int mj=0,mn=0,p=0,ret=0; -// ret=getROCMVersion(&mj,&mn,&p); -// if(ret !=VerSuccess ) // error occured -// -// check for the values and -// - - -#ifdef __cplusplus -} // end extern "C" block -#endif - -#endif //_ROCM_VERSION_H_ header guard From 41ab6719307b93606f6c2d99914ff92d50d46ad2 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 30 Sep 2023 04:52:46 +0000 Subject: [PATCH 10/30] miopengemm: 5.4.3 -> 5.5.0 --- pkgs/development/libraries/miopengemm/default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/libraries/miopengemm/default.nix b/pkgs/development/libraries/miopengemm/default.nix index f288cfda5456..92616f79eedc 100644 --- a/pkgs/development/libraries/miopengemm/default.nix +++ b/pkgs/development/libraries/miopengemm/default.nix @@ -31,7 +31,7 @@ let }; in stdenv.mkDerivation (finalAttrs: { pname = "miopengemm"; - version = "5.4.3"; + version = "5.5.0"; outputs = [ "out" From 12e7fc6923688a08152ee937d00e272502d2ed63 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sun, 1 Oct 2023 19:36:12 -0500 Subject: [PATCH 11/30] llvmPackages_rocm -> rocmPackages.llvm --- .../compilers/llvm/rocm/0000-fix-openmp.patch | 18 - .../compilers/llvm/rocm/default.nix | 644 ------------------ pkgs/development/rocm-modules/5/default.nix | 9 + .../llvm.nix => rocm-modules/5/llvm/base.nix} | 0 .../rocm-modules/5/llvm/default.nix | 53 ++ .../5/llvm/stage-1/clang-unwrapped.nix | 46 ++ .../rocm-modules/5/llvm/stage-1/lld.nix | 13 + .../rocm-modules/5/llvm/stage-1/llvm.nix | 10 + .../rocm-modules/5/llvm/stage-1/runtimes.nix | 30 + .../stage-2}/1000-libcxx-failing-tests.list | 0 .../5/llvm/stage-2/bintools-unwrapped.nix | 28 + .../5/llvm/stage-2/compiler-rt.nix | 63 ++ .../rocm-modules/5/llvm/stage-2/libc.nix | 26 + .../rocm-modules/5/llvm/stage-2/libcxx.nix | 42 ++ .../rocm-modules/5/llvm/stage-2/libcxxabi.nix | 37 + .../rocm-modules/5/llvm/stage-2/libunwind.nix | 26 + .../rocm-modules/5/llvm/stage-2/rstdenv.nix | 35 + .../5/llvm/stage-3/clang-tools-extra.nix | 42 ++ .../rocm-modules/5/llvm/stage-3/clang.nix | 73 ++ .../rocm-modules/5/llvm/stage-3/flang.nix | 33 + .../rocm-modules/5/llvm/stage-3/libclc.nix | 36 + .../rocm-modules/5/llvm/stage-3/lldb.nix | 39 ++ .../rocm-modules/5/llvm/stage-3/mlir.nix | 67 ++ .../rocm-modules/5/llvm/stage-3/openmp.nix | 45 ++ .../rocm-modules/5/llvm/stage-3/polly.nix | 18 + .../rocm-modules/5/llvm/stage-3/pstl.nix | 15 + .../default.nix => 5/update.nix} | 2 +- pkgs/test/default.nix | 2 +- pkgs/top-level/all-packages.nix | 36 +- pkgs/top-level/python-packages.nix | 4 +- 30 files changed, 807 insertions(+), 685 deletions(-) delete mode 100644 pkgs/development/compilers/llvm/rocm/0000-fix-openmp.patch delete mode 100644 pkgs/development/compilers/llvm/rocm/default.nix create mode 100644 pkgs/development/rocm-modules/5/default.nix rename pkgs/development/{compilers/llvm/rocm/llvm.nix => rocm-modules/5/llvm/base.nix} (100%) create mode 100644 pkgs/development/rocm-modules/5/llvm/default.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-1/clang-unwrapped.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-1/lld.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-1/llvm.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-1/runtimes.nix rename pkgs/development/{compilers/llvm/rocm => rocm-modules/5/llvm/stage-2}/1000-libcxx-failing-tests.list (100%) create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-2/bintools-unwrapped.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-2/compiler-rt.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-2/libc.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-2/libcxx.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-2/libcxxabi.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-2/libunwind.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-2/rstdenv.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/clang-tools-extra.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/clang.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/libclc.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/lldb.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/openmp.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/polly.nix create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/pstl.nix rename pkgs/development/rocm-modules/{update-script/default.nix => 5/update.nix} (91%) diff --git a/pkgs/development/compilers/llvm/rocm/0000-fix-openmp.patch b/pkgs/development/compilers/llvm/rocm/0000-fix-openmp.patch deleted file mode 100644 index 2811df7d29f7..000000000000 --- a/pkgs/development/compilers/llvm/rocm/0000-fix-openmp.patch +++ /dev/null @@ -1,18 +0,0 @@ -diff --git a/libomptarget/plugins/amdgpu/impl/impl.cpp b/libomptarget/plugins/amdgpu/impl/impl.cpp -index 80e024789..3a14e0889 100644 ---- a/libomptarget/plugins/amdgpu/impl/impl.cpp -+++ b/libomptarget/plugins/amdgpu/impl/impl.cpp -@@ -21,10 +21,11 @@ bool is_locked(void *ptr, hsa_status_t *err_p, void **agentBaseAddress) { - info.size = sizeof(hsa_amd_pointer_info_t); - err = hsa_amd_pointer_info(ptr, &info, nullptr, nullptr, nullptr); - -- if (err != HSA_STATUS_SUCCESS) -+ if (err != HSA_STATUS_SUCCESS) { - DP("Error when getting pointer info\n"); -- else -+ } else { - is_locked = (info.type == HSA_EXT_POINTER_TYPE_LOCKED); -+ } - - if (is_locked && agentBaseAddress != nullptr) { - // When user passes in a basePtr+offset we need to fix the diff --git a/pkgs/development/compilers/llvm/rocm/default.nix b/pkgs/development/compilers/llvm/rocm/default.nix deleted file mode 100644 index 5fe10f956ca6..000000000000 --- a/pkgs/development/compilers/llvm/rocm/default.nix +++ /dev/null @@ -1,644 +0,0 @@ -{ lib -, stdenv -, callPackage -, overrideCC -, wrapCCWith -, wrapBintoolsWith -, runCommand -, lit -, glibc -, spirv-llvm-translator -, xz -, swig -, lua5_3 -, gtest -, hip -, rocm-comgr -, vulkan-loader -, vulkan-headers -, glslang -, shaderc -, perl -, rocm-device-libs -, rocm-runtime -, elfutils -, graphviz -, python3Packages -}: - -let - # Stage 1 - # Base - llvm = callPackage ./llvm.nix { - requiredSystemFeatures = [ "big-parallel" ]; - isBroken = stdenv.isAarch64; # https://github.com/RadeonOpenCompute/ROCm/issues/1831#issuecomment-1278205344 - }; - - # Projects - clang-unwrapped = callPackage ./llvm.nix rec { - targetName = "clang"; - targetDir = targetName; - extraBuildInputs = [ llvm ]; - - extraCMakeFlags = [ - "-DCLANG_INCLUDE_DOCS=ON" - "-DCLANG_INCLUDE_TESTS=ON" - ]; - - extraPostPatch = '' - # Looks like they forgot to add finding libedit to the standalone build - ln -s ../cmake/Modules/FindLibEdit.cmake cmake/modules - - substituteInPlace CMakeLists.txt \ - --replace "include(CheckIncludeFile)" "include(CheckIncludeFile)''\nfind_package(LibEdit)" - - # `No such file or directory: '/build/source/clang/tools/scan-build/bin/scan-build'` - rm test/Analysis/scan-build/*.test - rm test/Analysis/scan-build/rebuild_index/rebuild_index.test - - # `does not depend on a module exporting 'baz.h'` - rm test/Modules/header-attribs.cpp - - # We do not have HIP or the ROCm stack available yet - rm test/Driver/hip-options.hip - - # ???? `ld: cannot find crti.o: No such file or directory` linker issue? - rm test/Interpreter/dynamic-library.cpp - - # `fatal error: 'stdio.h' file not found` - rm test/OpenMP/amdgcn_emit_llvm.c - ''; - - extraPostInstall = '' - mv bin/clang-tblgen $out/bin - ''; - - requiredSystemFeatures = [ "big-parallel" ]; - }; - - lld = callPackage ./llvm.nix rec { - buildMan = false; # No man pages to build - targetName = "lld"; - targetDir = targetName; - extraBuildInputs = [ llvm ]; - checkTargets = [ "check-${targetName}" ]; - }; - - # Runtimes - runtimes = callPackage ./llvm.nix rec { - buildDocs = false; - buildMan = false; - buildTests = false; - targetName = "runtimes"; - targetDir = targetName; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - "libcxx" - "compiler-rt" - ]; - - extraBuildInputs = [ llvm ]; - - extraCMakeFlags = [ - "-DLIBCXX_INCLUDE_BENCHMARKS=OFF" - "-DLIBCXX_CXX_ABI=libcxxabi" - ]; - - extraLicenses = [ lib.licenses.mit ]; - }; - - # Stage 2 - # Helpers - rStdenv = overrideCC stdenv (wrapCCWith rec { - inherit bintools; - libcxx = runtimes; - cc = clang-unwrapped; - - extraPackages = [ - llvm - lld - ]; - - nixSupport.cc-cflags = [ - "-resource-dir=$out/resource-root" - "-fuse-ld=lld" - "-rtlib=compiler-rt" - "-unwindlib=libunwind" - "-Wno-unused-command-line-argument" - ]; - - extraBuildCommands = '' - clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/resource-root - ln -s ${cc}/lib/clang/$clang_version/include $out/resource-root - ln -s ${runtimes}/lib $out/resource-root - ''; - }); - - bintools = wrapBintoolsWith { bintools = bintools-unwrapped; }; - - bintools-unwrapped = runCommand "rocm-llvm-binutils-${llvm.version}" { preferLocalBuild = true; } '' - mkdir -p $out/bin - - for prog in ${lld}/bin/*; do - ln -s $prog $out/bin/$(basename $prog) - done - - for prog in ${llvm}/bin/*; do - ln -sf $prog $out/bin/$(basename $prog) - done - - ln -s ${llvm}/bin/llvm-ar $out/bin/ar - ln -s ${llvm}/bin/llvm-as $out/bin/as - ln -s ${llvm}/bin/llvm-dwp $out/bin/dwp - ln -s ${llvm}/bin/llvm-nm $out/bin/nm - ln -s ${llvm}/bin/llvm-objcopy $out/bin/objcopy - ln -s ${llvm}/bin/llvm-objdump $out/bin/objdump - ln -s ${llvm}/bin/llvm-ranlib $out/bin/ranlib - ln -s ${llvm}/bin/llvm-readelf $out/bin/readelf - ln -s ${llvm}/bin/llvm-size $out/bin/size - ln -s ${llvm}/bin/llvm-strip $out/bin/strip - ln -s ${lld}/bin/lld $out/bin/ld - ''; -in rec { - inherit - llvm - clang-unwrapped - lld - bintools - bintools-unwrapped; - - # Runtimes - libc = callPackage ./llvm.nix rec { - stdenv = rStdenv; - buildMan = false; # No man pages to build - targetName = "libc"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - - extraPostPatch = '' - # `Failed to match ... against ...` `Match value not within tolerance value of MPFR result:` - # We need a better way, but I don't know enough sed magic and patching `CMakeLists.txt` isn't working... - substituteInPlace ../libc/test/src/math/log10_test.cpp \ - --replace "i < N" "i < 0" \ - --replace "test(mpfr::RoundingMode::Nearest);" "" \ - --replace "test(mpfr::RoundingMode::Downward);" "" \ - --replace "test(mpfr::RoundingMode::Upward);" "" \ - --replace "test(mpfr::RoundingMode::TowardZero);" "" - ''; - - checkTargets = [ "check-${targetName}" ]; - hardeningDisable = [ "fortify" ]; # Prevent `error: "Assumed value of MB_LEN_MAX wrong"` - }; - - libunwind = callPackage ./llvm.nix rec { - stdenv = rStdenv; - buildMan = false; # No man pages to build - targetName = "libunwind"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - - extraCMakeFlags = [ - "-DLIBUNWIND_INCLUDE_DOCS=ON" - "-DLIBUNWIND_INCLUDE_TESTS=ON" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - ]; - - extraPostPatch = '' - # `command had no output on stdout or stderr` (Says these unsupported tests) - chmod +w -R ../libunwind/test - rm ../libunwind/test/floatregister.pass.cpp - rm ../libunwind/test/unwind_leaffunction.pass.cpp - rm ../libunwind/test/libunwind_02.pass.cpp - ''; - }; - - libcxxabi = callPackage ./llvm.nix rec { - stdenv = rStdenv; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "libcxxabi"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - targetName - "libcxx" - ]; - - extraCMakeFlags = [ - "-DLIBCXXABI_INCLUDE_TESTS=ON" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXX_INCLUDE_DOCS=OFF" - "-DLIBCXX_INCLUDE_TESTS=OFF" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - "-DLIBCXX_INSTALL_LIBRARY=OFF" - "-DLIBCXX_INSTALL_HEADERS=OFF" - ]; - }; - - libcxx = callPackage ./llvm.nix rec { - stdenv = rStdenv; - buildMan = false; # No man pages to build - targetName = "libcxx"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - targetName - ]; - - extraCMakeFlags = [ - "-DLIBCXX_INCLUDE_DOCS=ON" - "-DLIBCXX_INCLUDE_TESTS=ON" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXXABI_INCLUDE_TESTS=OFF" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - "-DLIBCXXABI_INSTALL_LIBRARY=OFF" - "-DLIBCXXABI_INSTALL_HEADERS=OFF" - ]; - - # Most of these can't find `bash` or `mkdir`, might just be hard-coded paths, or PATH is altered - extraPostPatch = '' - chmod +w -R ../libcxx/test/{libcxx,std} - cat ${./1000-libcxx-failing-tests.list} | xargs -d \\n rm - ''; - }; - - compiler-rt = callPackage ./llvm.nix rec { - stdenv = rStdenv; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "compiler-rt"; - targetDir = "runtimes"; - - targetRuntimes = [ - "libunwind" - "libcxxabi" - "libcxx" - targetName - ]; - - extraCMakeFlags = [ - "-DCOMPILER_RT_INCLUDE_TESTS=ON" - "-DCOMPILER_RT_USE_LLVM_UNWINDER=ON" - "-DCOMPILER_RT_CXX_LIBRARY=libcxx" - "-DCOMPILER_RT_CAN_EXECUTE_TESTS=OFF" # We can't run most of these - - # Workaround having to build combined - "-DLIBUNWIND_INCLUDE_DOCS=OFF" - "-DLIBUNWIND_INCLUDE_TESTS=OFF" - "-DLIBUNWIND_USE_COMPILER_RT=ON" - "-DLIBUNWIND_INSTALL_LIBRARY=OFF" - "-DLIBUNWIND_INSTALL_HEADERS=OFF" - "-DLIBCXXABI_INCLUDE_TESTS=OFF" - "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" - "-DLIBCXXABI_USE_COMPILER_RT=ON" - "-DLIBCXXABI_INSTALL_LIBRARY=OFF" - "-DLIBCXXABI_INSTALL_HEADERS=OFF" - "-DLIBCXX_INCLUDE_DOCS=OFF" - "-DLIBCXX_INCLUDE_TESTS=OFF" - "-DLIBCXX_USE_COMPILER_RT=ON" - "-DLIBCXX_CXX_ABI=libcxxabi" - "-DLIBCXX_INSTALL_LIBRARY=OFF" - "-DLIBCXX_INSTALL_HEADERS=OFF" - ]; - - extraPostPatch = '' - # `No such file or directory: 'ldd'` - substituteInPlace ../compiler-rt/test/lit.common.cfg.py \ - --replace "'ldd'," "'${glibc.bin}/bin/ldd'," - - # We can run these - substituteInPlace ../compiler-rt/test/CMakeLists.txt \ - --replace "endfunction()" "endfunction()''\nadd_subdirectory(builtins)''\nadd_subdirectory(shadowcallstack)" - - # Could not launch llvm-config in /build/source/runtimes/build/bin - mkdir -p build/bin - ln -s ${llvm}/bin/llvm-config build/bin - ''; - - extraLicenses = [ lib.licenses.mit ]; - }; - - # Stage 3 - # Helpers - rocmClangStdenv = overrideCC stdenv clang; - - clang = wrapCCWith rec { - inherit libcxx bintools; - - # We do this to avoid HIP pathing problems, and mimic a monolithic install - cc = stdenv.mkDerivation (finalAttrs: { - inherit (clang-unwrapped) pname version; - dontUnpack = true; - - installPhase = '' - runHook preInstall - - clang_version=`${clang-unwrapped}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/{bin,include/c++/v1,lib/{cmake,clang/$clang_version/{include,lib}},libexec,share} - - for path in ${llvm} ${clang-unwrapped} ${lld} ${libc} ${libunwind} ${libcxxabi} ${libcxx} ${compiler-rt}; do - cp -as $path/* $out - chmod +w $out/{*,include/c++/v1,lib/{clang/$clang_version/include,cmake}} - rm -f $out/lib/libc++.so - done - - ln -s $out/lib/* $out/lib/clang/$clang_version/lib - ln -sf $out/include/* $out/lib/clang/$clang_version/include - - runHook postInstall - ''; - - passthru.isClang = true; - }); - - extraPackages = [ - llvm - lld - libc - libunwind - libcxxabi - compiler-rt - ]; - - nixSupport.cc-cflags = [ - "-resource-dir=$out/resource-root" - "-fuse-ld=lld" - "-rtlib=compiler-rt" - "-unwindlib=libunwind" - "-Wno-unused-command-line-argument" - ]; - - extraBuildCommands = '' - clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - mkdir -p $out/resource-root - ln -s ${cc}/lib/clang/$clang_version/{include,lib} $out/resource-root - - # Not sure why, but hardening seems to make things break - echo "" > $out/nix-support/add-hardening.sh - - # GPU compilation uses builtin `lld` - substituteInPlace $out/bin/{clang,clang++} \ - --replace "-MM) dontLink=1 ;;" "-MM | --cuda-device-only) dontLink=1 ;;''\n--cuda-host-only | --cuda-compile-host-device) dontLink=0 ;;" - ''; - }; - - # Base - # Unfortunately, we cannot build `clang-tools-extra` separately. - clang-tools-extra = callPackage ./llvm.nix { - stdenv = rocmClangStdenv; - buildTests = false; # `invalid operands to binary expression ('std::basic_stringstream' and 'const llvm::StringRef')` - targetName = "clang-tools-extra"; - - targetProjects = [ - "clang" - "clang-tools-extra" - ]; - - extraBuildInputs = [ gtest ]; - - extraCMakeFlags = [ - "-DLLVM_INCLUDE_DOCS=OFF" - "-DLLVM_INCLUDE_TESTS=OFF" - "-DCLANG_INCLUDE_DOCS=OFF" - "-DCLANG_INCLUDE_TESTS=ON" - "-DCLANG_TOOLS_EXTRA_INCLUDE_DOCS=ON" - ]; - - extraPostInstall = '' - # Remove LLVM and Clang - for path in `find ${llvm} ${clang-unwrapped}`; do - if [ $path != ${llvm} ] && [ $path != ${clang-unwrapped} ]; then - rm -f $out''${path#${llvm}} $out''${path#${clang-unwrapped}} || true - fi - done - - # Cleanup empty directories - find $out -type d -empty -delete - ''; - - requiredSystemFeatures = [ "big-parallel" ]; - }; - - # Projects - libclc = let - spirv = (spirv-llvm-translator.override { inherit llvm; }); - in callPackage ./llvm.nix rec { - stdenv = rocmClangStdenv; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - targetName = "libclc"; - targetDir = targetName; - extraBuildInputs = [ spirv ]; - - # `spirv-mesa3d` isn't compiling with LLVM 15.0.0, it does with LLVM 14.0.0 - # Try removing the `spirv-mesa3d` and `clspv` patches next update - # `clspv` tests fail, unresolved calls - extraPostPatch = '' - substituteInPlace CMakeLists.txt \ - --replace "find_program( LLVM_CLANG clang PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ - "find_program( LLVM_CLANG clang PATHS \"${clang}/bin\" NO_DEFAULT_PATH )" \ - --replace "find_program( LLVM_SPIRV llvm-spirv PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ - "find_program( LLVM_SPIRV llvm-spirv PATHS \"${spirv}/bin\" NO_DEFAULT_PATH )" \ - --replace " spirv-mesa3d-" "" \ - --replace " spirv64-mesa3d-" "" \ - --replace "NOT \''${t} MATCHES" \ - "NOT \''${ARCH} STREQUAL \"clspv\" AND NOT \''${ARCH} STREQUAL \"clspv64\" AND NOT \''${t} MATCHES" - ''; - - checkTargets = [ ]; - isBroken = true; # ROCm 5.7.0 doesn't have IR/AttributeMask.h yet...? - }; - - lldb = callPackage ./llvm.nix rec { - stdenv = rocmClangStdenv; - buildTests = false; # FIXME: Bad pathing for clang executable in tests, using relative path most likely - targetName = "lldb"; - targetDir = targetName; - extraNativeBuildInputs = [ python3Packages.sphinx-automodapi ]; - - extraBuildInputs = [ - xz - swig - lua5_3 - gtest - graphviz - ]; - - extraCMakeFlags = [ - "-DLLDB_EXTERNAL_CLANG_RESOURCE_DIR=${clang}/resource-root/lib/clang/$clang_version" - "-DLLDB_INCLUDE_TESTS=ON" - "-DLLDB_INCLUDE_UNITTESTS=ON" - ]; - - extraPostPatch = '' - export clang_version=`clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` - ''; - - checkTargets = [ "check-${targetName}" ]; - }; - - mlir = callPackage ./llvm.nix rec { - stdenv = rocmClangStdenv; - buildDocs = false; # No decent way to hack this to work - buildMan = false; # No man pages to build - targetName = "mlir"; - targetDir = targetName; - extraNativeBuildInputs = [ hip ]; - - extraBuildInputs = [ - rocm-comgr - vulkan-headers - vulkan-loader - glslang - shaderc - ]; - - extraCMakeFlags = [ - "-DCMAKE_POLICY_DEFAULT_CMP0116=NEW" - "-DMLIR_INCLUDE_DOCS=ON" - "-DMLIR_INCLUDE_TESTS=ON" - "-DMLIR_ENABLE_ROCM_RUNNER=ON" - "-DMLIR_ENABLE_SPIRV_CPU_RUNNER=ON" - "-DMLIR_ENABLE_VULKAN_RUNNER=ON" - "-DROCM_TEST_CHIPSET=gfx000" # CPU runner - ]; - - extraPostPatch = '' - chmod +w ../llvm - mkdir -p ../llvm/build/bin - ln -s ${lit}/bin/lit ../llvm/build/bin/llvm-lit - - substituteInPlace test/CMakeLists.txt \ - --replace "FileCheck count not" "" \ - --replace "list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)" "" - - substituteInPlace lib/ExecutionEngine/CMakeLists.txt \ - --replace "return()" "" - - # Remove problematic tests - rm test/CAPI/execution_engine.c - rm test/Target/LLVMIR/llvmir-intrinsics.mlir - rm test/Target/LLVMIR/llvmir.mlir - rm test/Target/LLVMIR/openmp-llvm.mlir - rm test/mlir-cpu-runner/*.mlir - rm test/mlir-vulkan-runner/*.mlir - ''; - - extraPostInstall = '' - mkdir -p $out/bin - mv bin/mlir-tblgen $out/bin - ''; - - checkTargets = [ "check-${targetName}" ]; - requiredSystemFeatures = [ "big-parallel" ]; - }; - - polly = callPackage ./llvm.nix rec { - stdenv = rocmClangStdenv; - targetName = "polly"; - targetDir = targetName; - - extraPostPatch = '' - # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` - substituteInPlace CMakeLists.txt \ - --replace "NOT TARGET gtest" "FALSE" - ''; - - checkTargets = [ "check-${targetName}" ]; - }; - - flang = callPackage ./llvm.nix rec { - stdenv = rocmClangStdenv; - buildTests = false; # `Executable "flang1" doesn't exist!` - targetName = "flang"; - targetDir = targetName; - extraNativeBuildInputs = [ python3Packages.sphinx-markdown-tables ]; - extraBuildInputs = [ mlir ]; - - extraCMakeFlags = [ - "-DCMAKE_POLICY_DEFAULT_CMP0116=NEW" - "-DCLANG_DIR=${clang-unwrapped}/lib/cmake/clang" - "-DFLANG_INCLUDE_TESTS=OFF" - "-DMLIR_TABLEGEN_EXE=${mlir}/bin/mlir-tblgen" - ]; - - extraPostPatch = '' - substituteInPlace test/CMakeLists.txt \ - --replace "FileCheck" "" \ - --replace "count" "" \ - --replace "not" "" - - substituteInPlace docs/CMakeLists.txt \ - --replace "CLANG_TABLEGEN_EXE clang-tblgen" "CLANG_TABLEGEN_EXE ${clang-unwrapped}/bin/clang-tblgen" - ''; - }; - - openmp = callPackage ./llvm.nix rec { - stdenv = rocmClangStdenv; - buildTests = false; # Too many failures, most pass - targetName = "openmp"; - targetDir = targetName; - extraPatches = [ ./0000-fix-openmp.patch ]; - extraNativeBuildInputs = [ perl ]; - - extraBuildInputs = [ - rocm-device-libs - rocm-runtime - elfutils - ]; - - extraCMakeFlags = [ - "-DCMAKE_MODULE_PATH=/build/source/llvm/cmake/modules" # For docs - "-DCLANG_TOOL=${clang}/bin/clang" - "-DCLANG_OFFLOAD_BUNDLER_TOOL=${clang-unwrapped}/bin/clang-offload-bundler" - "-DOPENMP_LLVM_TOOLS_DIR=${llvm}/bin" - "-DOPENMP_LLVM_LIT_EXECUTABLE=${lit}/bin/.lit-wrapped" - "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" - ]; - - extraPostPatch = '' - # We can't build this target at the moment - substituteInPlace libomptarget/DeviceRTL/CMakeLists.txt \ - --replace "gfx1010" "" - ''; - - checkTargets = [ "check-${targetName}" ]; - extraLicenses = [ lib.licenses.mit ]; - }; - - # Runtimes - pstl = callPackage ./llvm.nix rec { - stdenv = rocmClangStdenv; - buildDocs = false; # No documentation to build - buildMan = false; # No man pages to build - buildTests = false; # Too many errors - targetName = "pstl"; - targetDir = "runtimes"; - targetRuntimes = [ targetName ]; - checkTargets = [ "check-${targetName}" ]; - }; -} diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix new file mode 100644 index 000000000000..6509f8850858 --- /dev/null +++ b/pkgs/development/rocm-modules/5/default.nix @@ -0,0 +1,9 @@ +{ callPackage +, recurseIntoAttrs +}: + +let + rocmUpdateScript = callPackage ./update.nix { }; +in { + llvm = recurseIntoAttrs (callPackage ./llvm/default.nix { inherit rocmUpdateScript; }); +} diff --git a/pkgs/development/compilers/llvm/rocm/llvm.nix b/pkgs/development/rocm-modules/5/llvm/base.nix similarity index 100% rename from pkgs/development/compilers/llvm/rocm/llvm.nix rename to pkgs/development/rocm-modules/5/llvm/base.nix diff --git a/pkgs/development/rocm-modules/5/llvm/default.nix b/pkgs/development/rocm-modules/5/llvm/default.nix new file mode 100644 index 000000000000..11f8241251e6 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/default.nix @@ -0,0 +1,53 @@ +{ stdenv +, callPackage +, rocmUpdateScript +, wrapBintoolsWith +, overrideCC +}: + +let + ## Stage 1 ## + # Projects + llvm = callPackage ./stage-1/llvm.nix { inherit rocmUpdateScript; }; + clang-unwrapped = callPackage ./stage-1/clang-unwrapped.nix { inherit rocmUpdateScript llvm; }; + lld = callPackage ./stage-1/lld.nix { inherit rocmUpdateScript llvm; }; + + # Runtimes + runtimes = callPackage ./stage-1/runtimes.nix { inherit rocmUpdateScript llvm; }; + + ## Stage 2 ## + # Helpers + bintools-unwrapped = callPackage ./stage-2/bintools-unwrapped.nix { inherit llvm lld; }; + bintools = wrapBintoolsWith { bintools = bintools-unwrapped; }; + rStdenv = callPackage ./stage-2/rstdenv.nix { inherit llvm clang-unwrapped lld runtimes bintools; }; +in rec { + inherit + llvm + clang-unwrapped + lld + bintools; + + # Runtimes + libc = callPackage ./stage-2/libc.nix { inherit rocmUpdateScript; stdenv = rStdenv; }; + libunwind = callPackage ./stage-2/libunwind.nix { inherit rocmUpdateScript; stdenv = rStdenv; }; + libcxxabi = callPackage ./stage-2/libcxxabi.nix { inherit rocmUpdateScript; stdenv = rStdenv; }; + libcxx = callPackage ./stage-2/libcxx.nix { inherit rocmUpdateScript; stdenv = rStdenv; }; + compiler-rt = callPackage ./stage-2/compiler-rt.nix { inherit rocmUpdateScript llvm; stdenv = rStdenv; }; + + ## Stage 3 ## + # Helpers + clang = callPackage ./stage-3/clang.nix { inherit llvm lld clang-unwrapped bintools libc libunwind libcxxabi libcxx compiler-rt; }; + rocmClangStdenv = overrideCC stdenv clang; + + # Projects + clang-tools-extra = callPackage ./stage-3/clang-tools-extra.nix { inherit rocmUpdateScript llvm clang-unwrapped; stdenv = rocmClangStdenv; }; + libclc = callPackage ./stage-3/libclc.nix { inherit rocmUpdateScript llvm clang; stdenv = rocmClangStdenv; }; + lldb = callPackage ./stage-3/lldb.nix { inherit rocmUpdateScript clang; stdenv = rocmClangStdenv; }; + mlir = callPackage ./stage-3/mlir.nix { inherit rocmUpdateScript; stdenv = rocmClangStdenv; }; + polly = callPackage ./stage-3/polly.nix { inherit rocmUpdateScript; stdenv = rocmClangStdenv; }; + flang = callPackage ./stage-3/flang.nix { inherit rocmUpdateScript clang-unwrapped mlir; stdenv = rocmClangStdenv; }; + openmp = callPackage ./stage-3/openmp.nix { inherit rocmUpdateScript llvm clang-unwrapped clang; stdenv = rocmClangStdenv; }; + + # Runtimes + pstl = callPackage ./stage-3/pstl.nix { inherit rocmUpdateScript; stdenv = rocmClangStdenv; }; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-1/clang-unwrapped.nix b/pkgs/development/rocm-modules/5/llvm/stage-1/clang-unwrapped.nix new file mode 100644 index 000000000000..113313f4e066 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-1/clang-unwrapped.nix @@ -0,0 +1,46 @@ +{ callPackage +, rocmUpdateScript +, llvm +}: + +callPackage ../base.nix rec { + inherit rocmUpdateScript; + targetName = "clang-unwrapped"; + targetDir = "clang"; + extraBuildInputs = [ llvm ]; + + extraCMakeFlags = [ + "-DCLANG_INCLUDE_DOCS=ON" + "-DCLANG_INCLUDE_TESTS=ON" + ]; + + extraPostPatch = '' + # Looks like they forgot to add finding libedit to the standalone build + ln -s ../cmake/Modules/FindLibEdit.cmake cmake/modules + + substituteInPlace CMakeLists.txt \ + --replace "include(CheckIncludeFile)" "include(CheckIncludeFile)''\nfind_package(LibEdit)" + + # `No such file or directory: '/build/source/clang/tools/scan-build/bin/scan-build'` + rm test/Analysis/scan-build/*.test + rm test/Analysis/scan-build/rebuild_index/rebuild_index.test + + # `does not depend on a module exporting 'baz.h'` + rm test/Modules/header-attribs.cpp + + # We do not have HIP or the ROCm stack available yet + rm test/Driver/hip-options.hip + + # ???? `ld: cannot find crti.o: No such file or directory` linker issue? + rm test/Interpreter/dynamic-library.cpp + + # `fatal error: 'stdio.h' file not found` + rm test/OpenMP/amdgcn_emit_llvm.c + ''; + + extraPostInstall = '' + mv bin/clang-tblgen $out/bin + ''; + + requiredSystemFeatures = [ "big-parallel" ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-1/lld.nix b/pkgs/development/rocm-modules/5/llvm/stage-1/lld.nix new file mode 100644 index 000000000000..a7b042eabfe6 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-1/lld.nix @@ -0,0 +1,13 @@ +{ callPackage +, rocmUpdateScript +, llvm +}: + +callPackage ../base.nix rec { + inherit rocmUpdateScript; + buildMan = false; # No man pages to build + targetName = "lld"; + targetDir = targetName; + extraBuildInputs = [ llvm ]; + checkTargets = [ "check-${targetName}" ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-1/llvm.nix b/pkgs/development/rocm-modules/5/llvm/stage-1/llvm.nix new file mode 100644 index 000000000000..51959ec8bc32 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-1/llvm.nix @@ -0,0 +1,10 @@ +{ stdenv +, callPackage +, rocmUpdateScript +}: + +callPackage ../base.nix { + inherit rocmUpdateScript; + requiredSystemFeatures = [ "big-parallel" ]; + isBroken = stdenv.isAarch64; # https://github.com/RadeonOpenCompute/ROCm/issues/1831#issuecomment-1278205344 +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-1/runtimes.nix b/pkgs/development/rocm-modules/5/llvm/stage-1/runtimes.nix new file mode 100644 index 000000000000..5f6f278ab10e --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-1/runtimes.nix @@ -0,0 +1,30 @@ +{ lib +, callPackage +, rocmUpdateScript +, llvm +}: + +callPackage ../base.nix rec { + inherit rocmUpdateScript; + buildDocs = false; + buildMan = false; + buildTests = false; + targetName = "runtimes"; + targetDir = targetName; + + targetRuntimes = [ + "libunwind" + "libcxxabi" + "libcxx" + "compiler-rt" + ]; + + extraBuildInputs = [ llvm ]; + + extraCMakeFlags = [ + "-DLIBCXX_INCLUDE_BENCHMARKS=OFF" + "-DLIBCXX_CXX_ABI=libcxxabi" + ]; + + extraLicenses = [ lib.licenses.mit ]; +} diff --git a/pkgs/development/compilers/llvm/rocm/1000-libcxx-failing-tests.list b/pkgs/development/rocm-modules/5/llvm/stage-2/1000-libcxx-failing-tests.list similarity index 100% rename from pkgs/development/compilers/llvm/rocm/1000-libcxx-failing-tests.list rename to pkgs/development/rocm-modules/5/llvm/stage-2/1000-libcxx-failing-tests.list diff --git a/pkgs/development/rocm-modules/5/llvm/stage-2/bintools-unwrapped.nix b/pkgs/development/rocm-modules/5/llvm/stage-2/bintools-unwrapped.nix new file mode 100644 index 000000000000..ef40dd4d3824 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-2/bintools-unwrapped.nix @@ -0,0 +1,28 @@ +{ runCommand +, llvm +, lld +}: + +runCommand "rocm-llvm-binutils-${llvm.version}" { preferLocalBuild = true; } '' + mkdir -p $out/bin + + for prog in ${lld}/bin/*; do + ln -s $prog $out/bin/$(basename $prog) + done + + for prog in ${llvm}/bin/*; do + ln -sf $prog $out/bin/$(basename $prog) + done + + ln -s ${llvm}/bin/llvm-ar $out/bin/ar + ln -s ${llvm}/bin/llvm-as $out/bin/as + ln -s ${llvm}/bin/llvm-dwp $out/bin/dwp + ln -s ${llvm}/bin/llvm-nm $out/bin/nm + ln -s ${llvm}/bin/llvm-objcopy $out/bin/objcopy + ln -s ${llvm}/bin/llvm-objdump $out/bin/objdump + ln -s ${llvm}/bin/llvm-ranlib $out/bin/ranlib + ln -s ${llvm}/bin/llvm-readelf $out/bin/readelf + ln -s ${llvm}/bin/llvm-size $out/bin/size + ln -s ${llvm}/bin/llvm-strip $out/bin/strip + ln -s ${lld}/bin/lld $out/bin/ld +'' diff --git a/pkgs/development/rocm-modules/5/llvm/stage-2/compiler-rt.nix b/pkgs/development/rocm-modules/5/llvm/stage-2/compiler-rt.nix new file mode 100644 index 000000000000..3b8e41705e1a --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-2/compiler-rt.nix @@ -0,0 +1,63 @@ +{ lib +, stdenv +, callPackage +, rocmUpdateScript +, llvm +, glibc +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildDocs = false; # No documentation to build + buildMan = false; # No man pages to build + targetName = "compiler-rt"; + targetDir = "runtimes"; + + targetRuntimes = [ + "libunwind" + "libcxxabi" + "libcxx" + targetName + ]; + + extraCMakeFlags = [ + "-DCOMPILER_RT_INCLUDE_TESTS=ON" + "-DCOMPILER_RT_USE_LLVM_UNWINDER=ON" + "-DCOMPILER_RT_CXX_LIBRARY=libcxx" + "-DCOMPILER_RT_CAN_EXECUTE_TESTS=OFF" # We can't run most of these + + # Workaround having to build combined + "-DLIBUNWIND_INCLUDE_DOCS=OFF" + "-DLIBUNWIND_INCLUDE_TESTS=OFF" + "-DLIBUNWIND_USE_COMPILER_RT=ON" + "-DLIBUNWIND_INSTALL_LIBRARY=OFF" + "-DLIBUNWIND_INSTALL_HEADERS=OFF" + "-DLIBCXXABI_INCLUDE_TESTS=OFF" + "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" + "-DLIBCXXABI_USE_COMPILER_RT=ON" + "-DLIBCXXABI_INSTALL_LIBRARY=OFF" + "-DLIBCXXABI_INSTALL_HEADERS=OFF" + "-DLIBCXX_INCLUDE_DOCS=OFF" + "-DLIBCXX_INCLUDE_TESTS=OFF" + "-DLIBCXX_USE_COMPILER_RT=ON" + "-DLIBCXX_CXX_ABI=libcxxabi" + "-DLIBCXX_INSTALL_LIBRARY=OFF" + "-DLIBCXX_INSTALL_HEADERS=OFF" + ]; + + extraPostPatch = '' + # `No such file or directory: 'ldd'` + substituteInPlace ../compiler-rt/test/lit.common.cfg.py \ + --replace "'ldd'," "'${glibc.bin}/bin/ldd'," + + # We can run these + substituteInPlace ../compiler-rt/test/CMakeLists.txt \ + --replace "endfunction()" "endfunction()''\nadd_subdirectory(builtins)''\nadd_subdirectory(shadowcallstack)" + + # Could not launch llvm-config in /build/source/runtimes/build/bin + mkdir -p build/bin + ln -s ${llvm}/bin/llvm-config build/bin + ''; + + extraLicenses = [ lib.licenses.mit ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-2/libc.nix b/pkgs/development/rocm-modules/5/llvm/stage-2/libc.nix new file mode 100644 index 000000000000..7e7cf9c2a608 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-2/libc.nix @@ -0,0 +1,26 @@ +{ stdenv +, callPackage +, rocmUpdateScript +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildMan = false; # No man pages to build + targetName = "libc"; + targetDir = "runtimes"; + targetRuntimes = [ targetName ]; + + extraPostPatch = '' + # `Failed to match ... against ...` `Match value not within tolerance value of MPFR result:` + # We need a better way, but I don't know enough sed magic and patching `CMakeLists.txt` isn't working... + substituteInPlace ../libc/test/src/math/log10_test.cpp \ + --replace "i < N" "i < 0" \ + --replace "test(mpfr::RoundingMode::Nearest);" "" \ + --replace "test(mpfr::RoundingMode::Downward);" "" \ + --replace "test(mpfr::RoundingMode::Upward);" "" \ + --replace "test(mpfr::RoundingMode::TowardZero);" "" + ''; + + checkTargets = [ "check-${targetName}" ]; + hardeningDisable = [ "fortify" ]; # Prevent `error: "Assumed value of MB_LEN_MAX wrong"` +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-2/libcxx.nix b/pkgs/development/rocm-modules/5/llvm/stage-2/libcxx.nix new file mode 100644 index 000000000000..473227242765 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-2/libcxx.nix @@ -0,0 +1,42 @@ +{ stdenv +, callPackage +, rocmUpdateScript +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildMan = false; # No man pages to build + targetName = "libcxx"; + targetDir = "runtimes"; + + targetRuntimes = [ + "libunwind" + "libcxxabi" + targetName + ]; + + extraCMakeFlags = [ + "-DLIBCXX_INCLUDE_DOCS=ON" + "-DLIBCXX_INCLUDE_TESTS=ON" + "-DLIBCXX_USE_COMPILER_RT=ON" + "-DLIBCXX_CXX_ABI=libcxxabi" + + # Workaround having to build combined + "-DLIBUNWIND_INCLUDE_DOCS=OFF" + "-DLIBUNWIND_INCLUDE_TESTS=OFF" + "-DLIBUNWIND_USE_COMPILER_RT=ON" + "-DLIBUNWIND_INSTALL_LIBRARY=OFF" + "-DLIBUNWIND_INSTALL_HEADERS=OFF" + "-DLIBCXXABI_INCLUDE_TESTS=OFF" + "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" + "-DLIBCXXABI_USE_COMPILER_RT=ON" + "-DLIBCXXABI_INSTALL_LIBRARY=OFF" + "-DLIBCXXABI_INSTALL_HEADERS=OFF" + ]; + + # Most of these can't find `bash` or `mkdir`, might just be hard-coded paths, or PATH is altered + extraPostPatch = '' + chmod +w -R ../libcxx/test/{libcxx,std} + cat ${./1000-libcxx-failing-tests.list} | xargs -d \\n rm + ''; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-2/libcxxabi.nix b/pkgs/development/rocm-modules/5/llvm/stage-2/libcxxabi.nix new file mode 100644 index 000000000000..e15ec777ff61 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-2/libcxxabi.nix @@ -0,0 +1,37 @@ +{ stdenv +, callPackage +, rocmUpdateScript +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildDocs = false; # No documentation to build + buildMan = false; # No man pages to build + targetName = "libcxxabi"; + targetDir = "runtimes"; + + targetRuntimes = [ + "libunwind" + targetName + "libcxx" + ]; + + extraCMakeFlags = [ + "-DLIBCXXABI_INCLUDE_TESTS=ON" + "-DLIBCXXABI_USE_LLVM_UNWINDER=ON" + "-DLIBCXXABI_USE_COMPILER_RT=ON" + + # Workaround having to build combined + "-DLIBUNWIND_INCLUDE_DOCS=OFF" + "-DLIBUNWIND_INCLUDE_TESTS=OFF" + "-DLIBUNWIND_USE_COMPILER_RT=ON" + "-DLIBUNWIND_INSTALL_LIBRARY=OFF" + "-DLIBUNWIND_INSTALL_HEADERS=OFF" + "-DLIBCXX_INCLUDE_DOCS=OFF" + "-DLIBCXX_INCLUDE_TESTS=OFF" + "-DLIBCXX_USE_COMPILER_RT=ON" + "-DLIBCXX_CXX_ABI=libcxxabi" + "-DLIBCXX_INSTALL_LIBRARY=OFF" + "-DLIBCXX_INSTALL_HEADERS=OFF" + ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-2/libunwind.nix b/pkgs/development/rocm-modules/5/llvm/stage-2/libunwind.nix new file mode 100644 index 000000000000..3d599e0d4b32 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-2/libunwind.nix @@ -0,0 +1,26 @@ +{ stdenv +, callPackage +, rocmUpdateScript +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildMan = false; # No man pages to build + targetName = "libunwind"; + targetDir = "runtimes"; + targetRuntimes = [ targetName ]; + + extraCMakeFlags = [ + "-DLIBUNWIND_INCLUDE_DOCS=ON" + "-DLIBUNWIND_INCLUDE_TESTS=ON" + "-DLIBUNWIND_USE_COMPILER_RT=ON" + ]; + + extraPostPatch = '' + # `command had no output on stdout or stderr` (Says these unsupported tests) + chmod +w -R ../libunwind/test + rm ../libunwind/test/floatregister.pass.cpp + rm ../libunwind/test/unwind_leaffunction.pass.cpp + rm ../libunwind/test/libunwind_02.pass.cpp + ''; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-2/rstdenv.nix b/pkgs/development/rocm-modules/5/llvm/stage-2/rstdenv.nix new file mode 100644 index 000000000000..45d369a6541c --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-2/rstdenv.nix @@ -0,0 +1,35 @@ +{ stdenv +, overrideCC +, wrapCCWith +, llvm +, clang-unwrapped +, lld +, runtimes +, bintools +}: + +overrideCC stdenv (wrapCCWith rec { + inherit bintools; + libcxx = runtimes; + cc = clang-unwrapped; + + extraPackages = [ + llvm + lld + ]; + + nixSupport.cc-cflags = [ + "-resource-dir=$out/resource-root" + "-fuse-ld=lld" + "-rtlib=compiler-rt" + "-unwindlib=libunwind" + "-Wno-unused-command-line-argument" + ]; + + extraBuildCommands = '' + clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` + mkdir -p $out/resource-root + ln -s ${cc}/lib/clang/$clang_version/include $out/resource-root + ln -s ${runtimes}/lib $out/resource-root + ''; +}) diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/clang-tools-extra.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/clang-tools-extra.nix new file mode 100644 index 000000000000..d18673ecb3db --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/clang-tools-extra.nix @@ -0,0 +1,42 @@ +{ stdenv +, callPackage +, rocmUpdateScript +, llvm +, clang-unwrapped +, gtest +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildTests = false; # `invalid operands to binary expression ('std::basic_stringstream' and 'const llvm::StringRef')` + targetName = "clang-tools-extra"; + + targetProjects = [ + "clang" + "clang-tools-extra" + ]; + + extraBuildInputs = [ gtest ]; + + extraCMakeFlags = [ + "-DLLVM_INCLUDE_DOCS=OFF" + "-DLLVM_INCLUDE_TESTS=OFF" + "-DCLANG_INCLUDE_DOCS=OFF" + "-DCLANG_INCLUDE_TESTS=ON" + "-DCLANG_TOOLS_EXTRA_INCLUDE_DOCS=ON" + ]; + + extraPostInstall = '' + # Remove LLVM and Clang + for path in `find ${llvm} ${clang-unwrapped}`; do + if [ $path != ${llvm} ] && [ $path != ${clang-unwrapped} ]; then + rm -f $out''${path#${llvm}} $out''${path#${clang-unwrapped}} || true + fi + done + + # Cleanup empty directories + find $out -type d -empty -delete + ''; + + requiredSystemFeatures = [ "big-parallel" ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/clang.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/clang.nix new file mode 100644 index 000000000000..91f34265f85f --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/clang.nix @@ -0,0 +1,73 @@ +{ stdenv +, wrapCCWith +, llvm +, lld +, clang-unwrapped +, bintools +, libc +, libunwind +, libcxxabi +, libcxx +, compiler-rt +}: + +wrapCCWith rec { + inherit libcxx bintools; + + # We do this to avoid HIP pathing problems, and mimic a monolithic install + cc = stdenv.mkDerivation (finalAttrs: { + inherit (clang-unwrapped) version; + pname = "rocm-llvm-clang"; + dontUnpack = true; + + installPhase = '' + runHook preInstall + + clang_version=`${clang-unwrapped}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` + mkdir -p $out/{bin,include/c++/v1,lib/{cmake,clang/$clang_version/{include,lib}},libexec,share} + + for path in ${llvm} ${clang-unwrapped} ${lld} ${libc} ${libunwind} ${libcxxabi} ${libcxx} ${compiler-rt}; do + cp -as $path/* $out + chmod +w $out/{*,include/c++/v1,lib/{clang/$clang_version/include,cmake}} + rm -f $out/lib/libc++.so + done + + ln -s $out/lib/* $out/lib/clang/$clang_version/lib + ln -sf $out/include/* $out/lib/clang/$clang_version/include + + runHook postInstall + ''; + + passthru.isClang = true; + }); + + extraPackages = [ + llvm + lld + libc + libunwind + libcxxabi + compiler-rt + ]; + + nixSupport.cc-cflags = [ + "-resource-dir=$out/resource-root" + "-fuse-ld=lld" + "-rtlib=compiler-rt" + "-unwindlib=libunwind" + "-Wno-unused-command-line-argument" + ]; + + extraBuildCommands = '' + clang_version=`${cc}/bin/clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` + mkdir -p $out/resource-root + ln -s ${cc}/lib/clang/$clang_version/{include,lib} $out/resource-root + + # Not sure why, but hardening seems to make things break + echo "" > $out/nix-support/add-hardening.sh + + # GPU compilation uses builtin `lld` + substituteInPlace $out/bin/{clang,clang++} \ + --replace "-MM) dontLink=1 ;;" "-MM | --cuda-device-only) dontLink=1 ;;''\n--cuda-host-only | --cuda-compile-host-device) dontLink=0 ;;" + ''; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix new file mode 100644 index 000000000000..7289602451db --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix @@ -0,0 +1,33 @@ +{ stdenv +, callPackage +, rocmUpdateScript +, clang-unwrapped +, mlir +, python3Packages +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildTests = false; # `Executable "flang1" doesn't exist!` + targetName = "flang"; + targetDir = targetName; + extraNativeBuildInputs = [ python3Packages.sphinx-markdown-tables ]; + extraBuildInputs = [ mlir ]; + + extraCMakeFlags = [ + "-DCMAKE_POLICY_DEFAULT_CMP0116=NEW" + "-DCLANG_DIR=${clang-unwrapped}/lib/cmake/clang" + "-DFLANG_INCLUDE_TESTS=OFF" + "-DMLIR_TABLEGEN_EXE=${mlir}/bin/mlir-tblgen" + ]; + + extraPostPatch = '' + substituteInPlace test/CMakeLists.txt \ + --replace "FileCheck" "" \ + --replace "count" "" \ + --replace "not" "" + + substituteInPlace docs/CMakeLists.txt \ + --replace "CLANG_TABLEGEN_EXE clang-tblgen" "CLANG_TABLEGEN_EXE ${clang-unwrapped}/bin/clang-tblgen" + ''; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/libclc.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/libclc.nix new file mode 100644 index 000000000000..1fd72ee67188 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/libclc.nix @@ -0,0 +1,36 @@ +{ stdenv +, callPackage +, rocmUpdateScript +, llvm +, clang +, spirv-llvm-translator +}: + +let + spirv = (spirv-llvm-translator.override { inherit llvm; }); +in callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildDocs = false; # No documentation to build + buildMan = false; # No man pages to build + targetName = "libclc"; + targetDir = targetName; + extraBuildInputs = [ spirv ]; + + # `spirv-mesa3d` isn't compiling with LLVM 15.0.0, it does with LLVM 14.0.0 + # Try removing the `spirv-mesa3d` and `clspv` patches next update + # `clspv` tests fail, unresolved calls + extraPostPatch = '' + substituteInPlace CMakeLists.txt \ + --replace "find_program( LLVM_CLANG clang PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ + "find_program( LLVM_CLANG clang PATHS \"${clang}/bin\" NO_DEFAULT_PATH )" \ + --replace "find_program( LLVM_SPIRV llvm-spirv PATHS \''${LLVM_BINDIR} NO_DEFAULT_PATH )" \ + "find_program( LLVM_SPIRV llvm-spirv PATHS \"${spirv}/bin\" NO_DEFAULT_PATH )" \ + --replace " spirv-mesa3d-" "" \ + --replace " spirv64-mesa3d-" "" \ + --replace "NOT \''${t} MATCHES" \ + "NOT \''${ARCH} STREQUAL \"clspv\" AND NOT \''${ARCH} STREQUAL \"clspv64\" AND NOT \''${t} MATCHES" + ''; + + checkTargets = [ ]; + isBroken = true; # ROCm 5.7.0 doesn't have IR/AttributeMask.h yet...? +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/lldb.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/lldb.nix new file mode 100644 index 000000000000..9b7d25e06d9d --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/lldb.nix @@ -0,0 +1,39 @@ +{ stdenv +, callPackage +, rocmUpdateScript +, clang +, xz +, swig +, lua5_3 +, graphviz +, gtest +, python3Packages +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildTests = false; # FIXME: Bad pathing for clang executable in tests, using relative path most likely + targetName = "lldb"; + targetDir = targetName; + extraNativeBuildInputs = [ python3Packages.sphinx-automodapi ]; + + extraBuildInputs = [ + xz + swig + lua5_3 + graphviz + gtest + ]; + + extraCMakeFlags = [ + "-DLLDB_EXTERNAL_CLANG_RESOURCE_DIR=${clang}/resource-root/lib/clang/$clang_version" + "-DLLDB_INCLUDE_TESTS=ON" + "-DLLDB_INCLUDE_UNITTESTS=ON" + ]; + + extraPostPatch = '' + export clang_version=`clang -v 2>&1 | grep "clang version " | grep -E -o "[0-9.-]+"` + ''; + + checkTargets = [ "check-${targetName}" ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix new file mode 100644 index 000000000000..099622ca7cb8 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix @@ -0,0 +1,67 @@ +{ stdenv +, callPackage +, rocmUpdateScript +# , hip +# , rocm-comgr +, vulkan-headers +, vulkan-loader +, glslang +, shaderc +, lit +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildDocs = false; # No decent way to hack this to work + buildMan = false; # No man pages to build + targetName = "mlir"; + targetDir = targetName; + # extraNativeBuildInputs = [ hip ]; + + extraBuildInputs = [ + # rocm-comgr + vulkan-headers + vulkan-loader + glslang + shaderc + ]; + + extraCMakeFlags = [ + "-DCMAKE_POLICY_DEFAULT_CMP0116=NEW" + "-DMLIR_INCLUDE_DOCS=ON" + "-DMLIR_INCLUDE_TESTS=ON" + "-DMLIR_ENABLE_ROCM_RUNNER=ON" + "-DMLIR_ENABLE_SPIRV_CPU_RUNNER=ON" + "-DMLIR_ENABLE_VULKAN_RUNNER=ON" + "-DROCM_TEST_CHIPSET=gfx000" # CPU runner + ]; + + extraPostPatch = '' + chmod +w ../llvm + mkdir -p ../llvm/build/bin + ln -s ${lit}/bin/lit ../llvm/build/bin/llvm-lit + + substituteInPlace test/CMakeLists.txt \ + --replace "FileCheck count not" "" \ + --replace "list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)" "" + + substituteInPlace lib/ExecutionEngine/CMakeLists.txt \ + --replace "return()" "" + + # Remove problematic tests + rm test/CAPI/execution_engine.c + rm test/Target/LLVMIR/llvmir-intrinsics.mlir + rm test/Target/LLVMIR/llvmir.mlir + rm test/Target/LLVMIR/openmp-llvm.mlir + rm test/mlir-cpu-runner/*.mlir + rm test/mlir-vulkan-runner/*.mlir + ''; + + extraPostInstall = '' + mkdir -p $out/bin + mv bin/mlir-tblgen $out/bin + ''; + + checkTargets = [ "check-${targetName}" ]; + requiredSystemFeatures = [ "big-parallel" ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/openmp.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/openmp.nix new file mode 100644 index 000000000000..faab6388835e --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/openmp.nix @@ -0,0 +1,45 @@ +{ lib +, stdenv +, callPackage +, rocmUpdateScript +, llvm +, clang +, clang-unwrapped +# , rocm-device-libs +# , rocm-runtime +, perl +, elfutils +, lit +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildTests = false; # Too many failures, most pass + targetName = "openmp"; + targetDir = targetName; + extraNativeBuildInputs = [ perl ]; + + extraBuildInputs = [ + # rocm-device-libs + # rocm-runtime + elfutils + ]; + + extraCMakeFlags = [ + "-DCMAKE_MODULE_PATH=/build/source/llvm/cmake/modules" # For docs + "-DCLANG_TOOL=${clang}/bin/clang" + "-DCLANG_OFFLOAD_BUNDLER_TOOL=${clang-unwrapped}/bin/clang-offload-bundler" + "-DOPENMP_LLVM_TOOLS_DIR=${llvm}/bin" + "-DOPENMP_LLVM_LIT_EXECUTABLE=${lit}/bin/.lit-wrapped" + # "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" + ]; + + extraPostPatch = '' + # We can't build this target at the moment + substituteInPlace libomptarget/DeviceRTL/CMakeLists.txt \ + --replace "gfx1010" "" + ''; + + checkTargets = [ "check-${targetName}" ]; + extraLicenses = [ lib.licenses.mit ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/polly.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/polly.nix new file mode 100644 index 000000000000..e001f33dfd43 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/polly.nix @@ -0,0 +1,18 @@ +{ stdenv +, callPackage +, rocmUpdateScript +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + targetName = "polly"; + targetDir = targetName; + + extraPostPatch = '' + # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` + substituteInPlace CMakeLists.txt \ + --replace "NOT TARGET gtest" "FALSE" + ''; + + checkTargets = [ "check-${targetName}" ]; +} diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/pstl.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/pstl.nix new file mode 100644 index 000000000000..dc7d7cd6ccbf --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/pstl.nix @@ -0,0 +1,15 @@ +{ stdenv +, callPackage +, rocmUpdateScript +}: + +callPackage ../base.nix rec { + inherit stdenv rocmUpdateScript; + buildDocs = false; # No documentation to build + buildMan = false; # No man pages to build + buildTests = false; # Too many errors + targetName = "pstl"; + targetDir = "runtimes"; + targetRuntimes = [ targetName ]; + checkTargets = [ "check-${targetName}" ]; +} diff --git a/pkgs/development/rocm-modules/update-script/default.nix b/pkgs/development/rocm-modules/5/update.nix similarity index 91% rename from pkgs/development/rocm-modules/update-script/default.nix rename to pkgs/development/rocm-modules/5/update.nix index 6188587de31a..abd434776ef9 100644 --- a/pkgs/development/rocm-modules/update-script/default.nix +++ b/pkgs/development/rocm-modules/5/update.nix @@ -12,7 +12,7 @@ let pname = if lib.hasPrefix "rocm-llvm-" name - then "llvmPackages_rocm.${lib.removePrefix "rocm-llvm-" name}" + then "rocmPackages_5.llvm.${lib.removePrefix "rocm-llvm-" name}" else name; updateScript = writeScript "update.sh" '' diff --git a/pkgs/test/default.nix b/pkgs/test/default.nix index 05d8ee61e9a5..2b1768515bab 100644 --- a/pkgs/test/default.nix +++ b/pkgs/test/default.nix @@ -8,7 +8,7 @@ with pkgs; llvmTests = let pkgSets = lib.pipe pkgNames [ (filter (lib.hasPrefix "llvmPackages")) - (filter (n: n != "llvmPackages_rocm")) + (filter (n: n != "rocmPackages.llvm")) (filter (n: n != "llvmPackages_latest")) (filter (n: n != "llvmPackages_git")) ]; diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 123931d8533c..4e03557c7740 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -7766,6 +7766,9 @@ with pkgs; rar2fs = callPackage ../tools/filesystems/rar2fs { }; + rocmPackages = rocmPackages_5; + rocmPackages_5 = recurseIntoAttrs (callPackage ../development/rocm-modules/5 { }); + rune = callPackage ../development/interpreters/rune { }; s9fes = callPackage ../development/interpreters/s9fes { }; @@ -15716,7 +15719,6 @@ with pkgs; clangStdenv = if stdenv.cc.isClang then stdenv else lowPrio llvmPackages.stdenv; clang-sierraHack-stdenv = overrideCC stdenv buildPackages.clang-sierraHack; libcxxStdenv = if stdenv.isDarwin then stdenv else lowPrio llvmPackages.libcxxStdenv; - rocmClangStdenv = llvmPackages_rocm.rocmClangStdenv; clean = callPackage ../development/compilers/clean { }; @@ -16771,8 +16773,6 @@ with pkgs; targetLlvm = targetPackages.llvmPackages_16.llvm or llvmPackages_16.llvm; })); - llvmPackages_rocm = recurseIntoAttrs (callPackage ../development/compilers/llvm/rocm { }); - lorri = callPackage ../tools/misc/lorri { inherit (darwin.apple_sdk.frameworks) CoreServices Security; }; @@ -16958,7 +16958,7 @@ with pkgs; rml = callPackage ../development/compilers/rml { }; composable_kernel = callPackage ../development/libraries/composable_kernel { - inherit (llvmPackages_rocm) openmp clang-tools-extra; + inherit (rocmPackages.llvm) openmp clang-tools-extra; stdenv = rocmClangStdenv; }; @@ -16977,17 +16977,17 @@ with pkgs; }; hip-common = callPackage ../development/compilers/hip-common { - inherit (llvmPackages_rocm) llvm; + inherit (rocmPackages.llvm) llvm; stdenv = rocmClangStdenv; }; hipcc = callPackage ../development/compilers/hipcc { - inherit (llvmPackages_rocm) llvm; + inherit (rocmPackages.llvm) llvm; stdenv = rocmClangStdenv; }; hip = callPackage ../development/compilers/hip { - inherit (llvmPackages_rocm) llvm; + inherit (rocmPackages.llvm) llvm; inherit (cudaPackages) cudatoolkit; stdenv = rocmClangStdenv; }; @@ -17009,7 +17009,7 @@ with pkgs; }; hipsparse = callPackage ../development/libraries/hipsparse { - inherit (llvmPackages_rocm) openmp; + inherit (rocmPackages.llvm) openmp; stdenv = rocmClangStdenv; }; @@ -17018,7 +17018,7 @@ with pkgs; }; hipfft = callPackage ../development/libraries/hipfft { - inherit (llvmPackages_rocm) openmp; + inherit (rocmPackages.llvm) openmp; stdenv = rocmClangStdenv; }; @@ -17031,7 +17031,7 @@ with pkgs; }; migraphx = callPackage ../development/libraries/migraphx { - inherit (llvmPackages_rocm) clang-tools-extra openmp; + inherit (rocmPackages.llvm) clang-tools-extra openmp; stdenv = rocmClangStdenv; rocmlir = rocmlir-rock; }; @@ -17049,7 +17049,7 @@ with pkgs; }; rocalution = callPackage ../development/libraries/rocalution { - inherit (llvmPackages_rocm) openmp; + inherit (rocmPackages.llvm) openmp; stdenv = rocmClangStdenv; }; @@ -17122,7 +17122,7 @@ with pkgs; }; rocfft = callPackage ../development/libraries/rocfft { - inherit (llvmPackages_rocm) openmp; + inherit (rocmPackages.llvm) openmp; stdenv = rocmClangStdenv; }; @@ -17135,12 +17135,12 @@ with pkgs; }; rocwmma = callPackage ../development/libraries/rocwmma { - inherit (llvmPackages_rocm) openmp; + inherit (rocmPackages.llvm) openmp; stdenv = rocmClangStdenv; }; rocblas = callPackage ../development/libraries/rocblas { - inherit (llvmPackages_rocm) openmp; + inherit (rocmPackages.llvm) openmp; stdenv = rocmClangStdenv; }; @@ -17153,7 +17153,7 @@ with pkgs; }; miopen = callPackage ../development/libraries/miopen { - inherit (llvmPackages_rocm) llvm clang-tools-extra; + inherit (rocmPackages.llvm) llvm clang-tools-extra; stdenv = rocmClangStdenv; rocmlir = rocmlir-rock; boost = boost179.override { enableStatic = true; }; @@ -17167,11 +17167,9 @@ with pkgs; useOpenCL = true; }; - rocmUpdateScript = callPackage ../development/rocm-modules/update-script { }; - # Requires GCC roctracer = callPackage ../development/libraries/roctracer { - inherit (llvmPackages_rocm) clang; + inherit (rocmPackages.llvm) clang; }; rtags = callPackage ../development/tools/rtags { @@ -39477,7 +39475,7 @@ with pkgs; lie = callPackage ../applications/science/math/LiE { }; inherit (callPackage ../development/libraries/science/math/magma { - inherit (llvmPackages_rocm) openmp; + inherit (rocmPackages.llvm) openmp; }) magma magma_2_7_2 magma_2_6_2; magma-cuda = magma.override { diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index d0e53a2a38c0..4521548cb3d8 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -8349,7 +8349,7 @@ self: super: with self; { open-meteo = callPackage ../development/python-modules/open-meteo { }; - openai-triton = callPackage ../development/python-modules/openai-triton { llvmPackages = pkgs.llvmPackages_rocm; }; + openai-triton = callPackage ../development/python-modules/openai-triton { llvmPackages = pkgs.rocmPackages.llvm; }; openai-triton-bin = callPackage ../development/python-modules/openai-triton/bin.nix { }; @@ -13902,7 +13902,7 @@ self: super: with self; { else pkgs.magma; inherit (pkgs.darwin.apple_sdk.frameworks) Accelerate CoreServices; inherit (pkgs.darwin) libobjc; - inherit (pkgs.llvmPackages_rocm) openmp; + inherit (pkgs.rocmPackages.llvm) openmp; }; torch-bin = callPackage ../development/python-modules/torch/bin.nix { From 575ce47fa4051fbb5c234a10eed923d11257749a Mon Sep 17 00:00:00 2001 From: Madoura Date: Sun, 1 Oct 2023 20:05:32 -0500 Subject: [PATCH 12/30] rocm-related: move all relevant ROCm derivations to rocmPackages clr: init at 5.7.0 (hipamd, opencl, rocclr merged) --- .../hip-common/0000-fixup-paths.patch | 129 ------- .../compilers/hip/0000-fixup-paths.patch | 62 --- pkgs/development/compilers/hip/default.nix | 197 ---------- .../compilers/hipcc/0000-fixup-paths.patch | 130 ------- pkgs/development/libraries/rocclr/default.nix | 64 --- .../libraries/rocm-comgr/cmake.patch | 365 ------------------ .../libraries/rocm-opencl-icd/default.nix | 26 -- .../libraries/rocm-opencl-icd/test.nix | 19 - .../libraries/rocm-opencl-runtime/default.nix | 69 ---- ...0000-dont-require-hsa_amd_aqlprofile.patch | 20 - pkgs/development/libraries/ucx/default.nix | 9 +- .../5}/clang-ocl/default.nix | 0 .../rocm-modules/5/clr/default.nix | 147 +++++++ .../5}/composable_kernel/default.nix | 0 pkgs/development/rocm-modules/5/default.nix | 237 +++++++++++- .../5}/hip-common/default.nix | 15 - .../5}/hipblas/default.nix | 2 +- .../5}/hipcc/default.nix | 30 +- .../5}/hipcub/default.nix | 2 +- .../5}/hipfft/default.nix | 2 +- .../5}/hipfort/default.nix | 0 .../5}/hipify/default.nix | 11 +- .../5}/hipsolver/default.nix | 2 +- .../5}/hipsparse/default.nix | 2 +- .../5}/migraphx/default.nix | 2 +- .../5}/miopen/default.nix | 2 +- .../5}/miopen/deps.nix | 0 .../5}/miopengemm/default.nix | 0 .../5}/rccl/default.nix | 2 +- .../misc => rocm-modules/5}/rdc/default.nix | 3 +- .../5}/rocalution/default.nix | 2 +- .../5}/rocblas/default.nix | 2 +- .../5}/rocdbgapi/default.nix | 11 + .../5}/rocfft/default.nix | 2 +- .../5}/rocfft/device-install.patch | 0 .../5}/rocfft/split-kernel-compilation.patch | 0 .../5}/rocgdb/default.nix | 0 .../5}/rocm-cmake/default.nix | 2 + .../5}/rocm-comgr/default.nix | 1 - .../5}/rocm-core/default.nix | 0 .../5}/rocm-device-libs/cmake.patch | 0 .../5}/rocm-device-libs/default.nix | 0 .../5}/rocm-runtime/default.nix | 4 +- .../rocm-modules/5}/rocm-smi/cmake.patch | 0 .../rocm-modules/5}/rocm-smi/default.nix | 20 +- .../5}/rocm-thunk/default.nix | 6 - .../5}/rocminfo/default.nix | 0 .../5}/rocmlir/default.nix | 0 .../5}/rocprim/default.nix | 2 +- .../5}/rocprofiler/default.nix | 62 ++- .../5}/rocr-debug-agent/default.nix | 14 +- .../5}/rocrand/default.nix | 2 +- .../5}/rocsolver/default.nix | 2 +- .../5}/rocsparse/default.nix | 2 +- .../5}/rocsparse/deps.nix | 0 .../5}/rocthrust/default.nix | 2 +- .../5}/roctracer/default.nix | 11 +- .../rocwmma/0000-dont-fetch-googletest.patch | 0 .../5}/rocwmma/default.nix | 2 +- .../5}/tensile/default.nix | 0 pkgs/development/rocm-modules/5/update.nix | 4 +- pkgs/top-level/all-packages.nix | 215 +---------- 62 files changed, 518 insertions(+), 1399 deletions(-) delete mode 100644 pkgs/development/compilers/hip-common/0000-fixup-paths.patch delete mode 100644 pkgs/development/compilers/hip/0000-fixup-paths.patch delete mode 100644 pkgs/development/compilers/hip/default.nix delete mode 100644 pkgs/development/compilers/hipcc/0000-fixup-paths.patch delete mode 100644 pkgs/development/libraries/rocclr/default.nix delete mode 100644 pkgs/development/libraries/rocm-comgr/cmake.patch delete mode 100644 pkgs/development/libraries/rocm-opencl-icd/default.nix delete mode 100644 pkgs/development/libraries/rocm-opencl-icd/test.nix delete mode 100644 pkgs/development/libraries/rocm-opencl-runtime/default.nix delete mode 100644 pkgs/development/libraries/rocprofiler/0000-dont-require-hsa_amd_aqlprofile.patch rename pkgs/development/{libraries => rocm-modules/5}/clang-ocl/default.nix (100%) create mode 100644 pkgs/development/rocm-modules/5/clr/default.nix rename pkgs/development/{libraries => rocm-modules/5}/composable_kernel/default.nix (100%) rename pkgs/development/{compilers => rocm-modules/5}/hip-common/default.nix (79%) rename pkgs/development/{libraries => rocm-modules/5}/hipblas/default.nix (97%) rename pkgs/development/{compilers => rocm-modules/5}/hipcc/default.nix (62%) rename pkgs/development/{libraries => rocm-modules/5}/hipcub/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/hipfft/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/hipfort/default.nix (100%) rename pkgs/development/{compilers => rocm-modules/5}/hipify/default.nix (83%) rename pkgs/development/{libraries => rocm-modules/5}/hipsolver/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/hipsparse/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/migraphx/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/miopen/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/miopen/deps.nix (100%) rename pkgs/development/{libraries => rocm-modules/5}/miopengemm/default.nix (100%) rename pkgs/development/{libraries => rocm-modules/5}/rccl/default.nix (98%) rename pkgs/development/{tools/misc => rocm-modules/5}/rdc/default.nix (94%) rename pkgs/development/{libraries => rocm-modules/5}/rocalution/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/rocblas/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/rocdbgapi/default.nix (87%) rename pkgs/development/{libraries => rocm-modules/5}/rocfft/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/rocfft/device-install.patch (100%) rename pkgs/development/{libraries => rocm-modules/5}/rocfft/split-kernel-compilation.patch (100%) rename pkgs/development/{tools/misc => rocm-modules/5}/rocgdb/default.nix (100%) rename pkgs/development/{tools/build-managers => rocm-modules/5}/rocm-cmake/default.nix (91%) rename pkgs/development/{libraries => rocm-modules/5}/rocm-comgr/default.nix (97%) rename pkgs/development/{libraries => rocm-modules/5}/rocm-core/default.nix (100%) rename pkgs/development/{libraries => rocm-modules/5}/rocm-device-libs/cmake.patch (100%) rename pkgs/development/{libraries => rocm-modules/5}/rocm-device-libs/default.nix (100%) rename pkgs/development/{libraries => rocm-modules/5}/rocm-runtime/default.nix (87%) rename pkgs/{tools/system => development/rocm-modules/5}/rocm-smi/cmake.patch (100%) rename pkgs/{tools/system => development/rocm-modules/5}/rocm-smi/default.nix (72%) rename pkgs/development/{libraries => rocm-modules/5}/rocm-thunk/default.nix (92%) rename pkgs/development/{tools => rocm-modules/5}/rocminfo/default.nix (100%) rename pkgs/development/{libraries => rocm-modules/5}/rocmlir/default.nix (100%) rename pkgs/development/{libraries => rocm-modules/5}/rocprim/default.nix (98%) rename pkgs/development/{libraries => rocm-modules/5}/rocprofiler/default.nix (52%) rename pkgs/development/{libraries => rocm-modules/5}/rocr-debug-agent/default.nix (87%) rename pkgs/development/{libraries => rocm-modules/5}/rocrand/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/rocsolver/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/rocsparse/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/rocsparse/deps.nix (100%) rename pkgs/development/{libraries => rocm-modules/5}/rocthrust/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/roctracer/default.nix (93%) rename pkgs/development/{libraries => rocm-modules/5}/rocwmma/0000-dont-fetch-googletest.patch (100%) rename pkgs/development/{libraries => rocm-modules/5}/rocwmma/default.nix (99%) rename pkgs/development/{libraries => rocm-modules/5}/tensile/default.nix (100%) diff --git a/pkgs/development/compilers/hip-common/0000-fixup-paths.patch b/pkgs/development/compilers/hip-common/0000-fixup-paths.patch deleted file mode 100644 index f3fd73255520..000000000000 --- a/pkgs/development/compilers/hip-common/0000-fixup-paths.patch +++ /dev/null @@ -1,129 +0,0 @@ -diff --git a/bin/hipcc.pl b/bin/hipcc.pl -index da9559b..7aaa540 100755 ---- a/bin/hipcc.pl -+++ b/bin/hipcc.pl -@@ -185,7 +185,7 @@ if ($HIP_PLATFORM eq "amd") { - chomp($HIP_CLANG_TARGET); - - if (! defined $HIP_CLANG_INCLUDE_PATH) { -- $HIP_CLANG_INCLUDE_PATH = abs_path("$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION/include"); -+ $HIP_CLANG_INCLUDE_PATH = abs_path("@clang@/resource-root/include"); - } - if (! defined $HIP_INCLUDE_PATH) { - $HIP_INCLUDE_PATH = "$HIP_PATH/include"; -@@ -206,8 +206,8 @@ if ($HIP_PLATFORM eq "amd") { - print ("HIP_CLANG_TARGET=$HIP_CLANG_TARGET\n"); - } - -- $HIPCXXFLAGS .= " -isystem \"$HIP_CLANG_INCLUDE_PATH/..\""; -- $HIPCFLAGS .= " -isystem \"$HIP_CLANG_INCLUDE_PATH/..\""; -+ $HIPCXXFLAGS .= " -isystem \"$HIP_CLANG_INCLUDE_PATH\""; -+ $HIPCFLAGS .= " -isystem \"$HIP_CLANG_INCLUDE_PATH\""; - $HIPLDFLAGS .= " -L\"$HIP_LIB_PATH\""; - if ($isWindows) { - $HIPLDFLAGS .= " -lamdhip64"; -@@ -625,7 +625,7 @@ if($HIP_PLATFORM eq "amd"){ - $targetsStr = $ENV{HCC_AMDGPU_TARGET}; - } elsif (not $isWindows) { - # Else try using rocm_agent_enumerator -- $ROCM_AGENT_ENUM = "${ROCM_PATH}/bin/rocm_agent_enumerator"; -+ $ROCM_AGENT_ENUM = "@rocminfo@/bin/rocm_agent_enumerator"; - $targetsStr = `${ROCM_AGENT_ENUM} -t GPU`; - $targetsStr =~ s/\n/,/g; - } -@@ -724,16 +724,16 @@ if ($HIP_PLATFORM eq "amd") { - - if (not $isWindows and not $compileOnly) { - if ($linkType eq 0) { -- $toolArgs = " -L$HIP_LIB_PATH -lamdhip64 -L$ROCM_PATH/lib -lhsa-runtime64 -ldl -lnuma " . ${toolArgs}; -+ $toolArgs = " -L$HIP_LIB_PATH -lamdhip64 -L@rocm_runtime@/lib -lhsa-runtime64 -ldl -lnuma " . ${toolArgs}; - } else { - $toolArgs = ${toolArgs} . " -Wl,-rpath=$HIP_LIB_PATH:$ROCM_PATH/lib -lamdhip64 "; - } - # To support __fp16 and _Float16, explicitly link with compiler-rt -- $HIP_CLANG_BUILTIN_LIB="$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION/lib/$HIP_CLANG_TARGET/libclang_rt.builtins.a"; -+ $HIP_CLANG_BUILTIN_LIB="@clang@/resource-root/lib/$HIP_CLANG_TARGET/libclang_rt.builtins.a"; - if (-e $HIP_CLANG_BUILTIN_LIB) { -- $toolArgs .= " -L$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION/lib/$HIP_CLANG_TARGET -lclang_rt.builtins " -+ $toolArgs .= " -L@clang@/resource-root/lib/$HIP_CLANG_TARGET -lclang_rt.builtins " - } else { -- $toolArgs .= " -L$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION/lib/linux -lclang_rt.builtins-x86_64 " -+ $toolArgs .= " -L@clang@/resource-root/lib/linux -lclang_rt.builtins-x86_64 " - } - } - } -diff --git a/bin/hipconfig.pl b/bin/hipconfig.pl -index 5ddb8e9..6a76a2e 100755 ---- a/bin/hipconfig.pl -+++ b/bin/hipconfig.pl -@@ -77,7 +77,7 @@ if ($HIP_COMPILER eq "clang") { - $CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -D__HIP_PLATFORM_AMD__="; - - $HIP_PATH_INCLUDE = $HIP_PATH."/include"; -- $HIP_CLANG_INCLUDE = $HIP_CLANG_PATH."/../lib/clang/".$HIP_CLANG_VERSION; -+ $HIP_CLANG_INCLUDE = "@clang@/resource-root/include"; - if($isWindows) { - $CPP_CONFIG .= " -I\"$HIP_PATH_INCLUDE\" -I\"$HIP_CLANG_INCLUDE\""; - } else { -@@ -168,7 +168,7 @@ if (!$printed or $p_full) { - print ("HIP_CLANG_PATH : $HIP_CLANG_PATH\n"); - if ($isWindows) { - system("\"$HIP_CLANG_PATH/clang++\" --version"); -- system("\"$HIP_CLANG_PATH/llc\" --version"); -+ system("\"@llvm@/bin/llc\" --version"); - printf("hip-clang-cxxflags : "); - $win_output = `perl \"$HIP_PATH/bin/hipcc\" --cxxflags`; - printf("$win_output \n"); -@@ -177,7 +177,7 @@ if (!$printed or $p_full) { - printf("$win_output \n"); - } else { - system("$HIP_CLANG_PATH/clang++ --version"); -- system("$HIP_CLANG_PATH/llc --version"); -+ system("@llvm@/bin/llc --version"); - print ("hip-clang-cxxflags : "); - system("$HIP_PATH/bin/hipcc --cxxflags"); - printf("\n"); -@@ -219,8 +219,8 @@ if (!$printed or $p_full) { - system ("uname -a"); - } - -- if (-e "/usr/bin/lsb_release") { -- system ("/usr/bin/lsb_release -a"); -+ if (-e "@lsb_release@/bin/lsb_release") { -+ system ("@lsb_release@/bin/lsb_release -a"); - } - - print "\n" ; -diff --git a/hip-lang-config.cmake.in b/hip-lang-config.cmake.in -index 9250a68..f6e27b7 100644 ---- a/hip-lang-config.cmake.in -+++ b/hip-lang-config.cmake.in -@@ -71,8 +71,8 @@ get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) - - - #need _IMPORT_PREFIX to be set #FILE_REORG_BACKWARD_COMPATIBILITY --file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS "${_IMPORT_PREFIX}/../llvm/lib/clang/*/include") --file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS_REORG "${_IMPORT_PREFIX}/llvm/lib/clang/*/include") -+file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS "@clang@/resource-root/include") -+file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS_REORG "@clang@/resource-root/include") - find_path(HIP_CLANG_INCLUDE_PATH __clang_cuda_math.h - HINTS ${HIP_CLANG_INCLUDE_SEARCH_PATHS} - ${HIP_CLANG_INCLUDE_SEARCH_PATHS_REORG} -@@ -89,7 +89,7 @@ find_path(HSA_HEADER hsa/hsa.h - PATHS - "${_IMPORT_PREFIX}/../include" #FILE_REORG_BACKWARD_COMPATIBILITY - "${_IMPORT_PREFIX}/include" -- "${ROCM_PATH}/include" -+ "@rocm_runtime@/include" - ) - - if (NOT HSA_HEADER) -@@ -97,7 +97,7 @@ if (NOT HSA_HEADER) - endif() - - get_filename_component(HIP_COMPILER_INSTALL_PATH ${CMAKE_HIP_COMPILER} DIRECTORY) --file(GLOB HIP_CLANGRT_LIB_SEARCH_PATHS "${HIP_COMPILER_INSTALL_PATH}/../lib/clang/*/lib/*") -+file(GLOB HIP_CLANGRT_LIB_SEARCH_PATHS "@clang@/resource-root/lib/*") - find_library(CLANGRT_BUILTINS - NAMES - clang_rt.builtins diff --git a/pkgs/development/compilers/hip/0000-fixup-paths.patch b/pkgs/development/compilers/hip/0000-fixup-paths.patch deleted file mode 100644 index 423857218ee7..000000000000 --- a/pkgs/development/compilers/hip/0000-fixup-paths.patch +++ /dev/null @@ -1,62 +0,0 @@ -diff --git a/hip-config.cmake.in b/hip-config.cmake.in -index 89d1224..dc9ba05 100755 ---- a/hip-config.cmake.in -+++ b/hip-config.cmake.in -@@ -142,7 +142,7 @@ if(HIP_COMPILER STREQUAL "clang") - file(TO_CMAKE_PATH "${HIP_PATH}/../lc" HIP_CLANG_ROOT) - endif() - else() -- set(HIP_CLANG_ROOT "${ROCM_PATH}/llvm") -+ set(HIP_CLANG_ROOT "@clang@") - endif() - if(NOT HIP_CXX_COMPILER) - set(HIP_CXX_COMPILER ${CMAKE_CXX_COMPILER}) -@@ -171,7 +171,7 @@ if(HIP_COMPILER STREQUAL "clang") - get_filename_component(_HIP_CLANG_BIN_PATH "${_HIP_CLANG_REAL_PATH}" DIRECTORY) - get_filename_component(HIP_CLANG_ROOT "${_HIP_CLANG_BIN_PATH}" DIRECTORY) - endif() -- file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) -+ file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS "@clang@/resource-root/include") - find_path(HIP_CLANG_INCLUDE_PATH stddef.h - HINTS - ${HIP_CLANG_INCLUDE_SEARCH_PATHS} -@@ -209,7 +209,7 @@ if(NOT WIN32) - "${_IMPORT_PREFIX}/include" - #FILE_REORG_BACKWARD_COMPATIBILITY ${_IMPORT_PREFIX}/../include is for Backward compatibility - "${_IMPORT_PREFIX}/../include" -- ${ROCM_PATH}/include -+ "@rocm_runtime@/include" - ) - - if (NOT HSA_HEADER) -@@ -291,7 +291,7 @@ if(HIP_COMPILER STREQUAL "clang") - endif() - endif() - -- file(GLOB HIP_CLANGRT_LIB_SEARCH_PATHS "${HIP_CLANG_ROOT}/lib/clang/*/lib/*") -+ file(GLOB HIP_CLANGRT_LIB_SEARCH_PATHS "@clang@/resource-root/lib/*") - find_library(CLANGRT_BUILTINS - NAMES - clang_rt.builtins -diff --git a/src/hip_embed_pch.sh b/src/hip_embed_pch.sh -index 0a1572b..2feb19a 100755 ---- a/src/hip_embed_pch.sh -+++ b/src/hip_embed_pch.sh -@@ -149,7 +149,7 @@ EOF - - $LLVM_DIR/bin/clang -cc1 -O3 -emit-pch -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -std=c++17 -fgnuc-version=4.2.1 -o $tmp/hip_wave64.pch -x hip-cpp-output - <$tmp/pch_wave64.cui && - -- $LLVM_DIR/bin/llvm-mc -o hip_pch.o $tmp/hip_pch.mcin --filetype=obj && -+ @llvm@/bin/llvm-mc -o hip_pch.o $tmp/hip_pch.mcin --filetype=obj && - - rm -rf $tmp - } -@@ -195,7 +195,7 @@ EOF - set -x - $LLVM_DIR/bin/clang -O3 --rocm-path=$HIP_INC_DIR/.. -std=c++14 -nogpulib --hip-version=4.4 -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only -D__HIPCC_RTC__ -x hip $tmp/hipRTC_header.h -E -o $tmp/hiprtc && - cat $macroFile >> $tmp/hiprtc && -- $LLVM_DIR/bin/llvm-mc -o $tmp/hiprtc_header.o $tmp/hipRTC_header.mcin --filetype=obj && -+ @llvm@/bin/llvm-mc -o $tmp/hiprtc_header.o $tmp/hipRTC_header.mcin --filetype=obj && - $LLVM_DIR/bin/clang $tmp/hiprtc_header.o -o $rtc_shared_lib_out -shared && - $LLVM_DIR/bin/clang -O3 --rocm-path=$HIP_INC_DIR/.. -std=c++14 -nogpulib -nogpuinc -emit-llvm -c -o $tmp/tmp.bc --cuda-device-only -D__HIPCC_RTC__ --offload-arch=gfx906 -x hip-cpp-output $tmp/hiprtc && - rm -rf $tmp diff --git a/pkgs/development/compilers/hip/default.nix b/pkgs/development/compilers/hip/default.nix deleted file mode 100644 index 26fce1d8d490..000000000000 --- a/pkgs/development/compilers/hip/default.nix +++ /dev/null @@ -1,197 +0,0 @@ -{ lib -, stdenv -, fetchFromGitHub -, fetchpatch -, rocmUpdateScript -, substituteAll -, makeWrapper -, hip-common -, hipcc -, rocclr -, roctracer -, cmake -, perl -, llvm -, rocminfo -, rocm-thunk -, rocm-comgr -, rocm-device-libs -, rocm-runtime -, rocm-opencl-runtime -, cudatoolkit -, numactl -, libxml2 -, libX11 -, libglvnd -, doxygen -, graphviz -, fontconfig -, python3Packages -, buildDocs ? true -, buildTests ? false -, useNVIDIA ? false -}: - -let - hipPlatform = if useNVIDIA then "nvidia" else "amd"; - - wrapperArgs = [ - "--prefix PATH : $out/bin" - "--prefix LD_LIBRARY_PATH : ${rocm-runtime}" - "--set HIP_PLATFORM ${hipPlatform}" - "--set HIP_PATH $out" - "--set HIP_CLANG_PATH ${stdenv.cc}/bin" - "--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode" - "--set HSA_PATH ${rocm-runtime}" - "--set ROCM_PATH $out" - ] ++ lib.optionals useNVIDIA [ - "--set CUDA_PATH ${cudatoolkit}" - ]; -in stdenv.mkDerivation (finalAttrs: { - pname = "hip-${hipPlatform}"; - version = "5.4.4"; - - outputs = [ - "out" - ] ++ lib.optionals buildDocs [ - "doc" - ]; - - src = fetchFromGitHub { - owner = "ROCm-Developer-Tools"; - repo = "hipamd"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-FcuylhkG7HqLYXH1J6ND6IVEIbDzHp7h7jg2ZZ4XoFM="; - }; - - patches = [ - (substituteAll { - src = ./0000-fixup-paths.patch; - inherit llvm; - clang = stdenv.cc; - rocm_runtime = rocm-runtime; - }) - - # https://github.com/ROCm-Developer-Tools/hipamd/commit/be33ec55acc104a59d01df5912261d007c7f3ee9 - (fetchpatch { - url = "https://github.com/ROCm-Developer-Tools/hipamd/commit/be33ec55acc104a59d01df5912261d007c7f3ee9.patch"; - hash = "sha256-eTC4mUIN1FwRce1n38uDOlITFL/vpcOhvnaZTo5R7lo="; - }) - ]; - - nativeBuildInputs = [ - makeWrapper - cmake - perl - python3Packages.python - python3Packages.cppheaderparser - ] ++ lib.optionals buildDocs [ - doxygen - graphviz - fontconfig - ]; - - buildInputs = [ - numactl - libxml2 - libX11 - libglvnd - ]; - - propagatedBuildInputs = [ - stdenv.cc - llvm - rocminfo - rocm-thunk - rocm-comgr - rocm-device-libs - rocm-runtime - rocm-opencl-runtime - ] ++ lib.optionals useNVIDIA [ - cudatoolkit - ]; - - cmakeFlags = [ - "-DROCM_PATH=${rocminfo}" - "-DHIP_PLATFORM=${hipPlatform}" - "-DHIP_COMMON_DIR=${hip-common}" - "-DHIPCC_BIN_DIR=${hipcc}/bin" - "-DHIP_LLVM_ROOT=${stdenv.cc}" - "-DROCCLR_PATH=${rocclr}" - "-DAMD_OPENCL_PATH=${rocm-opencl-runtime.src}" - "-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext" - # Temporarily set variables to work around upstream CMakeLists issue - # Can be removed once https://github.com/ROCm-Developer-Tools/hipamd/issues/55 is fixed - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - "-DCMAKE_INSTALL_LIBDIR=lib" - ] ++ lib.optionals buildTests [ - "-DHIP_CATCH_TEST=1" - ]; - - postPatch = '' - export HIP_CLANG_PATH=${stdenv.cc}/bin - patchShebangs src - '' + lib.optionalString buildDocs '' - export HOME=$(mktemp -d) - export FONTCONFIG_FILE=${fontconfig.out}/etc/fonts/fonts.conf - ''; - - doCheck = buildTests; - checkTarget = "build_tests"; - - preCheck = lib.optionalString buildTests '' - export ROCM_PATH=$PWD - export DEVICE_LIB_PATH=${rocm-device-libs}/amdgcn/bitcode - patchShebangs bin - ''; - - postInstall = '' - patchShebangs $out/bin - cp -a $out/bin/hipcc $out/bin/hipcc-pl - cp -a $out/bin/hipconfig $out/bin/hipconfig-pl - wrapProgram $out/bin/hipcc --set HIP_USE_PERL_SCRIPTS 0 - wrapProgram $out/bin/hipconfig --set HIP_USE_PERL_SCRIPTS 0 - wrapProgram $out/bin/hipcc.bin ${lib.concatStringsSep " " wrapperArgs} - wrapProgram $out/bin/hipconfig.bin ${lib.concatStringsSep " " wrapperArgs} - wrapProgram $out/bin/hipcc-pl --set HIP_USE_PERL_SCRIPTS 1 - wrapProgram $out/bin/hipconfig-pl --set HIP_USE_PERL_SCRIPTS 1 - wrapProgram $out/bin/hipcc.pl ${lib.concatStringsSep " " wrapperArgs} - wrapProgram $out/bin/hipconfig.pl ${lib.concatStringsSep " " wrapperArgs} - ''; - - passthru = { - # All known and valid general GPU targets - # We cannot use this for each ROCm library, as each defines their own supported targets - # See: https://github.com/RadeonOpenCompute/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix - gpuTargets = lib.forEach [ - "803" - "900" - "906" - "908" - "90a" - "1010" - "1012" - "1030" - ] (target: "gfx${target}"); - - updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - }; - - meta = with lib; { - description = "C++ Heterogeneous-Compute Interface for Portability specifically for AMD platform"; - homepage = "https://github.com/ROCm-Developer-Tools/hipamd"; - license = with licenses; [ mit ]; - maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; - platforms = platforms.linux; - # Tests require GPU, also include issues - broken = - versions.minor finalAttrs.version != versions.minor hip-common.version || - versions.minor finalAttrs.version != versions.minor hipcc.version || - buildTests; - }; -}) diff --git a/pkgs/development/compilers/hipcc/0000-fixup-paths.patch b/pkgs/development/compilers/hipcc/0000-fixup-paths.patch deleted file mode 100644 index 4f52c1ad11aa..000000000000 --- a/pkgs/development/compilers/hipcc/0000-fixup-paths.patch +++ /dev/null @@ -1,130 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index c21f247..5bd3e45 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -17,6 +17,6 @@ if (NOT WIN32) # C++17 does not require the std lib linking - target_link_libraries(hipconfig.bin ${LINK_LIBS} ) # for hipconfig.bin - endif() - --set(HIP_VERSION_MAJOR 4 PARENT_SCOPE) --set(HIP_VERSION_MINOR 4 PARENT_SCOPE) --set(HIP_VERSION_PATCH 4 PARENT_SCOPE) -+set(HIP_VERSION_MAJOR @version_major@) -+set(HIP_VERSION_MINOR @version_minor@) -+set(HIP_VERSION_PATCH @version_patch@) -diff --git a/src/hipBin_amd.h b/src/hipBin_amd.h -index f94e4a5..f0b1b83 100644 ---- a/src/hipBin_amd.h -+++ b/src/hipBin_amd.h -@@ -207,7 +207,7 @@ void HipBinAmd::initializeHipCXXFlags() { - hipClangIncludePath = getCompilerIncludePath(); - hipCXXFlags += " -isystem \"" + hipClangIncludePath; - fs::path hipCXXFlagsTempFs = hipCXXFlags; -- hipCXXFlagsTempFs /= "..\""; -+ hipCXXFlagsTempFs /= "\""; - hipCXXFlags = hipCXXFlagsTempFs.string(); - const EnvVariables& var = getEnvVariables(); - // Allow __fp16 as function parameter and return type. -@@ -266,7 +266,7 @@ void HipBinAmd::printCompilerInfo() const { - string cmd = hipClangPath + "/clang++ --version"; - system(cmd.c_str()); // hipclang version - cout << "llc-version :" << endl; -- cmd = hipClangPath + "/llc --version"; -+ cmd = "@llvm@/bin/llc --version"; - system(cmd.c_str()); // llc version - cout << "hip-clang-cxxflags :" << endl; - cmd = hipPath + "/bin/hipcc --cxxflags"; -@@ -278,7 +278,7 @@ void HipBinAmd::printCompilerInfo() const { - } else { - string cmd = hipClangPath + "/clang++ --version"; - system(cmd.c_str()); // hipclang version -- cmd = hipClangPath + "/llc --version"; -+ cmd = "@llvm@/bin/llc --version"; - system(cmd.c_str()); // llc version - cout << "hip-clang-cxxflags :" << endl; - cmd = hipPath + "/bin/hipcc --cxxflags"; -@@ -331,10 +331,7 @@ string HipBinAmd::getCppConfig() { - hipPathInclude /= "include"; - - const string& compilerPath = getCompilerPath(); -- hipClangInclude = compilerPath; -- hipClangInclude = hipClangInclude.parent_path(); -- hipClangInclude /= "lib/clang/"; -- hipClangInclude /= compilerVersion; -+ hipClangInclude = "@clang@/resource-root/include"; - string hipClangPath = hipClangInclude.string(); - - const OsType& osInfo = getOSInfo(); -@@ -442,17 +439,7 @@ string HipBinAmd::getHipCC() const { - - - string HipBinAmd::getCompilerIncludePath() { -- string hipClangVersion, includePath, compilerIncludePath; -- const string& hipClangPath = getCompilerPath(); -- hipClangVersion = getCompilerVersion(); -- fs::path includePathfs = hipClangPath; -- includePathfs = includePathfs.parent_path(); -- includePathfs /= "lib/clang/"; -- includePathfs /= hipClangVersion; -- includePathfs /= "include"; -- includePathfs = fs::absolute(includePathfs).string(); -- compilerIncludePath = includePathfs.string(); -- return compilerIncludePath; -+ return "@clang@/resource-root/include"; - } - - -@@ -506,8 +493,8 @@ void HipBinAmd::printFull() { - cout << endl << "== Envirnoment Variables" << endl; - printEnvironmentVariables(); - getSystemInfo(); -- if (fs::exists("/usr/bin/lsb_release")) -- system("/usr/bin/lsb_release -a"); -+ if (fs::exists("@lsb_release@/bin/lsb_release")) -+ system("@lsb_release@/bin/lsb_release -a"); - cout << endl; - } - -@@ -993,7 +980,7 @@ void HipBinAmd::executeHipCCCmd(vector argv) { - } else if (os != windows) { - // Else try using rocm_agent_enumerator - string ROCM_AGENT_ENUM; -- ROCM_AGENT_ENUM = roccmPath + "/bin/rocm_agent_enumerator"; -+ ROCM_AGENT_ENUM = "@rocminfo@/bin/rocm_agent_enumerator"; - targetsStr = ROCM_AGENT_ENUM +" -t GPU"; - SystemCmdOut sysOut = hipBinUtilPtr_->exec(targetsStr.c_str()); - regex toReplace("\n+"); -@@ -1097,7 +1084,7 @@ void HipBinAmd::executeHipCCCmd(vector argv) { - string hipClangVersion, toolArgTemp; - if (linkType == 0) { - toolArgTemp = " -L"+ hipLibPath + "-lamdhip64 -L" + -- roccmPath+ "/lib -lhsa-runtime64 -ldl -lnuma " + toolArgs; -+ "@rocm_runtime@/lib -lhsa-runtime64 -ldl -lnuma " + toolArgs; - toolArgs = toolArgTemp; - } else { - toolArgTemp = toolArgs + " -Wl,--enable-new-dtags -Wl,-rpath=" + hipLibPath + ":" -@@ -1107,8 +1094,7 @@ void HipBinAmd::executeHipCCCmd(vector argv) { - - hipClangVersion = getCompilerVersion(); - // To support __fp16 and _Float16, explicitly link with compiler-rt -- toolArgs += " -L" + hipClangPath + "/../lib/clang/" + -- hipClangVersion + "/lib/linux -lclang_rt.builtins-x86_64 "; -+ toolArgs += " -L@clang@/resource-root/lib/linux -lclang_rt.builtins-x86_64 "; - } - if (!var.hipccCompileFlagsAppendEnv_.empty()) { - HIPCXXFLAGS += " " + var.hipccCompileFlagsAppendEnv_ + " "; -diff --git a/src/hipBin_nvidia.h b/src/hipBin_nvidia.h -index 6feb315..b61739d 100644 ---- a/src/hipBin_nvidia.h -+++ b/src/hipBin_nvidia.h -@@ -157,8 +157,8 @@ void HipBinNvidia::printFull() { - cout << endl << "== Envirnoment Variables" << endl; - printEnvironmentVariables(); - getSystemInfo(); -- if (fs::exists("/usr/bin/lsb_release")) -- system("/usr/bin/lsb_release -a"); -+ if (fs::exists("@lsb_release@/bin/lsb_release")) -+ system("@lsb_release@/bin/lsb_release -a"); - } - - // returns hip include diff --git a/pkgs/development/libraries/rocclr/default.nix b/pkgs/development/libraries/rocclr/default.nix deleted file mode 100644 index 09876ea98a91..000000000000 --- a/pkgs/development/libraries/rocclr/default.nix +++ /dev/null @@ -1,64 +0,0 @@ -{ lib -, stdenv -, fetchFromGitHub -, fetchpatch -, rocmUpdateScript -, rocm-comgr -}: - -stdenv.mkDerivation (finalAttrs: { - pname = "rocclr"; - version = "5.4.4"; - - src = fetchFromGitHub { - owner = "ROCm-Developer-Tools"; - repo = "ROCclr"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-DbN7kL8oyaPeYQB19Q96L3wX66v62TMSWl0Yor7Q4kE="; - }; - - patches = [ - # Enable support for gfx8 again - # See the upstream issue: https://github.com/RadeonOpenCompute/ROCm/issues/1659 - # And the arch patch: https://github.com/rocm-arch/rocm-arch/pull/742 - (fetchpatch { - url = "https://raw.githubusercontent.com/John-Gee/rocm-arch/d6812d308fee3caf2b6bb01b4d19fe03a6a0e3bd/rocm-opencl-runtime/enable-gfx800.patch"; - hash = "sha256-59jFDIIsTTZcNns9RyMVWPRUggn/bSlAGrky4quu8B4="; - }) - ]; - - postPatch = '' - substituteInPlace device/comgrctx.cpp \ - --replace "libamd_comgr.so" "${rocm-comgr}/lib/libamd_comgr.so" - ''; - - dontConfigure = true; - dontBuild = true; - - installPhase = '' - runHook preInstall - - mkdir -p $out - cp -a * $out/ - - runHook postInstall - ''; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - - meta = with lib; { - description = "Source package of the Radeon Open Compute common language runtime"; - homepage = "https://github.com/ROCm-Developer-Tools/ROCclr"; - license = licenses.mit; - maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; - # rocclr seems to have some AArch64 ifdefs, but does not seem - # to be supported yet by the build infrastructure. Recheck in - # the future. - platforms = [ "x86_64-linux" ]; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; - }; -}) diff --git a/pkgs/development/libraries/rocm-comgr/cmake.patch b/pkgs/development/libraries/rocm-comgr/cmake.patch deleted file mode 100644 index ae966745171c..000000000000 --- a/pkgs/development/libraries/rocm-comgr/cmake.patch +++ /dev/null @@ -1,365 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 62b857b..d21c7f4 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -147,8 +147,8 @@ if (UNIX) - list(APPEND AMD_COMGR_PUBLIC_LINKER_OPTIONS -pthread) - if (NOT APPLE AND COMGR_BUILD_SHARED_LIBS) - configure_file( -- ${CMAKE_CURRENT_SOURCE_DIR}/src/exportmap.in -- ${CMAKE_CURRENT_BINARY_DIR}/src/exportmap @ONLY) -+ src/exportmap.in -+ src/exportmap @ONLY) - list(APPEND AMD_COMGR_PRIVATE_LINKER_OPTIONS - "-Wl,--version-script=${CMAKE_CURRENT_BINARY_DIR}/src/exportmap") - # When building a shared library with -fsanitize=address we can't be -@@ -175,10 +175,6 @@ endif() - # the shared header. - list(APPEND AMD_COMGR_PRIVATE_COMPILE_DEFINITIONS AMD_COMGR_EXPORT) - --configure_file( -- ${CMAKE_CURRENT_SOURCE_DIR}/include/amd_comgr.h.in -- ${CMAKE_CURRENT_BINARY_DIR}/include/amd_comgr.h @ONLY) -- - include(bc2h) - include(opencl_pch) - include(DeviceLibs) -@@ -212,10 +208,14 @@ target_include_directories(amd_comgr - $ - $) - -+configure_file( -+ include/amd_comgr.h.in -+ include/amd_comgr.h @ONLY) -+ - set(AMD_COMGR_CONFIG_NAME amd_comgr-config.cmake) - set(AMD_COMGR_TARGETS_NAME amd_comgr-targets.cmake) - set(AMD_COMGR_VERSION_NAME amd_comgr-config-version.cmake) --set(AMD_COMGR_PACKAGE_PREFIX ${CMAKE_INSTALL_LIBDIR}/cmake/amd_comgr) -+set(AMD_COMGR_PACKAGE_PREFIX cmake/amd_comgr) - - # Generate the build-tree package. - set(AMD_COMGR_PREFIX_CODE) -@@ -226,13 +226,13 @@ if (NOT COMGR_BUILD_SHARED_LIBS) - endif() - - set(AMD_COMGR_TARGETS_PATH -- "${CMAKE_CURRENT_BINARY_DIR}/${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_TARGETS_NAME}") -+ "${CMAKE_CURRENT_BINARY_DIR}/lib/${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_TARGETS_NAME}") - set(AMD_COMGR_VERSION_PATH -- "${CMAKE_CURRENT_BINARY_DIR}/${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_VERSION_NAME}") -+ "${CMAKE_CURRENT_BINARY_DIR}/lib/${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_VERSION_NAME}") - export(TARGETS amd_comgr -- FILE "${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_TARGETS_NAME}") -+ FILE "lib/${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_TARGETS_NAME}") - configure_file("cmake/${AMD_COMGR_CONFIG_NAME}.in" -- "${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_CONFIG_NAME}" -+ "lib/${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_CONFIG_NAME}" - @ONLY) - write_basic_package_version_file("${AMD_COMGR_VERSION_PATH}" - VERSION "${amd_comgr_VERSION}" -@@ -266,7 +266,7 @@ install(FILES - set(AMD_COMGR_PREFIX_CODE " - # Derive absolute install prefix from config file path. - get_filename_component(AMD_COMGR_PREFIX \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)") --string(REGEX REPLACE "/" ";" count "${AMD_COMGR_PACKAGE_PREFIX}") -+string(REGEX REPLACE "/" ";" count "${CMAKE_INSTALL_LIBDIR}/${AMD_COMGR_PACKAGE_PREFIX}") - foreach(p ${count}) - set(AMD_COMGR_PREFIX_CODE "${AMD_COMGR_PREFIX_CODE} - get_filename_component(AMD_COMGR_PREFIX \"\${AMD_COMGR_PREFIX}\" PATH)") -@@ -278,20 +278,20 @@ if (NOT COMGR_BUILD_SHARED_LIBS) - string(APPEND AMD_COMGR_PREFIX_CODE "find_dependency(LLD REQUIRED)\n") - endif() - --set(AMD_COMGR_TARGETS_PATH "\${AMD_COMGR_PREFIX}/${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_TARGETS_NAME}") -+set(AMD_COMGR_TARGETS_PATH "\${AMD_COMGR_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${AMD_COMGR_PACKAGE_PREFIX}/${AMD_COMGR_TARGETS_NAME}") - configure_file("cmake/${AMD_COMGR_CONFIG_NAME}.in" -- "${CMAKE_CURRENT_BINARY_DIR}/${AMD_COMGR_CONFIG_NAME}.install" -+ "${AMD_COMGR_CONFIG_NAME}.install" - @ONLY) - install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/${AMD_COMGR_CONFIG_NAME}.install" -- DESTINATION "${AMD_COMGR_PACKAGE_PREFIX}" -+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/${AMD_COMGR_PACKAGE_PREFIX}" - RENAME "${AMD_COMGR_CONFIG_NAME}") - install(EXPORT amd_comgr_export -- DESTINATION "${AMD_COMGR_PACKAGE_PREFIX}" -+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/${AMD_COMGR_PACKAGE_PREFIX}" - FILE "${AMD_COMGR_TARGETS_NAME}") - install(FILES - "${AMD_COMGR_VERSION_PATH}" -- DESTINATION "${AMD_COMGR_PACKAGE_PREFIX}") -+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/${AMD_COMGR_PACKAGE_PREFIX}") - - if(TARGET clangFrontendTool) - set(CLANG_LIBS -diff --git a/cmake/DeviceLibs.cmake b/cmake/DeviceLibs.cmake -index 27e9546..dfe1b57 100644 ---- a/cmake/DeviceLibs.cmake -+++ b/cmake/DeviceLibs.cmake -@@ -1,8 +1,7 @@ - set(INC_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) - - set(GEN_LIBRARY_INC_FILE ${INC_DIR}/libraries.inc) -- --file(WRITE ${GEN_LIBRARY_INC_FILE} "// Automatically generated file; DO NOT EDIT.\n") -+set(GEN_LIBRARY_DEFS_INC_FILE ${INC_DIR}/libraries_defs.inc) - - # cmake does not provide a way to query targets produced by a project, - # so we have to make one up. Ordinarily, individual library target -@@ -23,6 +22,7 @@ if(NOT AMD_DEVICE_LIBS_TARGETS) - message(FATAL_ERROR "Could not find list of device libraries") - endif() - -+set(TARGETS_INCLUDES "") - foreach(AMDGCN_LIB_TARGET ${AMD_DEVICE_LIBS_TARGETS}) - set(header ${AMDGCN_LIB_TARGET}.inc) - -@@ -54,75 +54,52 @@ foreach(AMDGCN_LIB_TARGET ${AMD_DEVICE_LIBS_TARGETS}) - add_custom_target(${AMDGCN_LIB_TARGET}_header DEPENDS ${INC_DIR}/${header}) - add_dependencies(amd_comgr ${AMDGCN_LIB_TARGET}_header) - -- file(APPEND ${GEN_LIBRARY_INC_FILE} "#include \"${header}\"\n") -+ list(APPEND TARGETS_INCLUDES "#include \"${header}\"") -+endforeach() -+ -+list(JOIN TARGETS_INCLUDES "\n" TARGETS_INCLUDES) -+file(GENERATE OUTPUT ${GEN_LIBRARY_INC_FILE} CONTENT "${TARGETS_INCLUDES}") -+ -+foreach(OPENCL_VERSION 1.2 2.0) -+ string(REPLACE . _ OPENCL_UNDERSCORE_VERSION ${OPENCL_VERSION}) -+ add_custom_command(OUTPUT ${INC_DIR}/opencl${OPENCL_VERSION}-c.inc -+ COMMAND bc2h ${CMAKE_CURRENT_BINARY_DIR}/opencl${OPENCL_VERSION}-c.pch -+ ${INC_DIR}/opencl${OPENCL_VERSION}-c.inc -+ opencl${OPENCL_UNDERSCORE_VERSION}_c -+ DEPENDS bc2h ${CMAKE_CURRENT_BINARY_DIR}/opencl${OPENCL_VERSION}-c.pch -+ COMMENT "Generating opencl${OPENCL_VERSION}-c.inc" -+ ) -+ set_property(DIRECTORY APPEND PROPERTY -+ ADDITIONAL_MAKE_CLEAN_FILES ${INC_DIR}/opencl${OPENCL_VERSION}-c.inc) -+ add_custom_target(opencl${OPENCL_VERSION}-c.inc_target DEPENDS ${INC_DIR}/opencl${OPENCL_VERSION}-c.inc) -+ add_dependencies(amd_comgr opencl${OPENCL_VERSION}-c.inc_target) - endforeach() - --add_custom_command(OUTPUT ${INC_DIR}/opencl1.2-c.inc -- COMMAND bc2h ${CMAKE_CURRENT_BINARY_DIR}/opencl1.2-c.pch -- ${INC_DIR}/opencl1.2-c.inc -- opencl1_2_c -- DEPENDS bc2h ${CMAKE_CURRENT_BINARY_DIR}/opencl1.2-c.pch -- COMMENT "Generating opencl1.2-c.inc" --) --set_property(DIRECTORY APPEND PROPERTY -- ADDITIONAL_MAKE_CLEAN_FILES ${INC_DIR}/opencl1.2-c.inc) --add_custom_target(opencl1.2-c.inc_target DEPENDS ${INC_DIR}/opencl1.2-c.inc) --add_dependencies(amd_comgr opencl1.2-c.inc_target) --file(APPEND ${GEN_LIBRARY_INC_FILE} "#include \"opencl1.2-c.inc\"\n") -- --add_custom_command(OUTPUT ${INC_DIR}/opencl2.0-c.inc -- COMMAND bc2h ${CMAKE_CURRENT_BINARY_DIR}/opencl2.0-c.pch -- ${INC_DIR}/opencl2.0-c.inc -- opencl2_0_c -- DEPENDS bc2h ${CMAKE_CURRENT_BINARY_DIR}/opencl2.0-c.pch -- COMMENT "Generating opencl2.0-c.inc" --) --set_property(DIRECTORY APPEND PROPERTY -- ADDITIONAL_MAKE_CLEAN_FILES ${INC_DIR}/opencl2.0-c.inc) --add_custom_target(opencl2.0-c.inc_target DEPENDS ${INC_DIR}/opencl2.0-c.inc) --add_dependencies(amd_comgr opencl2.0-c.inc_target) --file(APPEND ${GEN_LIBRARY_INC_FILE} "#include \"opencl2.0-c.inc\"\n") -- --# Generate function to select libraries for a given GFXIP number. --file(APPEND ${GEN_LIBRARY_INC_FILE} "#include \"llvm/ADT/StringRef.h\"\n") --file(APPEND ${GEN_LIBRARY_INC_FILE} -- "static std::tuple get_oclc_isa_version(llvm::StringRef gfxip) {") -+set(TARGETS_DEFS "") -+list(APPEND TARGETS_DEFS "#ifndef AMD_DEVICE_LIBS_TARGET\n#define AMD_DEVICE_LIBS_TARGET(t)\n#endif") -+list(APPEND TARGETS_DEFS "#ifndef AMD_DEVICE_LIBS_GFXIP\n#define AMD_DEVICE_LIBS_GFXIP(t, g)\n#endif") -+list(APPEND TARGETS_DEFS "#ifndef AMD_DEVICE_LIBS_FUNCTION\n#define AMD_DEVICE_LIBS_FUNCTION(t, f)\n#endif") -+list(APPEND TARGETS_DEFS "") - foreach(AMDGCN_LIB_TARGET ${AMD_DEVICE_LIBS_TARGETS}) -+ list(APPEND TARGETS_DEFS "AMD_DEVICE_LIBS_TARGET(${AMDGCN_LIB_TARGET})") -+ # Generate function to select libraries for a given GFXIP number. - if (${AMDGCN_LIB_TARGET} MATCHES "^oclc_isa_version_.+$") - string(REGEX REPLACE "^oclc_isa_version_(.+)$" "\\1" gfxip ${AMDGCN_LIB_TARGET}) -- file(APPEND ${GEN_LIBRARY_INC_FILE} -- "if (gfxip == \"${gfxip}\") return std::make_tuple(\"${AMDGCN_LIB_TARGET}.bc\", ${AMDGCN_LIB_TARGET}_lib, ${AMDGCN_LIB_TARGET}_lib_size);") -+ list(APPEND TARGETS_DEFS "AMD_DEVICE_LIBS_GFXIP(${AMDGCN_LIB_TARGET}, \"${gfxip}\")") - endif() --endforeach() --file(APPEND ${GEN_LIBRARY_INC_FILE} -- "return std::make_tuple(nullptr, nullptr, 0); }") -- --# Generate function to select libraries for given feature. --foreach(AMDGCN_LIB_TARGET ${AMD_DEVICE_LIBS_TARGETS}) -+ # Generate function to select libraries for given feature. - if (${AMDGCN_LIB_TARGET} MATCHES "^oclc_.*_on$") - string(REGEX REPLACE "^oclc_(.*)_on" "\\1" function ${AMDGCN_LIB_TARGET}) -- file(APPEND ${GEN_LIBRARY_INC_FILE} -- "static std::tuple get_oclc_${function}(bool on) { \ -- return std::make_tuple( \ -- on ? \"oclc_${function}_on_lib.bc\" : \"oclc_${function}_off_lib.bc\", \ -- on ? oclc_${function}_on_lib : oclc_${function}_off_lib, \ -- on ? oclc_${function}_on_lib_size : oclc_${function}_off_lib_size \ -- ); }") -+ list(APPEND TARGETS_DEFS "AMD_DEVICE_LIBS_FUNCTION(${AMDGCN_LIB_TARGET}, ${function})") - endif() - endforeach() - --# Generate function yield all libraries. --file(APPEND ${GEN_LIBRARY_INC_FILE} "\n#include \"llvm/ADT/ArrayRef.h\"\n") --file(APPEND ${GEN_LIBRARY_INC_FILE} -- "llvm::ArrayRef> COMGR::getDeviceLibraries() { \ -- static std::tuple DeviceLibs[] = {") --foreach(AMDGCN_LIB_TARGET ${AMD_DEVICE_LIBS_TARGETS}) -- file(APPEND ${GEN_LIBRARY_INC_FILE} -- "{\"${AMDGCN_LIB_TARGET}.bc\", llvm::StringRef(reinterpret_cast(${AMDGCN_LIB_TARGET}_lib), ${AMDGCN_LIB_TARGET}_lib_size)},") --endforeach() --file(APPEND ${GEN_LIBRARY_INC_FILE} -- "}; \ -- return DeviceLibs; \ -- }") -+list(APPEND TARGETS_DEFS "") -+list(APPEND TARGETS_DEFS "#undef AMD_DEVICE_LIBS_TARGET") -+list(APPEND TARGETS_DEFS "#undef AMD_DEVICE_LIBS_GFXIP") -+list(APPEND TARGETS_DEFS "#undef AMD_DEVICE_LIBS_FUNCTION") -+ -+list(JOIN TARGETS_DEFS "\n" TARGETS_DEFS) -+file(GENERATE OUTPUT ${GEN_LIBRARY_DEFS_INC_FILE} CONTENT "${TARGETS_DEFS}") - - include_directories(${INC_DIR}) -diff --git a/cmake/bc2h.cmake b/cmake/bc2h.cmake -index 146fe2b..9134985 100644 ---- a/cmake/bc2h.cmake -+++ b/cmake/bc2h.cmake -@@ -1,40 +1,41 @@ --file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bc2h.c --"#include \n" --"int main(int argc, char **argv){\n" --" FILE *ifp, *ofp;\n" --" int c, i, l;\n" --" if (argc != 4) return 1;\n" --" ifp = fopen(argv[1], \"rb\");\n" --" if (!ifp) return 1;\n" --" i = fseek(ifp, 0, SEEK_END);\n" --" if (i < 0) return 1;\n" --" l = ftell(ifp);\n" --" if (l < 0) return 1;\n" --" i = fseek(ifp, 0, SEEK_SET);\n" --" if (i < 0) return 1;\n" --" ofp = fopen(argv[2], \"wb+\");\n" --" if (!ofp) return 1;\n" --" fprintf(ofp, \"#define %s_size %d\\n\\n\"\n" --" \"#if defined __GNUC__\\n\"\n" --" \"__attribute__((aligned (4096)))\\n\"\n" --" \"#elif defined _MSC_VER\\n\"\n" --" \"__declspec(align(4096))\\n\"\n" --" \"#endif\\n\"\n" --" \"static const unsigned char %s[%s_size+1] = {\",\n" --" argv[3], l,\n" --" argv[3], argv[3]);\n" --" i = 0;\n" --" while ((c = getc(ifp)) != EOF) {\n" --" if (0 == (i&7)) fprintf(ofp, \"\\n \");\n" --" fprintf(ofp, \" 0x%02x,\", c);\n" --" ++i;\n" --" }\n" --" fprintf(ofp, \" 0x00\\n};\\n\\n\");\n" --" fclose(ifp);\n" --" fclose(ofp);\n" --" return 0;\n" --"}\n" --) -+file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/bc2h.c -+ CONTENT -+"#include -+int main(int argc, char **argv){ -+ FILE *ifp, *ofp; -+ int c, i, l; -+ if (argc != 4) return 1; -+ ifp = fopen(argv[1], \"rb\"); -+ if (!ifp) return 1; -+ i = fseek(ifp, 0, SEEK_END); -+ if (i < 0) return 1; -+ l = ftell(ifp); -+ if (l < 0) return 1; -+ i = fseek(ifp, 0, SEEK_SET); -+ if (i < 0) return 1; -+ ofp = fopen(argv[2], \"wb+\"); -+ if (!ofp) return 1; -+ fprintf(ofp, \"#define %s_size %d\\n\\n\" -+ \"#if defined __GNUC__\\n\" -+ \"__attribute__((aligned (4096)))\\n\" -+ \"#elif defined _MSC_VER\\n\" -+ \"__declspec(align(4096))\\n\" -+ \"#endif\\n\" -+ \"static const unsigned char %s[%s_size+1] = {\", -+ argv[3], l, -+ argv[3], argv[3]); -+ i = 0; -+ while ((c = getc(ifp)) != EOF) { -+ if (0 == (i&7)) fprintf(ofp, \"\\n \"); -+ fprintf(ofp, \" 0x%02x,\", c); -+ ++i; -+ } -+ fprintf(ofp, \" 0x00\\n};\\n\\n\"); -+ fclose(ifp); -+ fclose(ofp); -+ return 0; -+} -+") - - add_executable(bc2h ${CMAKE_CURRENT_BINARY_DIR}/bc2h.c) - if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") -diff --git a/src/comgr-device-libs.cpp b/src/comgr-device-libs.cpp -index 4d2b914..80786d1 100644 ---- a/src/comgr-device-libs.cpp -+++ b/src/comgr-device-libs.cpp -@@ -35,7 +35,7 @@ - - #include "comgr-device-libs.h" - #include "comgr.h" --#include "libraries.inc" -+#include "comgr-libraries.h" - #include "llvm/ADT/StringSwitch.h" - #include - -diff --git a/src/comgr-libraries.h b/src/comgr-libraries.h -new file mode 100644 -index 0000000..3caa0a0 ---- /dev/null -+++ b/src/comgr-libraries.h -@@ -0,0 +1,34 @@ -+#include "libraries.inc" -+#include "opencl1.2-c.inc" -+#include "opencl2.0-c.inc" -+#include "llvm/ADT/StringRef.h" -+#include "llvm/ADT/ArrayRef.h" -+ -+static std::tuple get_oclc_isa_version(llvm::StringRef gfxip) { -+#define AMD_DEVICE_LIBS_GFXIP(target, target_gfxip) \ -+ if (gfxip == target_gfxip) return std::make_tuple(#target ".bc", target##_lib, target##_lib_size); -+#include "libraries_defs.inc" -+ -+ return std::make_tuple(nullptr, nullptr, 0); -+} -+ -+#define AMD_DEVICE_LIBS_FUNCTION(target, function) \ -+ static std::tuple get_oclc_##function(bool on) { \ -+ return std::make_tuple( \ -+ on ? "oclc_" #function "_on_lib.bc" : "oclc_" #function "_off_lib.bc", \ -+ on ? oclc_##function##_on_lib : oclc_##function##_off_lib, \ -+ on ? oclc_##function##_on_lib_size : oclc_##function##_off_lib_size \ -+ ); \ -+ } -+#include "libraries_defs.inc" -+ -+llvm::ArrayRef> COMGR::getDeviceLibraries() { -+ static std::tuple DeviceLibs[] = { -+#define AMD_DEVICE_LIBS_TARGET(target) \ -+ {#target ".bc", llvm::StringRef(reinterpret_cast(target##_lib), target##_lib_size)}, -+#include "libraries_defs.inc" -+ }; -+ return DeviceLibs; -+} -+ -+ diff --git a/pkgs/development/libraries/rocm-opencl-icd/default.nix b/pkgs/development/libraries/rocm-opencl-icd/default.nix deleted file mode 100644 index 4602d3646e47..000000000000 --- a/pkgs/development/libraries/rocm-opencl-icd/default.nix +++ /dev/null @@ -1,26 +0,0 @@ -{ lib -, stdenv -, callPackage -, rocm-opencl-runtime -}: - -stdenv.mkDerivation rec { - pname = "rocm-opencl-icd"; - version = rocm-opencl-runtime.version; - - dontUnpack = true; - - installPhase = '' - mkdir -p $out/etc/OpenCL/vendors - echo "${rocm-opencl-runtime}/lib/libamdocl64.so" > $out/etc/OpenCL/vendors/amdocl64.icd - ''; - - passthru.impureTests = { rocm-opencl = callPackage ./test.nix { }; }; - - meta = with lib; { - description = "OpenCL ICD definition for AMD GPUs using the ROCm stack"; - license = licenses.mit; - maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; - platforms = platforms.linux; - }; -} diff --git a/pkgs/development/libraries/rocm-opencl-icd/test.nix b/pkgs/development/libraries/rocm-opencl-icd/test.nix deleted file mode 100644 index 398a4818e7c3..000000000000 --- a/pkgs/development/libraries/rocm-opencl-icd/test.nix +++ /dev/null @@ -1,19 +0,0 @@ -{ lib, makeImpureTest, clinfo, rocm-opencl-icd, rocm-smi }: -makeImpureTest { - name = "rocm-opencl"; - testedPackage = "rocm-opencl-icd"; - - nativeBuildInputs = [ clinfo rocm-smi ]; - - OCL_ICD_VENDORS = "${rocm-opencl-icd}/etc/OpenCL/vendors/"; - - testScript = '' - # Test fails if the number of platforms is 0 - clinfo | grep -E 'Number of platforms * [1-9]' - rocm-smi | grep -A1 GPU - ''; - - meta = with lib; { - maintainers = teams.rocm.members; - }; -} diff --git a/pkgs/development/libraries/rocm-opencl-runtime/default.nix b/pkgs/development/libraries/rocm-opencl-runtime/default.nix deleted file mode 100644 index ebdb4e3177d7..000000000000 --- a/pkgs/development/libraries/rocm-opencl-runtime/default.nix +++ /dev/null @@ -1,69 +0,0 @@ -{ lib -, stdenv -, fetchFromGitHub -, rocmUpdateScript -, addOpenGLRunpath -, cmake -, rocm-comgr -, rocm-runtime -, rocclr -, glew -, libX11 -, numactl -}: - -stdenv.mkDerivation (finalAttrs: { - pname = "rocm-opencl-runtime"; - version = "5.4.4"; - - src = fetchFromGitHub { - owner = "RadeonOpenCompute"; - repo = "ROCm-OpenCL-Runtime"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-E1+Y/fgp5b+7H1LN+O1fwVi0/XRCgvsiSxTY3u/q+8I="; - }; - - nativeBuildInputs = [ cmake ]; - - buildInputs = [ - rocm-comgr - rocm-runtime - glew - libX11 - numactl - ]; - - cmakeFlags = [ - "-DAMD_OPENCL_PATH=${finalAttrs.src}" - "-DROCCLR_PATH=${rocclr}" - ]; - - dontStrip = true; - - # Remove clinfo, which is already provided through the - # `clinfo` package. - postInstall = '' - rm -rf $out/bin - ''; - - # Fix the ICD installation path for NixOS - postPatch = '' - substituteInPlace khronos/icd/loader/linux/icd_linux.c \ - --replace 'ICD_VENDOR_PATH' '"${addOpenGLRunpath.driverLink}/etc/OpenCL/vendors/"' - ''; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - - meta = with lib; { - description = "OpenCL runtime for AMD GPUs, part of the ROCm stack"; - homepage = "https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime"; - license = with licenses; [ asl20 mit ]; - maintainers = with maintainers; [ acowley lovesegfault ] ++ teams.rocm.members; - platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; - }; -}) diff --git a/pkgs/development/libraries/rocprofiler/0000-dont-require-hsa_amd_aqlprofile.patch b/pkgs/development/libraries/rocprofiler/0000-dont-require-hsa_amd_aqlprofile.patch deleted file mode 100644 index b70163b08e48..000000000000 --- a/pkgs/development/libraries/rocprofiler/0000-dont-require-hsa_amd_aqlprofile.patch +++ /dev/null @@ -1,20 +0,0 @@ -diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp -index 643ff16..c08d98f 100644 ---- a/src/util/hsa_rsrc_factory.cpp -+++ b/src/util/hsa_rsrc_factory.cpp -@@ -127,15 +127,6 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize - if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); - if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); - -- // Get AqlProfile API table -- aqlprofile_api_ = {0}; --#ifdef ROCP_LD_AQLPROFILE -- status = LoadAqlProfileLib(&aqlprofile_api_); --#else -- status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); --#endif -- CHECK_STATUS("aqlprofile API table load failed", status); -- - // Get Loader API table - loader_api_ = {0}; - status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); diff --git a/pkgs/development/libraries/ucx/default.nix b/pkgs/development/libraries/ucx/default.nix index a6dfb9f85c80..c956a3333a03 100644 --- a/pkgs/development/libraries/ucx/default.nix +++ b/pkgs/development/libraries/ucx/default.nix @@ -4,7 +4,7 @@ , enableCuda ? config.cudaSupport , cudatoolkit , enableRocm ? false -, rocm-core, rocm-runtime, rocm-device-libs, hip +, rocmPackages }: let @@ -13,9 +13,12 @@ let inherit (cudatoolkit) name meta; paths = [ cudatoolkit cudatoolkit.lib ]; }; + + rocmList = with rocmPackages; [ rocm-core rocm-runtime rocm-device-libs clr ]; + rocm = symlinkJoin { name = "rocm"; - paths = [ rocm-core rocm-runtime rocm-device-libs hip ]; + paths = rocmList; }; in @@ -40,7 +43,7 @@ stdenv.mkDerivation rec { rdma-core zlib ] ++ lib.optional enableCuda cudatoolkit - ++ lib.optionals enableRocm [ rocm-core rocm-runtime rocm-device-libs hip ]; + ++ lib.optionals enableRocm rocmList; configureFlags = [ "--with-rdmacm=${lib.getDev rdma-core}" diff --git a/pkgs/development/libraries/clang-ocl/default.nix b/pkgs/development/rocm-modules/5/clang-ocl/default.nix similarity index 100% rename from pkgs/development/libraries/clang-ocl/default.nix rename to pkgs/development/rocm-modules/5/clang-ocl/default.nix diff --git a/pkgs/development/rocm-modules/5/clr/default.nix b/pkgs/development/rocm-modules/5/clr/default.nix new file mode 100644 index 000000000000..d3f811dcc422 --- /dev/null +++ b/pkgs/development/rocm-modules/5/clr/default.nix @@ -0,0 +1,147 @@ +{ lib +, stdenv +, fetchFromGitHub +, rocmUpdateScript +, makeWrapper +, cmake +, perl +, clang +, hip-common +, hipcc +, rocm-device-libs +, rocm-comgr +, rocm-runtime +, roctracer +, rocminfo +, numactl +, libGL +, libxml2 +, libX11 +, python3Packages +}: + +let + wrapperArgs = [ + "--prefix PATH : $out/bin" + "--prefix LD_LIBRARY_PATH : ${rocm-runtime}" + "--set HIP_PLATFORM amd" + "--set HIP_PATH $out" + "--set HIP_CLANG_PATH ${clang}/bin" + "--set DEVICE_LIB_PATH ${rocm-device-libs}/amdgcn/bitcode" + "--set HSA_PATH ${rocm-runtime}" + "--set ROCM_PATH $out" + ]; +in stdenv.mkDerivation (finalAttrs: { + pname = "clr"; + version = "5.7.0"; + + src = fetchFromGitHub { + owner = "ROCm-Developer-Tools"; + repo = "clr"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-C+rFW/7kf35rz0sQTI2+iY5RhZZQY07fc5a+e6cB5OQ="; + }; + + nativeBuildInputs = [ + makeWrapper + cmake + perl + python3Packages.python + python3Packages.cppheaderparser + ]; + + buildInputs = [ + numactl + libGL + libxml2 + libX11 + ]; + + propagatedBuildInputs = [ + rocm-device-libs + rocm-comgr + rocm-runtime + rocminfo + ]; + + cmakeFlags = [ + "-DCMAKE_POLICY_DEFAULT_CMP0072=NEW" # Prefer newer OpenGL libraries + "-DCLR_BUILD_HIP=ON" + "-DCLR_BUILD_OCL=ON" + "-DHIP_COMMON_DIR=${hip-common}" + "-DHIPCC_BIN_DIR=${hipcc}/bin" + "-DHIP_PLATFORM=amd" + "-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext" + "-DROCM_PATH=${rocminfo}" + + # Temporarily set variables to work around upstream CMakeLists issue + # Can be removed once https://github.com/ROCm-Developer-Tools/hipamd/issues/55 is fixed + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DCMAKE_INSTALL_LIBDIR=lib" + ]; + + postPatch = '' + patchShebangs hipamd/src + + # We're not on Windows so these are never installed to hipcc... + substituteInPlace hipamd/CMakeLists.txt \ + --replace "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipcc.bat DESTINATION bin)" "" \ + --replace "install(PROGRAMS \''${HIPCC_BIN_DIR}/hipconfig.bat DESTINATION bin)" "" + + substituteInPlace hipamd/src/hip_embed_pch.sh \ + --replace "\''$LLVM_DIR/bin/clang" "${clang}/bin/clang" + ''; + + postInstall = '' + patchShebangs $out/bin + + # hipcc.bin and hipconfig.bin is mysteriously never installed + cp -a ${hipcc}/bin/{hipcc.bin,hipconfig.bin} $out/bin + + wrapProgram $out/bin/hipcc.bin ${lib.concatStringsSep " " wrapperArgs} + wrapProgram $out/bin/hipconfig.bin ${lib.concatStringsSep " " wrapperArgs} + wrapProgram $out/bin/hipcc.pl ${lib.concatStringsSep " " wrapperArgs} + wrapProgram $out/bin/hipconfig.pl ${lib.concatStringsSep " " wrapperArgs} + + # Just link rocminfo, it's easier + ln -s ${rocminfo}/bin/* $out/bin + ''; + + passthru = { + # All known and valid general GPU targets + # We cannot use this for each ROCm library, as each defines their own supported targets + # See: https://github.com/RadeonOpenCompute/ROCm/blob/77cbac4abab13046ee93d8b5bf410684caf91145/README.md#library-target-matrix + gpuTargets = lib.forEach [ + "803" + "900" + "906" + "908" + "90a" + "940" + "941" + "942" + "1010" + "1012" + "1030" + "1100" + "1101" + "1102" + ] (target: "gfx${target}"); + + updateScript = rocmUpdateScript { + name = finalAttrs.pname; + owner = finalAttrs.src.owner; + repo = finalAttrs.src.repo; + }; + }; + + meta = with lib; { + description = "AMD Common Language Runtime for hipamd, opencl, and rocclr"; + homepage = "https://github.com/ROCm-Developer-Tools/clr"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ lovesegfault ] ++ teams.rocm.members; + platforms = platforms.linux; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; + }; +}) diff --git a/pkgs/development/libraries/composable_kernel/default.nix b/pkgs/development/rocm-modules/5/composable_kernel/default.nix similarity index 100% rename from pkgs/development/libraries/composable_kernel/default.nix rename to pkgs/development/rocm-modules/5/composable_kernel/default.nix diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix index 6509f8850858..8bc496f452f1 100644 --- a/pkgs/development/rocm-modules/5/default.nix +++ b/pkgs/development/rocm-modules/5/default.nix @@ -1,9 +1,244 @@ { callPackage , recurseIntoAttrs +, cudaPackages +, python3Packages +, elfutils +, boost179 }: let rocmUpdateScript = callPackage ./update.nix { }; -in { +in rec { + ## RadeonOpenCompute ## llvm = recurseIntoAttrs (callPackage ./llvm/default.nix { inherit rocmUpdateScript; }); + + rocm-core = callPackage ./rocm-core { + inherit rocmUpdateScript; + stdenv = llvm.rocmClangStdenv; + }; + + rocm-cmake = callPackage ./rocm-cmake { + inherit rocmUpdateScript; + stdenv = llvm.rocmClangStdenv; + }; + + rocm-thunk = callPackage ./rocm-thunk { + inherit rocmUpdateScript; + stdenv = llvm.rocmClangStdenv; + }; + + rocm-smi = python3Packages.callPackage ./rocm-smi { + inherit rocmUpdateScript; + stdenv = llvm.rocmClangStdenv; + }; + + # Eventually will be in the LLVM repo + rocm-device-libs = callPackage ./rocm-device-libs { + inherit rocmUpdateScript rocm-cmake; + stdenv = llvm.rocmClangStdenv; + }; + + rocm-runtime = callPackage ./rocm-runtime { + inherit rocmUpdateScript rocm-device-libs rocm-thunk; + stdenv = llvm.rocmClangStdenv; + }; + + # Eventually will be in the LLVM repo + rocm-comgr = callPackage ./rocm-comgr { + inherit rocmUpdateScript rocm-cmake rocm-device-libs; + stdenv = llvm.rocmClangStdenv; + }; + + rocminfo = callPackage ./rocminfo { + inherit rocmUpdateScript rocm-cmake rocm-runtime; + stdenv = llvm.rocmClangStdenv; + }; + + clang-ocl = callPackage ./clang-ocl { + inherit rocmUpdateScript rocm-cmake rocm-device-libs; + stdenv = llvm.rocmClangStdenv; + }; + + # Broken, too many errors + rdc = callPackage ./rdc { + inherit rocmUpdateScript rocm-smi rocm-runtime; + # stdenv = llvm.rocmClangStdenv; + }; + + ## ROCm-Developer-Tools ## + hip-common = callPackage ./hip-common { + inherit rocmUpdateScript; + stdenv = llvm.rocmClangStdenv; + }; + + # Eventually will be in the LLVM repo + hipcc = callPackage ./hipcc { + inherit rocmUpdateScript; + stdenv = llvm.rocmClangStdenv; + }; + + clr = callPackage ./clr { + inherit rocmUpdateScript hip-common hipcc rocm-device-libs rocm-comgr rocm-runtime roctracer rocminfo; + inherit (llvm) clang; + stdenv = llvm.rocmClangStdenv; + }; + + hipify = callPackage ./hipify { + inherit rocmUpdateScript; + inherit (llvm) clang; + stdenv = llvm.rocmClangStdenv; + }; + + # Needs GCC + rocprofiler = callPackage ./rocprofiler { + inherit (llvm) clang; + inherit rocmUpdateScript clr rocm-thunk roctracer rocm-smi hsa-amd-aqlprofile-bin; + }; + + # Needs GCC + roctracer = callPackage ./roctracer { + inherit rocmUpdateScript rocm-device-libs rocm-runtime rocprofiler clr; + inherit (llvm) clang; + }; + + # Needs GCC + rocgdb = callPackage ./rocgdb { + inherit rocmUpdateScript; + elfutils = elfutils.override { enableDebuginfod = true; }; + }; + + rocdbgapi = callPackage ./rocdbgapi { + inherit rocmUpdateScript rocm-cmake rocm-comgr rocm-runtime; + stdenv = llvm.rocmClangStdenv; + }; + + rocr-debug-agent = callPackage ./rocr-debug-agent { + inherit rocmUpdateScript clr rocdbgapi; + stdenv = llvm.rocmClangStdenv; + }; + + + + + + + + + + + + + + + composable_kernel = callPackage ./composable_kernel { + inherit (llvm) openmp clang-tools-extra; + stdenv = llvm.rocmClangStdenv; + }; + + hipcub = callPackage ./hipcub { + stdenv = llvm.rocmClangStdenv; + }; + + hipsparse = callPackage ./hipsparse { + inherit (llvm) openmp; + stdenv = llvm.rocmClangStdenv; + }; + + hipfort = callPackage ./hipfort { + stdenv = llvm.rocmClangStdenv; + }; + + hipfft = callPackage ./hipfft { + inherit (llvm) openmp; + stdenv = llvm.rocmClangStdenv; + }; + + hipsolver = callPackage ./hipsolver { + stdenv = llvm.rocmClangStdenv; + }; + + hipblas = callPackage ./hipblas { + stdenv = llvm.rocmClangStdenv; + }; + + migraphx = callPackage ./migraphx { + inherit (llvm) clang-tools-extra openmp; + stdenv = llvm.rocmClangStdenv; + rocmlir = rocmlir-rock; + }; + + rccl = callPackage ./rccl { + stdenv = llvm.rocmClangStdenv; + }; + + rocalution = callPackage ./rocalution { + inherit (llvm) openmp; + stdenv = llvm.rocmClangStdenv; + }; + + rocsolver = callPackage ./rocsolver { + stdenv = llvm.rocmClangStdenv; + }; + + rocmlir = callPackage ./rocmlir { + stdenv = llvm.rocmClangStdenv; + }; + + rocmlir-rock = rocmlir.override { + buildRockCompiler = true; + }; + + rocprim = callPackage ./rocprim { + stdenv = llvm.rocmClangStdenv; + }; + + rocsparse = callPackage ./rocsparse { + stdenv = llvm.rocmClangStdenv; + }; + + rocfft = callPackage ./rocfft { + inherit (llvm) openmp; + stdenv = llvm.rocmClangStdenv; + }; + + rocrand = callPackage ./rocrand { + stdenv = llvm.rocmClangStdenv; + }; + + tensile = python3Packages.callPackage ./tensile { + stdenv = llvm.rocmClangStdenv; + }; + + rocwmma = callPackage ./rocwmma { + inherit (llvm) openmp; + stdenv = llvm.rocmClangStdenv; + }; + + rocblas = callPackage ./rocblas { + inherit (llvm) openmp; + stdenv = llvm.rocmClangStdenv; + }; + + miopengemm = callPackage ./miopengemm { + stdenv = llvm.rocmClangStdenv; + }; + + rocthrust = callPackage ./rocthrust { + stdenv = llvm.rocmClangStdenv; + }; + + miopen = callPackage ./miopen { + inherit (llvm) llvm clang-tools-extra; + stdenv = llvm.rocmClangStdenv; + rocmlir = rocmlir-rock; + boost = boost179.override { enableStatic = true; }; + }; + + miopen-hip = miopen.override { + useOpenCL = false; + }; + + miopen-opencl = miopen.override { + useOpenCL = true; + }; } diff --git a/pkgs/development/compilers/hip-common/default.nix b/pkgs/development/rocm-modules/5/hip-common/default.nix similarity index 79% rename from pkgs/development/compilers/hip-common/default.nix rename to pkgs/development/rocm-modules/5/hip-common/default.nix index 754fea89ac5e..9f5f37511ef0 100644 --- a/pkgs/development/compilers/hip-common/default.nix +++ b/pkgs/development/rocm-modules/5/hip-common/default.nix @@ -2,11 +2,6 @@ , stdenv , fetchFromGitHub , rocmUpdateScript -, substituteAll -, llvm -, rocm-runtime -, rocminfo -, lsb-release }: stdenv.mkDerivation (finalAttrs: { @@ -20,16 +15,6 @@ stdenv.mkDerivation (finalAttrs: { hash = "sha256-1Abit9qZCwrCVcnaFT4uMygFB9G6ovRasLmTsOsJ/Fw="; }; - patches = [ - (substituteAll { - src = ./0000-fixup-paths.patch; - inherit llvm rocminfo; - clang = stdenv.cc; - rocm_runtime = rocm-runtime; - lsb_release = lsb-release; - }) - ]; - dontConfigure = true; dontBuild = true; diff --git a/pkgs/development/libraries/hipblas/default.nix b/pkgs/development/rocm-modules/5/hipblas/default.nix similarity index 97% rename from pkgs/development/libraries/hipblas/default.nix rename to pkgs/development/rocm-modules/5/hipblas/default.nix index 845c5b9d0d7d..cb60e5de6633 100644 --- a/pkgs/development/libraries/hipblas/default.nix +++ b/pkgs/development/rocm-modules/5/hipblas/default.nix @@ -95,6 +95,6 @@ stdenv.mkDerivation (finalAttrs: { maintainers = teams.rocm.members; platforms = platforms.linux; # Fixed in develop branch by using C++17 and related refactor - broken = versions.minor finalAttrs.version != versions.minor hip.version || buildTests || buildBenchmarks || buildSamples; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || buildTests || buildBenchmarks || buildSamples; }; }) diff --git a/pkgs/development/compilers/hipcc/default.nix b/pkgs/development/rocm-modules/5/hipcc/default.nix similarity index 62% rename from pkgs/development/compilers/hipcc/default.nix rename to pkgs/development/rocm-modules/5/hipcc/default.nix index b758d0e1ed96..e6610e8909f7 100644 --- a/pkgs/development/compilers/hipcc/default.nix +++ b/pkgs/development/rocm-modules/5/hipcc/default.nix @@ -2,11 +2,7 @@ , stdenv , fetchFromGitHub , rocmUpdateScript -, substituteAll , cmake -, llvm -, rocm-runtime -, rocminfo , lsb-release }: @@ -21,28 +17,16 @@ stdenv.mkDerivation (finalAttrs: { hash = "sha256-lJX6nF1V4YmK5ai7jivXlRnG3doIOf6X9CWLHVdRuVg="; }; - patches = [ - (substituteAll { - src = ./0000-fixup-paths.patch; - inherit llvm rocminfo; - version_major = lib.versions.major finalAttrs.version; - version_minor = lib.versions.minor finalAttrs.version; - version_patch = lib.versions.patch finalAttrs.version; - clang = stdenv.cc; - rocm_runtime = rocm-runtime; - lsb_release = lsb-release; - }) - ]; - nativeBuildInputs = [ cmake ]; - installPhase = '' - runHook preInstall + postPatch = '' + substituteInPlace src/hipBin_amd.h \ + --replace "/usr/bin/lsb_release" "${lsb-release}/bin/lsb_release" + ''; - mkdir -p $out/bin - mv *.bin $out/bin - - runHook postInstall + postInstall = '' + rm -r $out/hip/bin + ln -s $out/bin $out/hip/bin ''; passthru.updateScript = rocmUpdateScript { diff --git a/pkgs/development/libraries/hipcub/default.nix b/pkgs/development/rocm-modules/5/hipcub/default.nix similarity index 99% rename from pkgs/development/libraries/hipcub/default.nix rename to pkgs/development/rocm-modules/5/hipcub/default.nix index fff34e1a0ec7..b3a23241366f 100644 --- a/pkgs/development/libraries/hipcub/default.nix +++ b/pkgs/development/rocm-modules/5/hipcub/default.nix @@ -82,6 +82,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ bsd3 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/hipfft/default.nix b/pkgs/development/rocm-modules/5/hipfft/default.nix similarity index 99% rename from pkgs/development/libraries/hipfft/default.nix rename to pkgs/development/rocm-modules/5/hipfft/default.nix index c208296c687b..1e959f0ad56a 100644 --- a/pkgs/development/libraries/hipfft/default.nix +++ b/pkgs/development/rocm-modules/5/hipfft/default.nix @@ -102,6 +102,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/hipfort/default.nix b/pkgs/development/rocm-modules/5/hipfort/default.nix similarity index 100% rename from pkgs/development/libraries/hipfort/default.nix rename to pkgs/development/rocm-modules/5/hipfort/default.nix diff --git a/pkgs/development/compilers/hipify/default.nix b/pkgs/development/rocm-modules/5/hipify/default.nix similarity index 83% rename from pkgs/development/compilers/hipify/default.nix rename to pkgs/development/rocm-modules/5/hipify/default.nix index d7b243b9da04..893056496c9c 100644 --- a/pkgs/development/compilers/hipify/default.nix +++ b/pkgs/development/rocm-modules/5/hipify/default.nix @@ -3,6 +3,7 @@ , fetchFromGitHub , rocmUpdateScript , cmake +, clang , libxml2 }: @@ -22,7 +23,7 @@ stdenv.mkDerivation (finalAttrs: { postPatch = '' substituteInPlace CMakeLists.txt \ - --replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${stdenv.cc}/bin/clang" + --replace "\''${LLVM_TOOLS_BINARY_DIR}/clang" "${clang}/bin/clang" ''; passthru.updateScript = rocmUpdateScript { @@ -31,11 +32,11 @@ stdenv.mkDerivation (finalAttrs: { repo = finalAttrs.src.repo; }; - # Fixup weird install paths + # Fixup bad symlinks postInstall = '' - mkdir -p $out/bin - mv $out/{*.sh,hipify-*} $out/bin - cp -afs $out/bin $out/hip + rm -r $out/hip/bin + ln -s $out/bin $out/hip/bin + patchShebangs $out/bin ''; meta = with lib; { diff --git a/pkgs/development/libraries/hipsolver/default.nix b/pkgs/development/rocm-modules/5/hipsolver/default.nix similarity index 99% rename from pkgs/development/libraries/hipsolver/default.nix rename to pkgs/development/rocm-modules/5/hipsolver/default.nix index cd689856d418..238564d631bc 100644 --- a/pkgs/development/libraries/hipsolver/default.nix +++ b/pkgs/development/rocm-modules/5/hipsolver/default.nix @@ -95,6 +95,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/hipsparse/default.nix b/pkgs/development/rocm-modules/5/hipsparse/default.nix similarity index 99% rename from pkgs/development/libraries/hipsparse/default.nix rename to pkgs/development/rocm-modules/5/hipsparse/default.nix index 45a571735b70..6e6197209e4b 100644 --- a/pkgs/development/libraries/hipsparse/default.nix +++ b/pkgs/development/rocm-modules/5/hipsparse/default.nix @@ -131,6 +131,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/migraphx/default.nix b/pkgs/development/rocm-modules/5/migraphx/default.nix similarity index 99% rename from pkgs/development/libraries/migraphx/default.nix rename to pkgs/development/rocm-modules/5/migraphx/default.nix index 2a842a3c24dd..fdc97f45da9d 100644 --- a/pkgs/development/libraries/migraphx/default.nix +++ b/pkgs/development/rocm-modules/5/migraphx/default.nix @@ -155,6 +155,6 @@ in stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/miopen/default.nix b/pkgs/development/rocm-modules/5/miopen/default.nix similarity index 99% rename from pkgs/development/libraries/miopen/default.nix rename to pkgs/development/rocm-modules/5/miopen/default.nix index 5345c2216e66..dd7661a09756 100644 --- a/pkgs/development/libraries/miopen/default.nix +++ b/pkgs/development/rocm-modules/5/miopen/default.nix @@ -185,6 +185,6 @@ in stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/miopen/deps.nix b/pkgs/development/rocm-modules/5/miopen/deps.nix similarity index 100% rename from pkgs/development/libraries/miopen/deps.nix rename to pkgs/development/rocm-modules/5/miopen/deps.nix diff --git a/pkgs/development/libraries/miopengemm/default.nix b/pkgs/development/rocm-modules/5/miopengemm/default.nix similarity index 100% rename from pkgs/development/libraries/miopengemm/default.nix rename to pkgs/development/rocm-modules/5/miopengemm/default.nix diff --git a/pkgs/development/libraries/rccl/default.nix b/pkgs/development/rocm-modules/5/rccl/default.nix similarity index 98% rename from pkgs/development/libraries/rccl/default.nix rename to pkgs/development/rocm-modules/5/rccl/default.nix index acd0030cabd9..68dafc29c164 100644 --- a/pkgs/development/libraries/rccl/default.nix +++ b/pkgs/development/rocm-modules/5/rccl/default.nix @@ -78,6 +78,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ bsd2 bsd3 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/tools/misc/rdc/default.nix b/pkgs/development/rocm-modules/5/rdc/default.nix similarity index 94% rename from pkgs/development/tools/misc/rdc/default.nix rename to pkgs/development/rocm-modules/5/rdc/default.nix index d2a7f46dc849..134b946c5f7a 100644 --- a/pkgs/development/tools/misc/rdc/default.nix +++ b/pkgs/development/rocm-modules/5/rdc/default.nix @@ -120,6 +120,7 @@ in stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor rocm-smi.version; + # broken = versions.minor finalAttrs.version != versions.minor rocm-smi.version; + broken = true; # Too many errors, unsure how to fix }; }) diff --git a/pkgs/development/libraries/rocalution/default.nix b/pkgs/development/rocm-modules/5/rocalution/default.nix similarity index 99% rename from pkgs/development/libraries/rocalution/default.nix rename to pkgs/development/rocm-modules/5/rocalution/default.nix index f67384a95f08..650e9dc7a1ca 100644 --- a/pkgs/development/libraries/rocalution/default.nix +++ b/pkgs/development/rocm-modules/5/rocalution/default.nix @@ -110,6 +110,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocblas/default.nix b/pkgs/development/rocm-modules/5/rocblas/default.nix similarity index 99% rename from pkgs/development/libraries/rocblas/default.nix rename to pkgs/development/rocm-modules/5/rocblas/default.nix index 78d0e7df8b24..59d23ad121da 100644 --- a/pkgs/development/libraries/rocblas/default.nix +++ b/pkgs/development/rocm-modules/5/rocblas/default.nix @@ -134,6 +134,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocdbgapi/default.nix b/pkgs/development/rocm-modules/5/rocdbgapi/default.nix similarity index 87% rename from pkgs/development/libraries/rocdbgapi/default.nix rename to pkgs/development/rocm-modules/5/rocdbgapi/default.nix index a1cfacf33c27..41c1178f1089 100644 --- a/pkgs/development/libraries/rocdbgapi/default.nix +++ b/pkgs/development/rocm-modules/5/rocdbgapi/default.nix @@ -7,6 +7,7 @@ , git , rocm-comgr , rocm-runtime +, hwdata , texlive , doxygen , graphviz @@ -65,6 +66,16 @@ in stdenv.mkDerivation (finalAttrs: { buildInputs = [ rocm-comgr rocm-runtime + hwdata + ]; + + cmakeFlags = [ + "-DPCI_IDS_PATH=${hwdata}/share/hwdata" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" ]; # Unfortunately, it seems like we have to call make on this manually diff --git a/pkgs/development/libraries/rocfft/default.nix b/pkgs/development/rocm-modules/5/rocfft/default.nix similarity index 99% rename from pkgs/development/libraries/rocfft/default.nix rename to pkgs/development/rocm-modules/5/rocfft/default.nix index 8eed31b8b233..ee1078eabb28 100644 --- a/pkgs/development/libraries/rocfft/default.nix +++ b/pkgs/development/rocm-modules/5/rocfft/default.nix @@ -238,6 +238,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = with maintainers; [ kira-bruneau ] ++ teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocfft/device-install.patch b/pkgs/development/rocm-modules/5/rocfft/device-install.patch similarity index 100% rename from pkgs/development/libraries/rocfft/device-install.patch rename to pkgs/development/rocm-modules/5/rocfft/device-install.patch diff --git a/pkgs/development/libraries/rocfft/split-kernel-compilation.patch b/pkgs/development/rocm-modules/5/rocfft/split-kernel-compilation.patch similarity index 100% rename from pkgs/development/libraries/rocfft/split-kernel-compilation.patch rename to pkgs/development/rocm-modules/5/rocfft/split-kernel-compilation.patch diff --git a/pkgs/development/tools/misc/rocgdb/default.nix b/pkgs/development/rocm-modules/5/rocgdb/default.nix similarity index 100% rename from pkgs/development/tools/misc/rocgdb/default.nix rename to pkgs/development/rocm-modules/5/rocgdb/default.nix diff --git a/pkgs/development/tools/build-managers/rocm-cmake/default.nix b/pkgs/development/rocm-modules/5/rocm-cmake/default.nix similarity index 91% rename from pkgs/development/tools/build-managers/rocm-cmake/default.nix rename to pkgs/development/rocm-modules/5/rocm-cmake/default.nix index 9e9cf3caf12e..04ae947d3a4a 100644 --- a/pkgs/development/tools/build-managers/rocm-cmake/default.nix +++ b/pkgs/development/rocm-modules/5/rocm-cmake/default.nix @@ -22,6 +22,8 @@ stdenv.mkDerivation (finalAttrs: { name = finalAttrs.pname; owner = finalAttrs.src.owner; repo = finalAttrs.src.repo; + page = "releases?per_page=2"; + filter = ".[1].tag_name | split(\"-\") | .[1]"; }; meta = with lib; { diff --git a/pkgs/development/libraries/rocm-comgr/default.nix b/pkgs/development/rocm-modules/5/rocm-comgr/default.nix similarity index 97% rename from pkgs/development/libraries/rocm-comgr/default.nix rename to pkgs/development/rocm-modules/5/rocm-comgr/default.nix index 4d84af3afa54..c3c4c5fab3cd 100644 --- a/pkgs/development/libraries/rocm-comgr/default.nix +++ b/pkgs/development/rocm-modules/5/rocm-comgr/default.nix @@ -24,7 +24,6 @@ in stdenv.mkDerivation (finalAttrs: { hash = "sha256-QB3G0V92UTW67hD6+zSuExN1+eMT820iYSlMyZeWSFw="; }; - patches = [ ./cmake.patch ]; sourceRoot = "${finalAttrs.src.name}/lib/comgr"; nativeBuildInputs = [ diff --git a/pkgs/development/libraries/rocm-core/default.nix b/pkgs/development/rocm-modules/5/rocm-core/default.nix similarity index 100% rename from pkgs/development/libraries/rocm-core/default.nix rename to pkgs/development/rocm-modules/5/rocm-core/default.nix diff --git a/pkgs/development/libraries/rocm-device-libs/cmake.patch b/pkgs/development/rocm-modules/5/rocm-device-libs/cmake.patch similarity index 100% rename from pkgs/development/libraries/rocm-device-libs/cmake.patch rename to pkgs/development/rocm-modules/5/rocm-device-libs/cmake.patch diff --git a/pkgs/development/libraries/rocm-device-libs/default.nix b/pkgs/development/rocm-modules/5/rocm-device-libs/default.nix similarity index 100% rename from pkgs/development/libraries/rocm-device-libs/default.nix rename to pkgs/development/rocm-modules/5/rocm-device-libs/default.nix diff --git a/pkgs/development/libraries/rocm-runtime/default.nix b/pkgs/development/rocm-modules/5/rocm-runtime/default.nix similarity index 87% rename from pkgs/development/libraries/rocm-runtime/default.nix rename to pkgs/development/rocm-modules/5/rocm-runtime/default.nix index dfb10c363153..fd9182c8254d 100644 --- a/pkgs/development/libraries/rocm-runtime/default.nix +++ b/pkgs/development/rocm-modules/5/rocm-runtime/default.nix @@ -50,8 +50,10 @@ stdenv.mkDerivation (finalAttrs: { --replace 'hsa/include/hsa' 'include/hsa' # We compile clang before rocm-device-libs, so patch it in afterwards + # Replace object version: https://github.com/RadeonOpenCompute/ROCR-Runtime/issues/166 (TODO: Remove on LLVM update?) substituteInPlace image/blit_src/CMakeLists.txt \ - --replace '-cl-denorms-are-zero' '-cl-denorms-are-zero --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode' + --replace '-cl-denorms-are-zero' '-cl-denorms-are-zero --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode' \ + --replace '-mcode-object-version=4' '-mcode-object-version=5' ''; fixupPhase = '' diff --git a/pkgs/tools/system/rocm-smi/cmake.patch b/pkgs/development/rocm-modules/5/rocm-smi/cmake.patch similarity index 100% rename from pkgs/tools/system/rocm-smi/cmake.patch rename to pkgs/development/rocm-modules/5/rocm-smi/cmake.patch diff --git a/pkgs/tools/system/rocm-smi/default.nix b/pkgs/development/rocm-modules/5/rocm-smi/default.nix similarity index 72% rename from pkgs/tools/system/rocm-smi/default.nix rename to pkgs/development/rocm-modules/5/rocm-smi/default.nix index 2fa79828c63b..2e1692539e23 100644 --- a/pkgs/tools/system/rocm-smi/default.nix +++ b/pkgs/development/rocm-modules/5/rocm-smi/default.nix @@ -17,16 +17,24 @@ stdenv.mkDerivation (finalAttrs: { hash = "sha256-swCRO4PBMBJ6fO2bLq/xxFZIYw2IgiFB490wsU8Wm2o="; }; - postPatch = '' - sed '1i#include ' -i src/rocm_smi{,_gpu_metrics}.cc # since gcc12 probably - ''; - - nativeBuildInputs = [ cmake wrapPython ]; - patches = [ ./cmake.patch ]; + nativeBuildInputs = [ + cmake + wrapPython + ]; + + cmakeFlags = [ + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ]; + postInstall = '' wrapPythonProgramsIn $out + mv $out/libexec/rocm_smi/.rsmiBindings.py-wrapped $out/libexec/rocm_smi/rsmiBindings.py ''; passthru.updateScript = rocmUpdateScript { diff --git a/pkgs/development/libraries/rocm-thunk/default.nix b/pkgs/development/rocm-modules/5/rocm-thunk/default.nix similarity index 92% rename from pkgs/development/libraries/rocm-thunk/default.nix rename to pkgs/development/rocm-modules/5/rocm-thunk/default.nix index 8a4ad2a098c6..73368dbb0e7f 100644 --- a/pkgs/development/libraries/rocm-thunk/default.nix +++ b/pkgs/development/rocm-modules/5/rocm-thunk/default.nix @@ -4,11 +4,8 @@ , rocmUpdateScript , pkg-config , cmake -, rocm-cmake , libdrm , numactl -, valgrind -, gcc }: stdenv.mkDerivation (finalAttrs: { @@ -25,14 +22,11 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ pkg-config cmake - rocm-cmake ]; buildInputs = [ libdrm numactl - valgrind - gcc.cc.libgcc or null # TODO: unhack this? ]; cmakeFlags = [ diff --git a/pkgs/development/tools/rocminfo/default.nix b/pkgs/development/rocm-modules/5/rocminfo/default.nix similarity index 100% rename from pkgs/development/tools/rocminfo/default.nix rename to pkgs/development/rocm-modules/5/rocminfo/default.nix diff --git a/pkgs/development/libraries/rocmlir/default.nix b/pkgs/development/rocm-modules/5/rocmlir/default.nix similarity index 100% rename from pkgs/development/libraries/rocmlir/default.nix rename to pkgs/development/rocm-modules/5/rocmlir/default.nix diff --git a/pkgs/development/libraries/rocprim/default.nix b/pkgs/development/rocm-modules/5/rocprim/default.nix similarity index 98% rename from pkgs/development/libraries/rocprim/default.nix rename to pkgs/development/rocm-modules/5/rocprim/default.nix index b38684b24dd6..e8233547664f 100644 --- a/pkgs/development/libraries/rocprim/default.nix +++ b/pkgs/development/rocm-modules/5/rocprim/default.nix @@ -77,6 +77,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocprofiler/default.nix b/pkgs/development/rocm-modules/5/rocprofiler/default.nix similarity index 52% rename from pkgs/development/libraries/rocprofiler/default.nix rename to pkgs/development/rocm-modules/5/rocprofiler/default.nix index 97f269beb84e..ec24a3f41e59 100644 --- a/pkgs/development/libraries/rocprofiler/default.nix +++ b/pkgs/development/rocm-modules/5/rocprofiler/default.nix @@ -3,10 +3,32 @@ , fetchFromGitHub , rocmUpdateScript , cmake -, rocm-runtime +, clang +, clr , rocm-thunk , roctracer +, rocm-smi +, hsa-amd-aqlprofile-bin , numactl +, libpciaccess +, libxml2 +, elfutils +, mpi +, gtest +, python3Packages +, gpuTargets ? [ + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx940" + "gfx941" + "gfx942" + "gfx1030" + "gfx1100" + "gfx1101" + "gfx1102" +] }: stdenv.mkDerivation (finalAttrs: { @@ -20,17 +42,33 @@ stdenv.mkDerivation (finalAttrs: { hash = "sha256-ue/2uiLbhOv/5XY4cIJuZ8DUMRhniYgxolq9xMwO1FY="; }; - patches = [ ./0000-dont-require-hsa_amd_aqlprofile.patch ]; - nativeBuildInputs = [ cmake ]; + nativeBuildInputs = [ + cmake + clang + clr + python3Packages.lxml + python3Packages.cppheaderparser + python3Packages.pyyaml + python3Packages.barectf + ]; buildInputs = [ rocm-thunk - rocm-runtime + rocm-smi + hsa-amd-aqlprofile-bin numactl + libpciaccess + libxml2 + elfutils + mpi + gtest ]; cmakeFlags = [ + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" "-DPROF_API_HEADER_PATH=${roctracer.src}/inc/ext" + "-DHIP_ROOT_DIR=${clr}" + "-DGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -39,14 +77,18 @@ stdenv.mkDerivation (finalAttrs: { ]; postPatch = '' - patchShebangs bin test + patchShebangs . - substituteInPlace cmake_modules/env.cmake \ - --replace "FATAL_ERROR \"AQL_PROFILE" "WARNING \"AQL_PROFILE" + # Cannot find ROCm device library, pointless + substituteInPlace CMakeLists.txt \ + --replace "add_subdirectory(tests-v2)" "" \ + --replace "add_subdirectory(samples)" "" ''; - postInstall = '' - patchelf --set-rpath $out/lib:${lib.makeLibraryPath finalAttrs.buildInputs} $out/lib/rocprofiler/librocprof-tool.so + postBuild = '' + # HSACO aren't being built for some reason + substituteInPlace test/cmake_install.cmake \ + --replace "file(INSTALL DESTINATION \"\''${CMAKE_INSTALL_PREFIX}/share/rocprofiler/tests-v1\" TYPE FILE FILES \"" "message(\"" ''; passthru.updateScript = rocmUpdateScript { @@ -61,6 +103,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; # mitx11 maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; + broken = versions.minor finalAttrs.version != versions.minor clr.version; }; }) diff --git a/pkgs/development/libraries/rocr-debug-agent/default.nix b/pkgs/development/rocm-modules/5/rocr-debug-agent/default.nix similarity index 87% rename from pkgs/development/libraries/rocr-debug-agent/default.nix rename to pkgs/development/rocm-modules/5/rocr-debug-agent/default.nix index 08d45f304a4f..dfc8580b3e14 100644 --- a/pkgs/development/libraries/rocr-debug-agent/default.nix +++ b/pkgs/development/rocm-modules/5/rocr-debug-agent/default.nix @@ -3,10 +3,9 @@ , fetchFromGitHub , rocmUpdateScript , cmake -, hip +, clr , git , rocdbgapi -, rocm-runtime , elfutils }: @@ -23,20 +22,19 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake - hip + clr git ]; buildInputs = [ rocdbgapi - rocm-runtime elfutils ]; cmakeFlags = [ - "-DCMAKE_MODULE_PATH=${hip}/lib/cmake/hip" - "-DHIP_ROOT_DIR=${hip}" - "-DHIP_PATH=${hip}" + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" + "-DHIP_ROOT_DIR=${clr}" + "-DHIP_PATH=${clr}" ]; # Weird install target @@ -56,6 +54,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ ncsa ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocrand/default.nix b/pkgs/development/rocm-modules/5/rocrand/default.nix similarity index 99% rename from pkgs/development/libraries/rocrand/default.nix rename to pkgs/development/rocm-modules/5/rocrand/default.nix index 8ea138d4c3cd..daa24b870ceb 100644 --- a/pkgs/development/libraries/rocrand/default.nix +++ b/pkgs/development/rocm-modules/5/rocrand/default.nix @@ -80,6 +80,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocsolver/default.nix b/pkgs/development/rocm-modules/5/rocsolver/default.nix similarity index 99% rename from pkgs/development/libraries/rocsolver/default.nix rename to pkgs/development/rocm-modules/5/rocsolver/default.nix index c78b4d97a0ae..3a0858af6335 100644 --- a/pkgs/development/libraries/rocsolver/default.nix +++ b/pkgs/development/rocm-modules/5/rocsolver/default.nix @@ -90,6 +90,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ bsd2 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocsparse/default.nix b/pkgs/development/rocm-modules/5/rocsparse/default.nix similarity index 99% rename from pkgs/development/libraries/rocsparse/default.nix rename to pkgs/development/rocm-modules/5/rocsparse/default.nix index a93d7a77bf26..d97951530119 100644 --- a/pkgs/development/libraries/rocsparse/default.nix +++ b/pkgs/development/rocm-modules/5/rocsparse/default.nix @@ -141,6 +141,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/rocsparse/deps.nix b/pkgs/development/rocm-modules/5/rocsparse/deps.nix similarity index 100% rename from pkgs/development/libraries/rocsparse/deps.nix rename to pkgs/development/rocm-modules/5/rocsparse/deps.nix diff --git a/pkgs/development/libraries/rocthrust/default.nix b/pkgs/development/rocm-modules/5/rocthrust/default.nix similarity index 99% rename from pkgs/development/libraries/rocthrust/default.nix rename to pkgs/development/rocm-modules/5/rocthrust/default.nix index a4981d3fb270..e441709f89f7 100644 --- a/pkgs/development/libraries/rocthrust/default.nix +++ b/pkgs/development/rocm-modules/5/rocthrust/default.nix @@ -79,6 +79,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ asl20 ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/roctracer/default.nix b/pkgs/development/rocm-modules/5/roctracer/default.nix similarity index 93% rename from pkgs/development/libraries/roctracer/default.nix rename to pkgs/development/rocm-modules/5/roctracer/default.nix index 3aeb8e3ba198..92e557426b10 100644 --- a/pkgs/development/libraries/roctracer/default.nix +++ b/pkgs/development/rocm-modules/5/roctracer/default.nix @@ -4,7 +4,7 @@ , rocmUpdateScript , cmake , clang -, hip +, clr , rocm-device-libs , rocprofiler , libxml2 @@ -39,14 +39,13 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake clang - hip + clr ] ++ lib.optionals buildDocs [ doxygen graphviz ]; buildInputs = [ - rocm-device-libs rocprofiler libxml2 python3Packages.python @@ -54,7 +53,7 @@ stdenv.mkDerivation (finalAttrs: { ]; cmakeFlags = [ - "-DCMAKE_MODULE_PATH=${hip}/hip/cmake" + "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -85,7 +84,7 @@ stdenv.mkDerivation (finalAttrs: { find $out/test -executable -type f -exec mv {} $test/bin \; rm $test/bin/{*.sh,*.py} patchelf --set-rpath $out/lib:${lib.makeLibraryPath ( - finalAttrs.buildInputs ++ [ hip gcc-unwrapped.lib rocm-runtime ])} $test/bin/* + finalAttrs.buildInputs ++ [ clr gcc-unwrapped.lib rocm-runtime ])} $test/bin/* rm -rf $out/test ''; @@ -101,6 +100,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; # mitx11 maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor clr.version; }; }) diff --git a/pkgs/development/libraries/rocwmma/0000-dont-fetch-googletest.patch b/pkgs/development/rocm-modules/5/rocwmma/0000-dont-fetch-googletest.patch similarity index 100% rename from pkgs/development/libraries/rocwmma/0000-dont-fetch-googletest.patch rename to pkgs/development/rocm-modules/5/rocwmma/0000-dont-fetch-googletest.patch diff --git a/pkgs/development/libraries/rocwmma/default.nix b/pkgs/development/rocm-modules/5/rocwmma/default.nix similarity index 99% rename from pkgs/development/libraries/rocwmma/default.nix rename to pkgs/development/rocm-modules/5/rocwmma/default.nix index 84db5b4dbebf..ef21ed86248a 100644 --- a/pkgs/development/libraries/rocwmma/default.nix +++ b/pkgs/development/rocm-modules/5/rocwmma/default.nix @@ -141,6 +141,6 @@ in stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/libraries/tensile/default.nix b/pkgs/development/rocm-modules/5/tensile/default.nix similarity index 100% rename from pkgs/development/libraries/tensile/default.nix rename to pkgs/development/rocm-modules/5/tensile/default.nix diff --git a/pkgs/development/rocm-modules/5/update.nix b/pkgs/development/rocm-modules/5/update.nix index abd434776ef9..1cc7e354d240 100644 --- a/pkgs/development/rocm-modules/5/update.nix +++ b/pkgs/development/rocm-modules/5/update.nix @@ -12,7 +12,7 @@ let pname = if lib.hasPrefix "rocm-llvm-" name - then "rocmPackages_5.llvm.${lib.removePrefix "rocm-llvm-" name}" + then "llvm.${lib.removePrefix "rocm-llvm-" name}" else name; updateScript = writeScript "update.sh" '' @@ -27,6 +27,6 @@ let version="''${version}.0" fi - update-source-version ${pname} "$version" --ignore-same-hash + update-source-version rocmPackages_5.${pname} "$version" --ignore-same-hash ''; in [ updateScript ] diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 4e03557c7740..d365df0eb0f0 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -16957,221 +16957,8 @@ with pkgs; rml = callPackage ../development/compilers/rml { }; - composable_kernel = callPackage ../development/libraries/composable_kernel { - inherit (rocmPackages.llvm) openmp clang-tools-extra; - stdenv = rocmClangStdenv; - }; - - rocprofiler = callPackage ../development/libraries/rocprofiler { - stdenv = rocmClangStdenv; - }; - - clang-ocl = callPackage ../development/libraries/clang-ocl { - stdenv = rocmClangStdenv; - }; - rgxg = callPackage ../tools/text/rgxg { }; - rocclr = callPackage ../development/libraries/rocclr { - stdenv = rocmClangStdenv; - }; - - hip-common = callPackage ../development/compilers/hip-common { - inherit (rocmPackages.llvm) llvm; - stdenv = rocmClangStdenv; - }; - - hipcc = callPackage ../development/compilers/hipcc { - inherit (rocmPackages.llvm) llvm; - stdenv = rocmClangStdenv; - }; - - hip = callPackage ../development/compilers/hip { - inherit (rocmPackages.llvm) llvm; - inherit (cudaPackages) cudatoolkit; - stdenv = rocmClangStdenv; - }; - - hip-amd = hip.override { - useNVIDIA = false; - }; - - hip-nvidia = hip.override { - useNVIDIA = true; - }; - - hipify = callPackage ../development/compilers/hipify { - stdenv = rocmClangStdenv; - }; - - hipcub = callPackage ../development/libraries/hipcub { - stdenv = rocmClangStdenv; - }; - - hipsparse = callPackage ../development/libraries/hipsparse { - inherit (rocmPackages.llvm) openmp; - stdenv = rocmClangStdenv; - }; - - hipfort = callPackage ../development/libraries/hipfort { - stdenv = rocmClangStdenv; - }; - - hipfft = callPackage ../development/libraries/hipfft { - inherit (rocmPackages.llvm) openmp; - stdenv = rocmClangStdenv; - }; - - hipsolver = callPackage ../development/libraries/hipsolver { - stdenv = rocmClangStdenv; - }; - - hipblas = callPackage ../development/libraries/hipblas { - stdenv = rocmClangStdenv; - }; - - migraphx = callPackage ../development/libraries/migraphx { - inherit (rocmPackages.llvm) clang-tools-extra openmp; - stdenv = rocmClangStdenv; - rocmlir = rocmlir-rock; - }; - - rccl = callPackage ../development/libraries/rccl { - stdenv = rocmClangStdenv; - }; - - rocm-cmake = callPackage ../development/tools/build-managers/rocm-cmake { - stdenv = rocmClangStdenv; - }; - - rocm-comgr = callPackage ../development/libraries/rocm-comgr { - stdenv = rocmClangStdenv; - }; - - rocalution = callPackage ../development/libraries/rocalution { - inherit (rocmPackages.llvm) openmp; - stdenv = rocmClangStdenv; - }; - - rocm-device-libs = callPackage ../development/libraries/rocm-device-libs { - stdenv = rocmClangStdenv; - }; - - rocm-opencl-icd = callPackage ../development/libraries/rocm-opencl-icd { - stdenv = rocmClangStdenv; - }; - - rocsolver = callPackage ../development/libraries/rocsolver { - stdenv = rocmClangStdenv; - }; - - rocm-opencl-runtime = callPackage ../development/libraries/rocm-opencl-runtime { - stdenv = rocmClangStdenv; - }; - - rocm-runtime = callPackage ../development/libraries/rocm-runtime { - stdenv = rocmClangStdenv; - }; - - rocm-smi = python3Packages.callPackage ../tools/system/rocm-smi { - stdenv = rocmClangStdenv; - }; - - rocm-thunk = callPackage ../development/libraries/rocm-thunk { - stdenv = rocmClangStdenv; - }; - - rocminfo = callPackage ../development/tools/rocminfo { - stdenv = rocmClangStdenv; - }; - - rocmlir = callPackage ../development/libraries/rocmlir { - stdenv = rocmClangStdenv; - }; - - # Best just use GCC here - rdc = callPackage ../development/tools/misc/rdc { }; - - # Best just use GCC here - rocgdb = callPackage ../development/tools/misc/rocgdb { - elfutils = elfutils.override { enableDebuginfod = true; }; - }; - - rocdbgapi = callPackage ../development/libraries/rocdbgapi { - stdenv = rocmClangStdenv; - }; - - rocr-debug-agent = callPackage ../development/libraries/rocr-debug-agent { - stdenv = rocmClangStdenv; - }; - - rocmlir-rock = rocmlir.override { - buildRockCompiler = true; - }; - - rocm-core = callPackage ../development/libraries/rocm-core { - stdenv = rocmClangStdenv; - }; - - rocprim = callPackage ../development/libraries/rocprim { - stdenv = rocmClangStdenv; - }; - - rocsparse = callPackage ../development/libraries/rocsparse { - stdenv = rocmClangStdenv; - }; - - rocfft = callPackage ../development/libraries/rocfft { - inherit (rocmPackages.llvm) openmp; - stdenv = rocmClangStdenv; - }; - - rocrand = callPackage ../development/libraries/rocrand { - stdenv = rocmClangStdenv; - }; - - tensile = python3Packages.callPackage ../development/libraries/tensile { - stdenv = rocmClangStdenv; - }; - - rocwmma = callPackage ../development/libraries/rocwmma { - inherit (rocmPackages.llvm) openmp; - stdenv = rocmClangStdenv; - }; - - rocblas = callPackage ../development/libraries/rocblas { - inherit (rocmPackages.llvm) openmp; - stdenv = rocmClangStdenv; - }; - - miopengemm = callPackage ../development/libraries/miopengemm { - stdenv = rocmClangStdenv; - }; - - rocthrust = callPackage ../development/libraries/rocthrust { - stdenv = rocmClangStdenv; - }; - - miopen = callPackage ../development/libraries/miopen { - inherit (rocmPackages.llvm) llvm clang-tools-extra; - stdenv = rocmClangStdenv; - rocmlir = rocmlir-rock; - boost = boost179.override { enableStatic = true; }; - }; - - miopen-hip = miopen.override { - useOpenCL = false; - }; - - miopen-opencl = miopen.override { - useOpenCL = true; - }; - - # Requires GCC - roctracer = callPackage ../development/libraries/roctracer { - inherit (rocmPackages.llvm) clang; - }; - rtags = callPackage ../development/tools/rtags { inherit (darwin) apple_sdk; }; @@ -30903,6 +30690,7 @@ with pkgs; # LLVM 11 crashes when compiling GHOST_SystemCocoa.mm stdenv = if stdenv.isDarwin then llvmPackages_10.stdenv else stdenv; inherit (darwin.apple_sdk.frameworks) Cocoa CoreGraphics ForceFeedback OpenAL OpenGL; + inherit (rocmPackages) hip; }; blender-with-packages = callPackage ../applications/misc/blender/wrapper.nix { }; @@ -39476,6 +39264,7 @@ with pkgs; inherit (callPackage ../development/libraries/science/math/magma { inherit (rocmPackages.llvm) openmp; + inherit (rocmPackages) hip hipblas hipsparse; }) magma magma_2_7_2 magma_2_6_2; magma-cuda = magma.override { From 5021abc8de0deec0eb36c14b193aff7b84bb1adc Mon Sep 17 00:00:00 2001 From: Madoura Date: Mon, 2 Oct 2023 00:57:40 -0500 Subject: [PATCH 13/30] pythonPackages: barectf: init at 3.1.2 --- .../python-modules/barectf/default.nix | 44 +++++++++++++++++++ pkgs/top-level/python-packages.nix | 2 + 2 files changed, 46 insertions(+) create mode 100644 pkgs/development/python-modules/barectf/default.nix diff --git a/pkgs/development/python-modules/barectf/default.nix b/pkgs/development/python-modules/barectf/default.nix new file mode 100644 index 000000000000..429e03fbe940 --- /dev/null +++ b/pkgs/development/python-modules/barectf/default.nix @@ -0,0 +1,44 @@ +{ lib +, buildPythonPackage +, fetchFromGitHub +, poetry-core +, pytestCheckHook +, setuptools +, jsonschema +, pyyaml +, jinja2 +, termcolor +}: + +buildPythonPackage rec { + pname = "barectf"; + version = "3.1.2"; + format = "pyproject"; + + src = fetchFromGitHub { + owner = "efficios"; + repo = "barectf"; + rev = "v${version}"; + hash = "sha256-JelFfd3WS012dveNlIljhLdyPmgE9VEOXoZE3MBA/Gw="; + }; + + nativeBuildInputs = [ poetry-core ]; + nativeCheckInputs = [ pytestCheckHook ]; + + propagatedBuildInputs = [ + setuptools # needs pkg_resources at runtime + jsonschema + pyyaml + jinja2 + termcolor + ]; + + pythonImportsCheck = [ "barectf" ]; + + meta = with lib; { + description = "Generator of ANSI C tracers which output CTF data streams "; + homepage = "https://github.com/efficios/barectf"; + license = licenses.mit; + maintainers = with maintainers; [ Madouura ]; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 4521548cb3d8..c0857bf850f8 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -1297,6 +1297,8 @@ self: super: with self; { inherit (pkgs.ocaml-ng.ocamlPackages) bap; }; + barectf = callPackage ../development/python-modules/barectf { }; + baron = callPackage ../development/python-modules/baron { }; base36 = callPackage ../development/python-modules/base36 { }; From 91fc34e29f430238c20010855c001619843bd150 Mon Sep 17 00:00:00 2001 From: Madoura Date: Mon, 2 Oct 2023 02:00:19 -0500 Subject: [PATCH 14/30] rocmPackages.hsa-amd-aqlprofile-bin: init at 5.7.0 --- pkgs/development/rocm-modules/5/default.nix | 5 +++ .../5/hsa-amd-aqlprofile-bin/default.nix | 45 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 pkgs/development/rocm-modules/5/hsa-amd-aqlprofile-bin/default.nix diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix index 8bc496f452f1..c4567410c385 100644 --- a/pkgs/development/rocm-modules/5/default.nix +++ b/pkgs/development/rocm-modules/5/default.nix @@ -59,6 +59,11 @@ in rec { stdenv = llvm.rocmClangStdenv; }; + # Unfree + hsa-amd-aqlprofile-bin = callPackage ./hsa-amd-aqlprofile-bin { + stdenv = llvm.rocmClangStdenv; + }; + # Broken, too many errors rdc = callPackage ./rdc { inherit rocmUpdateScript rocm-smi rocm-runtime; diff --git a/pkgs/development/rocm-modules/5/hsa-amd-aqlprofile-bin/default.nix b/pkgs/development/rocm-modules/5/hsa-amd-aqlprofile-bin/default.nix new file mode 100644 index 000000000000..d13092fd3eef --- /dev/null +++ b/pkgs/development/rocm-modules/5/hsa-amd-aqlprofile-bin/default.nix @@ -0,0 +1,45 @@ +{ lib +, stdenv +, fetchurl +, dpkg +}: + +let + prefix = "hsa-amd-aqlprofile"; + version = "5.7.0"; + major = lib.versions.major version; + minor = lib.versions.minor version; + patch = lib.versions.patch version; + magic = lib.strings.concatStrings (lib.strings.intersperse "0" (lib.versions.splitVersion version)); +in stdenv.mkDerivation (finalAttrs: { + inherit version; + pname = "${prefix}-bin"; + + src = fetchurl { + url = "https://repo.radeon.com/rocm/apt/${major}.${minor}/pool/main/h/${prefix}/${prefix}_1.0.0.${magic}.${magic}-63~22.04_amd64.deb"; + hash = "sha256-FQ25eXkhnvOmcf0sGW3GYu9kZj69bVvZrh0jVx/G/kI="; + }; + + nativeBuildInputs = [ dpkg ]; + dontPatch = true; + dontConfigure = true; + dontBuild = true; + + installPhase = '' + runHook preInstall + + mkdir -p $out + cp -a opt/rocm-${version}/* $out + + runHook postInstall + ''; + + meta = with lib; { + description = "AQLPROFILE library for AMD HSA runtime API extension support"; + homepage = "https://rocm.docs.amd.com/en/latest/"; + license = with licenses; [ unfree ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; + }; +}) From ab01e7cd07ee0698400943ba7f15c3d7eb30d3d7 Mon Sep 17 00:00:00 2001 From: Madoura Date: Mon, 2 Oct 2023 06:12:00 -0500 Subject: [PATCH 15/30] rocmPackages.llvm: fixup for 5.7.0 rocmPackages.llvm.openmp: fixup for 5.7.0 rocmPackages.llvm.mlir: fixup for 5.7.0 rocmPackages.llvm.flang: fixup for 5.7.0 --- pkgs/development/rocm-modules/5/default.nix | 2 +- .../rocm-modules/5/llvm/default.nix | 8 +- .../stage-3/1000-openmp-failing-tests.list | 122 ++++++++++++++++++ .../rocm-modules/5/llvm/stage-3/flang.nix | 23 ++-- .../rocm-modules/5/llvm/stage-3/mlir.nix | 12 +- .../rocm-modules/5/llvm/stage-3/openmp.nix | 21 ++- 6 files changed, 160 insertions(+), 28 deletions(-) create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/1000-openmp-failing-tests.list diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix index c4567410c385..be597edfe150 100644 --- a/pkgs/development/rocm-modules/5/default.nix +++ b/pkgs/development/rocm-modules/5/default.nix @@ -10,7 +10,7 @@ let rocmUpdateScript = callPackage ./update.nix { }; in rec { ## RadeonOpenCompute ## - llvm = recurseIntoAttrs (callPackage ./llvm/default.nix { inherit rocmUpdateScript; }); + llvm = recurseIntoAttrs (callPackage ./llvm/default.nix { inherit rocmUpdateScript rocm-device-libs rocm-runtime rocm-thunk clr; }); rocm-core = callPackage ./rocm-core { inherit rocmUpdateScript; diff --git a/pkgs/development/rocm-modules/5/llvm/default.nix b/pkgs/development/rocm-modules/5/llvm/default.nix index 11f8241251e6..9226fb87802c 100644 --- a/pkgs/development/rocm-modules/5/llvm/default.nix +++ b/pkgs/development/rocm-modules/5/llvm/default.nix @@ -3,6 +3,10 @@ , rocmUpdateScript , wrapBintoolsWith , overrideCC +, rocm-device-libs +, rocm-runtime +, rocm-thunk +, clr }: let @@ -43,10 +47,10 @@ in rec { clang-tools-extra = callPackage ./stage-3/clang-tools-extra.nix { inherit rocmUpdateScript llvm clang-unwrapped; stdenv = rocmClangStdenv; }; libclc = callPackage ./stage-3/libclc.nix { inherit rocmUpdateScript llvm clang; stdenv = rocmClangStdenv; }; lldb = callPackage ./stage-3/lldb.nix { inherit rocmUpdateScript clang; stdenv = rocmClangStdenv; }; - mlir = callPackage ./stage-3/mlir.nix { inherit rocmUpdateScript; stdenv = rocmClangStdenv; }; + mlir = callPackage ./stage-3/mlir.nix { inherit rocmUpdateScript clr; stdenv = rocmClangStdenv; }; polly = callPackage ./stage-3/polly.nix { inherit rocmUpdateScript; stdenv = rocmClangStdenv; }; flang = callPackage ./stage-3/flang.nix { inherit rocmUpdateScript clang-unwrapped mlir; stdenv = rocmClangStdenv; }; - openmp = callPackage ./stage-3/openmp.nix { inherit rocmUpdateScript llvm clang-unwrapped clang; stdenv = rocmClangStdenv; }; + openmp = callPackage ./stage-3/openmp.nix { inherit rocmUpdateScript llvm clang-unwrapped clang rocm-device-libs rocm-runtime rocm-thunk; stdenv = rocmClangStdenv; }; # Runtimes pstl = callPackage ./stage-3/pstl.nix { inherit rocmUpdateScript; stdenv = rocmClangStdenv; }; diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/1000-openmp-failing-tests.list b/pkgs/development/rocm-modules/5/llvm/stage-3/1000-openmp-failing-tests.list new file mode 100644 index 000000000000..e53b21b3c535 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/1000-openmp-failing-tests.list @@ -0,0 +1,122 @@ +runtime/test/tasking/hidden_helper_task/gtid.cpp +runtime/test/ompt/parallel/parallel_if0.c +runtime/test/ompt/parallel/serialized.c +runtime/test/ompt/teams/parallel_team.c +runtime/test/ompt/teams/serial_teams.c +runtime/test/ompt/teams/serialized.c +runtime/test/ompt/teams/team.c +libomptarget/test/api/assert.c +libomptarget/test/api/omp_device_managed_memory.c +libomptarget/test/api/omp_device_memory.c +libomptarget/test/api/omp_get_device_num.c +libomptarget/test/api/omp_host_pinned_memory.c +libomptarget/test/api/omp_host_pinned_memory_alloc.c +libomptarget/test/api/omp_target_memcpy_async1.c +libomptarget/test/api/omp_target_memcpy_async2.c +libomptarget/test/api/omp_target_memcpy_rect_async1.c +libomptarget/test/api/omp_target_memcpy_rect_async2.c +libomptarget/test/mapping/array_section_implicit_capture.c +libomptarget/test/mapping/data_absent_at_exit.c +libomptarget/test/mapping/data_member_ref.cpp +libomptarget/test/mapping/declare_mapper_api.cpp +libomptarget/test/mapping/declare_mapper_target.cpp +libomptarget/test/mapping/declare_mapper_target_data.cpp +libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp +libomptarget/test/mapping/firstprivate_aligned.cpp +libomptarget/test/mapping/has_device_addr.cpp +libomptarget/test/mapping/implicit_device_ptr.c +libomptarget/test/mapping/is_device_ptr.cpp +libomptarget/test/mapping/lambda_mapping.cpp +libomptarget/test/mapping/low_alignment.c +libomptarget/test/mapping/map_back_race.cpp +libomptarget/test/mapping/power_of_two_alignment.c +libomptarget/test/mapping/pr38704.c +libomptarget/test/mapping/prelock.cpp +libomptarget/test/mapping/present/target_data_at_exit.c +libomptarget/test/mapping/private_mapping.c +libomptarget/test/mapping/ptr_and_obj_motion.c +libomptarget/test/mapping/reduction_implicit_map.cpp +libomptarget/test/mapping/target_derefence_array_pointrs.cpp +libomptarget/test/mapping/target_map_for_member_data.cpp +libomptarget/test/mapping/target_update_array_extension.c +libomptarget/test/mapping/target_use_device_addr.c +libomptarget/test/offloading/atomic-compare-signedness.c +libomptarget/test/offloading/bug47654.cpp +libomptarget/test/offloading/bug49021.cpp +libomptarget/test/offloading/bug49779.cpp +libomptarget/test/offloading/bug50022.cpp +libomptarget/test/offloading/bug51781.c +libomptarget/test/offloading/bug51982.c +libomptarget/test/offloading/bug53727.cpp +libomptarget/test/offloading/complex_reduction.cpp +libomptarget/test/offloading/cuda_no_devices.c +libomptarget/test/offloading/d2d_memcpy.c +libomptarget/test/offloading/dynamic_module.c +libomptarget/test/offloading/dynamic_module_load.c +libomptarget/test/offloading/global_constructor.cpp +libomptarget/test/offloading/lone_target_exit_data.c +libomptarget/test/offloading/memory_manager.cpp +libomptarget/test/offloading/parallel_offloading_map.cpp +libomptarget/test/offloading/static_linking.c +libomptarget/test/offloading/std_complex_arithmetic.cpp +libomptarget/test/offloading/target-teams-atomic.c +libomptarget/test/offloading/target_constexpr_mapping.cpp +libomptarget/test/offloading/target_critical_region.cpp +libomptarget/test/offloading/target_depend_nowait.cpp +libomptarget/test/offloading/target_nowait_target.cpp +libomptarget/test/offloading/taskloop_offload_nowait.cpp +libomptarget/test/offloading/test_libc.cpp +libomptarget/test/ompt/veccopy.c +libomptarget/test/ompt/veccopy_disallow_both.c +libomptarget/test/ompt/veccopy_emi.c +libomptarget/test/ompt/veccopy_emi_map.c +libomptarget/test/ompt/veccopy_map.c +libomptarget/test/ompt/veccopy_no_device_init.c +libomptarget/test/ompt/veccopy_wrong_return.c +libomptarget/test/api/is_initial_device.c +libomptarget/test/mapping/declare_mapper_nested_default_mappers_array_subscript.cpp +libomptarget/test/mapping/declare_mapper_nested_default_mappers_ptr_subscript.cpp +libomptarget/test/mapping/declare_mapper_nested_default_mappers_var.cpp +libomptarget/test/mapping/target_pointers_members_map.cpp +libomptarget/test/api/omp_dynamic_shared_memory_mixed.c +libomptarget/test/api/omp_env_vars.c +libomptarget/test/api/omp_get_mapped_ptr.c +libomptarget/test/api/omp_get_num_devices.c +libomptarget/test/api/omp_get_num_devices_with_empty_target.c +libomptarget/test/mapping/alloc_fail.c +libomptarget/test/mapping/array_section_use_device_ptr.c +libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp +libomptarget/test/mapping/declare_mapper_nested_mappers.cpp +libomptarget/test/mapping/declare_mapper_target_update.cpp +libomptarget/test/mapping/delete_inf_refcount.c +libomptarget/test/mapping/lambda_by_value.cpp +libomptarget/test/mapping/ompx_hold/omp_target_disassociate_ptr.c +libomptarget/test/mapping/ompx_hold/struct.c +libomptarget/test/mapping/ompx_hold/target-data.c +libomptarget/test/mapping/ompx_hold/target.c +libomptarget/test/mapping/present/target.c +libomptarget/test/mapping/present/target_array_extension.c +libomptarget/test/mapping/present/target_data.c +libomptarget/test/mapping/present/target_data_array_extension.c +libomptarget/test/mapping/present/target_enter_data.c +libomptarget/test/mapping/present/target_exit_data_delete.c +libomptarget/test/mapping/present/target_exit_data_release.c +libomptarget/test/mapping/present/target_update.c +libomptarget/test/mapping/present/target_update_array_extension.c +libomptarget/test/mapping/present/zero_length_array_section.c +libomptarget/test/mapping/present/zero_length_array_section_exit.c +libomptarget/test/mapping/target_data_array_extension_at_exit.c +libomptarget/test/mapping/target_has_device_addr.c +libomptarget/test/mapping/target_implicit_partial_map.c +libomptarget/test/mapping/target_wrong_use_device_addr.c +libomptarget/test/offloading/host_as_target.c +libomptarget/test/offloading/info.c +libomptarget/test/offloading/offloading_success.c +libomptarget/test/offloading/offloading_success.cpp +libomptarget/test/offloading/wtime.c +libomptarget/test/unified_shared_memory/api.c +libomptarget/test/unified_shared_memory/associate_ptr.c +libomptarget/test/unified_shared_memory/close_enter_exit.c +libomptarget/test/unified_shared_memory/close_manual.c +libomptarget/test/unified_shared_memory/close_member.c +libomptarget/test/unified_shared_memory/close_modifier.c diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix index 7289602451db..f5b4649e3f4b 100644 --- a/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix @@ -3,31 +3,26 @@ , rocmUpdateScript , clang-unwrapped , mlir +, graphviz , python3Packages }: callPackage ../base.nix rec { inherit stdenv rocmUpdateScript; - buildTests = false; # `Executable "flang1" doesn't exist!` targetName = "flang"; targetDir = targetName; - extraNativeBuildInputs = [ python3Packages.sphinx-markdown-tables ]; + + extraNativeBuildInputs = [ + graphviz + python3Packages.sphinx-markdown-tables + ]; + extraBuildInputs = [ mlir ]; extraCMakeFlags = [ - "-DCMAKE_POLICY_DEFAULT_CMP0116=NEW" "-DCLANG_DIR=${clang-unwrapped}/lib/cmake/clang" - "-DFLANG_INCLUDE_TESTS=OFF" "-DMLIR_TABLEGEN_EXE=${mlir}/bin/mlir-tblgen" + "-DCLANG_TABLEGEN_EXE=${clang-unwrapped}/bin/clang-tblgen" + "-DFLANG_INCLUDE_TESTS=OFF" # `The dependency target "Bye" of target ...` ]; - - extraPostPatch = '' - substituteInPlace test/CMakeLists.txt \ - --replace "FileCheck" "" \ - --replace "count" "" \ - --replace "not" "" - - substituteInPlace docs/CMakeLists.txt \ - --replace "CLANG_TABLEGEN_EXE clang-tblgen" "CLANG_TABLEGEN_EXE ${clang-unwrapped}/bin/clang-tblgen" - ''; } diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix index 099622ca7cb8..0e0dcabec60c 100644 --- a/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix @@ -1,8 +1,7 @@ { stdenv , callPackage , rocmUpdateScript -# , hip -# , rocm-comgr +, clr , vulkan-headers , vulkan-loader , glslang @@ -16,10 +15,9 @@ callPackage ../base.nix rec { buildMan = false; # No man pages to build targetName = "mlir"; targetDir = targetName; - # extraNativeBuildInputs = [ hip ]; + extraNativeBuildInputs = [ clr ]; extraBuildInputs = [ - # rocm-comgr vulkan-headers vulkan-loader glslang @@ -27,7 +25,6 @@ callPackage ../base.nix rec { ]; extraCMakeFlags = [ - "-DCMAKE_POLICY_DEFAULT_CMP0116=NEW" "-DMLIR_INCLUDE_DOCS=ON" "-DMLIR_INCLUDE_TESTS=ON" "-DMLIR_ENABLE_ROCM_RUNNER=ON" @@ -41,6 +38,10 @@ callPackage ../base.nix rec { mkdir -p ../llvm/build/bin ln -s ${lit}/bin/lit ../llvm/build/bin/llvm-lit + # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` + substituteInPlace CMakeLists.txt \ + --replace "EXISTS \''${UNITTEST_DIR}/googletest/include/gtest/gtest.h" "FALSE" + substituteInPlace test/CMakeLists.txt \ --replace "FileCheck count not" "" \ --replace "list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)" "" @@ -64,4 +65,5 @@ callPackage ../base.nix rec { checkTargets = [ "check-${targetName}" ]; requiredSystemFeatures = [ "big-parallel" ]; + isBroken = true; # `DebugTranslation.cpp:139:10: error: no matching function for call to 'get'` } diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/openmp.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/openmp.nix index faab6388835e..5fd7b6fd9aa3 100644 --- a/pkgs/development/rocm-modules/5/llvm/stage-3/openmp.nix +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/openmp.nix @@ -5,39 +5,48 @@ , llvm , clang , clang-unwrapped -# , rocm-device-libs -# , rocm-runtime +, rocm-device-libs +, rocm-runtime +, rocm-thunk , perl , elfutils +, libdrm +, numactl , lit }: callPackage ../base.nix rec { inherit stdenv rocmUpdateScript; - buildTests = false; # Too many failures, most pass targetName = "openmp"; targetDir = targetName; extraNativeBuildInputs = [ perl ]; extraBuildInputs = [ - # rocm-device-libs - # rocm-runtime + rocm-device-libs + rocm-runtime + rocm-thunk elfutils + libdrm + numactl ]; extraCMakeFlags = [ "-DCMAKE_MODULE_PATH=/build/source/llvm/cmake/modules" # For docs "-DCLANG_TOOL=${clang}/bin/clang" "-DCLANG_OFFLOAD_BUNDLER_TOOL=${clang-unwrapped}/bin/clang-offload-bundler" + "-DPACKAGER_TOOL=${clang-unwrapped}/bin/clang-offload-packager" "-DOPENMP_LLVM_TOOLS_DIR=${llvm}/bin" "-DOPENMP_LLVM_LIT_EXECUTABLE=${lit}/bin/.lit-wrapped" - # "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" + "-DDEVICELIBS_ROOT=${rocm-device-libs.src}" ]; extraPostPatch = '' # We can't build this target at the moment substituteInPlace libomptarget/DeviceRTL/CMakeLists.txt \ --replace "gfx1010" "" + + # No idea what's going on here... + cat ${./1000-openmp-failing-tests.list} | xargs -d \\n rm ''; checkTargets = [ "check-${targetName}" ]; From 56f1d971abb970ff5f59be78e124ae8b2c35601b Mon Sep 17 00:00:00 2001 From: Madoura Date: Wed, 4 Oct 2023 19:45:17 -0500 Subject: [PATCH 16/30] rocmPackages.llvm.mlir: fix upstream bug --- .../0000-mlir-fix-debugtranslation.patch | 36 +++++++++++++++++++ .../llvm/stage-3/1001-mlir-failing-tests.list | 11 ++++++ .../rocm-modules/5/llvm/stage-3/mlir.nix | 26 ++++---------- 3 files changed, 54 insertions(+), 19 deletions(-) create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/0000-mlir-fix-debugtranslation.patch create mode 100644 pkgs/development/rocm-modules/5/llvm/stage-3/1001-mlir-failing-tests.list diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/0000-mlir-fix-debugtranslation.patch b/pkgs/development/rocm-modules/5/llvm/stage-3/0000-mlir-fix-debugtranslation.patch new file mode 100644 index 000000000000..f4221a088136 --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/0000-mlir-fix-debugtranslation.patch @@ -0,0 +1,36 @@ +From f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74 Mon Sep 17 00:00:00 2001 +From: Scott Linder +Date: Mon, 11 Sep 2023 18:37:37 +0000 +Subject: [PATCH] [HeterogeneousDWARF] Update MLIR DI Metadata handling + +Pass a default DW_MSPACE_LLVM_none to satisfy new API + +Change-Id: I50df461f00b5510a715f55f61107122318102d22 +--- + lib/Target/LLVMIR/DebugTranslation.cpp | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/lib/Target/LLVMIR/DebugTranslation.cpp b/lib/Target/LLVMIR/DebugTranslation.cpp +index 2053f5bcef06aa6..635ee5d7e5fefdc 100644 +--- a/lib/Target/LLVMIR/DebugTranslation.cpp ++++ b/lib/Target/LLVMIR/DebugTranslation.cpp +@@ -148,7 +148,8 @@ llvm::DIDerivedType *DebugTranslation::translateImpl(DIDerivedTypeAttr attr) { + /*File=*/nullptr, /*Line=*/0, + /*Scope=*/nullptr, translate(attr.getBaseType()), attr.getSizeInBits(), + attr.getAlignInBits(), attr.getOffsetInBits(), +- /*DWARFAddressSpace=*/std::nullopt, /*Flags=*/llvm::DINode::FlagZero); ++ /*DWARFAddressSpace=*/std::nullopt, llvm::dwarf::DW_MSPACE_LLVM_none, ++ /*Flags=*/llvm::DINode::FlagZero); + } + + llvm::DIFile *DebugTranslation::translateImpl(DIFileAttr attr) { +@@ -185,7 +186,8 @@ DebugTranslation::translateImpl(DILocalVariableAttr attr) { + llvmCtx, translate(attr.getScope()), getMDStringOrNull(attr.getName()), + translate(attr.getFile()), attr.getLine(), translate(attr.getType()), + attr.getArg(), +- /*Flags=*/llvm::DINode::FlagZero, attr.getAlignInBits(), ++ /*Flags=*/llvm::DINode::FlagZero, llvm::dwarf::DW_MSPACE_LLVM_none, ++ attr.getAlignInBits(), + /*Annotations=*/nullptr); + } + diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/1001-mlir-failing-tests.list b/pkgs/development/rocm-modules/5/llvm/stage-3/1001-mlir-failing-tests.list new file mode 100644 index 000000000000..0b3d2d22592d --- /dev/null +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/1001-mlir-failing-tests.list @@ -0,0 +1,11 @@ +./test/Target/LLVMIR/openmp-llvm.mlir +./test/mlir-spirv-cpu-runner/double.mlir +./test/mlir-spirv-cpu-runner/simple_add.mlir +./test/mlir-vulkan-runner/addf.mlir +./test/mlir-vulkan-runner/addi.mlir +./test/mlir-vulkan-runner/addi8.mlir +./test/mlir-vulkan-runner/mulf.mlir +./test/mlir-vulkan-runner/smul_extended.mlir +./test/mlir-vulkan-runner/subf.mlir +./test/mlir-vulkan-runner/time.mlir +./test/mlir-vulkan-runner/umul_extended.mlir diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix index 0e0dcabec60c..1b0bc29ea62b 100644 --- a/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/mlir.nix @@ -15,6 +15,11 @@ callPackage ../base.nix rec { buildMan = false; # No man pages to build targetName = "mlir"; targetDir = targetName; + + # Fix `DebugTranslation.cpp:139:10: error: no matching function for call to 'get'` + # We patch at a different source root, so we modify the patch and include it locally + # https://github.com/RadeonOpenCompute/llvm-project/commit/f1d1e10ec7e1061bf0b90abbc1e298d9438a5e74.patch + extraPatches = [ ./0000-mlir-fix-debugtranslation.patch ]; extraNativeBuildInputs = [ clr ]; extraBuildInputs = [ @@ -34,28 +39,12 @@ callPackage ../base.nix rec { ]; extraPostPatch = '' - chmod +w ../llvm - mkdir -p ../llvm/build/bin - ln -s ${lit}/bin/lit ../llvm/build/bin/llvm-lit - # `add_library cannot create target "llvm_gtest" because an imported target with the same name already exists` substituteInPlace CMakeLists.txt \ --replace "EXISTS \''${UNITTEST_DIR}/googletest/include/gtest/gtest.h" "FALSE" - substituteInPlace test/CMakeLists.txt \ - --replace "FileCheck count not" "" \ - --replace "list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)" "" - - substituteInPlace lib/ExecutionEngine/CMakeLists.txt \ - --replace "return()" "" - - # Remove problematic tests - rm test/CAPI/execution_engine.c - rm test/Target/LLVMIR/llvmir-intrinsics.mlir - rm test/Target/LLVMIR/llvmir.mlir - rm test/Target/LLVMIR/openmp-llvm.mlir - rm test/mlir-cpu-runner/*.mlir - rm test/mlir-vulkan-runner/*.mlir + # Mainly `No such file or directory` + cat ${./1001-mlir-failing-tests.list} | xargs -d \\n rm ''; extraPostInstall = '' @@ -65,5 +54,4 @@ callPackage ../base.nix rec { checkTargets = [ "check-${targetName}" ]; requiredSystemFeatures = [ "big-parallel" ]; - isBroken = true; # `DebugTranslation.cpp:139:10: error: no matching function for call to 'get'` } From 6da31e558ad4d02fa6412f3312e67d13a4003594 Mon Sep 17 00:00:00 2001 From: Madoura Date: Wed, 4 Oct 2023 20:53:51 -0500 Subject: [PATCH 17/30] rocmPackages.llvm.flang: mark broken due to error --- pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix b/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix index f5b4649e3f4b..421663dcb1b7 100644 --- a/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix +++ b/pkgs/development/rocm-modules/5/llvm/stage-3/flang.nix @@ -25,4 +25,7 @@ callPackage ../base.nix rec { "-DCLANG_TABLEGEN_EXE=${clang-unwrapped}/bin/clang-tblgen" "-DFLANG_INCLUDE_TESTS=OFF" # `The dependency target "Bye" of target ...` ]; + + # `flang/lib/Semantics/check-omp-structure.cpp:1905:1: error: no member named 'v' in 'Fortran::parser::OmpClause::OmpxDynCgroupMem'` + isBroken = true; } From 0a89aedcea092534a4e0e2e57672dd35d5c8f518 Mon Sep 17 00:00:00 2001 From: Madoura Date: Tue, 3 Oct 2023 20:42:42 -0500 Subject: [PATCH 18/30] rocmPackages: fixup for 5.7.0 rocmPackages.rocprim: fixup for 5.7.0 rocmPackages.rocsparse: fixup for 5.7.0 rocmPackages.rocthrust: fixup for 5.7.0 rocmPackages.rocrand: fixup for 5.7.0 rocmPackages.rocfft: fixup for 5.7.0 rocmPackages.rccl: fixup for 5.7.0 rocmPackages.hipcub: fixup for 5.7.0 rocmPackages.hipsparse: fixup for 5.7.0 rocmPackages.hipfort: fixup for 5.7.0 rocmPackages.hipfft: fixup for 5.7.0 rocmPackages.tensile: fixup for 5.7.0 rocmPackages.rocblas: fixup for 5.7.0 rocmPackages.rocsolver: fixup for 5.7.0 rocmPackages.rocwmma: fixup for 5.7.0 rocmPackages.rocalution: fixup for 5.7.0 rocmPackages.rocmlir: fixup for 5.7.0 rocmPackages.hipsolver: fixup for 5.7.0 rocmPackages.hipblas: fixup for 5.7.0 rocmPackages.miopengemm: fixup for 5.7.0 rocmPackages.miopen: fixup for 5.7.0 rocmPackages.migraphx: fixup for 5.7.0 --- pkgs/development/rocm-modules/5/default.nix | 117 ++++++++------ .../rocm-modules/5/hipblas/default.nix | 7 +- .../rocm-modules/5/hipcub/default.nix | 9 +- .../rocm-modules/5/hipfft/default.nix | 13 +- .../rocm-modules/5/hipsolver/default.nix | 4 +- .../rocm-modules/5/hipsparse/default.nix | 14 +- .../rocm-modules/5/migraphx/default.nix | 28 +++- .../rocm-modules/5/miopen/default.nix | 146 ++++++++++++------ .../rocm-modules/5/miopen/deps.nix | 45 ------ .../rocm-modules/5/miopengemm/default.nix | 21 +-- .../rocm-modules/5/rccl/default.nix | 20 ++- .../rocm-modules/5/rocalution/default.nix | 10 +- .../rocm-modules/5/rocblas/default.nix | 8 +- .../rocm-modules/5/rocfft/default.nix | 94 ++--------- .../5/rocfft/device-install.patch | 15 -- .../5/rocfft/split-kernel-compilation.patch | 124 --------------- .../rocm-modules/5/rocmlir/default.nix | 44 ++++-- .../rocm-modules/5/rocprim/default.nix | 8 +- .../rocm-modules/5/rocrand/default.nix | 9 +- .../rocm-modules/5/rocsolver/default.nix | 7 +- .../rocm-modules/5/rocsparse/default.nix | 7 +- .../rocm-modules/5/rocthrust/default.nix | 9 +- .../rocwmma/0000-dont-fetch-googletest.patch | 12 +- .../rocm-modules/5/rocwmma/default.nix | 55 +------ .../rocm-modules/5/tensile/default.nix | 25 ++- 25 files changed, 357 insertions(+), 494 deletions(-) delete mode 100644 pkgs/development/rocm-modules/5/miopen/deps.nix delete mode 100644 pkgs/development/rocm-modules/5/rocfft/device-install.patch delete mode 100644 pkgs/development/rocm-modules/5/rocfft/split-kernel-compilation.patch diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix index be597edfe150..d3effc777613 100644 --- a/pkgs/development/rocm-modules/5/default.nix +++ b/pkgs/development/rocm-modules/5/default.nix @@ -82,6 +82,7 @@ in rec { stdenv = llvm.rocmClangStdenv; }; + # Replaces hip, opencl-runtime, and rocclr clr = callPackage ./clr { inherit rocmUpdateScript hip-common hipcc rocm-device-libs rocm-comgr rocm-runtime roctracer rocminfo; inherit (llvm) clang; @@ -122,70 +123,92 @@ in rec { stdenv = llvm.rocmClangStdenv; }; + ## ROCmSoftwarePlatform ## + rocprim = callPackage ./rocprim { + inherit rocmUpdateScript rocm-cmake clr; + stdenv = llvm.rocmClangStdenv; + }; + rocsparse = callPackage ./rocsparse { + inherit rocmUpdateScript rocm-cmake rocprim clr; + stdenv = llvm.rocmClangStdenv; + }; + rocthrust = callPackage ./rocthrust { + inherit rocmUpdateScript rocm-cmake rocprim clr; + stdenv = llvm.rocmClangStdenv; + }; + rocrand = callPackage ./rocrand { + inherit rocmUpdateScript rocm-cmake clr; + stdenv = llvm.rocmClangStdenv; + }; + hiprand = rocrand; # rocrand includes hiprand + rocfft = callPackage ./rocfft { + inherit rocmUpdateScript rocm-cmake rocrand rocfft clr; + inherit (llvm) openmp; + stdenv = llvm.rocmClangStdenv; + }; - - - - - - - - composable_kernel = callPackage ./composable_kernel { - inherit (llvm) openmp clang-tools-extra; + rccl = callPackage ./rccl { + inherit rocmUpdateScript rocm-cmake rocm-smi clr hipify; stdenv = llvm.rocmClangStdenv; }; hipcub = callPackage ./hipcub { + inherit rocmUpdateScript rocm-cmake rocprim clr; stdenv = llvm.rocmClangStdenv; }; hipsparse = callPackage ./hipsparse { + inherit rocmUpdateScript rocm-cmake rocsparse clr; inherit (llvm) openmp; stdenv = llvm.rocmClangStdenv; }; hipfort = callPackage ./hipfort { + inherit rocmUpdateScript rocm-cmake; stdenv = llvm.rocmClangStdenv; }; hipfft = callPackage ./hipfft { + inherit rocmUpdateScript rocm-cmake rocfft clr; inherit (llvm) openmp; stdenv = llvm.rocmClangStdenv; }; - hipsolver = callPackage ./hipsolver { + tensile = python3Packages.callPackage ./tensile { + inherit rocmUpdateScript rocminfo; stdenv = llvm.rocmClangStdenv; }; - hipblas = callPackage ./hipblas { - stdenv = llvm.rocmClangStdenv; - }; - - migraphx = callPackage ./migraphx { - inherit (llvm) clang-tools-extra openmp; - stdenv = llvm.rocmClangStdenv; - rocmlir = rocmlir-rock; - }; - - rccl = callPackage ./rccl { - stdenv = llvm.rocmClangStdenv; - }; - - rocalution = callPackage ./rocalution { + rocblas = callPackage ./rocblas { + inherit rocmUpdateScript rocm-cmake clr tensile; inherit (llvm) openmp; stdenv = llvm.rocmClangStdenv; }; rocsolver = callPackage ./rocsolver { + inherit rocmUpdateScript rocm-cmake rocblas rocsparse clr; + stdenv = llvm.rocmClangStdenv; + }; + + rocwmma = callPackage ./rocwmma { + inherit rocmUpdateScript rocm-cmake rocm-smi rocblas clr; + inherit (llvm) openmp; + stdenv = llvm.rocmClangStdenv; + }; + + rocalution = callPackage ./rocalution { + inherit rocmUpdateScript rocm-cmake rocprim rocsparse rocrand rocblas clr; + inherit (llvm) openmp; stdenv = llvm.rocmClangStdenv; }; rocmlir = callPackage ./rocmlir { + inherit rocmUpdateScript rocm-cmake clr; stdenv = llvm.rocmClangStdenv; }; @@ -193,47 +216,32 @@ in rec { buildRockCompiler = true; }; - rocprim = callPackage ./rocprim { + hipsolver = callPackage ./hipsolver { + inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr; stdenv = llvm.rocmClangStdenv; }; - rocsparse = callPackage ./rocsparse { + hipblas = callPackage ./hipblas { + inherit rocmUpdateScript rocm-cmake rocblas rocsolver clr; stdenv = llvm.rocmClangStdenv; }; - rocfft = callPackage ./rocfft { - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rocrand = callPackage ./rocrand { - stdenv = llvm.rocmClangStdenv; - }; - - tensile = python3Packages.callPackage ./tensile { - stdenv = llvm.rocmClangStdenv; - }; - - rocwmma = callPackage ./rocwmma { - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; - - rocblas = callPackage ./rocblas { - inherit (llvm) openmp; - stdenv = llvm.rocmClangStdenv; - }; + # hipBlasLt - Very broken with Tensile at the moment, only supports GFX9 + # hipTensor - Only supports GFX9 miopengemm = callPackage ./miopengemm { + inherit rocmUpdateScript rocm-cmake clr; stdenv = llvm.rocmClangStdenv; }; - rocthrust = callPackage ./rocthrust { + composable_kernel = callPackage ./composable_kernel { + inherit (llvm) openmp clang-tools-extra; stdenv = llvm.rocmClangStdenv; }; miopen = callPackage ./miopen { - inherit (llvm) llvm clang-tools-extra; + inherit rocmUpdateScript rocm-cmake rocblas clang-ocl miopengemm composable_kernel rocm-comgr clr rocm-docs-core half; + inherit (llvm) clang-tools-extra; stdenv = llvm.rocmClangStdenv; rocmlir = rocmlir-rock; boost = boost179.override { enableStatic = true; }; @@ -246,4 +254,11 @@ in rec { miopen-opencl = miopen.override { useOpenCL = true; }; + + migraphx = callPackage ./migraphx { + inherit rocmUpdateScript rocm-cmake rocblas composable_kernel miopengemm miopen clr half rocm-device-libs; + inherit (llvm) openmp clang-tools-extra; + stdenv = llvm.rocmClangStdenv; + rocmlir = rocmlir-rock; + }; } diff --git a/pkgs/development/rocm-modules/5/hipblas/default.nix b/pkgs/development/rocm-modules/5/hipblas/default.nix index cb60e5de6633..b2206c737b00 100644 --- a/pkgs/development/rocm-modules/5/hipblas/default.nix +++ b/pkgs/development/rocm-modules/5/hipblas/default.nix @@ -4,7 +4,7 @@ , rocmUpdateScript , cmake , rocm-cmake -, hip +, clr , gfortran , rocblas , rocsolver @@ -40,7 +40,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr gfortran ]; @@ -94,7 +94,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - # Fixed in develop branch by using C++17 and related refactor - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || buildTests || buildBenchmarks || buildSamples; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/rocm-modules/5/hipcub/default.nix b/pkgs/development/rocm-modules/5/hipcub/default.nix index b3a23241366f..447c2c4174af 100644 --- a/pkgs/development/rocm-modules/5/hipcub/default.nix +++ b/pkgs/development/rocm-modules/5/hipcub/default.nix @@ -5,11 +5,12 @@ , cmake , rocm-cmake , rocprim -, hip +, clr , gtest , gbenchmark , buildTests ? false , buildBenchmarks ? false +, gpuTargets ? [ ] }: # CUB can also be used as a backend instead of rocPRIM. @@ -35,7 +36,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr ]; buildInputs = [ @@ -48,12 +49,14 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ "-DCMAKE_CXX_COMPILER=hipcc" - "-DHIP_ROOT_DIR=${hip}" + "-DHIP_ROOT_DIR=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_TEST=ON" ] ++ lib.optionals buildBenchmarks [ diff --git a/pkgs/development/rocm-modules/5/hipfft/default.nix b/pkgs/development/rocm-modules/5/hipfft/default.nix index 1e959f0ad56a..153a7c8c18cc 100644 --- a/pkgs/development/rocm-modules/5/hipfft/default.nix +++ b/pkgs/development/rocm-modules/5/hipfft/default.nix @@ -4,7 +4,7 @@ , rocmUpdateScript , cmake , rocm-cmake -, hip +, clr , git , rocfft , gtest @@ -15,6 +15,7 @@ , buildTests ? false , buildBenchmarks ? false , buildSamples ? false +, gpuTargets ? [ ] }: # Can also use cuFFT @@ -41,7 +42,7 @@ stdenv.mkDerivation (finalAttrs: { }; nativeBuildInputs = [ - hip + clr git cmake rocm-cmake @@ -60,14 +61,16 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ "-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=hipcc" - "-DCMAKE_MODULE_PATH=${hip}/lib/cmake/hip" - "-DHIP_ROOT_DIR=${hip}" - "-DHIP_PATH=${hip}" + "-DCMAKE_MODULE_PATH=${clr}/lib/cmake/hip" + "-DHIP_ROOT_DIR=${clr}" + "-DHIP_PATH=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_CLIENTS_TESTS=ON" ] ++ lib.optionals buildBenchmarks [ diff --git a/pkgs/development/rocm-modules/5/hipsolver/default.nix b/pkgs/development/rocm-modules/5/hipsolver/default.nix index 238564d631bc..34592a5bbd96 100644 --- a/pkgs/development/rocm-modules/5/hipsolver/default.nix +++ b/pkgs/development/rocm-modules/5/hipsolver/default.nix @@ -4,7 +4,7 @@ , rocmUpdateScript , cmake , rocm-cmake -, hip +, clr , gfortran , rocblas , rocsolver @@ -40,7 +40,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr gfortran ]; diff --git a/pkgs/development/rocm-modules/5/hipsparse/default.nix b/pkgs/development/rocm-modules/5/hipsparse/default.nix index 6e6197209e4b..79b78f3661d8 100644 --- a/pkgs/development/rocm-modules/5/hipsparse/default.nix +++ b/pkgs/development/rocm-modules/5/hipsparse/default.nix @@ -5,13 +5,14 @@ , cmake , rocm-cmake , rocsparse -, hip +, clr , gfortran , git , gtest , openmp , buildTests ? false , buildSamples ? false +, gpuTargets ? [ ] }: # This can also use cuSPARSE as a backend instead of rocSPARSE @@ -37,7 +38,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr gfortran ]; @@ -59,16 +60,15 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_CLIENTS_TESTS=ON" ]; # We have to manually generate the matrices # CMAKE_MATRICES_DIR seems to be reset in clients/tests/CMakeLists.txt - postPatch = '' - substituteInPlace clients/common/utility.cpp \ - --replace "#ifdef __cpp_lib_filesystem" " #if true" - '' + lib.optionalString buildTests '' + postPatch = lib.optionalString buildTests '' mkdir -p matrices ln -s ${rocsparse.passthru.matrices.matrix-01}/*.mtx matrices @@ -116,7 +116,7 @@ stdenv.mkDerivation (finalAttrs: { mkdir -p $sample/bin mv clients/staging/example_* $sample/bin patchelf --set-rpath $out/lib:${lib.makeLibraryPath ( - finalAttrs.buildInputs ++ [ hip gfortran.cc ])} $sample/bin/example_* + finalAttrs.buildInputs ++ [ clr gfortran.cc ])} $sample/bin/example_* ''; passthru.updateScript = rocmUpdateScript { diff --git a/pkgs/development/rocm-modules/5/migraphx/default.nix b/pkgs/development/rocm-modules/5/migraphx/default.nix index fdc97f45da9d..5842cd1695d5 100644 --- a/pkgs/development/rocm-modules/5/migraphx/default.nix +++ b/pkgs/development/rocm-modules/5/migraphx/default.nix @@ -5,11 +5,12 @@ , pkg-config , cmake , rocm-cmake -, hip +, clr , clang-tools-extra , openmp , rocblas , rocmlir +, composable_kernel , miopengemm , miopen , protobuf @@ -19,6 +20,8 @@ , sqlite , oneDNN_2 , blaze +, cppcheck +, rocm-device-libs , texlive , doxygen , sphinx @@ -67,7 +70,7 @@ in stdenv.mkDerivation (finalAttrs: { pkg-config cmake rocm-cmake - hip + clr clang-tools-extra python3Packages.python ] ++ lib.optionals buildDocs [ @@ -84,6 +87,7 @@ in stdenv.mkDerivation (finalAttrs: { openmp rocblas rocmlir + composable_kernel miopengemm miopen protobuf @@ -93,18 +97,16 @@ in stdenv.mkDerivation (finalAttrs: { sqlite oneDNN_2 blaze + cppcheck python3Packages.pybind11 python3Packages.onnx ]; cmakeFlags = [ - "-DCMAKE_POLICY_DEFAULT_CMP0079=NEW" - # "-DCMAKE_C_COMPILER=hipcc" - # "-DCMAKE_CXX_COMPILER=hipcc" - "-DMIGRAPHX_ENABLE_GPU=OFF" # GPU compilation is broken, don't know why + "-DMIGRAPHX_ENABLE_GPU=ON" "-DMIGRAPHX_ENABLE_CPU=ON" "-DMIGRAPHX_ENABLE_FPGA=ON" - "-DMIGRAPHX_ENABLE_MLIR=ON" + "-DMIGRAPHX_ENABLE_MLIR=OFF" # LLVM or rocMLIR mismatch? # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -113,10 +115,20 @@ in stdenv.mkDerivation (finalAttrs: { ]; postPatch = '' + # We need to not use hipcc and define the CXXFLAGS manually due to `undefined hidden symbol: tensorflow:: ...` + export CXXFLAGS+="--rocm-path=${clr} --rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" patchShebangs tools + # `error: '__clang_hip_runtime_wrapper.h' file not found [clang-diagnostic-error]` + substituteInPlace CMakeLists.txt \ + --replace "set(MIGRAPHX_TIDY_ERRORS ALL)" "" + + # JIT library was removed from composable_kernel... + # https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/782 substituteInPlace src/targets/gpu/CMakeLists.txt \ - --replace "CMAKE_CXX_COMPILER MATCHES \".*clang\\\+\\\+\$\"" "TRUE" + --replace " COMPONENTS jit_library" "" \ + --replace " composable_kernel::jit_library" "" \ + --replace "if(WIN32)" "if(TRUE)" '' + lib.optionalString (!buildDocs) '' substituteInPlace CMakeLists.txt \ --replace "add_subdirectory(doc)" "" diff --git a/pkgs/development/rocm-modules/5/miopen/default.nix b/pkgs/development/rocm-modules/5/miopen/default.nix index dd7661a09756..d22518aa51c6 100644 --- a/pkgs/development/rocm-modules/5/miopen/default.nix +++ b/pkgs/development/rocm-modules/5/miopen/default.nix @@ -1,23 +1,26 @@ { lib , stdenv , fetchFromGitHub -, fetchurl +, fetchpatch , rocmUpdateScript +, runCommand , pkg-config , cmake , rocm-cmake , rocblas , rocmlir -, hip +, clr , clang-tools-extra , clang-ocl -, llvm , miopengemm , composable_kernel +, frugally-deep +, rocm-docs-core , half , boost , sqlite , bzip2 +, lbzip2 , nlohmann_json , texlive , doxygen @@ -26,13 +29,48 @@ , gtest , rocm-comgr , python3Packages -, buildDocs ? true +, buildDocs ? false # Needs internet because of rocm-docs-core , buildTests ? false -, fetchKDBs ? true , useOpenCL ? false }: let + version = "5.7.0"; + + src = fetchFromGitHub { + owner = "ROCmSoftwarePlatform"; + repo = "MIOpen"; + rev = "rocm-${version}"; + hash = "sha256-xcKmFI8HcRA9bbh6EQGElKykIQ3RJX/q5f4IxXvM1Is="; + fetchLFS = true; + leaveDotGit = true; + + # If you're reading this, it's gonna take a bit of time. + # fetchSubModules doesn't work with postFetch??? + # fetchLFS isn't actually fetching the LFS files... + postFetch = '' + export HOME=$(mktemp -d) + cd $out + + # We need more history to fetch LFS files + git remote add origin $url + git fetch origin + git clean -fdx + git checkout rocm-${version} + + # We need to do this manually since using leaveDotGit and fetchSubmodules errors + git submodule update --init + + # Fetch the LFS files + git lfs install + git lfs fetch --all + git lfs checkout + + # Remove the defunct .git folder + rm -rf .git + ''; + }; + latex = lib.optionalAttrs buildDocs texlive.combine { inherit (texlive) scheme-small latexmk @@ -47,13 +85,40 @@ let titlesec; }; - kdbs = lib.optionalAttrs fetchKDBs import ./deps.nix { - inherit fetchurl; - mirror = "https://repo.radeon.com/rocm/miopen-kernel/rel-5.0"; - }; + gfx900 = runCommand "miopen-gfx900.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx900.kdb.bz2 > $out + ''; + + gfx906 = runCommand "miopen-gfx906.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx906.kdb.bz2 > $out + ''; + + gfx908 = runCommand "miopen-gfx908.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx908.kdb.bz2 > $out + ''; + + gfx90a = runCommand "miopen-gfx90a.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx90a.kdb.bz2 > $out + ''; + + gfx1030 = runCommand "miopen-gfx1030.kdb" { preferLocalBuild = true; } '' + ${lbzip2}/bin/lbzip2 -ckd ${src}/src/kernels/gfx1030.kdb.bz2 > $out + ''; in stdenv.mkDerivation (finalAttrs: { + inherit version src; pname = "miopen"; - version = "5.7.0"; + + # Find zstd and add to target. Mainly for torch. + patches = [ + (fetchpatch { + url = "https://github.com/ROCmSoftwarePlatform/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch"; + hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M="; + }) + (fetchpatch { + url = "https://github.com/ROCmSoftwarePlatform/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch"; + hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs="; + }) + ]; outputs = [ "out" @@ -63,23 +128,15 @@ in stdenv.mkDerivation (finalAttrs: { "test" ]; - src = fetchFromGitHub { - owner = "ROCmSoftwarePlatform"; - repo = "MIOpen"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-6Bz4yDbQtV8XlEpwbH0YsJFaaZqH7BOfZDL7F4JTS1Q="; - }; - nativeBuildInputs = [ pkg-config cmake rocm-cmake - hip + clr clang-tools-extra ]; buildInputs = [ - llvm rocblas rocmlir clang-ocl @@ -90,10 +147,12 @@ in stdenv.mkDerivation (finalAttrs: { sqlite bzip2 nlohmann_json + frugally-deep ] ++ lib.optionals buildDocs [ latex doxygen sphinx + rocm-docs-core python3Packages.sphinx-rtd-theme python3Packages.breathe python3Packages.myst-parser @@ -102,7 +161,9 @@ in stdenv.mkDerivation (finalAttrs: { ]; cmakeFlags = [ + "-DCMAKE_CXX_FLAGS=-Wno-#warnings" # -> "-DMIOPEN_USE_MIOPENGEMM=ON" + "-DUNZIPPER=${bzip2}/bin/bunzip2" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -117,58 +178,47 @@ in stdenv.mkDerivation (finalAttrs: { ] ++ lib.optionals buildTests [ "-DBUILD_TESTS=ON" "-DMIOPEN_TEST_ALL=ON" - "-DMIOPEN_TEST_GFX900=ON" - "-DMIOPEN_TEST_GFX906=ON" - "-DMIOPEN_TEST_GFX908=ON" - "-DMIOPEN_TEST_GFX90A=ON" - "-DMIOPEN_TEST_GFX103X=ON" - "-DGOOGLETEST_DIR=${gtest.src}" # Custom linker names ]; postPatch = '' + patchShebangs test src/composable_kernel fin utils install_deps.cmake + substituteInPlace CMakeLists.txt \ - --replace "enable_testing()" "" \ + --replace "unpack_db(\"\''${CMAKE_SOURCE_DIR}/src/kernels/\''${FILE_NAME}.kdb.bz2\")" "" \ --replace "MIOPEN_HIP_COMPILER MATCHES \".*clang\\\\+\\\\+$\"" "true" \ --replace "set(MIOPEN_TIDY_ERRORS ALL)" "" # error: missing required key 'key' - '' + lib.optionalString buildTests '' + substituteInPlace test/gtest/CMakeLists.txt \ - --replace "enable_testing()" "" - '' + lib.optionalString (!buildTests) '' - substituteInPlace CMakeLists.txt \ - --replace "add_subdirectory(test)" "" - '' + lib.optionalString fetchKDBs '' - ln -sf ${kdbs.gfx1030_36} src/kernels/gfx1030_36.kdb - ln -sf ${kdbs.gfx900_56} src/kernels/gfx900_56.kdb - ln -sf ${kdbs.gfx900_64} src/kernels/gfx900_64.kdb - ln -sf ${kdbs.gfx906_60} src/kernels/gfx906_60.kdb - ln -sf ${kdbs.gfx906_64} src/kernels/gfx906_64.kdb - ln -sf ${kdbs.gfx90878} src/kernels/gfx90878.kdb - ln -sf ${kdbs.gfx90a68} src/kernels/gfx90a68.kdb - ln -sf ${kdbs.gfx90a6e} src/kernels/gfx90a6e.kdb + --replace "include(googletest)" "" + + ln -sf ${gfx900} src/kernels/gfx900.kdb + ln -sf ${gfx906} src/kernels/gfx906.kdb + ln -sf ${gfx908} src/kernels/gfx908.kdb + ln -sf ${gfx90a} src/kernels/gfx90a.kdb + ln -sf ${gfx1030} src/kernels/gfx1030.kdb ''; # Unfortunately, it seems like we have to call make on these manually postBuild = lib.optionalString buildDocs '' - export HOME=$(mktemp -d) - make -j$NIX_BUILD_CORES doc + python -m sphinx -T -E -b html -d _build/doctrees -D language=en ../docs _build/html '' + lib.optionalString buildTests '' make -j$NIX_BUILD_CORES check ''; postInstall = '' rm $out/bin/install_precompiled_kernels.sh + ln -sf ${gfx900} $out/share/miopen/db/gfx900.kdb + ln -sf ${gfx906} $out/share/miopen/db/gfx906.kdb + ln -sf ${gfx908} $out/share/miopen/db/gfx908.kdb + ln -sf ${gfx90a} $out/share/miopen/db/gfx90a.kdb + ln -sf ${gfx1030} $out/share/miopen/db/gfx1030.kdb '' + lib.optionalString buildDocs '' mv ../doc/html $out/share/doc/miopen-${if useOpenCL then "opencl" else "hip"} - mv ../doc/pdf/miopen.pdf $out/share/doc/miopen-${if useOpenCL then "opencl" else "hip"} '' + lib.optionalString buildTests '' mkdir -p $test/bin mv bin/test_* $test/bin patchelf --set-rpath $out/lib:${lib.makeLibraryPath (finalAttrs.buildInputs ++ - [ hip rocm-comgr ])} $test/bin/* - '' + lib.optionalString fetchKDBs '' - # Apparently gfx1030_40 wasn't generated so the developers suggest just renaming gfx1030_36 to it - # Should be fixed in the next miopen kernel generation batch - ln -s ${kdbs.gfx1030_36} $out/share/miopen/db/gfx1030_40.kdb + [ clr rocm-comgr ])} $test/bin/* ''; requiredSystemFeatures = [ "big-parallel" ]; diff --git a/pkgs/development/rocm-modules/5/miopen/deps.nix b/pkgs/development/rocm-modules/5/miopen/deps.nix deleted file mode 100644 index e88b61ad974c..000000000000 --- a/pkgs/development/rocm-modules/5/miopen/deps.nix +++ /dev/null @@ -1,45 +0,0 @@ -{ fetchurl -, mirror -}: - -{ - gfx1030_36 = fetchurl { - sha256 = "sha256-zEXDLkRWAHS15LDA5IRyqG5rO7HHPBiVgPlQ8JjSqNc="; - url = "${mirror}/gfx1030_36.kdb"; - }; - - gfx900_56 = fetchurl { - sha256 = "sha256-ZTqUPhVKcQzjO6bxykvZMJk1VZh31dRVs+XqcxEtmeI="; - url = "${mirror}/gfx900_56.kdb"; - }; - - gfx900_64 = fetchurl { - sha256 = "sha256-ZTqUPhVKcQzjO6bxykvZMJk1VZh31dRVs+XqcxEtmeI="; - url = "${mirror}/gfx900_64.kdb"; - }; - - gfx906_60 = fetchurl { - sha256 = "sha256-U6pDo8ICfs6fVIEqRziWeE5/4Vzvu41JkcRVn3ou1e4="; - url = "${mirror}/gfx906_60.kdb"; - }; - - gfx906_64 = fetchurl { - sha256 = "sha256-U6pDo8ICfs6fVIEqRziWeE5/4Vzvu41JkcRVn3ou1e4="; - url = "${mirror}/gfx906_64.kdb"; - }; - - gfx90878 = fetchurl { - sha256 = "sha256-r7DRhNH+jHUXAu64b9vWsZzGD4w5oSHnxH0l2RN0qlQ="; - url = "${mirror}/gfx90878.kdb"; - }; - - gfx90a68 = fetchurl { - sha256 = "sha256-NT//zIPTbzsPJyaVycxwU6BcMTzGc/d+Z4Ab9FImDko="; - url = "${mirror}/gfx90a68.kdb"; - }; - - gfx90a6e = fetchurl { - sha256 = "sha256-ENZHbf+/MGYgSTpALKh2meuZPNhH5bG+WrW/jzvGpBs="; - url = "${mirror}/gfx90a6e.kdb"; - }; -} diff --git a/pkgs/development/rocm-modules/5/miopengemm/default.nix b/pkgs/development/rocm-modules/5/miopengemm/default.nix index 92616f79eedc..bda94cee61b3 100644 --- a/pkgs/development/rocm-modules/5/miopengemm/default.nix +++ b/pkgs/development/rocm-modules/5/miopengemm/default.nix @@ -4,7 +4,8 @@ , rocmUpdateScript , cmake , rocm-cmake -, rocm-opencl-runtime +, clr +, clblast , texlive , doxygen , sphinx @@ -53,11 +54,10 @@ in stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake + clr ]; - buildInputs = [ - rocm-opencl-runtime - ] ++ lib.optionals buildDocs [ + buildInputs = lib.optionals buildDocs [ latex doxygen sphinx @@ -65,6 +65,9 @@ in stdenv.mkDerivation (finalAttrs: { python3Packages.breathe ] ++ lib.optionals buildTests [ openblas + ] ++ lib.optionals buildBenchmarks [ + clblast + python3Packages.openai-triton ]; cmakeFlags = [ @@ -77,10 +80,8 @@ in stdenv.mkDerivation (finalAttrs: { "-DOPENBLAS=ON" ] ++ lib.optionals buildBenchmarks [ "-DAPI_BENCH_MIOGEMM=ON" - # Needs https://github.com/CNugteren/CLBlast - # "-DAPI_BENCH_CLBLAST=ON" - # Needs https://github.com/openai/triton - # "-DAPI_BENCH_ISAAC=ON" + "-DAPI_BENCH_CLBLAST=ON" + "-DAPI_BENCH_ISAAC=ON" ]; # Unfortunately, it seems like we have to call make on these manually @@ -118,6 +119,8 @@ in stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; + # They are not making tags or releases, this may break other derivations in the future + # Use version major instead of minor, 6.0 will HOPEFULLY have a release or tag + broken = versions.major finalAttrs.version != versions.major stdenv.cc.version; }; }) diff --git a/pkgs/development/rocm-modules/5/rccl/default.nix b/pkgs/development/rocm-modules/5/rccl/default.nix index 68dafc29c164..d4045252bae4 100644 --- a/pkgs/development/rocm-modules/5/rccl/default.nix +++ b/pkgs/development/rocm-modules/5/rccl/default.nix @@ -5,10 +5,13 @@ , cmake , rocm-cmake , rocm-smi -, hip +, clr +, perl +, hipify , gtest , chrpath , buildTests ? false +, gpuTargets ? [ ] }: stdenv.mkDerivation (finalAttrs: { @@ -31,7 +34,9 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr + perl + hipify ]; buildInputs = [ @@ -42,22 +47,25 @@ stdenv.mkDerivation (finalAttrs: { ]; cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=hipcc" + "-DBUILD_BFD=OFF" # Can't get it to detect bfd.h # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_TESTS=ON" ]; - # Replace the manually set parallel jobs to NIX_BUILD_CORES postPatch = '' + patchShebangs src tools + + # Really strange behavior, `#!/usr/bin/env perl` should work... substituteInPlace CMakeLists.txt \ - --replace "8 P" "$NIX_BUILD_CORES P" \ - --replace "8)" "$NIX_BUILD_CORES)" + --replace "\''$ \''${hipify-perl_executable}" "${perl}/bin/perl ${hipify}/bin/hipify-perl" ''; postInstall = lib.optionalString buildTests '' diff --git a/pkgs/development/rocm-modules/5/rocalution/default.nix b/pkgs/development/rocm-modules/5/rocalution/default.nix index 650e9dc7a1ca..80fd655557df 100644 --- a/pkgs/development/rocm-modules/5/rocalution/default.nix +++ b/pkgs/development/rocm-modules/5/rocalution/default.nix @@ -8,7 +8,7 @@ , rocsparse , rocprim , rocrand -, hip +, clr , git , openmp , openmpi @@ -43,7 +43,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr git ]; @@ -60,8 +60,8 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ "-DCMAKE_CXX_COMPILER=hipcc" - "-DROCM_PATH=${hip}" - "-DHIP_ROOT_DIR=${hip}" + "-DROCM_PATH=${clr}" + "-DHIP_ROOT_DIR=${clr}" "-DSUPPORT_HIP=ON" "-DSUPPORT_OMP=ON" "-DSUPPORT_MPI=ON" @@ -92,7 +92,7 @@ stdenv.mkDerivation (finalAttrs: { rm $sample/bin/rocalution-bench || true patchelf --set-rpath \ - $out/lib:${lib.makeLibraryPath (finalAttrs.buildInputs ++ [ hip ])} \ + $out/lib:${lib.makeLibraryPath (finalAttrs.buildInputs ++ [ clr ])} \ $sample/bin/* '' + lib.optionalString (buildTests || buildBenchmarks) '' rmdir $out/bin diff --git a/pkgs/development/rocm-modules/5/rocblas/default.nix b/pkgs/development/rocm-modules/5/rocblas/default.nix index 59d23ad121da..f1cd81df663f 100644 --- a/pkgs/development/rocm-modules/5/rocblas/default.nix +++ b/pkgs/development/rocm-modules/5/rocblas/default.nix @@ -4,7 +4,7 @@ , rocmUpdateScript , cmake , rocm-cmake -, hip +, clr , python3 , tensile , msgpack @@ -18,13 +18,14 @@ , buildTests ? false , buildBenchmarks ? false , tensileLogic ? "asm_full" -, tensileCOVersion ? "V3" +, tensileCOVersion ? "default" , tensileSepArch ? true , tensileLazyLib ? true , tensileLibFormat ? "msgpack" , gpuTargets ? [ "all" ] }: +# rocBLAS is 3.7GB... I'll have to figure out hydra in another PR stdenv.mkDerivation (finalAttrs: { pname = "rocblas"; version = "5.7.0"; @@ -47,7 +48,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr ]; buildInputs = [ @@ -56,6 +57,7 @@ stdenv.mkDerivation (finalAttrs: { msgpack libxml2 python3Packages.msgpack + python3Packages.joblib ] ++ lib.optionals buildTests [ gtest ] ++ lib.optionals (buildTests || buildBenchmarks) [ diff --git a/pkgs/development/rocm-modules/5/rocfft/default.nix b/pkgs/development/rocm-modules/5/rocfft/default.nix index ee1078eabb28..ac50318622ce 100644 --- a/pkgs/development/rocm-modules/5/rocfft/default.nix +++ b/pkgs/development/rocm-modules/5/rocfft/default.nix @@ -4,7 +4,7 @@ , fetchFromGitHub , rocmUpdateScript , cmake -, hip +, clr , python3 , rocm-cmake , sqlite @@ -14,73 +14,9 @@ , gtest , openmp , rocrand -# NOTE: Update the default GPU targets on every update -, gpuTargets ? [ - "gfx803" - "gfx900" - "gfx906" - "gfx908" - "gfx90a" - "gfx1030" - "gfx1100" - "gfx1102" -] +, gpuTargets ? [ ] }: -let - # To avoid output limit exceeded errors in hydra, we build kernel - # device libs and the kernel RTC cache database in separate derivations - kernelDeviceLibs = map - (target: - (rocfft.overrideAttrs (prevAttrs: { - pname = "rocfft-device-${target}"; - - patches = prevAttrs.patches ++ [ - # Add back install rule for device library - # This workaround is needed because rocm_install_targets - # doesn't support an EXCLUDE_FROM_ALL option - ./device-install.patch - ]; - - buildFlags = [ "rocfft-device-${target}" ]; - - installPhase = '' - runHook preInstall - cmake --install . --component device - runHook postInstall - ''; - - requiredSystemFeatures = [ "big-parallel" ]; - })).override { - gpuTargets = [ target ]; - } - ) - gpuTargets; - - # TODO: Figure out how to also split this by GPU target - # - # It'll be bit more complicated than what we're doing for the kernel - # device libs, because the kernel cache needs to be compiled into - # one sqlite database (whereas the device libs can be linked into - # rocfft as separate libraries for each GPU target). - # - # It's not clear why this needs to even be a db in the first place. - # It would simplify things A LOT if we could just store these - # pre-compiled kernels as files (but that'd need a lot of patching). - kernelRtcCache = rocfft.overrideAttrs (_: { - pname = "rocfft-kernel-cache"; - - buildFlags = [ "rocfft_kernel_cache_target" ]; - - installPhase = '' - runHook preInstall - cmake --install . --component kernel_cache - runHook postInstall - ''; - - requiredSystemFeatures = [ "big-parallel" ]; - }); -in stdenv.mkDerivation (finalAttrs: { pname = "rocfft"; version = "5.7.0"; @@ -92,40 +28,28 @@ stdenv.mkDerivation (finalAttrs: { hash = "sha256-GZSi03geTT+NUztBWhGYyghLqJGsFjUQzVAKQ7d03uA="; }; - patches = [ - # Exclude kernel compilation & installation from "all" target, - # and split device libraries by GPU target - ./split-kernel-compilation.patch - ]; - nativeBuildInputs = [ cmake - hip + clr python3 rocm-cmake ]; - buildInputs = [ - sqlite - ] ++ lib.optionals (finalAttrs.pname == "rocfft") kernelDeviceLibs; + buildInputs = [ sqlite ]; cmakeFlags = [ "-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=hipcc" - "-DUSE_HIP_CLANG=ON" "-DSQLITE_USE_SYSTEM_PACKAGE=ON" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ]; - postInstall = lib.optionalString (finalAttrs.pname == "rocfft") '' - ln -s ${kernelRtcCache}/lib/rocfft_kernel_cache.db "$out/lib" - ''; - passthru = { test = stdenv.mkDerivation { pname = "${finalAttrs.pname}-test"; @@ -135,7 +59,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake - hip + clr rocm-cmake ]; @@ -168,7 +92,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake - hip + clr rocm-cmake ]; @@ -201,7 +125,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake - hip + clr rocm-cmake ]; @@ -232,6 +156,8 @@ stdenv.mkDerivation (finalAttrs: { }; }; + requiredSystemFeatures = [ "big-parallel" ]; + meta = with lib; { description = "FFT implementation for ROCm"; homepage = "https://github.com/ROCmSoftwarePlatform/rocFFT"; diff --git a/pkgs/development/rocm-modules/5/rocfft/device-install.patch b/pkgs/development/rocm-modules/5/rocfft/device-install.patch deleted file mode 100644 index 355cf30d07ff..000000000000 --- a/pkgs/development/rocm-modules/5/rocfft/device-install.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt -index 73a8ec9..9bfd4b8 100644 ---- a/library/src/device/CMakeLists.txt -+++ b/library/src/device/CMakeLists.txt -@@ -255,4 +255,10 @@ foreach( sub ${AMDGPU_TARGETS} ) - if( NOT BUILD_SHARED_LIBS ) - set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" ) - endif( ) -+ -+ rocm_install_targets( -+ TARGETS -+ rocfft-device-${sub} -+ COMPONENT device -+ ) - endforeach() diff --git a/pkgs/development/rocm-modules/5/rocfft/split-kernel-compilation.patch b/pkgs/development/rocm-modules/5/rocfft/split-kernel-compilation.patch deleted file mode 100644 index 5d71fe399c1a..000000000000 --- a/pkgs/development/rocm-modules/5/rocfft/split-kernel-compilation.patch +++ /dev/null @@ -1,124 +0,0 @@ -diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt -index 3a16304..606b711 100644 ---- a/library/src/CMakeLists.txt -+++ b/library/src/CMakeLists.txt -@@ -250,12 +250,12 @@ foreach( target - - endforeach() - --add_executable( rocfft_aot_helper -+add_executable( rocfft_aot_helper EXCLUDE_FROM_ALL - rocfft_aot_helper.cpp - rocfft_stub.cpp - ) - --add_executable( rocfft_config_search -+add_executable( rocfft_config_search EXCLUDE_FROM_ALL - rocfft_config_search.cpp - rocfft_stub.cpp - ) -@@ -279,10 +279,10 @@ endif() - - target_link_libraries( rocfft PRIVATE ${ROCFFT_DEVICE_LINK_LIBS} ) - --target_link_libraries( rocfft PRIVATE rocfft-device-0 ) --target_link_libraries( rocfft PRIVATE rocfft-device-1 ) --target_link_libraries( rocfft PRIVATE rocfft-device-2 ) --target_link_libraries( rocfft PRIVATE rocfft-device-3 ) -+foreach( sub ${AMDGPU_TARGETS} ) -+ target_link_libraries( rocfft PRIVATE -lrocfft-device-${sub} ) -+endforeach() -+ - foreach( target rocfft rocfft_aot_helper rocfft_config_search ) - # RTC uses dladdr to find the RTC helper program - if( NOT WIN32 ) -@@ -347,7 +347,7 @@ add_custom_command( - DEPENDS rocfft_aot_helper rocfft_rtc_helper - COMMENT "Compile kernels into shipped cache file" - ) --add_custom_target( rocfft_kernel_cache_target ALL -+add_custom_target( rocfft_kernel_cache_target - DEPENDS rocfft_kernel_cache.db - VERBATIM - ) -@@ -392,7 +392,8 @@ else() - endif() - rocm_install(FILES ${ROCFFT_KERNEL_CACHE_PATH} - DESTINATION "${ROCFFT_KERNEL_CACHE_INSTALL_DIR}" -- COMPONENT runtime -+ COMPONENT kernel_cache -+ EXCLUDE_FROM_ALL - ) - - # PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ -diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt -index 9f7b85f..73a8ec9 100644 ---- a/library/src/device/CMakeLists.txt -+++ b/library/src/device/CMakeLists.txt -@@ -170,11 +170,11 @@ list( SORT rocfft_device_source ) - # functions callable by rocFFT and depends on amdhip64, and another - # one usable by AOT RTC that contains no device code - list( FILTER rocfft_device_source EXCLUDE REGEX function_pool.cpp ) --add_library( rocfft-function-pool OBJECT -+add_library( rocfft-function-pool OBJECT EXCLUDE_FROM_ALL - function_pool.cpp - ) - target_compile_definitions( rocfft-function-pool PRIVATE FUNCTION_POOL_STANDALONE_BODY= ) --add_library( rocfft-function-pool-standalone OBJECT -+add_library( rocfft-function-pool-standalone OBJECT EXCLUDE_FROM_ALL - function_pool.cpp - ) - target_compile_definitions( rocfft-function-pool-standalone PRIVATE FUNCTION_POOL_STANDALONE_BODY={} ) -@@ -193,26 +193,15 @@ foreach( pool rocfft-function-pool rocfft-function-pool-standalone ) - add_dependencies(${pool} gen_headers_target) - endforeach() - --list( LENGTH rocfft_device_source rocfft_device_source_len ) --math(EXPR split_len "${rocfft_device_source_len} / 4") --math(EXPR split_idx_2 "${rocfft_device_source_len} / 4 * 2") --math(EXPR split_idx_3 "${rocfft_device_source_len} / 4 * 3") -- --list( SUBLIST rocfft_device_source 0 ${split_len} rocfft_device_source_0 ) --list( SUBLIST rocfft_device_source ${split_len} ${split_len} rocfft_device_source_1 ) --list( SUBLIST rocfft_device_source ${split_idx_2} ${split_len} rocfft_device_source_2 ) --list( SUBLIST rocfft_device_source ${split_idx_3} -1 rocfft_device_source_3 ) -- --foreach( sub RANGE 3 ) -- set( rocfft_device_source_var rocfft_device_source_${sub} ) -+foreach( sub ${AMDGPU_TARGETS} ) - if(NOT SINGLELIB) -- add_library( rocfft-device-${sub} -- ${${rocfft_device_source_var}} ) -+ add_library( rocfft-device-${sub} EXCLUDE_FROM_ALL -+ ${rocfft_device_source} ) - else() - # Compile the device lib as a static library, which is then linked - # into librocfft.so Useful for testing purposes. -- add_library( rocfft-device-${sub} STATIC -- ${${rocfft_device_source_var}} ) -+ add_library( rocfft-device-${sub} STATIC EXCLUDE_FROM_ALL -+ ${rocfft_device_source} ) - - # if we're building singlelib, we don't want to export any of the - # device library symbols to the main library -@@ -241,9 +230,7 @@ foreach( sub RANGE 3 ) - # Set AMD GPU architecture options - - # Enable compilation of desired architectures -- foreach( target ${AMDGPU_TARGETS} ) -- target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${target} ) -- endforeach( ) -+ target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${sub} ) - - target_include_directories( rocfft-device-${sub} - PRIVATE $ -@@ -268,9 +255,4 @@ foreach( sub RANGE 3 ) - if( NOT BUILD_SHARED_LIBS ) - set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" ) - endif( ) -- -- rocm_install_targets( -- TARGETS -- rocfft-device-${sub} -- ) - endforeach() diff --git a/pkgs/development/rocm-modules/5/rocmlir/default.nix b/pkgs/development/rocm-modules/5/rocmlir/default.nix index a2a4923148a0..9b24112dce8a 100644 --- a/pkgs/development/rocm-modules/5/rocmlir/default.nix +++ b/pkgs/development/rocm-modules/5/rocmlir/default.nix @@ -3,28 +3,35 @@ , fetchFromGitHub , rocmUpdateScript , cmake +, rocm-cmake , ninja -, hip -, rocminfo +, clr , git , libxml2 , libedit +, zstd , zlib , ncurses -, python3 +, python3Packages , buildRockCompiler ? false +, buildTests ? false # `argument of type 'NoneType' is not iterable` }: # Theoretically, we could have our MLIR have an output # with the source and built objects so that we can just # use it as the external LLVM repo for this let + suffix = + if buildRockCompiler + then "-rock" + else ""; + llvmNativeTarget = if stdenv.isx86_64 then "X86" else if stdenv.isAarch64 then "AArch64" else throw "Unsupported ROCm LLVM platform"; in stdenv.mkDerivation (finalAttrs: { - pname = "rocmlir"; + pname = "rocmlir${suffix}"; version = "5.7.0"; outputs = [ @@ -42,44 +49,61 @@ in stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake + rocm-cmake ninja - ] ++ lib.optionals (!buildRockCompiler) [ - hip + clr + python3Packages.python + python3Packages.tomli ]; buildInputs = [ git libxml2 libedit - python3 ]; propagatedBuildInputs = [ + zstd zlib ncurses ]; cmakeFlags = [ "-DLLVM_TARGETS_TO_BUILD=AMDGPU;${llvmNativeTarget}" + "-DLLVM_ENABLE_ZSTD=ON" "-DLLVM_ENABLE_ZLIB=ON" "-DLLVM_ENABLE_TERMINFO=ON" + "-DROCM_PATH=${clr}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" ] ++ lib.optionals buildRockCompiler [ "-DBUILD_FAT_LIBROCKCOMPILER=ON" ] ++ lib.optionals (!buildRockCompiler) [ - "-DROCM_PATH=${rocminfo}" "-DROCM_TEST_CHIPSET=gfx000" ]; + postPatch = '' + patchShebangs mlir + + substituteInPlace mlir/utils/performance/common/CMakeLists.txt \ + --replace "/opt/rocm" "${clr}" + ''; + dontBuild = true; doCheck = true; # Certain libs aren't being generated, try enabling tests next update checkTarget = if buildRockCompiler then "librockCompiler" - else "check-mlir-miopen-build-only"; + else if buildTests + then "check-rocmlir" + else "check-rocmlir-build-only"; postInstall = let - libPath = lib.makeLibraryPath [ zlib ncurses hip stdenv.cc.cc ]; + libPath = lib.makeLibraryPath [ zstd zlib ncurses clr stdenv.cc.cc ]; in lib.optionals (!buildRockCompiler) '' mkdir -p $external/lib cp -a external/llvm-project/llvm/lib/{*.a*,*.so*} $external/lib diff --git a/pkgs/development/rocm-modules/5/rocprim/default.nix b/pkgs/development/rocm-modules/5/rocprim/default.nix index e8233547664f..1dd2555c6915 100644 --- a/pkgs/development/rocm-modules/5/rocprim/default.nix +++ b/pkgs/development/rocm-modules/5/rocprim/default.nix @@ -4,11 +4,12 @@ , rocmUpdateScript , cmake , rocm-cmake -, hip +, clr , gtest , gbenchmark , buildTests ? false , buildBenchmarks ? false +, gpuTargets ? [ ] }: stdenv.mkDerivation (finalAttrs: { @@ -33,7 +34,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr ]; buildInputs = lib.optionals buildTests [ @@ -49,6 +50,8 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_TEST=ON" ] ++ lib.optionals buildBenchmarks [ @@ -58,6 +61,7 @@ stdenv.mkDerivation (finalAttrs: { postInstall = lib.optionalString buildTests '' mkdir -p $test/bin mv $out/bin/test_* $test/bin + mv $out/bin/rocprim $test/bin '' + lib.optionalString buildBenchmarks '' mkdir -p $benchmark/bin mv $out/bin/benchmark_* $benchmark/bin diff --git a/pkgs/development/rocm-modules/5/rocrand/default.nix b/pkgs/development/rocm-modules/5/rocrand/default.nix index daa24b870ceb..954a299e317e 100644 --- a/pkgs/development/rocm-modules/5/rocrand/default.nix +++ b/pkgs/development/rocm-modules/5/rocrand/default.nix @@ -4,11 +4,12 @@ , rocmUpdateScript , cmake , rocm-cmake -, hip +, clr , gtest , gbenchmark , buildTests ? false , buildBenchmarks ? false +, gpuTargets ? [ ] }: stdenv.mkDerivation (finalAttrs: { @@ -34,7 +35,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr ]; buildInputs = lib.optionals buildTests [ @@ -46,12 +47,14 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ "-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=hipcc" - "-DHIP_ROOT_DIR=${hip}" + "-DHIP_ROOT_DIR=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_TEST=ON" ] ++ lib.optionals buildBenchmarks [ diff --git a/pkgs/development/rocm-modules/5/rocsolver/default.nix b/pkgs/development/rocm-modules/5/rocsolver/default.nix index 3a0858af6335..48bf0950351c 100644 --- a/pkgs/development/rocm-modules/5/rocsolver/default.nix +++ b/pkgs/development/rocm-modules/5/rocsolver/default.nix @@ -5,7 +5,8 @@ , cmake , rocm-cmake , rocblas -, hip +, rocsparse +, clr , fmt , gtest , gfortran @@ -37,13 +38,14 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr ] ++ lib.optionals (buildTests || buildBenchmarks) [ gfortran ]; buildInputs = [ rocblas + rocsparse fmt ] ++ lib.optionals buildTests [ gtest @@ -53,6 +55,7 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ "-DCMAKE_CXX_COMPILER=hipcc" + "-DCMAKE_CXX_FLAGS=-Wno-switch" # Way too many warnings # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" diff --git a/pkgs/development/rocm-modules/5/rocsparse/default.nix b/pkgs/development/rocm-modules/5/rocsparse/default.nix index d97951530119..945e03c0bc8b 100644 --- a/pkgs/development/rocm-modules/5/rocsparse/default.nix +++ b/pkgs/development/rocm-modules/5/rocsparse/default.nix @@ -6,7 +6,7 @@ , cmake , rocm-cmake , rocprim -, hip +, clr , gfortran , git , gtest @@ -14,6 +14,7 @@ , python3Packages , buildTests ? false , buildBenchmarks ? false # Seems to depend on tests +, gpuTargets ? [ ] }: stdenv.mkDerivation (finalAttrs: { @@ -38,7 +39,7 @@ stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr gfortran ]; @@ -59,6 +60,8 @@ stdenv.mkDerivation (finalAttrs: { "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals (buildTests || buildBenchmarks) [ "-DBUILD_CLIENTS_TESTS=ON" "-DCMAKE_MATRICES_DIR=/build/source/matrices" diff --git a/pkgs/development/rocm-modules/5/rocthrust/default.nix b/pkgs/development/rocm-modules/5/rocthrust/default.nix index e441709f89f7..b80b161f5799 100644 --- a/pkgs/development/rocm-modules/5/rocthrust/default.nix +++ b/pkgs/development/rocm-modules/5/rocthrust/default.nix @@ -5,10 +5,11 @@ , cmake , rocm-cmake , rocprim -, hip +, clr , gtest , buildTests ? false , buildBenchmarks ? false +, gpuTargets ? [ ] }: stdenv.mkDerivation (finalAttrs: { @@ -34,7 +35,7 @@ stdenv.mkDerivation (finalAttrs: { cmake rocm-cmake rocprim - hip + clr ]; buildInputs = lib.optionals buildTests [ @@ -43,12 +44,14 @@ stdenv.mkDerivation (finalAttrs: { cmakeFlags = [ "-DCMAKE_CXX_COMPILER=hipcc" - "-DHIP_ROOT_DIR=${hip}" + "-DHIP_ROOT_DIR=${clr}" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" "-DCMAKE_INSTALL_LIBDIR=lib" "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals (gpuTargets != [ ]) [ + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" ] ++ lib.optionals buildTests [ "-DBUILD_TEST=ON" ] ++ lib.optionals buildBenchmarks [ diff --git a/pkgs/development/rocm-modules/5/rocwmma/0000-dont-fetch-googletest.patch b/pkgs/development/rocm-modules/5/rocwmma/0000-dont-fetch-googletest.patch index cee603679758..fa47a3c42249 100644 --- a/pkgs/development/rocm-modules/5/rocwmma/0000-dont-fetch-googletest.patch +++ b/pkgs/development/rocm-modules/5/rocwmma/0000-dont-fetch-googletest.patch @@ -1,8 +1,8 @@ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt -index e1160bb..2a5462e 100644 +index 0d00883..86ce282 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt -@@ -30,26 +30,6 @@ cmake_dependent_option( ROCWMMA_BUILD_VALIDATION_TESTS "Build validation tests" +@@ -30,30 +30,6 @@ cmake_dependent_option( ROCWMMA_BUILD_VALIDATION_TESTS "Build validation tests" cmake_dependent_option( ROCWMMA_BUILD_BENCHMARK_TESTS "Build benchmarking tests" OFF "ROCWMMA_BUILD_TESTS" OFF ) cmake_dependent_option( ROCWMMA_BUILD_EXTENDED_TESTS "Build extended test parameter coverage" OFF "ROCWMMA_BUILD_TESTS" OFF ) @@ -12,20 +12,24 @@ index e1160bb..2a5462e 100644 -FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git -- GIT_TAG 609281088cfefc76f9d0ce82e1ff6c30cc3591e5 +- GIT_TAG release-1.12.1 -) -FetchContent_GetProperties(googletest) -if(NOT googletest_POPULATED) +- - # Fetch the content using default details - FetchContent_Populate(googletest) - # Save the shared libs setting, then force to static libs - set(BUILD_SHARED_LIBS_OLD ${BUILD_SHARED_LIBS}) - set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build SHARED libraries" FORCE) +- - # Add gtest targets as static libs - add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR}) +- - # Restore shared libs setting - set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD} CACHE INTERNAL "Build SHARED libraries" FORCE) -endif() - +- set(ROCWMMA_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}) set(ROCWMMA_COMMON_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hip_device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/rocwmma_gtest_main.cpp) diff --git a/pkgs/development/rocm-modules/5/rocwmma/default.nix b/pkgs/development/rocm-modules/5/rocwmma/default.nix index ef21ed86248a..71d0e3fbe793 100644 --- a/pkgs/development/rocm-modules/5/rocwmma/default.nix +++ b/pkgs/development/rocm-modules/5/rocwmma/default.nix @@ -4,44 +4,24 @@ , rocmUpdateScript , cmake , rocm-cmake -, hip +, rocm-smi +, clr , openmp , gtest , rocblas -, texlive -, doxygen -, sphinx -, python3Packages -, buildDocs ? true -, buildTests ? false +, buildTests ? false # Will likely fail building because wavefront shifts are not supported for certain archs , buildExtendedTests ? false , buildBenchmarks ? false , buildSamples ? false , gpuTargets ? [ ] # gpuTargets = [ "gfx908:xnack-" "gfx90a:xnack-" "gfx90a:xnack+" ... ] }: -let - latex = lib.optionalAttrs buildDocs texlive.combine { - inherit (texlive) scheme-small - latexmk - tex-gyre - fncychap - wrapfig - capt-of - framed - needspace - tabulary - varwidth - titlesec; - }; -in stdenv.mkDerivation (finalAttrs: { +stdenv.mkDerivation (finalAttrs: { pname = "rocwmma"; version = "5.7.0"; outputs = [ "out" - ] ++ lib.optionals buildDocs [ - "doc" ] ++ lib.optionals (buildTests || buildBenchmarks) [ "test" ] ++ lib.optionals buildBenchmarks [ @@ -64,28 +44,21 @@ in stdenv.mkDerivation (finalAttrs: { nativeBuildInputs = [ cmake rocm-cmake - hip + clr ]; buildInputs = [ openmp ] ++ lib.optionals (buildTests || buildBenchmarks) [ + rocm-smi gtest rocblas - ] ++ lib.optionals buildDocs [ - latex - doxygen - sphinx - python3Packages.sphinx-rtd-theme - python3Packages.breathe ]; cmakeFlags = [ "-DCMAKE_CXX_COMPILER=hipcc" "-DROCWMMA_BUILD_TESTS=${if buildTests || buildBenchmarks then "ON" else "OFF"}" - "-DROCWMMA_BUILD_VALIDATION_TESTS=ON" "-DROCWMMA_BUILD_SAMPLES=${if buildSamples then "ON" else "OFF"}" - "-DROCWMMA_VALIDATE_WITH_ROCBLAS=ON" # Manually define CMAKE_INSTALL_ # See: https://github.com/NixOS/nixpkgs/pull/197838 "-DCMAKE_INSTALL_BINDIR=bin" @@ -100,21 +73,7 @@ in stdenv.mkDerivation (finalAttrs: { "-DROCWMMA_BENCHMARK_WITH_ROCBLAS=ON" ]; - postPatch = lib.optionalString buildDocs '' - patchShebangs docs/*.sh - ''; - - # Unfortunately, it seems like we have to call make on this manually - # -DROCWMMA_BUILD_DOCS=ON is invalid, despite being on the README - postBuild = lib.optionalString buildDocs '' - export HOME=$(mktemp -d) - ../docs/run_doc.sh - ''; - - postInstall = lib.optionalString buildDocs '' - mv ../docs/source/_build/html $out/share/doc/rocwmma - mv ../docs/source/_build/latex/rocWMMA.pdf $out/share/doc/rocwmma - '' + lib.optionalString (buildTests || buildBenchmarks) '' + postInstall = lib.optionalString (buildTests || buildBenchmarks) '' mkdir -p $test/bin mv $out/bin/{*_test,*-validate} $test/bin '' + lib.optionalString buildBenchmarks '' diff --git a/pkgs/development/rocm-modules/5/tensile/default.nix b/pkgs/development/rocm-modules/5/tensile/default.nix index 7d0165a42060..86dbaa95e192 100644 --- a/pkgs/development/rocm-modules/5/tensile/default.nix +++ b/pkgs/development/rocm-modules/5/tensile/default.nix @@ -3,14 +3,20 @@ , fetchFromGitHub , rocmUpdateScript , buildPythonPackage +, pytestCheckHook +, setuptools , pyyaml , msgpack , pandas +, joblib +, filelock +, rocminfo }: buildPythonPackage rec { pname = "tensile"; version = "5.7.0"; + format = "pyproject"; src = fetchFromGitHub { owner = "ROCmSoftwarePlatform"; @@ -19,12 +25,29 @@ buildPythonPackage rec { hash = "sha256-CyPGiM/53duJc/oNtOsl6JSsl9uOOYm5R7O6YXaVOm4="; }; - buildInputs = [ + buildInputs = [ setuptools ]; + + propagatedBuildInputs = [ pyyaml msgpack pandas + joblib ]; + doCheck = false; # Too many errors, not sure how to set this up properly + + nativeCheckInputs = [ + pytestCheckHook + filelock + rocminfo + ]; + + preCheck = '' + export ROCM_PATH=${rocminfo} + ''; + + pythonImportsCheck = [ "Tensile" ]; + passthru.updateScript = rocmUpdateScript { name = pname; owner = src.owner; From 05de74ecf3083ec7a4479dc131a24167f0dfd3d0 Mon Sep 17 00:00:00 2001 From: Madoura Date: Thu, 5 Oct 2023 02:35:52 -0500 Subject: [PATCH 19/30] rocmPackages.rocm-docs-core: init at 0.25.0 --- pkgs/development/rocm-modules/5/default.nix | 2 + .../rocm-modules/5/rocm-docs-core/default.nix | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 pkgs/development/rocm-modules/5/rocm-docs-core/default.nix diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix index d3effc777613..e0295f08720f 100644 --- a/pkgs/development/rocm-modules/5/default.nix +++ b/pkgs/development/rocm-modules/5/default.nix @@ -70,6 +70,8 @@ in rec { # stdenv = llvm.rocmClangStdenv; }; + rocm-docs-core = python3Packages.callPackage ./rocm-docs-core { }; + ## ROCm-Developer-Tools ## hip-common = callPackage ./hip-common { inherit rocmUpdateScript; diff --git a/pkgs/development/rocm-modules/5/rocm-docs-core/default.nix b/pkgs/development/rocm-modules/5/rocm-docs-core/default.nix new file mode 100644 index 000000000000..65d21daa5b50 --- /dev/null +++ b/pkgs/development/rocm-modules/5/rocm-docs-core/default.nix @@ -0,0 +1,65 @@ +{ lib +, stdenv +, fetchFromGitHub +, gitUpdater +, buildPythonPackage +, setuptools +, beautifulsoup4 +, gitpython +, pydata-sphinx-theme +, pygithub +, sphinx +, breathe +, myst-parser +, sphinx-book-theme +, sphinx-copybutton +, sphinx-design +, sphinx-external-toc +, sphinx-notfound-page +, pyyaml +, fastjsonschema +}: + +buildPythonPackage rec { + pname = "rocm-docs-core"; + version = "0.25.0"; + format = "pyproject"; + + src = fetchFromGitHub { + owner = "RadeonOpenCompute"; + repo = "rocm-docs-core"; + rev = "v${version}"; + hash = "sha256-kOsoIK0vaPT60hGr960s5vc0eloSr5CECtd8Dy24YuM="; + }; + + buildInputs = [ setuptools ]; + + propagatedBuildInputs = [ + beautifulsoup4 + gitpython + pydata-sphinx-theme + pygithub + sphinx + breathe + myst-parser + sphinx-book-theme + sphinx-copybutton + sphinx-design + sphinx-external-toc + sphinx-notfound-page + pyyaml + fastjsonschema + ]; + + pythonImportsCheck = [ "rocm_docs" ]; + + passthru.updateScript = gitUpdater { rev-prefix = "v"; }; + + meta = with lib; { + description = "ROCm Documentation Python package for ReadTheDocs build standardization"; + homepage = "https://github.com/RadeonOpenCompute/rocm-docs-core"; + license = with licenses; [ mit cc-by-40 ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + }; +} From a3a45bea58e965b90e1abcea08feed78035b32db Mon Sep 17 00:00:00 2001 From: Madoura Date: Thu, 5 Oct 2023 08:39:10 -0500 Subject: [PATCH 20/30] rocmPackages.composable_kernel: unstable-2023-01-16 -> 5.7.0 --- .../5/composable_kernel/default.nix | 24 ++++++++++--------- pkgs/development/rocm-modules/5/default.nix | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/pkgs/development/rocm-modules/5/composable_kernel/default.nix b/pkgs/development/rocm-modules/5/composable_kernel/default.nix index 54bf1251c309..2372b27ebe52 100644 --- a/pkgs/development/rocm-modules/5/composable_kernel/default.nix +++ b/pkgs/development/rocm-modules/5/composable_kernel/default.nix @@ -1,10 +1,10 @@ { lib , stdenv , fetchFromGitHub -, unstableGitUpdater +, rocmUpdateScript , cmake , rocm-cmake -, hip +, clr , openmp , clang-tools-extra , gtest @@ -15,7 +15,7 @@ stdenv.mkDerivation (finalAttrs: { pname = "composable_kernel"; - version = "unstable-2023-01-16"; + version = "5.7.0"; outputs = [ "out" @@ -25,24 +25,21 @@ stdenv.mkDerivation (finalAttrs: { "example" ]; - # ROCm 5.6 should release composable_kernel as stable with a tag in the future src = fetchFromGitHub { owner = "ROCmSoftwarePlatform"; repo = "composable_kernel"; - rev = "80e05267417f948e4f7e63c0fe807106d9a0c0ef"; - hash = "sha256-+c0E2UtlG/abweLwCWWjNHDO5ZvSIVKwwwettT9mqR4="; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-Z9X+S2SijGJ8bhr9ghkkWicBUzLzs9fxPpqZxX6BBM4="; }; nativeBuildInputs = [ cmake rocm-cmake - hip + clr clang-tools-extra ]; - buildInputs = [ - openmp - ]; + buildInputs = [ openmp ]; cmakeFlags = [ "-DCMAKE_C_COMPILER=hipcc" @@ -71,7 +68,11 @@ stdenv.mkDerivation (finalAttrs: { mv $out/bin/example_* $example/bin ''; - passthru.updateScript = unstableGitUpdater { }; + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + owner = finalAttrs.src.owner; + repo = finalAttrs.src.repo; + }; # Times out otherwise requiredSystemFeatures = [ "big-parallel" ]; @@ -82,5 +83,6 @@ stdenv.mkDerivation (finalAttrs: { license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; }; }) diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix index e0295f08720f..838874f398e2 100644 --- a/pkgs/development/rocm-modules/5/default.nix +++ b/pkgs/development/rocm-modules/5/default.nix @@ -237,6 +237,7 @@ in rec { }; composable_kernel = callPackage ./composable_kernel { + inherit rocmUpdateScript rocm-cmake clr; inherit (llvm) openmp clang-tools-extra; stdenv = llvm.rocmClangStdenv; }; From ae91d1330e833c8b20be31317cb6e27a43974ef5 Mon Sep 17 00:00:00 2001 From: Madoura Date: Thu, 5 Oct 2023 11:52:09 -0500 Subject: [PATCH 21/30] rocmPackages.clr: replace rocm-opencl-icd --- .../rocm-modules/5/clr/default.nix | 18 +++++++++++++++ pkgs/development/rocm-modules/5/clr/test.nix | 23 +++++++++++++++++++ pkgs/development/rocm-modules/5/default.nix | 2 +- 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 pkgs/development/rocm-modules/5/clr/test.nix diff --git a/pkgs/development/rocm-modules/5/clr/default.nix b/pkgs/development/rocm-modules/5/clr/default.nix index d3f811dcc422..c0d4de8cbb7c 100644 --- a/pkgs/development/rocm-modules/5/clr/default.nix +++ b/pkgs/development/rocm-modules/5/clr/default.nix @@ -1,5 +1,6 @@ { lib , stdenv +, callPackage , fetchFromGitHub , rocmUpdateScript , makeWrapper @@ -13,6 +14,7 @@ , rocm-runtime , roctracer , rocminfo +, rocm-smi , numactl , libGL , libxml2 @@ -35,6 +37,11 @@ in stdenv.mkDerivation (finalAttrs: { pname = "clr"; version = "5.7.0"; + outputs = [ + "out" + "icd" + ]; + src = fetchFromGitHub { owner = "ROCm-Developer-Tools"; repo = "clr"; @@ -106,6 +113,10 @@ in stdenv.mkDerivation (finalAttrs: { # Just link rocminfo, it's easier ln -s ${rocminfo}/bin/* $out/bin + + # Replace rocm-opencl-icd functionality + mkdir -p $icd/etc/OpenCL/vendors + echo "$out/lib/libamdocl64.so" > $icd/etc/OpenCL/vendors/amdocl64.icd ''; passthru = { @@ -134,6 +145,13 @@ in stdenv.mkDerivation (finalAttrs: { owner = finalAttrs.src.owner; repo = finalAttrs.src.repo; }; + + impureTests = { + clr-icd = callPackage ./test.nix { + inherit rocm-smi; + clr = finalAttrs.finalPackage; + }; + }; }; meta = with lib; { diff --git a/pkgs/development/rocm-modules/5/clr/test.nix b/pkgs/development/rocm-modules/5/clr/test.nix new file mode 100644 index 000000000000..c02bb4da8886 --- /dev/null +++ b/pkgs/development/rocm-modules/5/clr/test.nix @@ -0,0 +1,23 @@ +{ lib +, makeImpureTest +, clinfo +, clr +, rocm-smi +}: + +makeImpureTest { + name = "clr-icd"; + testedPackage = "rocmPackages.clr"; + nativeBuildInputs = [ clinfo rocm-smi ]; + OCL_ICD_VENDORS = "${clr.icd}/etc/OpenCL/vendors"; + + testScript = '' + # Test fails if the number of platforms is 0 + clinfo | grep -E 'Number of platforms * [1-9]' + rocm-smi | grep -A1 GPU + ''; + + meta = with lib; { + maintainers = teams.rocm.members; + }; +} diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix index 838874f398e2..ac8a32ef5176 100644 --- a/pkgs/development/rocm-modules/5/default.nix +++ b/pkgs/development/rocm-modules/5/default.nix @@ -86,7 +86,7 @@ in rec { # Replaces hip, opencl-runtime, and rocclr clr = callPackage ./clr { - inherit rocmUpdateScript hip-common hipcc rocm-device-libs rocm-comgr rocm-runtime roctracer rocminfo; + inherit rocmUpdateScript hip-common hipcc rocm-device-libs rocm-comgr rocm-runtime roctracer rocminfo rocm-smi; inherit (llvm) clang; stdenv = llvm.rocmClangStdenv; }; From fb9321020577ac1ddaabc81488dbb700350eb4e5 Mon Sep 17 00:00:00 2001 From: Madoura Date: Thu, 5 Oct 2023 13:05:33 -0500 Subject: [PATCH 22/30] nixos/doc: note ROCm changes --- nixos/doc/manual/release-notes/rl-2311.section.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nixos/doc/manual/release-notes/rl-2311.section.md b/nixos/doc/manual/release-notes/rl-2311.section.md index d1bbb5c389a7..39011f392986 100644 --- a/nixos/doc/manual/release-notes/rl-2311.section.md +++ b/nixos/doc/manual/release-notes/rl-2311.section.md @@ -26,6 +26,9 @@ [`sudo-rs`]: https://github.com/memorysafety/sudo-rs/ +- All [ROCm](https://rocm.docs.amd.com/en/latest/) packages have been updated to 5.7.0. + - [ROCm](https://rocm.docs.amd.com/en/latest/) package attribute sets are versioned: `rocmPackages` -> `rocmPackages_5`. + ## New Services {#sec-release-23.11-new-services} - [MCHPRS](https://github.com/MCHPR/MCHPRS), a multithreaded Minecraft server built for redstone. Available as [services.mchprs](#opt-services.mchprs.enable). @@ -148,6 +151,17 @@ - `consul` has been updated to `1.16.0`. See the [release note](https://github.com/hashicorp/consul/releases/tag/v1.16.0) for more details. Once a new Consul version has started and upgraded its data directory, it generally cannot be downgraded to the previous version. +- `llvmPackages_rocm` has been moved to `rocmPackages.llvm`. + +- `hip`, `rocm-opencl-runtime`, `rocm-opencl-icd`, and `rocclr` have been combined into `rocmPackages.clr`. + +- `clang-ocl`, `clr`, `composable_kernel`, `hipblas`, `hipcc`, `hip-common`, `hipcub`, + `hipfft`, `hipfort`, `hipify`, `hipsolver`, `hipsparse`, `migraphx`, `miopen`, `miopengemm`, + `rccl`, `rdc`, `rocalution`, `rocblas`, `rocdgbapi`, `rocfft`, `rocgdb`, `rocm-cmake`, + `rocm-comgr`, `rocm-core`, `rocm-device-libs`, `rocminfo`, `rocmlir`, `rocm-runtime`, + `rocm-smi`, `rocm-thunk`, `rocprim`, `rocprofiler`, `rocrand`, `rocr-debug-agent`, + `rocsolver`, `rocsparse`, `rocthrust`, `roctracer`, `rocwmma`, and `tensile` have been moved to `rocmPackages`. + - `himalaya` has been updated to `0.8.0`, which drops the native TLS support (in favor of Rustls) and add OAuth 2.0 support. See the [release note](https://github.com/soywod/himalaya/releases/tag/v0.8.0) for more details. - `nix-prefetch-git` now ignores global and user git config, to improve reproducibility. From 6f39d63688276f1953fc1ca3afa16d8bbb7a301d Mon Sep 17 00:00:00 2001 From: Madoura Date: Thu, 5 Oct 2023 13:07:45 -0500 Subject: [PATCH 23/30] nixos/doc: rocm-opencl-icd -> rocmPackages.clr.icd --- nixos/doc/manual/configuration/gpu-accel.chapter.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nixos/doc/manual/configuration/gpu-accel.chapter.md b/nixos/doc/manual/configuration/gpu-accel.chapter.md index 40878b5da4b5..dfccdf291b73 100644 --- a/nixos/doc/manual/configuration/gpu-accel.chapter.md +++ b/nixos/doc/manual/configuration/gpu-accel.chapter.md @@ -26,7 +26,7 @@ directory which is scanned by the ICL loader for ICD files. For example: ```ShellSession $ export \ - OCL_ICD_VENDORS=`nix-build '' --no-out-link -A rocm-opencl-icd`/etc/OpenCL/vendors/ + OCL_ICD_VENDORS=`nix-build '' --no-out-link -A rocmPackages.clr.icd`/etc/OpenCL/vendors/ ``` The second mechanism is to add the OpenCL driver package to @@ -50,13 +50,13 @@ Platform Vendor Advanced Micro Devices, Inc. Modern AMD [Graphics Core Next](https://en.wikipedia.org/wiki/Graphics_Core_Next) (GCN) GPUs are -supported through the rocm-opencl-icd package. Adding this package to +supported through the rocmPackages.clr.icd package. Adding this package to [](#opt-hardware.opengl.extraPackages) enables OpenCL support: ```nix hardware.opengl.extraPackages = [ - rocm-opencl-icd + rocmPackages.clr.icd ]; ``` From e6f88a9a824ad8a34bf9d501aa45b0883a4a80c9 Mon Sep 17 00:00:00 2001 From: Madoura Date: Thu, 5 Oct 2023 16:58:02 -0500 Subject: [PATCH 24/30] rocm-related: fixup for ROCm 5.7.0 blender: fixup for ROCm 5.7.0 opensycl: fixup for ROCm 5.7.0 magma: fixup for ROCm 5.7.0 torch: fixup for ROCm 5.7.0 cp2k: fixup for ROCm 5.7.0 sirius: fixup for ROCm 5.7.0 spfft: fixup for ROCm 5.7.0 spla: fixup for ROCm 5.7.0 --- pkgs/applications/misc/blender/default.nix | 6 ++--- .../science/chemistry/cp2k/default.nix | 16 +++++++------ pkgs/by-name/si/sirius/package.nix | 11 +++++---- pkgs/by-name/sp/spfft/package.nix | 13 ++++++----- pkgs/by-name/sp/spla/package.nix | 9 ++++---- .../compilers/opensycl/default.nix | 10 ++++---- .../libraries/science/math/magma/generic.nix | 19 +++++++-------- .../python-modules/torch/default.nix | 23 ++++++++----------- pkgs/top-level/all-packages.nix | 9 ++------ pkgs/top-level/python-packages.nix | 1 - 10 files changed, 54 insertions(+), 63 deletions(-) diff --git a/pkgs/applications/misc/blender/default.nix b/pkgs/applications/misc/blender/default.nix index 0b368ef1b315..00bbcdafff13 100644 --- a/pkgs/applications/misc/blender/default.nix +++ b/pkgs/applications/misc/blender/default.nix @@ -6,7 +6,7 @@ , zlib, zstd, fftw, opensubdiv, freetype, jemalloc, ocl-icd, addOpenGLRunpath , jackaudioSupport ? false, libjack2 , cudaSupport ? config.cudaSupport, cudaPackages ? { } -, hipSupport ? false, hip # comes with a significantly larger closure size +, hipSupport ? false, rocmPackages # comes with a significantly larger closure size , colladaSupport ? true, opencollada , spaceNavSupport ? stdenv.isLinux, libspnav , makeWrapper @@ -103,8 +103,8 @@ stdenv.mkDerivation (finalAttrs: rec { substituteInPlace extern/clew/src/clew.c --replace '"libOpenCL.so"' '"${ocl-icd}/lib/libOpenCL.so"' '') + (lib.optionalString hipSupport '' - substituteInPlace extern/hipew/src/hipew.c --replace '"/opt/rocm/hip/lib/libamdhip64.so"' '"${hip}/lib/libamdhip64.so"' - substituteInPlace extern/hipew/src/hipew.c --replace '"opt/rocm/hip/bin"' '"${hip}/bin"' + substituteInPlace extern/hipew/src/hipew.c --replace '"/opt/rocm/hip/lib/libamdhip64.so"' '"${rocmPackages.clr}/lib/libamdhip64.so"' + substituteInPlace extern/hipew/src/hipew.c --replace '"opt/rocm/hip/bin"' '"${rocmPackages.clr}/bin"' ''); cmakeFlags = diff --git a/pkgs/applications/science/chemistry/cp2k/default.nix b/pkgs/applications/science/chemistry/cp2k/default.nix index 052d791c0bb7..bb306fa322fe 100644 --- a/pkgs/applications/science/chemistry/cp2k/default.nix +++ b/pkgs/applications/science/chemistry/cp2k/default.nix @@ -37,11 +37,7 @@ # and for Nvidia see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2i-cuda-optional-improved-performance-on-gpu-systems , gpuVersion ? "Mi100" , gpuArch ? "gfx908" -, rocm-core -, hip -, hipblas -, hipfft -, rocblas +, rocmPackages }: assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ]; @@ -86,7 +82,13 @@ stdenv.mkDerivation rec { ] ++ lib.optional enableElpa elpa ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit - ++ lib.optional (gpuBackend == "rocm") [hip rocm-core hipblas hipfft rocblas] + ++ lib.optional (gpuBackend == "rocm") [ + rocmPackages.clr + rocmPackages.rocm-core + rocmPackages.hipblas + rocmPackages.hipfft + rocmPackages.rocblas + ] ; propagatedBuildInputs = [ mpi ]; @@ -126,7 +128,7 @@ stdenv.mkDerivation rec { ${lib.strings.optionalString (gpuBackend == "rocm") '' GPUVER = ${gpuVersion} OFFLOAD_CC = hipcc - OFFLOAD_FLAGS = -fopenmp -m64 -pthread -fPIC -D__GRID_HIP -O2 --offload-arch=${gpuArch} --rocm-path=${rocm-core} + OFFLOAD_FLAGS = -fopenmp -m64 -pthread -fPIC -D__GRID_HIP -O2 --offload-arch=${gpuArch} --rocm-path=${rocmPackages.rocm-core} OFFLOAD_TARGET = hip CXX = mpicxx CXXFLAGS = -std=c++11 -fopenmp -D__HIP_PLATFORM_AMD__ diff --git a/pkgs/by-name/si/sirius/package.nix b/pkgs/by-name/si/sirius/package.nix index 05d049a7d45b..2af3c28de922 100644 --- a/pkgs/by-name/si/sirius/package.nix +++ b/pkgs/by-name/si/sirius/package.nix @@ -23,8 +23,7 @@ , llvmPackages , gpuBackend ? "none" , cudaPackages -, hip -, rocblas +, rocmPackages }: assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ]; @@ -67,8 +66,10 @@ stdenv.mkDerivation rec { libvdwxc ] ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit - ++ lib.optionals (gpuBackend == "rocm") [ hip rocblas ] - ++ lib.optional stdenv.isDarwin llvmPackages.openmp + ++ lib.optionals (gpuBackend == "rocm") [ + rocmPackages.clr + rocmPackages.rocblas + ] ++ lib.optional stdenv.isDarwin llvmPackages.openmp ; propagatedBuildInputs = [ mpi ]; @@ -87,7 +88,7 @@ stdenv.mkDerivation rec { ] ++ lib.optionals (gpuBackend == "rocm") [ "-DUSE_ROCM=ON" - "-DHIP_ROOT_DIR=${hip}" + "-DHIP_ROOT_DIR=${rocmPackages.clr}" ]; doCheck = true; diff --git a/pkgs/by-name/sp/spfft/package.nix b/pkgs/by-name/sp/spfft/package.nix index dcc43ccd2446..72ae473d14a5 100644 --- a/pkgs/by-name/sp/spfft/package.nix +++ b/pkgs/by-name/sp/spfft/package.nix @@ -8,9 +8,7 @@ , llvmPackages , gpuBackend ? "none" , cudaPackages -, hip -, rocfft -, hipfft +, rocmPackages }: assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ]; @@ -35,8 +33,11 @@ stdenv.mkDerivation rec { fftw ] ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit - ++ lib.optionals (gpuBackend == "rocm") [ hip rocfft hipfft ] - ++ lib.optional stdenv.isDarwin llvmPackages.openmp + ++ lib.optionals (gpuBackend == "rocm") [ + rocmPackages.clr + rocmPackages.rocfft + rocmPackages.hipfft + ] ++ lib.optional stdenv.isDarwin llvmPackages.openmp ; propagatedBuildInputs = [ mpi ]; @@ -53,7 +54,7 @@ stdenv.mkDerivation rec { ++ lib.optional (gpuBackend == "cuda") "-DSPFFT_GPU_BACKEND=CUDA" ++ lib.optionals (gpuBackend == "rocm") [ "-DSPFFT_GPU_BACKEND=ROCM" - "-DHIP_ROOT_DIR=${hip}" + "-DHIP_ROOT_DIR=${rocmPackages.clr}" ]; diff --git a/pkgs/by-name/sp/spla/package.nix b/pkgs/by-name/sp/spla/package.nix index 3143fbeb7316..1f8abde4b723 100644 --- a/pkgs/by-name/sp/spla/package.nix +++ b/pkgs/by-name/sp/spla/package.nix @@ -8,8 +8,7 @@ , llvmPackages , gpuBackend ? "none" , cudaPackages -, hip -, rocblas +, rocmPackages }: assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ]; @@ -39,8 +38,10 @@ stdenv.mkDerivation rec { blas ] ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit - ++ lib.optionals (gpuBackend == "rocm") [ hip rocblas rocblas ] - ++ lib.optional stdenv.isDarwin llvmPackages.openmp + ++ lib.optionals (gpuBackend == "rocm") [ + rocmPackages.clr + rocmPackages.rocblas + ] ++ lib.optional stdenv.isDarwin llvmPackages.openmp ; propagatedBuildInputs = [ mpi ]; diff --git a/pkgs/development/compilers/opensycl/default.nix b/pkgs/development/compilers/opensycl/default.nix index d6f11798f199..995b21330a99 100644 --- a/pkgs/development/compilers/opensycl/default.nix +++ b/pkgs/development/compilers/opensycl/default.nix @@ -2,15 +2,13 @@ , fetchFromGitHub , llvmPackages_15 , lld_15 -, rocm-device-libs , python3 -, rocm-runtime , cmake , boost , libxml2 , libffi , makeWrapper -, hip +, rocmPackages , rocmSupport ? false }: let @@ -40,8 +38,8 @@ stdenv.mkDerivation rec { llvmPackages_15.libclang.dev llvmPackages_15.llvm ] ++ lib.optionals rocmSupport [ - hip - rocm-runtime + rocmPackages.clr + rocmPackages.rocm-runtime ]; # opensycl makes use of clangs internal headers. Its cmake does not successfully discover them automatically on nixos, so we supply the path manually @@ -55,7 +53,7 @@ stdenv.mkDerivation rec { --add-flags "-L${llvmPackages_15.openmp}/lib" \ --add-flags "-I${llvmPackages_15.openmp.dev}/include" \ '' + lib.optionalString rocmSupport '' - --add-flags "--rocm-device-lib-path=${rocm-device-libs}/amdgcn/bitcode" + --add-flags "--rocm-device-lib-path=${rocmPackages.rocm-device-libs}/amdgcn/bitcode" ''; meta = with lib; { diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix index b3753a63339a..e9712ffb62ad 100644 --- a/pkgs/development/libraries/science/math/magma/generic.nix +++ b/pkgs/development/libraries/science/math/magma/generic.nix @@ -18,15 +18,12 @@ , gfortran , cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities , gpuTargets ? [ ] # Non-CUDA targets, that is HIP -, hip -, hipblas -, hipsparse +, rocmPackages , lapack , lib , libpthreadstubs , magmaRelease , ninja -, openmp , rocmSupport ? false , static ? false , stdenv @@ -47,7 +44,7 @@ let # NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.realArches. # For some reason, Magma's CMakeLists.txt file does not handle the "gfx" prefix, so we must # remove it. - rocmArches = lists.map (x: strings.removePrefix "gfx" x) hip.gpuTargets; + rocmArches = lists.map (x: strings.removePrefix "gfx" x) rocmPackages.clr.gpuTargets; supportedRocmArches = lists.intersectLists rocmArches supportedGpuTargets; unsupportedRocmArches = lists.subtractLists supportedRocmArches rocmArches; @@ -125,10 +122,10 @@ stdenv.mkDerivation { ] ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [ cuda_profiler_api.dev # ]) ++ lists.optionals rocmSupport [ - hip - hipblas - hipsparse - openmp + rocmPackages.clr + rocmPackages.hipblas + rocmPackages.hipsparse + rocmPackages.llvm.openmp ]; cmakeFlags = [ @@ -142,8 +139,8 @@ stdenv.mkDerivation { "-DCMAKE_CXX_COMPILER=${backendStdenv.cc}/bin/c++" "-DMAGMA_ENABLE_CUDA=ON" ] ++ lists.optionals rocmSupport [ - "-DCMAKE_C_COMPILER=${hip}/bin/hipcc" - "-DCMAKE_CXX_COMPILER=${hip}/bin/hipcc" + "-DCMAKE_C_COMPILER=${rocmPackages.clr}/bin/hipcc" + "-DCMAKE_CXX_COMPILER=${rocmPackages.clr}/bin/hipcc" "-DMAGMA_ENABLE_HIP=ON" ]; diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index f9f6e377b139..c9c400b57bd5 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -43,11 +43,7 @@ # ROCm dependencies rocmSupport ? false, - gpuTargets ? [ ], - openmp, rocm-core, hip, rccl, miopen, miopengemm, rocrand, rocblas, - rocfft, rocsparse, hipsparse, rocthrust, rocprim, hipcub, roctracer, - rocsolver, hipfft, hipsolver, hipblas, rocminfo, rocm-thunk, rocm-comgr, - rocm-device-libs, rocm-runtime, rocm-opencl-runtime, hipify + gpuTargets ? [ ], rocmPackages }: let @@ -89,7 +85,7 @@ let else if cudaSupport then gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities else if rocmSupport then - hip.gpuTargets + rocmPackages.clr.gpuTargets else throw "No GPU targets specified" ); @@ -97,12 +93,13 @@ let rocmtoolkit_joined = symlinkJoin { name = "rocm-merged"; - paths = [ - rocm-core hip rccl miopen miopengemm rocrand rocblas - rocfft rocsparse hipsparse rocthrust rocprim hipcub - roctracer rocfft rocsolver hipfft hipsolver hipblas + paths = with rocmPackages; [ + rocm-core clr rccl miopen miopengemm rocrand rocblas + rocsparse hipsparse rocthrust rocprim hipcub + roctracer # Unfree at the moment due to hsa-amd-aqlprofile hard dependency in rocprofiler + rocfft rocsolver hipfft hipsolver hipblas rocminfo rocm-thunk rocm-comgr rocm-device-libs - rocm-runtime rocm-opencl-runtime hipify + rocm-runtime clr.icd hipify ]; }; @@ -170,7 +167,7 @@ in buildPythonPackage rec { # Strangely, this is never set in cmake substituteInPlace cmake/public/LoadHIP.cmake \ --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \ - "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})" + "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." rocmPackages.clr.version))})" '' # Detection of NCCL version doesn't work particularly well when using the static binary. + lib.optionalString cudaSupport '' @@ -323,7 +320,7 @@ in buildPythonPackage rec { ] ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [ cuda_profiler_api.dev # ]) - ++ lib.optionals rocmSupport [ openmp ] + ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ] ++ lib.optionals (cudaSupport || rocmSupport) [ magma ] ++ lib.optionals stdenv.isLinux [ numactl ] ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ]; diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index d365df0eb0f0..a62783c951af 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -30690,7 +30690,6 @@ with pkgs; # LLVM 11 crashes when compiling GHOST_SystemCocoa.mm stdenv = if stdenv.isDarwin then llvmPackages_10.stdenv else stdenv; inherit (darwin.apple_sdk.frameworks) Cocoa CoreGraphics ForceFeedback OpenAL OpenGL; - inherit (rocmPackages) hip; }; blender-with-packages = callPackage ../applications/misc/blender/wrapper.nix { }; @@ -39262,10 +39261,7 @@ with pkgs; lie = callPackage ../applications/science/math/LiE { }; - inherit (callPackage ../development/libraries/science/math/magma { - inherit (rocmPackages.llvm) openmp; - inherit (rocmPackages) hip hipblas hipsparse; - }) magma magma_2_7_2 magma_2_6_2; + inherit (callPackage ../development/libraries/science/math/magma { }) magma magma_2_7_2 magma_2_6_2; magma-cuda = magma.override { cudaSupport = true; @@ -39276,8 +39272,7 @@ with pkgs; static = true; }; - # TODO:AMD won't compile with anything newer than 2.6.2 -- it fails at the linking stage. - magma-hip = magma_2_6_2.override { + magma-hip = magma.override { cudaSupport = false; rocmSupport = true; }; diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index c0857bf850f8..16751903fe50 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -13904,7 +13904,6 @@ self: super: with self; { else pkgs.magma; inherit (pkgs.darwin.apple_sdk.frameworks) Accelerate CoreServices; inherit (pkgs.darwin) libobjc; - inherit (pkgs.rocmPackages.llvm) openmp; }; torch-bin = callPackage ../development/python-modules/torch/bin.nix { From 691b69fd80e1253e15164c4db8f9cec5a322279a Mon Sep 17 00:00:00 2001 From: Madoura Date: Fri, 6 Oct 2023 18:18:24 -0500 Subject: [PATCH 25/30] python3Packages.openai-triton: Use custom LLVM and refactor This project does not rely on ROCM's LLVM fork! Using it can cause a lot of problems when it upgrades! --- .../python-modules/openai-triton/default.nix | 222 +- .../python-modules/openai-triton/llvm.nix | 112 + .../python-modules/openai-triton/llvm15.patch | 4617 ----------------- pkgs/top-level/python-packages.nix | 2 +- 4 files changed, 188 insertions(+), 4765 deletions(-) create mode 100644 pkgs/development/python-modules/openai-triton/llvm.nix delete mode 100644 pkgs/development/python-modules/openai-triton/llvm15.patch diff --git a/pkgs/development/python-modules/openai-triton/default.nix b/pkgs/development/python-modules/openai-triton/default.nix index 0e10642f0693..68dfc24aafda 100644 --- a/pkgs/development/python-modules/openai-triton/default.nix +++ b/pkgs/development/python-modules/openai-triton/default.nix @@ -1,12 +1,13 @@ { lib +, callPackage , buildPythonPackage -, python -, fetchpatch , fetchFromGitHub , addOpenGLRunpath +, pytestCheckHook +, pythonRelaxDepsHook +, pkgsTargetTarget , cmake -, cudaPackages -, llvmPackages +, ninja , pybind11 , gtest , zlib @@ -15,18 +16,11 @@ , lit , filelock , torchWithRocm -, pytest -, pytestCheckHook -, pythonRelaxDepsHook -, pkgsTargetTarget +, python +, cudaPackages }: let - pname = "triton"; - version = "2.0.0"; - - inherit (cudaPackages) cuda_cudart backendStdenv; - # A time may come we'll want to be cross-friendly # # Short explanation: we need pkgsTargetTarget, because we use string @@ -38,20 +32,12 @@ let # pkgsTargetTarget maybe doesn't matter, because ptxas compiles programs to # be executed on the GPU. # Cf. https://nixos.org/manual/nixpkgs/unstable/#sec-cross-infra - ptxas = "${pkgsTargetTarget.cudaPackages.cuda_nvcc}/bin/ptxas"; - - llvm = (llvmPackages.llvm.override { - llvmTargetsToBuild = [ "NATIVE" "NVPTX" ]; - # Upstream CI sets these too: - # targetProjects = [ "mlir" ]; - extraCMakeFlags = [ - "-DLLVM_INSTALL_UTILS=ON" - ]; - }); + ptxas = "${pkgsTargetTarget.cudaPackages.cuda_nvcc}/bin/ptxas"; # Make sure cudaPackages is the right version each update (See python/setup.py) + llvm = callPackage ./llvm.nix { }; # Use a custom llvm, see llvm.nix for details in -buildPythonPackage { - inherit pname version; - +buildPythonPackage rec { + pname = "triton"; + version = "2.0.0"; format = "setuptools"; src = fetchFromGitHub { @@ -62,21 +48,6 @@ buildPythonPackage { }; patches = [ - # Prerequisite for llvm15 patch - (fetchpatch { - url = "https://github.com/openai/triton/commit/2aba985daaa70234823ea8f1161da938477d3e02.patch"; - hash = "sha256-LGv0+Ut2WYPC4Ksi4803Hwmhi3FyQOF9zElJc/JCobk="; - }) - (fetchpatch { - url = "https://github.com/openai/triton/commit/e3941f9d09cdd31529ba4a41018cfc0096aafea6.patch"; - hash = "sha256-A+Gor6qzFlGQhVVhiaaYOzqqx8yO2MdssnQS6TIfUWg="; - }) - - # Source: https://github.com/openai/triton/commit/fc7a8e35819bda632bdcf1cf75fd9abe4d4e077a.patch - # The original patch adds ptxas binary, so we include our own clean copy - # Drop with the next update - ./llvm15.patch - # TODO: there have been commits upstream aimed at removing the "torch" # circular dependency, but the patches fail to apply on the release # revision. Keeping the link for future reference @@ -88,70 +59,11 @@ buildPythonPackage { # }) ]; - postPatch = '' - substituteInPlace python/setup.py \ - --replace \ - '= get_thirdparty_packages(triton_cache_path)' \ - '= os.environ["cmakeFlags"].split()' - '' - # Wiring triton=2.0.0 with llcmPackages_rocm.llvm=5.4.3 - # Revisit when updating either triton or llvm - + '' - substituteInPlace CMakeLists.txt \ - --replace "nvptx" "NVPTX" \ - --replace "LLVM 11" "LLVM" - sed -i '/AddMLIR/a set(MLIR_TABLEGEN_EXE "${llvmPackages.mlir}/bin/mlir-tblgen")' CMakeLists.txt - sed -i '/AddMLIR/a set(MLIR_INCLUDE_DIR ''${MLIR_INCLUDE_DIRS})' CMakeLists.txt - find -iname '*.td' -exec \ - sed -i \ - -e '\|include "mlir/IR/OpBase.td"|a include "mlir/IR/AttrTypeBase.td"' \ - -e 's|include "mlir/Dialect/StandardOps/IR/Ops.td"|include "mlir/Dialect/Func/IR/FuncOps.td"|' \ - '{}' ';' - substituteInPlace unittest/CMakeLists.txt --replace "include(GoogleTest)" "find_package(GTest REQUIRED)" - sed -i 's/^include.*$//' unittest/CMakeLists.txt - sed -i '/LINK_LIBS/i NVPTXInfo' lib/Target/PTX/CMakeLists.txt - sed -i '/LINK_LIBS/i NVPTXCodeGen' lib/Target/PTX/CMakeLists.txt - '' - # TritonMLIRIR already links MLIRIR. Not transitive? - # + '' - # echo "target_link_libraries(TritonPTX PUBLIC MLIRIR)" >> lib/Target/PTX/CMakeLists.txt - # '' - # Already defined in llvm, when built with -DLLVM_INSTALL_UTILS - + '' - substituteInPlace bin/CMakeLists.txt \ - --replace "add_subdirectory(FileCheck)" "" - - rm cmake/FindLLVM.cmake - '' - + - ( - let - # Bash was getting weird without linting, - # but basically upstream contains [cc, ..., "-lcuda", ...] - # and we replace it with [..., "-lcuda", "-L/run/opengl-driver/lib", "-L$stubs", ...] - old = [ "-lcuda" ]; - new = [ "-lcuda" "-L${addOpenGLRunpath.driverLink}" "-L${cuda_cudart}/lib/stubs/" ]; - - quote = x: ''"${x}"''; - oldStr = lib.concatMapStringsSep ", " quote old; - newStr = lib.concatMapStringsSep ", " quote new; - in - '' - substituteInPlace python/triton/compiler.py \ - --replace '${oldStr}' '${newStr}' - '' - ) - # Triton seems to be looking up cuda.h - + '' - sed -i 's|cu_include_dir = os.path.join.*$|cu_include_dir = "${cuda_cudart}/include"|' python/triton/compiler.py - ''; - nativeBuildInputs = [ - cmake pythonRelaxDepsHook - - # Requires torch (circular dependency) and probably needs GPUs: - # pytestCheckHook + # pytestCheckHook # Requires torch (circular dependency) and probably needs GPUs: + cmake + ninja # Note for future: # These *probably* should go in depsTargetTarget @@ -159,7 +71,6 @@ buildPythonPackage { # because we only support cudaPackages on x86_64-linux atm lit llvm - llvmPackages.mlir ]; buildInputs = [ @@ -170,17 +81,44 @@ buildPythonPackage { zlib ]; - propagatedBuildInputs = [ - filelock - ]; + propagatedBuildInputs = [ filelock ]; + + postPatch = let + # Bash was getting weird without linting, + # but basically upstream contains [cc, ..., "-lcuda", ...] + # and we replace it with [..., "-lcuda", "-L/run/opengl-driver/lib", "-L$stubs", ...] + old = [ "-lcuda" ]; + new = [ "-lcuda" "-L${addOpenGLRunpath.driverLink}" "-L${cudaPackages.cuda_cudart}/lib/stubs/" ]; + + quote = x: ''"${x}"''; + oldStr = lib.concatMapStringsSep ", " quote old; + newStr = lib.concatMapStringsSep ", " quote new; + in '' + # Use our `cmakeFlags` instead and avoid downloading dependencies + substituteInPlace python/setup.py \ + --replace "= get_thirdparty_packages(triton_cache_path)" "= os.environ[\"cmakeFlags\"].split()" + + # Already defined in llvm, when built with -DLLVM_INSTALL_UTILS + substituteInPlace bin/CMakeLists.txt \ + --replace "add_subdirectory(FileCheck)" "" + + # Use our linker flags + substituteInPlace python/triton/compiler.py \ + --replace '${oldStr}' '${newStr}' + + # Don't fetch googletest + substituteInPlace unittest/CMakeLists.txt \ + --replace "include (\''${CMAKE_CURRENT_SOURCE_DIR}/googletest.cmake)" ""\ + --replace "include(GoogleTest)" "find_package(GTest REQUIRED)" + ''; # Avoid GLIBCXX mismatch with other cuda-enabled python packages preConfigure = '' - export CC="${backendStdenv.cc}/bin/cc"; - export CXX="${backendStdenv.cc}/bin/c++"; + export CC=${cudaPackages.backendStdenv.cc}/bin/cc; + export CXX=${cudaPackages.backendStdenv.cc}/bin/c++; # Upstream's setup.py tries to write cache somewhere in ~/ - export HOME=$TMPDIR + export HOME=$(mktemp -d) # Upstream's github actions patch setup.cfg to write base-dir. May be redundant echo " @@ -188,52 +126,41 @@ buildPythonPackage { base-dir=$PWD" >> python/setup.cfg # The rest (including buildPhase) is relative to ./python/ - cd python/ + cd python # Work around download_and_copy_ptxas() - dst_cuda="$PWD/triton/third_party/cuda/bin" - mkdir -p "$dst_cuda" - ln -s "${ptxas}" "$dst_cuda/" + mkdir -p $PWD/triton/third_party/cuda/bin + ln -s ${ptxas} $PWD/triton/third_party/cuda/bin ''; # CMake is run by setup.py instead dontUseCmakeConfigure = true; - cmakeFlags = [ - "-DMLIR_DIR=${llvmPackages.mlir}/lib/cmake/mlir" - ]; - postFixup = - let - ptxasDestination = "$out/${python.sitePackages}/triton/third_party/cuda/bin/ptxas"; - in - # Setuptools (?) strips runpath and +x flags. Let's just restore the symlink - '' - rm -f ${ptxasDestination} - ln -s ${ptxas} ${ptxasDestination} - ''; + # Setuptools (?) strips runpath and +x flags. Let's just restore the symlink + postFixup = '' + rm -f $out/${python.sitePackages}/triton/third_party/cuda/bin/ptxas + ln -s ${ptxas} $out/${python.sitePackages}/triton/third_party/cuda/bin/ptxas + ''; - checkInputs = [ - cmake # ctest - ]; + checkInputs = [ cmake ]; # ctest dontUseSetuptoolsCheck = true; - preCheck = + + preCheck = '' # build/temp* refers to build_ext.build_temp (looked up in the build logs) - '' - (cd /build/source/python/build/temp* ; ctest) - '' # For pytestCheckHook - + '' - cd test/unit - ''; - pythonImportsCheck = [ - # Circular dependency on torch - # "triton" - # "triton.language" - ]; + (cd /build/source/python/build/temp* ; ctest) + + # For pytestCheckHook + cd test/unit + ''; + + # Circular dependency on torch + # pythonImportsCheck = [ + # "triton" + # "triton.language" + # ]; # Ultimately, torch is our test suite: - passthru.tests = { - inherit torchWithRocm; - }; + passthru.tests = { inherit torchWithRocm; }; pythonRemoveDeps = [ # Circular dependency, cf. https://github.com/openai/triton/issues/1374 @@ -243,11 +170,12 @@ buildPythonPackage { "cmake" "lit" ]; + meta = with lib; { - description = "Development repository for the Triton language and compiler"; - homepage = "https://github.com/openai/triton/"; + description = "Language and compiler for writing highly efficient custom Deep-Learning primitives"; + homepage = "https://github.com/openai/triton"; platforms = lib.platforms.unix; license = licenses.mit; - maintainers = with maintainers; [ SomeoneSerge ]; + maintainers = with maintainers; [ SomeoneSerge Madouura ]; }; } diff --git a/pkgs/development/python-modules/openai-triton/llvm.nix b/pkgs/development/python-modules/openai-triton/llvm.nix new file mode 100644 index 000000000000..6ac0d9f5738c --- /dev/null +++ b/pkgs/development/python-modules/openai-triton/llvm.nix @@ -0,0 +1,112 @@ +{ lib +, stdenv +, fetchFromGitHub +, pkg-config +, cmake +, ninja +, git +, doxygen +, sphinx +, libxml2 +, libxcrypt +, libedit +, libffi +, mpfr +, zlib +, ncurses +, python3Packages +, buildDocs ? true +, buildMan ? true +, buildTests ? true +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "triton-llvm"; + version = "14.0.6-f28c006a5895"; + + outputs = [ + "out" + ] ++ lib.optionals buildDocs [ + "doc" + ] ++ lib.optionals buildMan [ + "man" + ]; + + # See https://github.com/openai/triton/blob/main/python/setup.py and https://github.com/ptillet/triton-llvm-releases/releases + src = fetchFromGitHub { + owner = "llvm"; + repo = "llvm-project"; + rev = "f28c006a5895fc0e329fe15fead81e37457cb1d1"; + hash = "sha256-vffu4HilvYwtzwgq+NlS26m65DGbp6OSSne2aje1yJE="; + }; + + nativeBuildInputs = [ + pkg-config + cmake + ninja + git + python3Packages.python + ] ++ lib.optionals (buildDocs || buildMan) [ + doxygen + sphinx + python3Packages.recommonmark + ]; + + buildInputs = [ + libxml2 + libxcrypt + libedit + libffi + mpfr + ]; + + propagatedBuildInputs = [ + zlib + ncurses + ]; + + sourceRoot = "${finalAttrs.src.name}/llvm"; + + cmakeFlags = [ + "-DLLVM_TARGETS_TO_BUILD=X86;AMDGPU;NVPTX" + "-DLLVM_ENABLE_PROJECTS=llvm;mlir" + "-DLLVM_INSTALL_UTILS=ON" + ] ++ lib.optionals (buildDocs || buildMan) [ + "-DLLVM_INCLUDE_DOCS=ON" + "-DMLIR_INCLUDE_DOCS=ON" + "-DLLVM_BUILD_DOCS=ON" + # "-DLLVM_ENABLE_DOXYGEN=ON" Way too slow, only uses one core + "-DLLVM_ENABLE_SPHINX=ON" + "-DSPHINX_OUTPUT_HTML=ON" + "-DSPHINX_OUTPUT_MAN=ON" + "-DSPHINX_WARNINGS_AS_ERRORS=OFF" + ] ++ lib.optionals buildTests [ + "-DLLVM_INCLUDE_TESTS=ON" + "-DMLIR_INCLUDE_TESTS=ON" + "-DLLVM_BUILD_TESTS=ON" + ]; + + postPatch = '' + # `CMake Error: cannot write to file "/build/source/llvm/build/lib/cmake/mlir/MLIRTargets.cmake": Permission denied` + chmod +w -R ../mlir + + # FileSystem permissions tests fail with various special bits + rm test/tools/llvm-objcopy/ELF/mirror-permissions-unix.test + rm unittests/Support/Path.cpp + + substituteInPlace unittests/Support/CMakeLists.txt \ + --replace "Path.cpp" "" + ''; + + doCheck = buildTests; + requiredSystemFeatures = [ "big-parallel" ]; + + meta = with lib; { + description = "Collection of modular and reusable compiler and toolchain technologies"; + homepage = "https://github.com/llvm/llvm-project"; + license = with licenses; [ ncsa ]; + maintainers = with maintainers; [ SomeoneSerge Madouura ]; + platforms = platforms.linux; + broken = stdenv.isAarch64; # https://github.com/RadeonOpenCompute/ROCm/issues/1831#issuecomment-1278205344 + }; +}) diff --git a/pkgs/development/python-modules/openai-triton/llvm15.patch b/pkgs/development/python-modules/openai-triton/llvm15.patch deleted file mode 100644 index 3e20cce23801..000000000000 --- a/pkgs/development/python-modules/openai-triton/llvm15.patch +++ /dev/null @@ -1,4617 +0,0 @@ -From fc7a8e35819bda632bdcf1cf75fd9abe4d4e077a Mon Sep 17 00:00:00 2001 -From: Christian Sigg -Date: Thu, 16 Feb 2023 15:40:53 +0100 -Subject: [PATCH] Rebase Triton to LLVM-15. (#1070) - -This PR rebases Triton from LLVM-14 to LLVM-15. Most changes are -mechanical, except for the analysis framework changes. ---- - CMakeLists.txt | 6 +- - bin/CMakeLists.txt | 2 +- - bin/FileCheck/FileCheck.cpp | 3 + - bin/triton-opt.cpp | 6 +- - bin/triton-translate.cpp | 7 +- - include/triton/Analysis/Alias.h | 21 +- - include/triton/Analysis/Allocation.h | 2 + - include/triton/Analysis/AxisInfo.h | 56 ++- - include/triton/Analysis/Utility.h | 6 +- - include/triton/Conversion/Passes.td | 4 +- - include/triton/Dialect/Triton/IR/Dialect.h | 7 +- - .../triton/Dialect/Triton/IR/TritonDialect.td | 8 +- - include/triton/Dialect/Triton/IR/TritonOps.td | 12 +- - .../triton/Dialect/Triton/IR/TritonTypes.td | 2 + - .../Dialect/Triton/Transforms/Passes.td | 3 +- - include/triton/Dialect/TritonGPU/IR/Dialect.h | 4 +- - .../Dialect/TritonGPU/IR/TritonGPUAttrDefs.td | 7 + - .../Dialect/TritonGPU/IR/TritonGPUDialect.td | 2 +- - .../Dialect/TritonGPU/IR/TritonGPUOps.td | 13 +- - lib/Analysis/Alias.cpp | 14 +- - lib/Analysis/Allocation.cpp | 30 +- - lib/Analysis/AxisInfo.cpp | 79 ++-- - lib/Analysis/CMakeLists.txt | 2 +- - lib/Analysis/Membar.cpp | 2 +- - lib/Analysis/Utility.cpp | 54 +++ - .../TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp | 3 - - lib/Conversion/TritonGPUToLLVM/DotOpHelpers.h | 10 +- - .../TritonGPUToLLVM/DotOpToLLVM.cpp | 5 - - .../TritonGPUToLLVM/ElementwiseOpToLLVM.cpp | 2 - - .../TritonGPUToLLVM/LoadStoreOpToLLVM.cpp | 5 +- - .../TritonGPUToLLVM/ReduceOpToLLVM.cpp | 2 - - .../TritonGPUToLLVM/TritonGPUToLLVM.cpp | 7 +- - .../TritonGPUToLLVM/TritonGPUToLLVMBase.h | 26 +- - .../TritonGPUToLLVM/TritonGPUToLLVMPass.cpp | 52 +-- - lib/Conversion/TritonGPUToLLVM/Utility.h | 5 +- - .../TritonToTritonGPUPass.cpp | 69 ++-- - lib/Dialect/Triton/IR/CMakeLists.txt | 10 +- - lib/Dialect/Triton/IR/Ops.cpp | 34 +- - lib/Dialect/Triton/Transforms/Combine.cpp | 6 +- - lib/Dialect/Triton/Transforms/Combine.td | 2 +- - lib/Dialect/TritonGPU/IR/Dialect.cpp | 27 +- - lib/Dialect/TritonGPU/Transforms/Coalesce.cpp | 20 +- - lib/Dialect/TritonGPU/Transforms/Combine.cpp | 2 +- - lib/Dialect/TritonGPU/Transforms/Combine.td | 1 + - .../Transforms/DecomposeConversions.cpp | 2 +- - lib/Dialect/TritonGPU/Transforms/Pipeline.cpp | 10 +- - .../Transforms/ReorderInstructions.cpp | 2 +- - .../Transforms/TritonGPUConversion.cpp | 12 +- - .../Transforms/UpdateMmaForVolta.cpp | 6 +- - lib/Dialect/TritonGPU/Transforms/Utility.cpp | 2 +- - lib/Target/LLVMIR/CMakeLists.txt | 3 +- - lib/Target/PTX/PTXTranslation.cpp | 3 + - python/setup.py | 15 +- - python/src/triton.cc | 85 +++-- - python/test/unit/language/test_core.py | 2 +- - python/triton/compiler.py | 4 +- - test/Analysis/test-alias.mlir | 24 +- - test/Analysis/test-alignment.mlir | 344 +++++++++--------- - test/Analysis/test-allocation.mlir | 32 +- - test/Analysis/test-membar.mlir | 38 +- - test/Conversion/triton_ops.mlir | 10 +- - test/Conversion/triton_to_tritongpu.mlir | 6 +- - test/Conversion/tritongpu_to_llvm.mlir | 94 ++--- - test/Target/tritongpu_to_llvmir.mlir | 4 +- - test/Target/tritongpu_to_ptx.mlir | 2 +- - test/Triton/combine.mlir | 40 +- - test/Triton/vecadd.mlir | 4 +- - test/TritonGPU/coalesce.mlir | 2 +- - test/TritonGPU/combine.mlir | 38 +- - test/TritonGPU/loop-pipeline.mlir | 22 +- - test/TritonGPU/matmul.mlir | 4 +- - test/TritonGPU/prefetch.mlir | 4 +- - test/TritonGPU/update-mma-for-volta.mlir | 4 +- - test/lib/Analysis/TestAlias.cpp | 29 +- - test/lib/Analysis/TestAllocation.cpp | 5 +- - test/lib/Analysis/TestAxisInfo.cpp | 51 +-- - test/lib/Analysis/TestMembar.cpp | 7 +- - 78 files changed, 808 insertions(+), 742 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index d0d361fc7c..b281a28400 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -1,4 +1,7 @@ - cmake_minimum_required(VERSION 3.6) -+ -+cmake_policy(SET CMP0116 OLD) -+ - include(ExternalProject) - - set(CMAKE_CXX_STANDARD 17) -@@ -155,7 +158,6 @@ if(TRITON_BUILD_PYTHON_MODULE) - endif() - endif() - -- - # # Triton - # file(GLOB_RECURSE LIBTRITON_SRC lib/*.cc) - # if (WIN32 AND TRITON_BUILD_PYTHON_MODULE) -@@ -212,7 +214,7 @@ if(TRITON_BUILD_PYTHON_MODULE) - # optimizations - MLIRPass - MLIRTransforms -- MLIRLLVMIR -+ MLIRLLVMDialect - MLIRSupport - MLIRTargetLLVMIRExport - MLIRExecutionEngine -diff --git a/bin/CMakeLists.txt b/bin/CMakeLists.txt -index 906f635f8b..695b3479fd 100644 ---- a/bin/CMakeLists.txt -+++ b/bin/CMakeLists.txt -@@ -48,7 +48,7 @@ llvm_update_compile_flags(triton-translate) - # MLIR core - MLIROptLib - MLIRIR -- MLIRLLVMIR -+ MLIRLLVMDialect - MLIRPass - MLIRSupport - MLIRTransforms -diff --git a/bin/FileCheck/FileCheck.cpp b/bin/FileCheck/FileCheck.cpp -index 819efc3541..9ac6f1b277 100644 ---- a/bin/FileCheck/FileCheck.cpp -+++ b/bin/FileCheck/FileCheck.cpp -@@ -19,6 +19,7 @@ - #include "llvm/Support/CommandLine.h" - #include "llvm/Support/InitLLVM.h" - #include "llvm/Support/Process.h" -+#include "llvm/Support/SourceMgr.h" - #include "llvm/Support/WithColor.h" - #include "llvm/Support/raw_ostream.h" - #include -@@ -360,6 +361,8 @@ static std::string GetCheckTypeAbbreviation(Check::FileCheckType Ty) { - return "bad-not"; - case Check::CheckBadCount: - return "bad-count"; -+ case Check::CheckMisspelled: -+ return "misspelled"; - case Check::CheckNone: - llvm_unreachable("invalid FileCheckType"); - } -diff --git a/bin/triton-opt.cpp b/bin/triton-opt.cpp -index 9f3b53b7ae..f96232e1b0 100644 ---- a/bin/triton-opt.cpp -+++ b/bin/triton-opt.cpp -@@ -8,7 +8,7 @@ - - #include "mlir/IR/Dialect.h" - #include "mlir/InitAllPasses.h" --#include "mlir/Support/MlirOptMain.h" -+#include "mlir/Tools/mlir-opt/MlirOptMain.h" - - namespace mlir { - namespace test { -@@ -33,8 +33,8 @@ int main(int argc, char **argv) { - // TODO: register Triton & TritonGPU passes - mlir::DialectRegistry registry; - registry.insert(); - - return mlir::asMainReturnCode(mlir::MlirOptMain( -diff --git a/bin/triton-translate.cpp b/bin/triton-translate.cpp -index 05ba15e453..56b5d65857 100644 ---- a/bin/triton-translate.cpp -+++ b/bin/triton-translate.cpp -@@ -3,7 +3,7 @@ - #include "mlir/IR/AsmState.h" - #include "mlir/IR/BuiltinOps.h" - #include "mlir/IR/Dialect.h" --#include "mlir/Parser.h" -+#include "mlir/Parser/Parser.h" - #include "mlir/Pass/Pass.h" - #include "mlir/Pass/PassManager.h" - #include "mlir/Support/FileUtilities.h" -@@ -38,7 +38,7 @@ OwningOpRef loadMLIRModule(llvm::StringRef inputFilename, - mlir::DialectRegistry registry; - registry.insert(); -+ scf::SCFDialect>(); - - context.appendDialectRegistry(registry); - -@@ -50,7 +50,8 @@ OwningOpRef loadMLIRModule(llvm::StringRef inputFilename, - context.loadAllAvailableDialects(); - context.allowUnregisteredDialects(); - -- OwningOpRef module(parseSourceFile(sourceMgr, &context)); -+ OwningOpRef module = -+ parseSourceFile(sourceMgr, &context); - if (!module) { - llvm::errs() << "Parse MLIR file failed."; - return nullptr; -diff --git a/include/triton/Analysis/Alias.h b/include/triton/Analysis/Alias.h -index fa6b906fc9..631df518bc 100644 ---- a/include/triton/Analysis/Alias.h -+++ b/include/triton/Analysis/Alias.h -@@ -2,7 +2,7 @@ - #define TRITON_ANALYSIS_ALIAS_H - - #include "mlir/Analysis/AliasAnalysis.h" --#include "mlir/Analysis/DataFlowAnalysis.h" -+#include "mlir/Analysis/DataFlow/SparseAnalysis.h" - #include "llvm/ADT/DenseSet.h" - - namespace mlir { -@@ -21,7 +21,7 @@ class AliasInfo { - } - - /// The pessimistic value state of a value without alias -- static AliasInfo getPessimisticValueState(MLIRContext *context) { -+ static AliasInfo getPessimisticValueState(MLIRContext *context = nullptr) { - return AliasInfo(); - } - static AliasInfo getPessimisticValueState(Value value) { return AliasInfo(); } -@@ -29,6 +29,10 @@ class AliasInfo { - /// The union of both arguments - static AliasInfo join(const AliasInfo &lhs, const AliasInfo &rhs); - -+ void print(raw_ostream &os) const { -+ llvm::interleaveComma(allocs, os, [&](Value alloc) { alloc.print(os); }); -+ } -+ - private: - /// The set of allocated values that are aliased by this lattice. - /// For now, we only consider aliased value produced by the following -@@ -58,9 +62,13 @@ class AliasInfo { - //===----------------------------------------------------------------------===// - // Shared Memory Alias Analysis - //===----------------------------------------------------------------------===// --class SharedMemoryAliasAnalysis : public ForwardDataFlowAnalysis { -+class SharedMemoryAliasAnalysis -+ : public dataflow::SparseDataFlowAnalysis> { - public: -- using ForwardDataFlowAnalysis::ForwardDataFlowAnalysis; -+ using dataflow::SparseDataFlowAnalysis< -+ dataflow::Lattice>::SparseDataFlowAnalysis; -+ using dataflow::SparseDataFlowAnalysis< -+ dataflow::Lattice>::getLatticeElement; - - /// XXX(Keren): Compatible interface with MLIR AliasAnalysis for future use. - /// Given two values, returns their aliasing behavior. -@@ -70,9 +78,10 @@ class SharedMemoryAliasAnalysis : public ForwardDataFlowAnalysis { - ModRefResult getModRef(Operation *op, Value location); - - /// Computes if the alloc set of the results are changed. -- ChangeResult -+ void - visitOperation(Operation *op, -- ArrayRef *> operands) override; -+ ArrayRef *> operands, -+ ArrayRef *> results) override; - }; - - } // namespace mlir -diff --git a/include/triton/Analysis/Allocation.h b/include/triton/Analysis/Allocation.h -index b7c136d602..89b77034cc 100644 ---- a/include/triton/Analysis/Allocation.h -+++ b/include/triton/Analysis/Allocation.h -@@ -188,6 +188,8 @@ class Allocation { - friend class triton::AllocationAnalysis; - }; - -+template Interval(T, T) -> Interval; -+ - } // namespace mlir - - #endif // TRITON_ANALYSIS_ALLOCATION_H -diff --git a/include/triton/Analysis/AxisInfo.h b/include/triton/Analysis/AxisInfo.h -index fdfbd8fbb3..7083b9c43b 100644 ---- a/include/triton/Analysis/AxisInfo.h -+++ b/include/triton/Analysis/AxisInfo.h -@@ -1,9 +1,10 @@ - #ifndef TRITON_ANALYSIS_AXISINFO_H - #define TRITON_ANALYSIS_AXISINFO_H - --#include "mlir/Analysis/DataFlowAnalysis.h" -+#include "mlir/Analysis/DataFlow/SparseAnalysis.h" - #include "llvm/Support/raw_ostream.h" - -+#include "mlir/Support/LLVM.h" - #include "triton/Analysis/Utility.h" - #include "triton/Dialect/Triton/IR/Dialect.h" - #include "triton/Dialect/TritonGPU/IR/Dialect.h" -@@ -62,7 +63,7 @@ class AxisInfo { - } - - /// The pessimistic value state of the contiguity is unknown. -- static AxisInfo getPessimisticValueState(MLIRContext *context) { -+ static AxisInfo getPessimisticValueState(MLIRContext *context = nullptr) { - return AxisInfo(); - } - static AxisInfo getPessimisticValueState(Value value); -@@ -70,6 +71,22 @@ class AxisInfo { - /// The gcd of both arguments for each dimension - static AxisInfo join(const AxisInfo &lhs, const AxisInfo &rhs); - -+ void print(raw_ostream &os) const { -+ auto print = [&](StringRef name, DimVectorT vec) { -+ os << name << " = ["; -+ llvm::interleaveComma(vec, os); -+ os << "]"; -+ }; -+ print("contiguity", contiguity); -+ print(", divisibility", divisibility); -+ print(", constancy", constancy); -+ os << ", constant_value = "; -+ if (constantValue) -+ os << *constantValue; -+ else -+ os << ""; -+ } -+ - private: - /// The _contiguity_ information maps the `d`-th - /// dimension to the length of the shortest -@@ -147,7 +164,8 @@ class AxisInfoVisitor { - } - - virtual AxisInfo -- getAxisInfo(Operation *op, ArrayRef *> operands) = 0; -+ getAxisInfo(Operation *op, -+ ArrayRef *> operands) = 0; - - virtual bool match(Operation *op) = 0; - }; -@@ -157,15 +175,16 @@ template class AxisInfoVisitorImpl : public AxisInfoVisitor { - public: - using AxisInfoVisitor::AxisInfoVisitor; - -- AxisInfo getAxisInfo(Operation *op, -- ArrayRef *> operands) final { -+ AxisInfo -+ getAxisInfo(Operation *op, -+ ArrayRef *> operands) final { - return getAxisInfo(cast(op), operands); - } - - bool match(Operation *op) final { return isa(op); } - -- virtual AxisInfo getAxisInfo(OpTy op, -- ArrayRef *> operands) { -+ virtual AxisInfo -+ getAxisInfo(OpTy op, ArrayRef *> operands) { - llvm_unreachable("Unimplemented getAxisInfo"); - } - }; -@@ -176,8 +195,9 @@ class BinaryOpVisitorImpl : public AxisInfoVisitorImpl { - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(OpTy op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(OpTy op, -+ ArrayRef *> operands) override { - auto lhsInfo = operands[0]->getValue(); - auto rhsInfo = operands[1]->getValue(); - auto rank = lhsInfo.getRank(); -@@ -230,7 +250,8 @@ class AxisInfoVisitorList { - (visitors.emplace_back(std::make_unique()), ...); - } - -- AxisInfo apply(Operation *op, ArrayRef *> operands) { -+ AxisInfo apply(Operation *op, -+ ArrayRef *> operands) { - for (auto &visitor : visitors) - if (visitor->match(op)) - return visitor->getAxisInfo(op, operands); -@@ -241,16 +262,19 @@ class AxisInfoVisitorList { - std::vector> visitors; - }; - --class AxisInfoAnalysis : public ForwardDataFlowAnalysis { -+class AxisInfoAnalysis -+ : public dataflow::SparseDataFlowAnalysis> { - private: - AxisInfoVisitorList visitors; - - public: -- AxisInfoAnalysis(MLIRContext *context); -+ AxisInfoAnalysis(DataFlowSolver &solver); -+ using dataflow::SparseDataFlowAnalysis< -+ dataflow::Lattice>::getLatticeElement; - -- ChangeResult -- visitOperation(Operation *op, -- ArrayRef *> operands) override; -+ void visitOperation(Operation *op, -+ ArrayRef *> operands, -+ ArrayRef *> results) override; - - unsigned getPtrContiguity(Value ptr); - -@@ -261,4 +285,4 @@ class AxisInfoAnalysis : public ForwardDataFlowAnalysis { - - } // namespace mlir - --#endif -\ No newline at end of file -+#endif -diff --git a/include/triton/Analysis/Utility.h b/include/triton/Analysis/Utility.h -index c5ac137dc1..ee7fadb59d 100644 ---- a/include/triton/Analysis/Utility.h -+++ b/include/triton/Analysis/Utility.h -@@ -1,6 +1,7 @@ - #ifndef TRITON_ANALYSIS_UTILITY_H - #define TRITON_ANALYSIS_UTILITY_H - -+#include "mlir/Analysis/DataFlowFramework.h" - #include "mlir/Analysis/SliceAnalysis.h" - #include "triton/Dialect/TritonGPU/IR/Dialect.h" - #include -@@ -12,7 +13,7 @@ namespace mlir { - class ReduceOpHelper { - public: - explicit ReduceOpHelper(triton::ReduceOp op) : op(op) { -- srcTy = op.operand().getType().cast(); -+ srcTy = op.getOperand().getType().cast(); - } - - ArrayRef getSrcShape() { return srcTy.getShape(); } -@@ -103,6 +104,9 @@ SetVector - multiRootGetSlice(Operation *op, TransitiveFilter backwardFilter = nullptr, - TransitiveFilter forwardFilter = nullptr); - -+// Create a basic DataFlowSolver with constant and dead code analysis included. -+std::unique_ptr createDataFlowSolver(); -+ - } // namespace mlir - - #endif // TRITON_ANALYSIS_UTILITY_H -diff --git a/include/triton/Conversion/Passes.td b/include/triton/Conversion/Passes.td -index 70bb20b78e..be00eb2dac 100644 ---- a/include/triton/Conversion/Passes.td -+++ b/include/triton/Conversion/Passes.td -@@ -12,7 +12,6 @@ def ConvertTritonToTritonGPU: Pass<"convert-triton-to-tritongpu", "mlir::ModuleO - - let dependentDialects = ["mlir::arith::ArithmeticDialect", - "mlir::math::MathDialect", -- "mlir::StandardOpsDialect", - // TODO: Does this pass depend on SCF? - "mlir::scf::SCFDialect", - "mlir::triton::TritonDialect", -@@ -41,8 +40,7 @@ def ConvertTritonGPUToLLVM : Pass<"convert-triton-gpu-to-llvm", "mlir::ModuleOp" - "mlir::tensor::TensorDialect", - "mlir::triton::TritonDialect", - "mlir::triton::gpu::TritonGPUDialect", -- "mlir::NVVM::NVVMDialect", -- "mlir::StandardOpsDialect"]; -+ "mlir::NVVM::NVVMDialect"]; - - let options = [ - Option<"computeCapability", "compute-capability", -diff --git a/include/triton/Dialect/Triton/IR/Dialect.h b/include/triton/Dialect/Triton/IR/Dialect.h -index e8012a51df..15869e262e 100644 ---- a/include/triton/Dialect/Triton/IR/Dialect.h -+++ b/include/triton/Dialect/Triton/IR/Dialect.h -@@ -1,14 +1,15 @@ - #ifndef TRITON_DIALECT_TRITON_IR_DIALECT_H_ - #define TRITON_DIALECT_TRITON_IR_DIALECT_H_ - -+#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" -+#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" -+#include "mlir/Dialect/Func/IR/FuncOps.h" - #include "mlir/Dialect/Math/IR/Math.h" --#include "mlir/Dialect/SCF/SCF.h" --#include "mlir/Dialect/StandardOps/IR/Ops.h" -+#include "mlir/Dialect/SCF/IR/SCF.h" - #include "mlir/Dialect/Tensor/IR/Tensor.h" - #include "mlir/IR/BuiltinOps.h" - #include "mlir/IR/Dialect.h" - #include "mlir/Interfaces/ControlFlowInterfaces.h" -- - #include "triton/Dialect/Triton/IR/Dialect.h.inc" - #include "triton/Dialect/Triton/IR/OpsEnums.h.inc" - #include "triton/Dialect/Triton/IR/Traits.h" -diff --git a/include/triton/Dialect/Triton/IR/TritonDialect.td b/include/triton/Dialect/Triton/IR/TritonDialect.td -index 07b069e14f..d98ce73884 100644 ---- a/include/triton/Dialect/Triton/IR/TritonDialect.td -+++ b/include/triton/Dialect/Triton/IR/TritonDialect.td -@@ -25,12 +25,9 @@ def Triton_Dialect : Dialect { - let dependentDialects = [ - "arith::ArithmeticDialect", - "math::MathDialect", -- "StandardOpsDialect", - "scf::SCFDialect", -- -- // Since LLVM 15 -- // "cf::ControlFlowDialect", -- // "func::FuncDialect" -+ "cf::ControlFlowDialect", -+ "func::FuncDialect" - ]; - - let extraClassDeclaration = [{ -@@ -38,6 +35,7 @@ def Triton_Dialect : Dialect { - }]; - - let hasConstantMaterializer = 1; -+ let useDefaultTypePrinterParser = 1; - } - - include "triton/Dialect/Triton/IR/TritonTypes.td" -diff --git a/include/triton/Dialect/Triton/IR/TritonOps.td b/include/triton/Dialect/Triton/IR/TritonOps.td -index 779e0b648c..0a69211179 100644 ---- a/include/triton/Dialect/Triton/IR/TritonOps.td -+++ b/include/triton/Dialect/Triton/IR/TritonOps.td -@@ -141,11 +141,7 @@ def TT_LoadOp : TT_Op<"load", - "triton::EvictionPolicy":$evict, "bool":$isVolatile)>, - ]; - -- // let assemblyFormat = "operands attr-dict `:` type($result)"; -- let parser = [{ return mlir::triton::parseLoadOp(parser, result); }]; -- -- let printer = [{ return mlir::triton::printLoadOp(p, *this); }]; -- -+ let hasCustomAssemblyFormat = 1; - let hasCanonicalizer = 1; - } - -@@ -170,11 +166,7 @@ def TT_StoreOp : TT_Op<"store", - "triton::EvictionPolicy":$evict)>, - ]; - -- // let assemblyFormat = "operands attr-dict `:` type($value)"; -- let parser = [{ return mlir::triton::parseStoreOp(parser, result); }]; -- -- let printer = [{ return mlir::triton::printStoreOp(p, *this); }]; -- -+ let hasCustomAssemblyFormat = 1; - let hasCanonicalizer = 1; - } - -diff --git a/include/triton/Dialect/Triton/IR/TritonTypes.td b/include/triton/Dialect/Triton/IR/TritonTypes.td -index 66d2a7b9a9..2fe2fd077d 100644 ---- a/include/triton/Dialect/Triton/IR/TritonTypes.td -+++ b/include/triton/Dialect/Triton/IR/TritonTypes.td -@@ -1,6 +1,7 @@ - #ifndef TRITON_TYPES - #define TRITON_TYPES - -+include "mlir/IR/AttrTypeBase.td" - include "triton/Dialect/Triton/IR/TritonDialect.td" - - // -@@ -58,6 +59,7 @@ def TT_Ptr : TritonTypeDef<"Pointer", "ptr"> { - }]> - ]; - -+ let hasCustomAssemblyFormat = 1; - let skipDefaultBuilders = 1; - } - def TT_PtrTensor : TensorOf<[TT_Ptr]>; -diff --git a/include/triton/Dialect/Triton/Transforms/Passes.td b/include/triton/Dialect/Triton/Transforms/Passes.td -index 8f77aed774..a25cdc5680 100644 ---- a/include/triton/Dialect/Triton/Transforms/Passes.td -+++ b/include/triton/Dialect/Triton/Transforms/Passes.td -@@ -16,8 +16,7 @@ def TritonCombineOps : Pass - - let constructor = "mlir::triton::createCombineOpsPass()"; - -- let dependentDialects = ["mlir::arith::ArithmeticDialect", -- /*SelectOp*/"mlir::StandardOpsDialect"]; -+ let dependentDialects = ["mlir::arith::ArithmeticDialect"]; - } - - #endif -diff --git a/include/triton/Dialect/TritonGPU/IR/Dialect.h b/include/triton/Dialect/TritonGPU/IR/Dialect.h -index b4c8daec7b..dfc5f53ab1 100644 ---- a/include/triton/Dialect/TritonGPU/IR/Dialect.h -+++ b/include/triton/Dialect/TritonGPU/IR/Dialect.h -@@ -1,19 +1,17 @@ - #ifndef TRITON_DIALECT_TRITONGPU_IR_DIALECT_H_ - #define TRITON_DIALECT_TRITONGPU_IR_DIALECT_H_ - --#include "mlir/Dialect/GPU/GPUDialect.h" -+#include "mlir/Dialect/GPU/IR/GPUDialect.h" - #include "mlir/Dialect/Tensor/IR/Tensor.h" - #include "mlir/IR/BuiltinOps.h" - #include "mlir/IR/Dialect.h" - - // TritonGPU depends on Triton - #include "triton/Dialect/Triton/IR/Dialect.h" -- - #include "triton/Dialect/TritonGPU/IR/Dialect.h.inc" - #include "triton/Dialect/TritonGPU/IR/Traits.h" - - #define GET_ATTRDEF_CLASSES --#include "triton/Dialect/Triton/IR/AttrInterfaces.h.inc" - #include "triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.h.inc" - - #define GET_OP_CLASSES -diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td -index 0242c3cc17..af2aeb03a8 100644 ---- a/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td -+++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td -@@ -1,6 +1,7 @@ - #ifndef TRITONGPU_ATTRDEFS - #define TRITONGPU_ATTRDEFS - -+include "mlir/IR/AttrTypeBase.td" - include "triton/Dialect/TritonGPU/IR/TritonGPUDialect.td" - include "triton/Dialect/Triton/IR/TritonInterfaces.td" - -@@ -136,6 +137,7 @@ A_{3, 2} A_{3, 3} A_{3, 0} A_{3, 1} ... [phase 1] / - ]; - - let extraClassDeclaration = extraBaseClassDeclaration; -+ let hasCustomAssemblyFormat = 1; - } - - //===----------------------------------------------------------------------===// -@@ -273,6 +275,7 @@ for - // ArrayRefParameter<"unsigned">:$sizePerCTA - ); - -+ let hasCustomAssemblyFormat = 1; - } - - //===----------------------------------------------------------------------===// -@@ -422,6 +425,7 @@ For example, the matrix L corresponding to blockTileSize=[32,16] is: - static constexpr int numBitsToHoldMmaV1ID{5}; - }]; - -+ let hasCustomAssemblyFormat = 1; - } - - def SliceEncodingAttr : DistributedEncoding<"SliceEncoding"> { -@@ -456,6 +460,8 @@ def SliceEncodingAttr : DistributedEncoding<"SliceEncoding"> { - template - SmallVector paddedShape(ArrayRef shape) const; - }]; -+ -+ let hasCustomAssemblyFormat = 1; - } - - def DotOperandEncodingAttr : DistributedEncoding<"DotOperandEncoding"> { -@@ -492,6 +498,7 @@ section 9.7.13.4.1 for more details. - - ]; - -+ let hasCustomAssemblyFormat = 1; - let extraClassDeclaration = extraBaseClassDeclaration; - } - -diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUDialect.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUDialect.td -index 87ec1d36c6..6489a721b4 100644 ---- a/include/triton/Dialect/TritonGPU/IR/TritonGPUDialect.td -+++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUDialect.td -@@ -30,7 +30,7 @@ def TritonGPU_Dialect : Dialect { - } - }]; - -- -+ let useDefaultAttributePrinterParser = 1; - } - - #endif -diff --git a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -index 510f8d0183..7aba11dc75 100644 ---- a/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -+++ b/include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td -@@ -59,7 +59,7 @@ def TTG_AsyncCommitGroupOp : TTG_Op<"async_commit_group"> { - // This is needed because these ops don't - // handle encodings - // e.g., https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td#L111 --def TTG_CmpIOp : TTG_Op<"cmpi", [NoSideEffect, Elementwise, -+def TTG_CmpIOp : TTG_Op<"cmpi", [NoSideEffect, Elementwise, - SameOperandsAndResultShape, - SameOperandsAndResultEncoding]> { - let summary = "integer comparison operation"; -@@ -73,7 +73,7 @@ def TTG_CmpIOp : TTG_Op<"cmpi", [NoSideEffect, Elementwise, - let results = (outs TT_BoolLike:$result); - } - --def TTG_CmpFOp : TTG_Op<"cmpf", [NoSideEffect, Elementwise, -+def TTG_CmpFOp : TTG_Op<"cmpf", [NoSideEffect, Elementwise, - SameOperandsAndResultShape, - SameOperandsAndResultEncoding]> { - let summary = "floating-point comparison operation"; -@@ -88,8 +88,8 @@ def TTG_CmpFOp : TTG_Op<"cmpf", [NoSideEffect, Elementwise, - } - - // TODO: migrate to arith::SelectOp on LLVM16 --def TTG_SelectOp : TTG_Op<"select", [NoSideEffect, Elementwise, -- SameOperandsAndResultShape, -+def TTG_SelectOp : TTG_Op<"select", [NoSideEffect, Elementwise, -+ SameOperandsAndResultShape, - SameOperandsAndResultEncoding]> { - let summary = "select operation"; - -@@ -188,10 +188,7 @@ def TTG_InsertSliceAsyncOp : TTG_Op<"insert_slice_async", - } - }]; - -- // The custom parser could be replaced with oilist in LLVM-16 -- let parser = [{ return parseInsertSliceAsyncOp(parser, result); }]; -- -- let printer = [{ return printInsertSliceAsyncOp(p, *this); }]; -+ let hasCustomAssemblyFormat = 1; - } - - def TTG_AllocTensorOp : TTG_Op<"alloc_tensor", [MemoryEffects<[MemAlloc]>, // Allocate shared memory -diff --git a/lib/Analysis/Alias.cpp b/lib/Analysis/Alias.cpp -index a39e4de9aa..208fdd4afc 100644 ---- a/lib/Analysis/Alias.cpp -+++ b/lib/Analysis/Alias.cpp -@@ -18,8 +18,9 @@ AliasInfo AliasInfo::join(const AliasInfo &lhs, const AliasInfo &rhs) { - return ret; - } - --ChangeResult SharedMemoryAliasAnalysis::visitOperation( -- Operation *op, ArrayRef *> operands) { -+void SharedMemoryAliasAnalysis::visitOperation( -+ Operation *op, ArrayRef *> operands, -+ ArrayRef *> results) { - AliasInfo aliasInfo; - bool pessimistic = true; - if (maybeSharedAllocationOp(op)) { -@@ -44,14 +45,11 @@ ChangeResult SharedMemoryAliasAnalysis::visitOperation( - } - - if (pessimistic) { -- return markAllPessimisticFixpoint(op->getResults()); -+ return markAllPessimisticFixpoint(results); - } - // Join all lattice elements -- ChangeResult result = ChangeResult::NoChange; -- for (Value value : op->getResults()) { -- result |= getLatticeElement(value).join(aliasInfo); -- } -- return result; -+ for (auto *result : results) -+ propagateIfChanged(result, result->join(aliasInfo)); - } - - AliasResult SharedMemoryAliasAnalysis::alias(Value lhs, Value rhs) { -diff --git a/lib/Analysis/Allocation.cpp b/lib/Analysis/Allocation.cpp -index 712c08c475..b4de8dcd9d 100644 ---- a/lib/Analysis/Allocation.cpp -+++ b/lib/Analysis/Allocation.cpp -@@ -1,4 +1,5 @@ - #include "triton/Analysis/Allocation.h" -+#include "mlir/Analysis/DataFlowFramework.h" - #include "mlir/Analysis/Liveness.h" - #include "mlir/Analysis/SliceAnalysis.h" - #include "mlir/Dialect/Tensor/IR/Tensor.h" -@@ -33,10 +34,8 @@ constexpr int kPtrBitWidth = 64; - - static std::pair, SmallVector> - getCvtOrder(const Attribute &srcLayout, const Attribute &dstLayout) { -- auto srcBlockedLayout = srcLayout.dyn_cast(); - auto srcMmaLayout = srcLayout.dyn_cast(); - auto srcDotLayout = srcLayout.dyn_cast(); -- auto dstBlockedLayout = dstLayout.dyn_cast(); - auto dstMmaLayout = dstLayout.dyn_cast(); - auto dstDotLayout = dstLayout.dyn_cast(); - assert(!(srcMmaLayout && dstMmaLayout) && -@@ -224,14 +223,12 @@ class AllocationAnalysis { - } - - void getValueAlias(Value value, SharedMemoryAliasAnalysis &analysis) { -- LatticeElement *latticeElement = -- analysis.lookupLatticeElement(value); -- if (latticeElement) { -- auto &info = latticeElement->getValue(); -- if (!info.getAllocs().empty()) { -- for (auto alloc : info.getAllocs()) { -- allocation->addAlias(value, alloc); -- } -+ dataflow::Lattice *latticeElement = -+ analysis.getLatticeElement(value); -+ if (latticeElement && !latticeElement->isUninitialized()) { -+ AliasInfo &info = latticeElement->getValue(); -+ for (auto alloc : info.getAllocs()) { -+ allocation->addAlias(value, alloc); - } - } - } -@@ -244,14 +241,19 @@ class AllocationAnalysis { - getScratchValueSize(op); - }); - // Get the alias values -- SharedMemoryAliasAnalysis aliasAnalysis(operation->getContext()); -- aliasAnalysis.run(operation); -+ std::unique_ptr solver = createDataFlowSolver(); -+ SharedMemoryAliasAnalysis *aliasAnalysis = -+ solver->load(); -+ if (failed(solver->initializeAndRun(operation))) { -+ // TODO: return error instead of bailing out.. -+ llvm_unreachable("failed to run SharedMemoryAliasAnalysis"); -+ } - operation->walk([&](Operation *op) { - for (auto operand : op->getOperands()) { -- getValueAlias(operand, aliasAnalysis); -+ getValueAlias(operand, *aliasAnalysis); - } - for (auto value : op->getResults()) { -- getValueAlias(value, aliasAnalysis); -+ getValueAlias(value, *aliasAnalysis); - } - }); - } -diff --git a/lib/Analysis/AxisInfo.cpp b/lib/Analysis/AxisInfo.cpp -index 0b7142b04d..4af46c3fbb 100644 ---- a/lib/Analysis/AxisInfo.cpp -+++ b/lib/Analysis/AxisInfo.cpp -@@ -1,4 +1,4 @@ --#include "mlir/Analysis/DataFlowAnalysis.h" -+#include "mlir/Analysis/DataFlowFramework.h" - #include "mlir/Dialect/LLVMIR/LLVMDialect.h" - #include "llvm/Support/raw_ostream.h" - -@@ -52,7 +52,7 @@ AxisInfo AxisInfo::getPessimisticValueState(Value value) { - BlockArgument blockArg = value.dyn_cast(); - if (blockArg && blockArg.getOwner()->isEntryBlock()) { - Operation *op = blockArg.getOwner()->getParentOp(); -- if (FuncOp fun = dyn_cast(op)) { -+ if (func::FuncOp fun = dyn_cast(op)) { - Attribute attr = - fun.getArgAttr(blockArg.getArgNumber(), "tt.divisibility"); - if (attr) -@@ -136,8 +136,9 @@ class CastOpAxisInfoVisitor final : public AxisInfoVisitorImpl { - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(OpTy op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(OpTy op, -+ ArrayRef *> operands) override { - return operands[0]->getValue(); - } - }; -@@ -147,8 +148,9 @@ class MakeRangeOpAxisInfoVisitor final - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(triton::MakeRangeOp op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(triton::MakeRangeOp op, -+ ArrayRef *> operands) override { - auto start = op.start(); - auto end = op.end(); - return AxisInfo(/*contiguity=*/{end - start}, -@@ -162,8 +164,9 @@ class ConstantOpAxisInfoVisitor final - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(arith::ConstantOp op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(arith::ConstantOp op, -+ ArrayRef *> operands) override { - auto intAttr = op.getValue().dyn_cast(); - auto boolAttr = op.getValue().dyn_cast(); - if (intAttr || boolAttr) { -@@ -416,8 +419,9 @@ class SplatOpAxisInfoVisitor final - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(triton::SplatOp op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(triton::SplatOp op, -+ ArrayRef *> operands) override { - Type _retTy = *op->result_type_begin(); - TensorType retTy = _retTy.cast(); - AxisInfo opInfo = operands[0]->getValue(); -@@ -439,8 +443,9 @@ class ExpandDimsOpAxisInfoVisitor final - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(triton::ExpandDimsOp op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(triton::ExpandDimsOp op, -+ ArrayRef *> operands) override { - AxisInfo opInfo = operands[0]->getValue(); - AxisInfo::DimVectorT contiguity = opInfo.getContiguity(); - AxisInfo::DimVectorT divisibility = opInfo.getDivisibility(); -@@ -458,8 +463,9 @@ class BroadcastOpAxisInfoVisitor final - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(triton::BroadcastOp op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(triton::BroadcastOp op, -+ ArrayRef *> operands) override { - Type _retTy = *op->result_type_begin(); - Type _opTy = *op->operand_type_begin(); - TensorType retTy = _retTy.cast(); -@@ -486,8 +492,9 @@ class CmpOpAxisInfoVisitor final : public AxisInfoVisitorImpl { - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(OpTy op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(OpTy op, -+ ArrayRef *> operands) override { - auto resTy = op.getResult().getType().template dyn_cast(); - if (!resTy) - return AxisInfo(); -@@ -596,8 +603,9 @@ class SelectOpAxisInfoVisitor final : public AxisInfoVisitorImpl { - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(OpTy op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(OpTy op, -+ ArrayRef *> operands) override { - auto resTy = op.getResult().getType().template dyn_cast(); - if (!resTy) - return AxisInfo(); -@@ -757,8 +765,9 @@ class MaxMinOpAxisInfoVisitor final : public AxisInfoVisitorImpl { - public: - using AxisInfoVisitorImpl::AxisInfoVisitorImpl; - -- AxisInfo getAxisInfo(OpTy op, -- ArrayRef *> operands) override { -+ AxisInfo -+ getAxisInfo(OpTy op, -+ ArrayRef *> operands) override { - auto lhsInfo = operands[0]->getValue(); - auto rhsInfo = operands[1]->getValue(); - std::optional constantValue; -@@ -786,8 +795,8 @@ class MaxMinOpAxisInfoVisitor final : public AxisInfoVisitorImpl { - // AxisInfoAnalysis - //===----------------------------------------------------------------------===// - --AxisInfoAnalysis::AxisInfoAnalysis(MLIRContext *context) -- : ForwardDataFlowAnalysis(context) { -+AxisInfoAnalysis::AxisInfoAnalysis(DataFlowSolver &solver) -+ : dataflow::SparseDataFlowAnalysis>(solver) { - // UnrealizedConversionCast: - // This is needed by TritonGPUToLLVM, to get AxisInfo when the graph is - // in the process of a PartialConversion, where UnrealizedConversionCast -@@ -819,7 +828,7 @@ AxisInfoAnalysis::AxisInfoAnalysis(MLIRContext *context) - visitors.append, - LogicalOpAxisInfoVisitor, - LogicalOpAxisInfoVisitor>(); -- visitors.append, -+ visitors.append, - SelectOpAxisInfoVisitor>(); - visitors.append, - ShROpAxisInfoVisitor>(); -@@ -829,11 +838,12 @@ AxisInfoAnalysis::AxisInfoAnalysis(MLIRContext *context) - MaxMinOpAxisInfoVisitor>(); - } - --ChangeResult AxisInfoAnalysis::visitOperation( -- Operation *op, ArrayRef *> operands) { -+void AxisInfoAnalysis::visitOperation( -+ Operation *op, ArrayRef *> operands, -+ ArrayRef *> results) { - AxisInfo curr = visitors.apply(op, operands); - if (curr.getRank() == 0) { -- return markAllPessimisticFixpoint(op->getResults()); -+ return markAllPessimisticFixpoint(results); - } - // override with hint - auto newContiguity = curr.getContiguity(); -@@ -854,11 +864,8 @@ ChangeResult AxisInfoAnalysis::visitOperation( - curr = mlir::AxisInfo(newContiguity, newDivisibility, newConstancy, - curr.getConstantValue()); - // join all lattice elements -- ChangeResult result = ChangeResult::NoChange; -- for (Value value : op->getResults()) { -- result |= getLatticeElement(value).join(curr); -- } -- return result; -+ for (auto *result : results) -+ propagateIfChanged(result, result->join(curr)); - } - - unsigned AxisInfoAnalysis::getPtrContiguity(Value ptr) { -@@ -884,7 +891,10 @@ unsigned AxisInfoAnalysis::getPtrAlignment(Value ptr) { - auto tensorTy = ptr.getType().dyn_cast(); - if (!tensorTy) - return 1; -- auto axisInfo = lookupLatticeElement(ptr)->getValue(); -+ dataflow::Lattice *latticeElement = getLatticeElement(ptr); -+ if (!latticeElement || latticeElement->isUninitialized()) -+ return 1; -+ auto axisInfo = latticeElement->getValue(); - auto layout = tensorTy.getEncoding(); - auto order = triton::gpu::getOrder(layout); - auto maxMultipleBytes = axisInfo.getDivisibility(order[0]); -@@ -900,8 +910,11 @@ unsigned AxisInfoAnalysis::getMaskAlignment(Value mask) { - auto tensorTy = mask.getType().dyn_cast(); - if (!tensorTy) - return 1; -+ dataflow::Lattice *latticeElement = getLatticeElement(mask); -+ if (!latticeElement || latticeElement->isUninitialized()) -+ return 1; -+ auto maskAxis = latticeElement->getValue(); - auto maskOrder = triton::gpu::getOrder(tensorTy.getEncoding()); -- auto maskAxis = lookupLatticeElement(mask)->getValue(); - auto alignment = std::max(maskAxis.getConstancy(maskOrder[0]), 1); - return alignment; - } -diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt -index afbc692510..1f761f845c 100644 ---- a/lib/Analysis/CMakeLists.txt -+++ b/lib/Analysis/CMakeLists.txt -@@ -8,7 +8,7 @@ add_mlir_library(TritonAnalysis - DEPENDS - TritonTableGen - TritonGPUAttrDefsIncGen -- -+ - LINK_LIBS PUBLIC - MLIRAnalysis - ) -diff --git a/lib/Analysis/Membar.cpp b/lib/Analysis/Membar.cpp -index acc885e827..910274b2ac 100644 ---- a/lib/Analysis/Membar.cpp -+++ b/lib/Analysis/Membar.cpp -@@ -2,7 +2,7 @@ - #include "triton/Analysis/Alias.h" - #include "triton/Dialect/TritonGPU/IR/Dialect.h" - --#include "mlir/Dialect/GPU/GPUDialect.h" -+#include "mlir/Dialect/GPU/IR/GPUDialect.h" - #include "mlir/Dialect/Tensor/IR/Tensor.h" - - namespace mlir { -diff --git a/lib/Analysis/Utility.cpp b/lib/Analysis/Utility.cpp -index d9e917e731..6ea52df272 100644 ---- a/lib/Analysis/Utility.cpp -+++ b/lib/Analysis/Utility.cpp -@@ -1,5 +1,8 @@ - #include "triton/Analysis/Utility.h" -+#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" -+#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" - #include "mlir/IR/Dialect.h" -+#include "mlir/IR/Matchers.h" - #include "triton/Dialect/Triton/IR/Dialect.h" - #include "triton/Dialect/TritonGPU/IR/Dialect.h" - #include -@@ -325,4 +328,55 @@ SetVector multiRootGetSlice(Operation *op, - return multiRootTopologicalSort(slice); - } - -+namespace { -+// Copied from TestDeadCodeAnalysis.cpp, because some dead code analysis -+// interacts with constant propagation, but SparseConstantPropagation -+// doesn't seem to be sufficient. -+struct ConstantAnalysis : public DataFlowAnalysis { -+ using DataFlowAnalysis::DataFlowAnalysis; -+ -+ LogicalResult initialize(Operation *top) override { -+ WalkResult result = top->walk([&](Operation *op) { -+ if (failed(visit(op))) -+ return WalkResult::interrupt(); -+ return WalkResult::advance(); -+ }); -+ return success(!result.wasInterrupted()); -+ } -+ -+ LogicalResult visit(ProgramPoint point) override { -+ Operation *op = point.get(); -+ Attribute value; -+ if (matchPattern(op, m_Constant(&value))) { -+ auto *constant = getOrCreate>( -+ op->getResult(0)); -+ propagateIfChanged(constant, constant->join(dataflow::ConstantValue( -+ value, op->getDialect()))); -+ return success(); -+ } -+ setAllToUnknownConstants(op->getResults()); -+ for (Region ®ion : op->getRegions()) -+ setAllToUnknownConstants(region.getArguments()); -+ return success(); -+ } -+ -+ /// Set all given values as not constants. -+ void setAllToUnknownConstants(ValueRange values) { -+ dataflow::ConstantValue unknownConstant(nullptr, nullptr); -+ for (Value value : values) { -+ auto *constant = -+ getOrCreate>(value); -+ propagateIfChanged(constant, constant->join(unknownConstant)); -+ } -+ } -+}; -+} // namespace -+ -+std::unique_ptr createDataFlowSolver() { -+ auto solver = std::make_unique(); -+ solver->load(); -+ solver->load(); -+ return solver; -+} -+ - } // namespace mlir -diff --git a/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp -index 6a46265bd7..e352eb3698 100644 ---- a/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp -@@ -159,9 +159,6 @@ struct ConvertLayoutOpConversion - Value smemBase) const { - auto accumNumCTAsEachRep = product(numCTAsEachRep); - auto layout = type.getEncoding(); -- auto blockedLayout = layout.dyn_cast(); -- auto sliceLayout = layout.dyn_cast(); -- auto mmaLayout = layout.dyn_cast(); - auto rank = type.getRank(); - auto sizePerThread = getSizePerThread(layout); - auto accumSizePerThread = product(sizePerThread); -diff --git a/lib/Conversion/TritonGPUToLLVM/DotOpHelpers.h b/lib/Conversion/TritonGPUToLLVM/DotOpHelpers.h -index 4b89965aa9..1d9e00519b 100644 ---- a/lib/Conversion/TritonGPUToLLVM/DotOpHelpers.h -+++ b/lib/Conversion/TritonGPUToLLVM/DotOpHelpers.h -@@ -7,10 +7,8 @@ - #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" - #include "mlir/Conversion/LLVMCommon/Pattern.h" - #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" --#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" --#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" - #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" --#include "mlir/Dialect/GPU/GPUDialect.h" -+#include "mlir/Dialect/GPU/IR/GPUDialect.h" - #include "mlir/Dialect/LLVMIR/LLVMDialect.h" - #include "mlir/Dialect/Tensor/IR/Tensor.h" - #include "mlir/IR/Matchers.h" -@@ -422,9 +420,9 @@ struct MMA16816ConversionHelper { - MMA16816ConversionHelper(Type dotOperand, MmaEncodingAttr mmaLayout, - Value thread, ConversionPatternRewriter &rewriter, - TypeConverter *typeConverter, Location loc) -- : mmaLayout(mmaLayout), thread(thread), helper(mmaLayout), -- rewriter(rewriter), typeConverter(typeConverter), loc(loc), -- ctx(mmaLayout.getContext()), wpt(mmaLayout.getWarpsPerCTA()) { -+ : mmaLayout(mmaLayout), wpt(mmaLayout.getWarpsPerCTA()), thread(thread), -+ helper(mmaLayout), rewriter(rewriter), typeConverter(typeConverter), -+ loc(loc), ctx(mmaLayout.getContext()) { - helper.deduceMmaType(dotOperand); - - Value _32 = i32_val(32); -diff --git a/lib/Conversion/TritonGPUToLLVM/DotOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/DotOpToLLVM.cpp -index 0f8070ca9f..e4bd47c411 100644 ---- a/lib/Conversion/TritonGPUToLLVM/DotOpToLLVM.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/DotOpToLLVM.cpp -@@ -115,8 +115,6 @@ struct DotOpConversion : public ConvertTritonGPUOpToLLVMPattern { - auto DTensorTy = D.getType().cast(); - auto AShape = ATensorTy.getShape(); - auto BShape = BTensorTy.getShape(); -- auto DShape = DTensorTy.getShape(); -- auto wpt = mmaLayout.getWarpsPerCTA(); - - bool isARow = ALayout.getIsMMAv1Row().cast().getValue(); - bool isBRow = BLayout.getIsMMAv1Row().cast().getValue(); -@@ -221,7 +219,6 @@ struct DotOpConversion : public ConvertTritonGPUOpToLLVMPattern { - ConversionPatternRewriter &rewriter) const { - auto *ctx = rewriter.getContext(); - auto loc = op.getLoc(); -- auto threadId = getThreadId(rewriter, loc); - - auto A = op.a(); - auto B = op.b(); -@@ -230,12 +227,10 @@ struct DotOpConversion : public ConvertTritonGPUOpToLLVMPattern { - - auto aTensorTy = A.getType().cast(); - auto bTensorTy = B.getType().cast(); -- auto cTensorTy = C.getType().cast(); - auto dTensorTy = D.getType().cast(); - - auto aShape = aTensorTy.getShape(); - auto bShape = bTensorTy.getShape(); -- auto cShape = cTensorTy.getShape(); - - BlockedEncodingAttr dLayout = - dTensorTy.getEncoding().cast(); -diff --git a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp -index deb71b9597..0b9e67674b 100644 ---- a/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp -@@ -61,7 +61,6 @@ struct FpToFpOpConversion - convertFp16x4ToFp8x4(Location loc, ConversionPatternRewriter &rewriter, - const Value &v0, const Value &v1, const Value &v2, - const Value &v3) { -- auto ctx = rewriter.getContext(); - auto fp16x2VecTy = vec_ty(f16_ty, 2); - Value fp16x2Vec0 = undef(fp16x2VecTy); - Value fp16x2Vec1 = undef(fp16x2VecTy); -@@ -153,7 +152,6 @@ struct FpToFpOpConversion - convertBf16x4ToFp8x4(Location loc, ConversionPatternRewriter &rewriter, - const Value &v0, const Value &v1, const Value &v2, - const Value &v3) { -- auto ctx = rewriter.getContext(); - auto bf16x2VecTy = vec_ty(i16_ty, 2); - Value bf16x2Vec0 = undef(bf16x2VecTy); - Value bf16x2Vec1 = undef(bf16x2VecTy); -diff --git a/lib/Conversion/TritonGPUToLLVM/LoadStoreOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/LoadStoreOpToLLVM.cpp -index 9a8b4702bc..bae675f0cb 100644 ---- a/lib/Conversion/TritonGPUToLLVM/LoadStoreOpToLLVM.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/LoadStoreOpToLLVM.cpp -@@ -109,7 +109,8 @@ struct LoadOpConversion - DenseElementsAttr constAttr; - int64_t splatVal = 0; - if (other && valueElemTy.isa() && -- matchPattern(other, m_Constant(&constAttr)) && constAttr.isSplat()) { -+ matchPattern(other, m_Constant(&constAttr)) && constAttr.isSplat() && -+ constAttr.getElementType().isa()) { - otherIsSplatConstInt = true; - splatVal = constAttr.getSplatValue().getSExtValue(); - } -@@ -333,7 +334,6 @@ struct StoreOpConversion - elem = rewriter.create(loc, type::i8Ty(ctx), elem); - elem = bitcast(elem, valueElemTy); - -- Type u32Ty = typeConverter->convertType(type::u32Ty(ctx)); - llWord = insert_element(wordTy, llWord, elem, i32_val(elemIdx)); - } - llWord = bitcast(llWord, valArgTy); -@@ -387,7 +387,6 @@ struct AtomicCASOpConversion - ConversionPatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - MLIRContext *ctx = rewriter.getContext(); -- Value ptr = op.ptr(); - - Value llPtr = adaptor.ptr(); - Value llCmp = adaptor.cmp(); -diff --git a/lib/Conversion/TritonGPUToLLVM/ReduceOpToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/ReduceOpToLLVM.cpp -index 69abd889be..1c973dc196 100644 ---- a/lib/Conversion/TritonGPUToLLVM/ReduceOpToLLVM.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/ReduceOpToLLVM.cpp -@@ -286,7 +286,6 @@ struct ReduceOpConversion - auto srcTy = op.operand().getType().cast(); - auto srcLayout = srcTy.getEncoding(); - auto srcShape = srcTy.getShape(); -- auto srcRank = srcTy.getRank(); - auto order = getOrder(srcLayout); - - auto threadsPerWarp = triton::gpu::getThreadsPerWarp(srcLayout); -@@ -351,7 +350,6 @@ struct ReduceOpConversion - - Value zero = i32_val(0); - Value laneZero = icmp_eq(laneIdAxis, zero); -- Value warpZero = icmp_eq(warpIdAxis, zero); - - for (auto it : accs) { - const SmallVector &key = it.first; -diff --git a/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVM.cpp b/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVM.cpp -index 5b77150b1a..78cfa076bd 100644 ---- a/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVM.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVM.cpp -@@ -11,11 +11,11 @@ using ::mlir::LLVM::getStructFromElements; - using ::mlir::triton::gpu::getElemsPerThread; - using ::mlir::triton::gpu::SharedEncodingAttr; - --struct ReturnOpConversion : public ConvertOpToLLVMPattern<::mlir::ReturnOp> { -- using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; -+struct ReturnOpConversion : public ConvertOpToLLVMPattern { -+ using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; - - LogicalResult -- matchAndRewrite(ReturnOp op, OpAdaptor adaptor, -+ matchAndRewrite(func::ReturnOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - unsigned numArguments = op.getNumOperands(); - -@@ -476,7 +476,6 @@ struct ExtractSliceOpConversion - - auto llvmElemTy = getTypeConverter()->convertType(srcTy.getElementType()); - auto elemPtrTy = ptr_ty(llvmElemTy, 3); -- auto resTy = op.getType().dyn_cast(); - smemObj = SharedMemoryObject(gep(elemPtrTy, smemObj.base, offset), - strideVals, offsetVals); - auto retVal = getStructFromSharedMemoryObject(loc, smemObj, rewriter); -diff --git a/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMBase.h b/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMBase.h -index bb10d5b24a..00e399f848 100644 ---- a/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMBase.h -+++ b/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMBase.h -@@ -4,6 +4,7 @@ - // TODO: refactor so that it doesn't fail if Allocation.h - // is included after utility.h (due to conflict in `store` macro - // and -+#include "mlir/Dialect/Func/IR/FuncOps.h" - #include "triton/Analysis/Allocation.h" - - // -@@ -39,15 +40,15 @@ void vprintf_array(Value thread, ArrayRef arr, std::string info, - // TODO(Superjomn): remove the code when MLIR v15.0 is included. - // All the rights are reserved by the LLVM community. - --struct FuncOpConversionBase : public ConvertOpToLLVMPattern { -+struct FuncOpConversionBase : public ConvertOpToLLVMPattern { - private: - /// Only retain those attributes that are not constructed by - /// `LLVMFuncOp::build`. If `filterArgAttrs` is set, also filter out argument - /// attributes. -- static void filterFuncAttributes(ArrayRef attrs, -- bool filterArgAttrs, -+ static void filterFuncAttributes(func::FuncOp op, bool filterArgAttrs, - SmallVectorImpl &result) { -- for (const auto &attr : attrs) { -+ -+ for (const auto &attr : op->getAttrs()) { - if (attr.getName() == SymbolTable::getSymbolAttrName() || - attr.getName() == FunctionOpInterface::getTypeAttrName() || - attr.getName() == "std.varargs" || -@@ -65,27 +66,27 @@ struct FuncOpConversionBase : public ConvertOpToLLVMPattern { - } - - protected: -- using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; -+ using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; - - // Convert input FuncOp to LLVMFuncOp by using the LLVMTypeConverter provided - // to this legalization pattern. - LLVM::LLVMFuncOp -- convertFuncOpToLLVMFuncOp(FuncOp funcOp, -+ convertFuncOpToLLVMFuncOp(func::FuncOp funcOp, - ConversionPatternRewriter &rewriter) const { - // Convert the original function arguments. They are converted using the - // LLVMTypeConverter provided to this legalization pattern. - auto varargsAttr = funcOp->getAttrOfType("func.varargs"); - TypeConverter::SignatureConversion result(funcOp.getNumArguments()); - auto llvmType = getTypeConverter()->convertFunctionSignature( -- funcOp.getType(), varargsAttr && varargsAttr.getValue(), result); -+ funcOp.getFunctionType(), varargsAttr && varargsAttr.getValue(), -+ result); - if (!llvmType) - return nullptr; - - // Propagate argument/result attributes to all converted arguments/result - // obtained after converting a given original argument/result. - SmallVector attributes; -- filterFuncAttributes(funcOp->getAttrs(), /*filterArgAttrs=*/true, -- attributes); -+ filterFuncAttributes(funcOp, /*filterArgAttrs=*/true, attributes); - if (ArrayAttr resAttrDicts = funcOp.getAllResultAttrs()) { - assert(!resAttrDicts.empty() && "expected array to be non-empty"); - auto newResAttrDicts = -@@ -131,7 +132,7 @@ struct FuncOpConversionBase : public ConvertOpToLLVMPattern { - } - auto newFuncOp = rewriter.create( - funcOp.getLoc(), funcOp.getName(), llvmType, linkage, -- /*dsoLocal*/ false, attributes); -+ /*dsoLocal*/ false, LLVM::CConv::C, attributes); - rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(), - newFuncOp.end()); - if (failed(rewriter.convertRegionTypes(&newFuncOp.getBody(), *typeConverter, -@@ -191,8 +192,8 @@ class ConvertTritonGPUOpToLLVMPatternBase { - const Allocation *allocation, - Value smem, - IndexCacheInfo indexCacheInfo) -- : converter(&typeConverter), indexCacheInfo(indexCacheInfo), -- allocation(allocation), smem(smem) {} -+ : converter(&typeConverter), allocation(allocation), smem(smem), -+ indexCacheInfo(indexCacheInfo) {} - - LLVMTypeConverter *getTypeConverter() const { return converter; } - -@@ -861,7 +862,6 @@ class ConvertTritonGPUOpToLLVMPatternBase { - ArrayRef shape) const { - auto parent = sliceLayout.getParent(); - unsigned dim = sliceLayout.getDim(); -- size_t rank = shape.size(); - auto parentIndices = - emitIndices(loc, rewriter, parent, sliceLayout.paddedShape(shape)); - unsigned numIndices = parentIndices.size(); -diff --git a/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.cpp b/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.cpp -index ff1af09835..6f66af4e34 100644 ---- a/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.cpp -+++ b/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.cpp -@@ -1,10 +1,11 @@ - #include "triton/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.h" - -+#include "mlir/Analysis/DataFlowFramework.h" - #include "mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h" -+#include "mlir/Conversion/ControlFlowToLLVM//ControlFlowToLLVM.h" - #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" - #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" --#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" --#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" -+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" - #include "mlir/Dialect/LLVMIR/LLVMDialect.h" - #include "mlir/Dialect/LLVMIR/NVVMDialect.h" - #include "mlir/Pass/Pass.h" -@@ -40,7 +41,6 @@ class TritonLLVMConversionTarget : public ConversionTarget { - addIllegalDialect(); - addIllegalDialect(); - addIllegalDialect(); -- addIllegalDialect(); - addLegalOp(); - } - }; -@@ -51,7 +51,7 @@ class TritonLLVMFunctionConversionTarget : public ConversionTarget { - : ConversionTarget(ctx) { - addLegalDialect(); - addLegalDialect(); -- addIllegalOp(); -+ addIllegalOp(); - addLegalOp(); - } - }; -@@ -69,7 +69,7 @@ struct FuncOpConversion : public FuncOpConversionBase { - : FuncOpConversionBase(converter, benefit), numWarps(numWarps) {} - - LogicalResult -- matchAndRewrite(FuncOp funcOp, OpAdaptor adaptor, -+ matchAndRewrite(func::FuncOp funcOp, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto newFuncOp = convertFuncOpToLLVMFuncOp(funcOp, rewriter); - if (!newFuncOp) -@@ -133,7 +133,8 @@ class ConvertTritonGPUToLLVM - decomposeBlockedToDotOperand(mod); - - // Step 2 -- decomposeInsertSliceAsyncOp(mod); -+ if (failed(decomposeInsertSliceAsyncOp(mod))) -+ return signalPassFailure(); - - // Step 3 - Allocation allocation(mod); -@@ -142,7 +143,7 @@ class ConvertTritonGPUToLLVM - - // Step 4 - RewritePatternSet scf_patterns(context); -- mlir::populateLoopToStdConversionPatterns(scf_patterns); -+ mlir::populateSCFToControlFlowConversionPatterns(scf_patterns); - mlir::ConversionTarget scf_target(*context); - scf_target.addIllegalOp(); -@@ -159,8 +160,10 @@ class ConvertTritonGPUToLLVM - return signalPassFailure(); - - // Step 6 - get axis and shared memory info -- AxisInfoAnalysis axisInfoAnalysis(mod.getContext()); -- axisInfoAnalysis.run(mod); -+ std::unique_ptr solver = createDataFlowSolver(); -+ AxisInfoAnalysis *axisInfoAnalysis = solver->load(); -+ if (failed(solver->initializeAndRun(mod))) -+ return signalPassFailure(); - initSharedMemory(allocation.getSharedMemorySize(), typeConverter); - mod->setAttr("triton_gpu.shared", - mlir::IntegerAttr::get(mlir::IntegerType::get(context, 32), -@@ -178,38 +181,39 @@ class ConvertTritonGPUToLLVM - - // Normal conversions - populateTritonGPUToLLVMPatterns(typeConverter, patterns, numWarps, -- axisInfoAnalysis, &allocation, smem, -+ *axisInfoAnalysis, &allocation, smem, - indexCacheInfo, /*benefit=*/10); - // ConvertLayoutOp - populateConvertLayoutOpToLLVMPatterns(typeConverter, patterns, numWarps, -- axisInfoAnalysis, &allocation, smem, -+ *axisInfoAnalysis, &allocation, smem, - indexCacheInfo, /*benefit=*/10); - // DotOp - populateDotOpToLLVMPatterns(typeConverter, patterns, numWarps, -- axisInfoAnalysis, &allocation, smem, -+ *axisInfoAnalysis, &allocation, smem, - /*benefit=*/10); - // ElementwiseOp - populateElementwiseOpToLLVMPatterns(typeConverter, patterns, numWarps, -- axisInfoAnalysis, &allocation, smem, -+ *axisInfoAnalysis, &allocation, smem, - /*benefit=*/10); - // LoadStoreOp - populateLoadStoreOpToLLVMPatterns(typeConverter, patterns, numWarps, -- axisInfoAnalysis, &allocation, smem, -+ *axisInfoAnalysis, &allocation, smem, - indexCacheInfo, /*benefit=*/10); - // ReduceOp - populateReduceOpToLLVMPatterns(typeConverter, patterns, numWarps, -- axisInfoAnalysis, &allocation, smem, -+ *axisInfoAnalysis, &allocation, smem, - indexCacheInfo, /*benefit=*/10); - // ViewOp - populateViewOpToLLVMPatterns(typeConverter, patterns, numWarps, -- axisInfoAnalysis, &allocation, smem, -+ *axisInfoAnalysis, &allocation, smem, - /*benefit=*/10); - - // Add arith/math's patterns to help convert scalar expression to LLVM. - mlir::arith::populateArithmeticToLLVMConversionPatterns(typeConverter, - patterns); - mlir::populateMathToLLVMConversionPatterns(typeConverter, patterns); -- mlir::populateStdToLLVMConversionPatterns(typeConverter, patterns); -+ mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter, -+ patterns); - mlir::populateGpuToNVVMConversionPatterns(typeConverter, patterns); - - if (failed(applyPartialConversion(mod, target, std::move(patterns)))) -@@ -306,9 +310,11 @@ class ConvertTritonGPUToLLVM - }); - } - -- void decomposeInsertSliceAsyncOp(ModuleOp mod) const { -- AxisInfoAnalysis axisInfoAnalysis(mod.getContext()); -- axisInfoAnalysis.run(mod); -+ LogicalResult decomposeInsertSliceAsyncOp(ModuleOp mod) const { -+ std::unique_ptr solver = createDataFlowSolver(); -+ AxisInfoAnalysis *axisInfoAnalysis = solver->load(); -+ if (failed(solver->initializeAndRun(mod))) -+ return failure(); - // TODO(Keren): This is a hacky knob that may cause performance regression - // when decomposition has been performed. We should remove this knob once we - // have thorough analysis on async wait. Currently, we decompose -@@ -342,7 +348,7 @@ class ConvertTritonGPUToLLVM - auto resSharedLayout = - dstTy.getEncoding().dyn_cast(); - auto resElemTy = dstTy.getElementType(); -- unsigned inVec = axisInfoAnalysis.getPtrContiguity(src); -+ unsigned inVec = axisInfoAnalysis->getPtrContiguity(src); - unsigned outVec = resSharedLayout.getVec(); - unsigned minVec = std::min(outVec, inVec); - auto maxBitWidth = -@@ -400,11 +406,11 @@ class ConvertTritonGPUToLLVM - } else if (decomposed) { - // Wait for all previous async ops - OpBuilder builder(asyncWaitOp); -- auto newAsyncWaitOp = -- builder.create(asyncWaitOp.getLoc(), 0); -+ builder.create(asyncWaitOp.getLoc(), 0); - asyncWaitOp.erase(); - } - }); -+ return success(); - } - }; - -diff --git a/lib/Conversion/TritonGPUToLLVM/Utility.h b/lib/Conversion/TritonGPUToLLVM/Utility.h -index d35dac28c5..11976908cf 100644 ---- a/lib/Conversion/TritonGPUToLLVM/Utility.h -+++ b/lib/Conversion/TritonGPUToLLVM/Utility.h -@@ -220,10 +220,7 @@ struct SharedMemoryObject { - ConversionPatternRewriter &rewriter) - : base(base) { - strides = getStridesFromShapeAndOrder(shape, order, loc, rewriter); -- -- for (auto idx : order) { -- offsets.emplace_back(i32_val(0)); -- } -+ offsets.append(order.size(), i32_val(0)); - } - - SmallVector getElems() const { -diff --git a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -index fe42202c34..5f230f787f 100644 ---- a/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -+++ b/lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp -@@ -1,10 +1,10 @@ - #include "triton/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.h" - - #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" --#include "mlir/Dialect/GPU/GPUDialect.h" -+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" -+#include "mlir/Dialect/GPU/IR/GPUDialect.h" - #include "mlir/Dialect/LLVMIR/LLVMDialect.h" - #include "mlir/Dialect/LLVMIR/NVVMDialect.h" --#include "mlir/Dialect/StandardOps/IR/Ops.h" - #include "mlir/Pass/Pass.h" - #include "mlir/Transforms/DialectConversion.h" - #include "triton/Dialect/Triton/IR/Dialect.h" -@@ -59,10 +59,13 @@ class ArithConstantPattern : public OpConversionPattern { - Type retType = getTypeConverter()->convertType(op.getType()); - auto value = adaptor.getValue().dyn_cast(); - assert(value); -- rewriter.replaceOpWithNewOp( -- op, retType, -- value.reshape(retType) // This is a hack. We just want to add encoding -- ); -+ if (value.getElementType().isInteger(1) && value.isSplat()) -+ // Workaround until https://reviews.llvm.org/D133743 is included. -+ value = DenseElementsAttr::get(retType, value.getSplatValue()); -+ else -+ // This is a hack. We just want to add encoding -+ value = value.reshape(retType); -+ rewriter.replaceOpWithNewOp(op, retType, value); - return success(); - } - }; -@@ -127,12 +130,12 @@ void populateArithmeticPatternsAndLegality( - } - - // this shouldn't exist if mlir's SelectOp checked encodings properly --class StdSelectPattern : public OpConversionPattern { -+class StdSelectPattern : public OpConversionPattern { - public: -- using OpConversionPattern::OpConversionPattern; -+ using OpConversionPattern::OpConversionPattern; - - LogicalResult -- matchAndRewrite(SelectOp op, typename SelectOp::Adaptor adaptor, -+ matchAndRewrite(arith::SelectOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - Type retType = this->getTypeConverter()->convertType(op.getType()); - rewriter.replaceOpWithNewOp( -@@ -148,8 +151,8 @@ void populateStdPatternsAndLegality(TritonGPUTypeConverter &typeConverter, - MLIRContext *context = patterns.getContext(); - // Rewrite rule - patterns.add(typeConverter, context); -- target.addLegalOp(); // this is ok because all functions are inlined -- // by the frontend -+ target.addLegalOp(); // this is ok because all functions are -+ // inlined by the frontend - } - - void populateMathPatternsAndLegality(TritonGPUTypeConverter &typeConverter, -@@ -455,18 +458,19 @@ struct TritonPrintfPattern : public OpConversionPattern { - void populateTritonPatterns(TritonGPUTypeConverter &typeConverter, - RewritePatternSet &patterns) { - MLIRContext *context = patterns.getContext(); -- patterns.add< // TODO: view should have custom pattern that views the layout -- TritonGenericPattern, -- TritonGenericPattern, -- TritonGenericPattern, -- TritonGenericPattern, -- TritonGenericPattern, -- TritonGenericPattern, TritonBroadcastPattern, -- TritonGenericPattern, TritonCatPattern, -- TritonReducePattern, TritonTransPattern, TritonExpandDimsPattern, -- TritonMakeRangePattern, TritonDotPattern, TritonLoadPattern, -- TritonStorePattern, TritonExtElemwisePattern, TritonPrintfPattern, -- TritonAtomicRMWPattern>(typeConverter, context); -+ patterns -+ .insert< // TODO: view should have custom pattern that views the layout -+ TritonGenericPattern, -+ TritonGenericPattern, -+ TritonGenericPattern, -+ TritonGenericPattern, -+ TritonGenericPattern, -+ TritonGenericPattern, TritonBroadcastPattern, -+ TritonGenericPattern, TritonCatPattern, -+ TritonReducePattern, TritonTransPattern, TritonExpandDimsPattern, -+ TritonMakeRangePattern, TritonDotPattern, TritonLoadPattern, -+ TritonStorePattern, TritonExtElemwisePattern, TritonPrintfPattern, -+ TritonAtomicRMWPattern>(typeConverter, context); - } - - // -@@ -623,29 +627,28 @@ void populateSCFPatterns(TritonGPUTypeConverter &typeConverter, - - // CF - --class CFBranchPattern : public OpConversionPattern { -+class CFBranchPattern : public OpConversionPattern { - public: -- using OpConversionPattern::OpConversionPattern; -+ using OpConversionPattern::OpConversionPattern; - - LogicalResult -- matchAndRewrite(BranchOp op, BranchOp::Adaptor adaptor, -+ matchAndRewrite(cf::BranchOp op, cf::BranchOp::Adaptor adaptor, - ConversionPatternRewriter &rewriter) const override { -- auto converter = getTypeConverter(); -- auto newOp = rewriter.replaceOpWithNewOp(op, op.getSuccessor(), -- adaptor.getOperands()); -+ auto newOp = rewriter.replaceOpWithNewOp( -+ op, op.getSuccessor(), adaptor.getOperands()); - return success(); - } - }; - --class CFCondBranchPattern : public OpConversionPattern { -+class CFCondBranchPattern : public OpConversionPattern { - public: -- using OpConversionPattern::OpConversionPattern; -+ using OpConversionPattern::OpConversionPattern; - - LogicalResult -- matchAndRewrite(CondBranchOp op, CondBranchOp::Adaptor adaptor, -+ matchAndRewrite(cf::CondBranchOp op, cf::CondBranchOp::Adaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto converter = getTypeConverter(); -- auto newOp = rewriter.replaceOpWithNewOp( -+ auto newOp = rewriter.replaceOpWithNewOp( - op, adaptor.getCondition(), op.getTrueDest(), - adaptor.getTrueDestOperands(), op.getFalseDest(), - adaptor.getFalseDestOperands()); -diff --git a/lib/Dialect/Triton/IR/CMakeLists.txt b/lib/Dialect/Triton/IR/CMakeLists.txt -index 2d679b21fd..705554ba6b 100644 ---- a/lib/Dialect/Triton/IR/CMakeLists.txt -+++ b/lib/Dialect/Triton/IR/CMakeLists.txt -@@ -10,11 +10,7 @@ add_mlir_dialect_library(TritonIR - - LINK_LIBS PUBLIC - MLIRIR -- MLIRArithmetic -- MLIRSCF -- -- # Since LLVM 15 -- # MLIRFunc -- # else -- MLIRStandard -+ MLIRArithmeticDialect -+ MLIRSCFDialect -+ MLIRFuncDialect - ) -diff --git a/lib/Dialect/Triton/IR/Ops.cpp b/lib/Dialect/Triton/IR/Ops.cpp -index 3aadbfa0c0..86570359c5 100644 ---- a/lib/Dialect/Triton/IR/Ops.cpp -+++ b/lib/Dialect/Triton/IR/Ops.cpp -@@ -1,10 +1,9 @@ --#include "triton/Dialect/Triton/IR/Dialect.h" --#include "triton/Dialect/Triton/IR/Types.h" -- - #include "mlir/IR/Builders.h" - #include "mlir/IR/BuiltinAttributes.h" - #include "mlir/IR/BuiltinTypes.h" - #include "mlir/IR/OperationSupport.h" -+#include "triton/Dialect/Triton/IR/Dialect.h" -+#include "triton/Dialect/Triton/IR/Types.h" - - namespace mlir { - namespace triton { -@@ -38,8 +37,8 @@ static Type getPointerTypeSameShape(Type type) { - } - - // Parser & printer for assembly forms --ParseResult parseLoadOp(OpAsmParser &parser, OperationState &result) { -- SmallVector allOperands; -+ParseResult LoadOp::parse(OpAsmParser &parser, OperationState &result) { -+ SmallVector allOperands; - Type resultTypes[1]; - SMLoc allOperandLoc = parser.getCurrentLocation(); - if (parser.parseOperandList(allOperands) || -@@ -73,18 +72,18 @@ ParseResult parseLoadOp(OpAsmParser &parser, OperationState &result) { - return success(); - } - --void printLoadOp(OpAsmPrinter &printer, LoadOp loadOp) { -+void LoadOp::print(OpAsmPrinter &printer) { - printer << " "; -- printer << loadOp.getOperation()->getOperands(); -+ printer << getOperation()->getOperands(); - // "operand_segment_sizes" can be deduced, so we don't print it. -- printer.printOptionalAttrDict(loadOp->getAttrs(), -- {loadOp.operand_segment_sizesAttrName()}); -+ printer.printOptionalAttrDict(getOperation()->getAttrs(), -+ {operand_segment_sizesAttrName()}); - printer << " : "; -- printer.printStrippedAttrOrType(loadOp.result().getType()); -+ printer.printStrippedAttrOrType(getResult().getType()); - } - --ParseResult parseStoreOp(OpAsmParser &parser, OperationState &result) { -- SmallVector allOperands; -+ParseResult StoreOp::parse(OpAsmParser &parser, OperationState &result) { -+ SmallVector allOperands; - Type valueType; - SMLoc allOperandLoc = parser.getCurrentLocation(); - if (parser.parseOperandList(allOperands) || -@@ -104,12 +103,12 @@ ParseResult parseStoreOp(OpAsmParser &parser, OperationState &result) { - return success(); - } - --void printStoreOp(OpAsmPrinter &printer, StoreOp storeOp) { -+void StoreOp::print(OpAsmPrinter &printer) { - printer << " "; -- printer << storeOp.getOperation()->getOperands(); -- printer.printOptionalAttrDict(storeOp->getAttrs(), /*elidedAttrs=*/{}); -+ printer << getOperation()->getOperands(); -+ printer.printOptionalAttrDict(getOperation()->getAttrs(), /*elidedAttrs=*/{}); - printer << " : "; -- printer.printStrippedAttrOrType(storeOp.value().getType()); -+ printer.printStrippedAttrOrType(value().getType()); - } - - } // namespace triton -@@ -319,7 +318,8 @@ OpFoldResult SplatOp::fold(ArrayRef operands) { - if (!constOperand) - return {}; - auto shapedType = getType().cast(); -- auto ret = SplatElementsAttr::get(shapedType, {constOperand.getValue()}); -+ auto ret = SplatElementsAttr::get( -+ shapedType, ArrayRef(constOperand.getValue())); - return ret; - } - -diff --git a/lib/Dialect/Triton/Transforms/Combine.cpp b/lib/Dialect/Triton/Transforms/Combine.cpp -index 2261472170..11570283d6 100644 ---- a/lib/Dialect/Triton/Transforms/Combine.cpp -+++ b/lib/Dialect/Triton/Transforms/Combine.cpp -@@ -57,13 +57,13 @@ DenseElementsAttr getConstantValue(Builder &builder, Attribute value, - class CombineSelectMaskedLoadPattern : public mlir::RewritePattern { - public: - CombineSelectMaskedLoadPattern(mlir::MLIRContext *context) -- : mlir::RewritePattern(mlir::SelectOp::getOperationName(), 3, context, -- {triton::LoadOp::getOperationName()}) {} -+ : mlir::RewritePattern(mlir::arith::SelectOp::getOperationName(), 3, -+ context, {triton::LoadOp::getOperationName()}) {} - - mlir::LogicalResult - matchAndRewrite(mlir::Operation *op, - mlir::PatternRewriter &rewriter) const override { -- auto selectOp = llvm::dyn_cast(op); -+ auto selectOp = llvm::dyn_cast(op); - if (!selectOp) - return mlir::failure(); - -diff --git a/lib/Dialect/Triton/Transforms/Combine.td b/lib/Dialect/Triton/Transforms/Combine.td -index 14f286b26e..ded0e346e6 100644 ---- a/lib/Dialect/Triton/Transforms/Combine.td -+++ b/lib/Dialect/Triton/Transforms/Combine.td -@@ -1,9 +1,9 @@ - #ifndef TRITON_PATTERNS - #define TRITON_PATTERNS - --include "mlir/Dialect/StandardOps/IR/Ops.td" - include "mlir/Dialect/Arithmetic/IR/ArithmeticOps.td" - include "triton/Dialect/Triton/IR/TritonOps.td" -+include "mlir/IR/PatternBase.td" - - - // AddIOp(DotOp(a, b, c), d) and c==0 => DotOp(a, b, d) -diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dialect.cpp -index 1fbc609e88..bfc3f3d3da 100644 ---- a/lib/Dialect/TritonGPU/IR/Dialect.cpp -+++ b/lib/Dialect/TritonGPU/IR/Dialect.cpp -@@ -1,14 +1,14 @@ -+#include "triton/Dialect/Triton/IR/Dialect.h" -+ - #include - - #include "mlir/IR/DialectImplementation.h" - #include "mlir/IR/OpImplementation.h" - #include "triton/Analysis/Utility.h" --#include "triton/Dialect/Triton/IR/Dialect.h" -+#include "triton/Dialect/TritonGPU/IR/Dialect.cpp.inc" - #include "triton/Dialect/TritonGPU/IR/Dialect.h" - #include "llvm/ADT/TypeSwitch.h" - --#include "triton/Dialect/TritonGPU/IR/Dialect.cpp.inc" -- - using namespace mlir; - using namespace mlir::triton::gpu; - -@@ -366,7 +366,6 @@ template SmallVector - SliceEncodingAttr::paddedShape(ArrayRef shape) const; - - unsigned SliceEncodingAttr::getElemsPerThread(ArrayRef shape) const { -- size_t rank = shape.size(); - auto parent = getParent(); - return ::getElemsPerThread(parent, paddedShape(shape)); - } -@@ -655,9 +654,9 @@ void DotOperandEncodingAttr::print(mlir::AsmPrinter &printer) const { - // InsertSliceAsyncOp - //===----------------------------------------------------------------------===// - --ParseResult parseInsertSliceAsyncOp(OpAsmParser &parser, -- OperationState &result) { -- SmallVector allOperands; -+ParseResult InsertSliceAsyncOp::parse(OpAsmParser &parser, -+ OperationState &result) { -+ SmallVector allOperands; - Type srcType, dstType; - SMLoc allOperandLoc = parser.getCurrentLocation(); - if (parser.parseOperandList(allOperands) || -@@ -696,18 +695,16 @@ ParseResult parseInsertSliceAsyncOp(OpAsmParser &parser, - return success(); - } - --void printInsertSliceAsyncOp(OpAsmPrinter &printer, -- InsertSliceAsyncOp insertSliceAsyncOp) { -+void InsertSliceAsyncOp::print(OpAsmPrinter &printer) { - printer << " "; -- printer << insertSliceAsyncOp.getOperation()->getOperands(); -+ printer << getOperation()->getOperands(); - // "operand_segment_sizes" can be deduced, so we don't print it. -- printer.printOptionalAttrDict( -- insertSliceAsyncOp->getAttrs(), -- {insertSliceAsyncOp.operand_segment_sizesAttrName()}); -+ printer.printOptionalAttrDict(getOperation()->getAttrs(), -+ {operand_segment_sizesAttrName()}); - printer << " : "; -- printer.printStrippedAttrOrType(insertSliceAsyncOp.src().getType()); -+ printer.printStrippedAttrOrType(src().getType()); - printer << " -> "; -- printer.printStrippedAttrOrType(insertSliceAsyncOp.result().getType()); -+ printer.printStrippedAttrOrType(result().getType()); - } - - //===----------------------------------------------------------------------===// -diff --git a/lib/Dialect/TritonGPU/Transforms/Coalesce.cpp b/lib/Dialect/TritonGPU/Transforms/Coalesce.cpp -index 82407980d3..ee6009f44a 100644 ---- a/lib/Dialect/TritonGPU/Transforms/Coalesce.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/Coalesce.cpp -@@ -27,7 +27,11 @@ struct CoalescePass : public TritonGPUCoalesceBase { - auto origType = ptr.getType().cast(); - // Get the shape of the tensor. - size_t rank = origType.getRank(); -- AxisInfo info = axisInfo.lookupLatticeElement(ptr)->getValue(); -+ dataflow::Lattice *latticeElement = -+ axisInfo.getLatticeElement(ptr); -+ AxisInfo info = latticeElement && !latticeElement->isUninitialized() -+ ? latticeElement->getValue() -+ : AxisInfo(); - // Get the contiguity order of `ptr` - auto order = argSort(info.getContiguity()); - // The desired divisibility is the maximum divisibility -@@ -40,7 +44,7 @@ struct CoalescePass : public TritonGPUCoalesceBase { - for (Value val : op->getResults()) { - if (val.getType() != origType) - continue; -- auto valInfo = axisInfo.lookupLatticeElement(val); -+ auto valInfo = axisInfo.getLatticeElement(val); - auto currOrder = argSort(valInfo->getValue().getContiguity()); - if (order == currOrder) - withSameOrder.insert(val); -@@ -55,7 +59,7 @@ struct CoalescePass : public TritonGPUCoalesceBase { - unsigned elemNumBytes = std::max(elemNumBits / 8, 1u); - unsigned perThread = 1; - for (Value val : withSameOrder) { -- AxisInfo info = axisInfo.lookupLatticeElement(val)->getValue(); -+ AxisInfo info = axisInfo.getLatticeElement(val)->getValue(); - unsigned maxMultipleBytes = info.getDivisibility(order[0]); - unsigned maxMultiple = std::max(maxMultipleBytes / elemNumBytes, 1u); - unsigned maxContig = info.getContiguity(order[0]); -@@ -123,8 +127,10 @@ struct CoalescePass : public TritonGPUCoalesceBase { - void runOnOperation() override { - Operation *op = getOperation(); - // Run axis info analysis -- AxisInfoAnalysis axisInfo(&getContext()); -- axisInfo.run(op); -+ std::unique_ptr solver = createDataFlowSolver(); -+ AxisInfoAnalysis *axisInfo = solver->load(); -+ if (failed(solver->initializeAndRun(op))) -+ return signalPassFailure(); - - // For each i/o operation, we determine what layout - // the pointers should have for best memory coalescing -@@ -146,10 +152,10 @@ struct CoalescePass : public TritonGPUCoalesceBase { - RankedTensorType ty = ptr.getType().template dyn_cast(); - if (!ty || !ty.getElementType().isa()) - return; -- AxisInfo info = axisInfo.lookupLatticeElement(ptr)->getValue(); -+ AxisInfo info = axisInfo->getLatticeElement(ptr)->getValue(); - auto mod = curr->getParentOfType(); - int numWarps = triton::gpu::TritonGPUDialect::getNumWarps(mod); -- auto convertType = getTypeConverter(axisInfo, ptr, numWarps); -+ auto convertType = getTypeConverter(*axisInfo, ptr, numWarps); - layoutMap[ptr] = convertType; - }); - -diff --git a/lib/Dialect/TritonGPU/Transforms/Combine.cpp b/lib/Dialect/TritonGPU/Transforms/Combine.cpp -index efa37ff2dc..089ce3996c 100644 ---- a/lib/Dialect/TritonGPU/Transforms/Combine.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/Combine.cpp -@@ -1,6 +1,6 @@ - #include "Utility.h" - #include "mlir/Analysis/SliceAnalysis.h" --#include "mlir/Dialect/SCF/SCF.h" -+#include "mlir/Dialect/SCF/IR/SCF.h" - #include "mlir/IR/BlockAndValueMapping.h" - #include "mlir/IR/BuiltinAttributes.h" - #include "mlir/IR/Matchers.h" -diff --git a/lib/Dialect/TritonGPU/Transforms/Combine.td b/lib/Dialect/TritonGPU/Transforms/Combine.td -index 6bf1b14866..6a7b10dbcb 100644 ---- a/lib/Dialect/TritonGPU/Transforms/Combine.td -+++ b/lib/Dialect/TritonGPU/Transforms/Combine.td -@@ -3,5 +3,6 @@ - - include "triton/Dialect/TritonGPU/IR/TritonGPUOps.td" - include "triton/Dialect/Triton/IR/TritonOps.td" -+include "mlir/IR/PatternBase.td" - - #endif -diff --git a/lib/Dialect/TritonGPU/Transforms/DecomposeConversions.cpp b/lib/Dialect/TritonGPU/Transforms/DecomposeConversions.cpp -index 4bd3bc76bf..b2f8defd81 100644 ---- a/lib/Dialect/TritonGPU/Transforms/DecomposeConversions.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/DecomposeConversions.cpp -@@ -1,5 +1,5 @@ - #include "mlir/Analysis/SliceAnalysis.h" --#include "mlir/Dialect/SCF/SCF.h" -+#include "mlir/Dialect/SCF/IR/SCF.h" - #include "mlir/IR/BlockAndValueMapping.h" - #include "mlir/IR/BuiltinAttributes.h" - #include "mlir/IR/Matchers.h" -diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp b/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp -index 9b2f42231e..85f746c1dc 100644 ---- a/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp -@@ -2,6 +2,7 @@ - #include "mlir/IR/BlockAndValueMapping.h" - #include "mlir/IR/TypeUtilities.h" - #include "triton/Analysis/AxisInfo.h" -+#include "triton/Analysis/Utility.h" - #include "triton/Dialect/TritonGPU/IR/Dialect.h" - #include "triton/Dialect/TritonGPU/Transforms/Passes.h" - -@@ -160,15 +161,18 @@ ttg::AllocTensorOp LoopPipeliner::allocateEmptyBuffer(Operation *op, - LogicalResult LoopPipeliner::initialize() { - Block *loop = forOp.getBody(); - -- AxisInfoAnalysis axisInfoAnalysis(forOp.getContext()); -- axisInfoAnalysis.run(forOp->getParentOfType()); -+ std::unique_ptr solver = createDataFlowSolver(); -+ AxisInfoAnalysis *axisInfoAnalysis = solver->load(); -+ if (failed(solver->initializeAndRun(forOp->getParentOfType()))) { -+ return failure(); -+ } - - // can we use forOp.walk(...) here? - SmallVector allLoads; - for (Operation &op : *loop) - if (auto loadOp = dyn_cast(&op)) { - auto ptr = loadOp.ptr(); -- unsigned vec = axisInfoAnalysis.getPtrContiguity(ptr); -+ unsigned vec = axisInfoAnalysis->getPtrContiguity(ptr); - auto tensorTy = ptr.getType().dyn_cast(); - if (!tensorTy) - continue; -diff --git a/lib/Dialect/TritonGPU/Transforms/ReorderInstructions.cpp b/lib/Dialect/TritonGPU/Transforms/ReorderInstructions.cpp -index 0e7dbe5264..b95a4f50a6 100644 ---- a/lib/Dialect/TritonGPU/Transforms/ReorderInstructions.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/ReorderInstructions.cpp -@@ -1,5 +1,5 @@ - #include "mlir/Analysis/SliceAnalysis.h" --#include "mlir/Dialect/SCF/SCF.h" -+#include "mlir/Dialect/SCF/IR/SCF.h" - #include "mlir/IR/BlockAndValueMapping.h" - #include "mlir/IR/BuiltinAttributes.h" - #include "mlir/IR/Matchers.h" -diff --git a/lib/Dialect/TritonGPU/Transforms/TritonGPUConversion.cpp b/lib/Dialect/TritonGPU/Transforms/TritonGPUConversion.cpp -index 37ac710995..762e887f36 100644 ---- a/lib/Dialect/TritonGPU/Transforms/TritonGPUConversion.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/TritonGPUConversion.cpp -@@ -82,12 +82,12 @@ TritonGPUConversionTarget::TritonGPUConversionTarget( - scf::ReduceReturnOp>(); - - addDynamicallyLegalDialect([&](Operation *op) { -- if (typeConverter.isLegal(op)) -- return true; -- return false; -- }); -+ triton::TritonDialect, scf::SCFDialect>( -+ [&](Operation *op) { -+ if (typeConverter.isLegal(op)) -+ return true; -+ return false; -+ }); - - // We have requirements for the data layouts - addDynamicallyLegalOp([](triton::DotOp dotOp) -> bool { -diff --git a/lib/Dialect/TritonGPU/Transforms/UpdateMmaForVolta.cpp b/lib/Dialect/TritonGPU/Transforms/UpdateMmaForVolta.cpp -index c229104286..c911fd4a5c 100644 ---- a/lib/Dialect/TritonGPU/Transforms/UpdateMmaForVolta.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/UpdateMmaForVolta.cpp -@@ -1,5 +1,5 @@ - #include "Utility.h" --#include "mlir/Dialect/SCF/SCF.h" -+#include "mlir/Dialect/SCF/IR/SCF.h" - #include "mlir/IR/Matchers.h" - #include "mlir/IR/PatternMatch.h" - #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -@@ -118,8 +118,8 @@ void setOpResultType(Operation *op, ArrayRef newTypes) { - .get("value") - .dyn_cast(); - if (attr) { -- auto newAttr = mlir::DenseElementsAttr::getFromRawBuffer( -- newType, attr.getRawData(), true); -+ auto newAttr = -+ mlir::DenseElementsAttr::getFromRawBuffer(newType, attr.getRawData()); - op->setAttr("value", newAttr); - } - } -diff --git a/lib/Dialect/TritonGPU/Transforms/Utility.cpp b/lib/Dialect/TritonGPU/Transforms/Utility.cpp -index ed15f02f67..6400f1633a 100644 ---- a/lib/Dialect/TritonGPU/Transforms/Utility.cpp -+++ b/lib/Dialect/TritonGPU/Transforms/Utility.cpp -@@ -1,5 +1,5 @@ - #include "Utility.h" --#include "mlir/Dialect/SCF/SCF.h" -+#include "mlir/Dialect/SCF/IR/SCF.h" - #include "mlir/IR/BlockAndValueMapping.h" - #include "mlir/Transforms/GreedyPatternRewriteDriver.h" - -diff --git a/lib/Target/LLVMIR/CMakeLists.txt b/lib/Target/LLVMIR/CMakeLists.txt -index f1bbd0bf4e..ac8973ad19 100644 ---- a/lib/Target/LLVMIR/CMakeLists.txt -+++ b/lib/Target/LLVMIR/CMakeLists.txt -@@ -6,8 +6,7 @@ add_mlir_translation_library(TritonLLVMIR - - LINK_LIBS PUBLIC - MLIRIR -- MLIRLLVMIR -- MLIRSCFToStandard -+ MLIRLLVMDialect - MLIRSupport - MLIRTargetLLVMIRExport - ) -diff --git a/lib/Target/PTX/PTXTranslation.cpp b/lib/Target/PTX/PTXTranslation.cpp -index 4cb0d8193c..6a5453a6e7 100644 ---- a/lib/Target/PTX/PTXTranslation.cpp -+++ b/lib/Target/PTX/PTXTranslation.cpp -@@ -1,11 +1,14 @@ - #include "triton/Target/PTX/PTXTranslation.h" - #include "triton/Target/LLVMIR/LLVMIRTranslation.h" -+#include - - #include "llvm/IR/IRBuilder.h" - #include "llvm/IR/LegacyPassManager.h" - #include "llvm/IR/Module.h" - #include "llvm/IR/Verifier.h" - #include "llvm/MC/TargetRegistry.h" -+#include "llvm/Pass.h" -+#include "llvm/Support/CommandLine.h" - #include "llvm/Support/TargetSelect.h" - #include "llvm/Target/TargetMachine.h" - -diff --git a/python/setup.py b/python/setup.py -index 2ac3accd25..4530b36714 100644 ---- a/python/setup.py -+++ b/python/setup.py -@@ -57,19 +57,10 @@ def get_pybind11_package_info(): - def get_llvm_package_info(): - # download if nothing is installed - system = platform.system() -- if system == "Darwin": -- system_suffix = "apple-darwin" -- elif system == "Linux": -- vglibc = tuple(map(int, platform.libc_ver()[1].split('.'))) -- vglibc = vglibc[0] * 100 + vglibc[1] -- linux_suffix = 'ubuntu-18.04' if vglibc > 217 else 'centos-7' -- system_suffix = f"linux-gnu-{linux_suffix}" -- else: -- raise RuntimeError(f"unsupported system: {system}") -+ system_suffix = {"Linux": "linux-gnu-ubuntu-18.04", "Darwin": "apple-darwin"}[system] - use_assert_enabled_llvm = check_env_flag("TRITON_USE_ASSERT_ENABLED_LLVM", "False") -- release_suffix = "assert" if use_assert_enabled_llvm else "release" -- name = f'llvm+mlir-14.0.6-x86_64-{system_suffix}-{release_suffix}' -- url = f"https://github.com/ptillet/triton-llvm-releases/releases/download/llvm-14.0.6-f28c006a5895/{name}.tar.xz" -+ name = 'llvm+mlir-15.0.7-x86_64-{}-{}'.format(system_suffix, "assert" if use_assert_enabled_llvm else "release") -+ url = "https://github.com/ptillet/triton-llvm-releases/releases/download/llvm-15.0.7-8dfdcc7b7bf6/{}.tar.xz".format(name) - return Package("llvm", name, url, "lib", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR", "LLVM_SYSPATH") - - -diff --git a/python/src/triton.cc b/python/src/triton.cc -index c40b117a55..f190eacc34 100644 ---- a/python/src/triton.cc -+++ b/python/src/triton.cc -@@ -8,9 +8,10 @@ - #include "mlir/Pass/PassManager.h" - #include "mlir/Transforms/Passes.h" - --#include "mlir/Parser.h" -+#include "mlir/Parser/Parser.h" - #include "mlir/Support/FileUtilities.h" - -+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" - #include "mlir/Dialect/LLVMIR/LLVMDialect.h" - #include "triton/Analysis/Allocation.h" - #include "triton/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.h" -@@ -195,7 +196,7 @@ void init_triton_ir(py::module &&m) { - std::string attrName = name + "_arg" + std::to_string(id); - mlir::Block *owner = arg.getOwner(); - if (owner->isEntryBlock() && -- !mlir::isa(owner->getParentOp())) { -+ !mlir::isa(owner->getParentOp())) { - owner->getParentOp()->setAttr(attrName, attr); - } - } -@@ -348,7 +349,7 @@ void init_triton_ir(py::module &&m) { - return str; - }) - .def("push_back", -- [](mlir::ModuleOp &self, mlir::FuncOp &funcOp) -> void { -+ [](mlir::ModuleOp &self, mlir::func::FuncOp &funcOp) -> void { - self.push_back(funcOp); - }) - .def("has_function", -@@ -358,16 +359,18 @@ void init_triton_ir(py::module &&m) { - return false; - }) - .def("get_function", -- [](mlir::ModuleOp &self, std::string &funcName) -> mlir::FuncOp { -- return self.lookupSymbol(funcName); -- }) -- .def("get_single_function", [](mlir::ModuleOp &self) -> mlir::FuncOp { -- llvm::SmallVector funcs; -- self.walk([&](mlir::FuncOp func) { funcs.push_back(func); }); -- if (funcs.size() != 1) -- throw std::runtime_error("Expected a single function"); -- return funcs[0]; -- }); -+ [](mlir::ModuleOp &self, -+ std::string &funcName) -> mlir::func::FuncOp { -+ return self.lookupSymbol(funcName); -+ }) -+ .def("get_single_function", -+ [](mlir::ModuleOp &self) -> mlir::func::FuncOp { -+ llvm::SmallVector funcs; -+ self.walk([&](mlir::func::FuncOp func) { funcs.push_back(func); }); -+ if (funcs.size() != 1) -+ throw std::runtime_error("Expected a single function"); -+ return funcs[0]; -+ }); - - m.def("make_attr", - [](const std::vector &values, mlir::MLIRContext &context) { -@@ -388,47 +391,48 @@ void init_triton_ir(py::module &&m) { - registry.insert(); -+ mlir::func::FuncDialect, mlir::scf::SCFDialect>(); - context.appendDialectRegistry(registry); - context.loadAllAvailableDialects(); - - // parse module -- mlir::OwningOpRef module( -- mlir::parseSourceFile(inputFilename, &context)); -+ mlir::OwningOpRef module = -+ mlir::parseSourceFile(inputFilename, &context); -+ if (!module) -+ throw std::runtime_error("Parse MLIR file failed."); - // locations are incompatible with ptx < 7.5 ! - module->walk([](mlir::Operation *op) { - op->setLoc(mlir::UnknownLoc::get(op->getContext())); - }); -- if (!module) -- throw std::runtime_error("Parse MLIR file failed."); - - return module->clone(); - }, - ret::take_ownership); - -- py::class_(m, "function") -+ py::class_(m, "function") - // .def_property_readonly("attrs", &ir::function::attrs) - // .def("add_attr", &ir::function::add_attr); - .def("args", -- [](mlir::FuncOp &self, unsigned idx) -> mlir::BlockArgument { -+ [](mlir::func::FuncOp &self, unsigned idx) -> mlir::BlockArgument { - return self.getArgument(idx); - }) - .def( - "add_entry_block", -- [](mlir::FuncOp &self) -> mlir::Block * { -+ [](mlir::func::FuncOp &self) -> mlir::Block * { - return self.addEntryBlock(); - }, - ret::reference) - .def( - "set_arg_attr", -- [](mlir::FuncOp &self, int arg_no, const std::string &name, int val) { -+ [](mlir::func::FuncOp &self, int arg_no, const std::string &name, -+ int val) { - // set arg attributes "name" to value "val" - auto attrTy = mlir::IntegerType::get(self.getContext(), 32); - self.setArgAttr(arg_no, name, mlir::IntegerAttr::get(attrTy, val)); - }, - ret::reference) -- .def_property_readonly("type", &mlir::FuncOp::getType) -- .def("reset_type", &mlir::FuncOp::setType); -+ .def_property_readonly("type", &mlir::func::FuncOp::getFunctionType) -+ .def("reset_type", &mlir::func::FuncOp::setType); - - py::class_(m, "InsertPoint"); - -@@ -445,13 +449,13 @@ void init_triton_ir(py::module &&m) { - .def("ret", - [](mlir::OpBuilder &self, std::vector &vals) -> void { - auto loc = self.getUnknownLoc(); -- self.create(loc, vals); -+ self.create(loc, vals); - }) - .def("call", -- [](mlir::OpBuilder &self, mlir::FuncOp &func, -+ [](mlir::OpBuilder &self, mlir::func::FuncOp &func, - std::vector &args) -> mlir::OpState { - auto loc = self.getUnknownLoc(); -- return self.create(loc, func, args); -+ return self.create(loc, func, args); - }) - // insertion block/point - .def("set_insertion_point_to_start", -@@ -618,15 +622,16 @@ void init_triton_ir(py::module &&m) { - .def("get_or_insert_function", - [](mlir::OpBuilder &self, mlir::ModuleOp &module, - std::string &funcName, mlir::Type &funcType, -- std::string &visibility) -> mlir::FuncOp { -+ std::string &visibility) -> mlir::func::FuncOp { - if (mlir::Operation *funcOperation = module.lookupSymbol(funcName)) -- return llvm::dyn_cast(funcOperation); -+ return llvm::dyn_cast(funcOperation); - auto loc = self.getUnknownLoc(); - if (auto funcTy = funcType.dyn_cast()) { - llvm::SmallVector attrs = { - mlir::NamedAttribute(self.getStringAttr("sym_visibility"), - self.getStringAttr(visibility))}; -- return self.create(loc, funcName, funcTy, attrs); -+ return self.create(loc, funcName, funcTy, -+ attrs); - } - throw std::runtime_error("invalid function type"); - }) -@@ -658,15 +663,15 @@ void init_triton_ir(py::module &&m) { - [](mlir::OpBuilder &self, mlir::Value condition, - mlir::Block *trueDest, mlir::Block *falseDest) { - auto loc = self.getUnknownLoc(); -- self.create(loc, condition, trueDest, -- falseDest); -+ self.create(loc, condition, trueDest, -+ falseDest); - return; - }) - .def("create_branch", - [](mlir::OpBuilder &self, mlir::Block *dest, - std::vector &args) { - auto loc = self.getUnknownLoc(); -- self.create(loc, dest, args); -+ self.create(loc, dest, args); - return; - }) - // Structured control flow -@@ -792,14 +797,14 @@ void init_triton_ir(py::module &&m) { - .def("create_to_index", - [](mlir::OpBuilder &self, mlir::Value &input) -> mlir::Value { - auto loc = self.getUnknownLoc(); -- return self.create(loc, input, -- self.getIndexType()); -+ return self.create( -+ loc, self.getIndexType(), input); - }) - .def("create_index_to_si", - [](mlir::OpBuilder &self, mlir::Value &input) -> mlir::Value { - auto loc = self.getUnknownLoc(); -- return self.create(loc, input, -- self.getI32Type()); -+ return self.create( -+ loc, self.getI32Type(), input); - }) - .def("create_fmul", - [](mlir::OpBuilder &self, mlir::Value &lhs, -@@ -1316,8 +1321,8 @@ void init_triton_ir(py::module &&m) { - [](mlir::OpBuilder &self, mlir::Value &condition, - mlir::Value &trueValue, mlir::Value &falseValue) -> mlir::Value { - auto loc = self.getUnknownLoc(); -- return self.create(loc, condition, trueValue, -- falseValue); -+ return self.create(loc, condition, -+ trueValue, falseValue); - }) - .def("create_printf", - [](mlir::OpBuilder &self, const std::string &prefix, -@@ -1429,7 +1434,7 @@ void init_triton_ir(py::module &&m) { - self.addPass(mlir::triton::createConvertTritonGPUToLLVMPass()); - }) - .def("add_scf_to_cfg", [](mlir::PassManager &self) { -- self.addPass(mlir::createLowerToCFGPass()); -+ self.addPass(mlir::createConvertSCFToCFPass()); - }); - } - -diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py -index 432544a8a4..018f544714 100644 ---- a/python/test/unit/language/test_core.py -+++ b/python/test/unit/language/test_core.py -@@ -1918,7 +1918,7 @@ def test_convert2d(dtype, shape, src_layout, dst_layout, device='cuda'): - #dst = {dst_layout} - """ + """ - module attributes {"triton_gpu.num-warps" = 4 : i32} { -- func public @kernel_0d1d(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { -+ func.func public @kernel_0d1d(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { - %cst = arith.constant dense<128> : tensor<128x1xi32, #src> - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32, #triton_gpu.slice<{dim = 1, parent = #src}>> - %1 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32, #triton_gpu.slice<{dim = 0, parent = #src}>> -diff --git a/python/triton/compiler.py b/python/triton/compiler.py -index 5d167634df..c36589037c 100644 ---- a/python/triton/compiler.py -+++ b/python/triton/compiler.py -@@ -1514,14 +1514,14 @@ def make_hash(fn, **kwargs): - return hashlib.md5((Path(fn).read_text() + triton.runtime.jit.version_key()).encode("utf-8")).hexdigest() - - --# - ^\s*func\s+ : match the start of the string, any leading whitespace, the keyword func, -+# - ^\s*func\.func\s+ : match the start of the string, any leading whitespace, the keyword func, - # and any following whitespace - # - (public\s+)? : optionally match the keyword public and any following whitespace - # - (@\w+) : match an @ symbol followed by one or more word characters - # (letters, digits, or underscores), and capture it as group 1 (the function name) - # - (\((?:%\w+: \S+(?: \{\S+ = \S+ : \S+\})?(?:, )?)*\)) : match a pair of parentheses enclosing - # zero or more arguments separated by commas, and capture it as group 2 (the argument list) --mlir_prototype_pattern = r'^\s*func\s+(?:public\s+)?(@\w+)(\((?:%\w+: \S+(?: \{\S+ = \S+ : \S+\})?(?:, )?)*\))\s*\{\s*$' -+mlir_prototype_pattern = r'^\s*func\.func\s+(?:public\s+)?(@\w+)(\((?:%\w+: \S+(?: \{\S+ = \S+ : \S+\})?(?:, )?)*\))\s*\{\s*$' - ptx_prototype_pattern = r"\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\)" - prototype_pattern = { - "ttir": mlir_prototype_pattern, -diff --git a/test/Analysis/test-alias.mlir b/test/Analysis/test-alias.mlir -index b3d5673f85..bb21615e68 100644 ---- a/test/Analysis/test-alias.mlir -+++ b/test/Analysis/test-alias.mlir -@@ -11,7 +11,7 @@ - - // CHECK-LABEL: matmul_loop - // There shouldn't be any aliasing with the dot op encoding. --func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - %a_ptr_init = tt.broadcast %A : (!tt.ptr) -> tensor<128x32x!tt.ptr, #AL> - %b_ptr_init = tt.broadcast %B : (!tt.ptr) -> tensor<32x128x!tt.ptr, #BL> - %a_mask = arith.constant dense : tensor<128x32xi1, #AL> -@@ -36,7 +36,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B - } - - // CHECK-LABEL: alloc --func @alloc(%A : !tt.ptr) { -+func.func @alloc(%A : !tt.ptr) { - // CHECK: %cst -> %cst - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - %cst1 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #AL> -@@ -46,7 +46,7 @@ func @alloc(%A : !tt.ptr) { - } - - // CHECK-LABEL: convert --func @convert(%A : !tt.ptr) { -+func.func @convert(%A : !tt.ptr) { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL> - // CHECK: %0 -> %0 - %cst1 = triton_gpu.convert_layout %cst0 : (tensor<16x16xf16, #AL>) -> tensor<16x16xf16, #A_SHARED> -@@ -54,7 +54,7 @@ func @convert(%A : !tt.ptr) { - } - - // CHECK-LABEL: trans --func @trans(%A : !tt.ptr) { -+func.func @trans(%A : !tt.ptr) { - // CHECK: %cst -> %cst - %tensor = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED> - // CHECK: %0 -> %cst -@@ -63,7 +63,7 @@ func @trans(%A : !tt.ptr) { - } - - // CHECK-LABEL: insert_slice_async --func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { -+func.func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { - %a_ptr = tt.broadcast %A : (!tt.ptr) -> tensor<16x16x!tt.ptr, #AL> - %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL> - %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL> -@@ -76,7 +76,7 @@ func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { - } - - // CHECK-LABEL: insert_slice --func @insert_slice(%A : !tt.ptr, %i1 : i1) { -+func.func @insert_slice(%A : !tt.ptr, %i1 : i1) { - %a_ptr = tt.broadcast %A : (!tt.ptr) -> tensor<16x16x!tt.ptr, #AL> - %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL> - %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL> -@@ -90,7 +90,7 @@ func @insert_slice(%A : !tt.ptr, %i1 : i1) { - } - - // CHECK-LABEL: extract_slice --func @extract_slice(%A : !tt.ptr) { -+func.func @extract_slice(%A : !tt.ptr) { - // CHECK: %cst -> %cst - %cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED> - %index = arith.constant 0 : index -@@ -100,7 +100,7 @@ func @extract_slice(%A : !tt.ptr) { - } - - // CHECK-LABEL: if_cat --func @if_cat(%i1 : i1) { -+func.func @if_cat(%i1 : i1) { - // CHECK: %cst -> %cst - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - // CHECK: %cst_0 -> %cst_0 -@@ -119,7 +119,7 @@ func @if_cat(%i1 : i1) { - } - - // CHECK-LABEL: if_alias --func @if_alias(%i1 : i1) { -+func.func @if_alias(%i1 : i1) { - // CHECK: %cst -> %cst - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - // CHECK-NEXT: %cst_0 -> %cst_0 -@@ -134,7 +134,7 @@ func @if_alias(%i1 : i1) { - } - - // CHECK-LABEL: for --func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - // CHECK: %cst -> %cst - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: %cst_0 -> %cst_0 -@@ -154,7 +154,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.p - } - - // CHECK-LABEL: for_if --func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { -+func.func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { - // CHECK: %cst -> %cst - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: %cst_0 -> %cst_0 -@@ -180,7 +180,7 @@ func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !t - } - - // CHECK-LABEL: for_if_for --func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { -+func.func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { - // CHECK: %cst -> %cst - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: %cst_0 -> %cst_0 -diff --git a/test/Analysis/test-alignment.mlir b/test/Analysis/test-alignment.mlir -index 0ab34c7a78..af8ea6f856 100644 ---- a/test/Analysis/test-alignment.mlir -+++ b/test/Analysis/test-alignment.mlir -@@ -1,288 +1,288 @@ --// RUN: triton-opt %s -test-print-alignment -split-input-file 2>&1 | FileCheck %s -+// RUN: triton-opt %s -test-print-alignment -split-input-file -o %t 2>&1 | FileCheck %s - --// CHECK-LABEL: cast --func @cast() { -- // CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [1] -+// CHECK-LABEL: @cast -+func.func @cast() { -+ // CHECK: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1 - %cst = arith.constant 1 : i32 -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 1 - %0 = arith.extsi %cst : i32 to i64 -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1 - %cst_tensor = arith.constant dense<1> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1 - %1 = tt.bitcast %cst_tensor : tensor<128xi32> -> tensor<128xi64> - return - } - - // ----- - --// CHECK-LABEL: add --func @add() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @add -+func.func @add() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1 - %1 = arith.constant dense<1> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [1], constancy = [1], constant_value = - %2 = arith.addi %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [127] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 127 - %3 = arith.constant dense<127> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128 - %4 = arith.addi %1, %3 : tensor<128xi32> - return - } - - // ----- - --// CHECK-LABEL: sub --func @sub() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @sub -+func.func @sub() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1 - %1 = arith.constant dense<1> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [1], constancy = [1], constant_value = - %2 = arith.subi %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [129] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 129 - %3 = arith.constant dense<129> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128 - %4 = arith.subi %3, %1 : tensor<128xi32> - return - } - - // ----- - --// CHECK-LABEL: mul --func @mul() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @mul -+func.func @mul() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1 - %1 = arith.constant dense<1> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %2 = arith.muli %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128 - %3 = arith.constant dense<128> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128 - %4 = arith.muli %3, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [2] -+ // CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 2 - %5 = arith.constant dense<2> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [256] ; Constancy: [128] ; ConstantValue: [256] -+ // CHECK-NEXT: contiguity = [1], divisibility = [256], constancy = [128], constant_value = 256 - %6 = arith.muli %4, %5 : tensor<128xi32> - return - } - - // ----- - --// CHECK-LABEL: div --func @div() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @div -+func.func @div() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1 - %1 = arith.constant dense<1> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %2 = arith.divsi %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %3 = arith.divui %1, %0 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64] -+ // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64 - %4 = arith.constant dense<64> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16777216] ; Constancy: [64] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [16777216], constancy = [64], constant_value = - %5 = arith.divsi %0, %4 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %6 = arith.divsi %4, %0 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64] -+ // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64 - %7 = arith.divsi %4, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [66] -+ // CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66 - %8 = arith.constant dense<66> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [2] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [2], constant_value = - %9 = arith.divui %0, %8 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [8192] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [8192], constancy = [1], constant_value = - %10 = tt.make_range {end = 8320 : i32, start = 8192 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [64] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [64], constant_value = - %11 = arith.divsi %10, %4 : tensor<128xi32> -- return -+ return - } - - // ----- - --// CHECK-LABEL: rem --func @rem() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @rem -+func.func @rem() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 1 - %1 = arith.constant dense<1> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0 - %2 = arith.remsi %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %3 = arith.remui %1, %0 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64] -+ // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64 - %4 = arith.constant dense<64> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [64] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [64], divisibility = [64], constancy = [1], constant_value = - %5 = arith.remsi %0, %4 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [1], constant_value = - %6 = arith.remsi %4, %0 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [2] ; Constancy: [128] ; ConstantValue: [66] -+ // CHECK-NEXT: contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66 - %7 = arith.constant dense<66> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [2] ; Divisibility: [2] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [2], divisibility = [2], constancy = [1], constant_value = - %8 = arith.remui %0, %7 : tensor<128xi32> -- return -+ return - } - - // ----- - --// CHECK-LABEL: broadcast --func @broadcast() { -- // CHECK: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64] -+// CHECK-LABEL: @broadcast -+func.func @broadcast() { -+ // CHECK: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64 - %0 = arith.constant dense<64> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [64, 1] ; Constancy: [128, 1] ; ConstantValue: [64] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [64, 1], constancy = [128, 1], constant_value = 64 - %1 = tt.expand_dims %0 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [64, 1] ; Constancy: [128, 128] ; ConstantValue: [64] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [64, 1], constancy = [128, 128], constant_value = 64 - %2 = tt.broadcast %1 : (tensor<128x1xi32>) -> tensor<128x128xi32> - return - } - - // ----- - --// CHECK-LABEL: splat --func @splat(%arg0: !tt.ptr {tt.divisibility = 16 : i32}) { -- // CHECK: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 128] ; ConstantValue: [None] -+// CHECK-LABEL: @splat -+func.func @splat(%arg0: !tt.ptr {tt.divisibility = 16 : i32}) { -+ // CHECK: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 128], constant_value = - %0 = tt.splat %arg0 : (!tt.ptr) -> tensor<128x128x!tt.ptr> - return - } - - // ----- - --// CHECK-LABEL: cmp --func @cmp() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @cmp -+func.func @cmp() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0 - %1 = arith.constant dense<0> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = - %2 = arith.cmpi eq, %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = - %3 = arith.cmpi slt, %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %4 = arith.cmpi sle, %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = - %5 = arith.cmpi sge, %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8] -+ // CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8 - %6 = arith.constant dense<8> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = - %7 = arith.cmpi sgt, %0, %6 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [0] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = 0 - %8 = arith.cmpi sgt, %1, %6 : tensor<128xi32> - return - } - - // ----- - --// CHECK-LABEL: logic --func @logic() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @logic -+func.func @logic() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [64] ; Constancy: [128] ; ConstantValue: [64] -+ // CHECK-NEXT: contiguity = [1], divisibility = [64], constancy = [128], constant_value = 64 - %1 = arith.constant dense<64> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16777216] ; Constancy: [64] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [16777216], constancy = [64], constant_value = - %2 = arith.divsi %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8] -+ // CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8 - %3 = arith.constant dense<8> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [134217728] ; Constancy: [8] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [134217728], constancy = [8], constant_value = - %4 = arith.divsi %0, %3 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %5 = arith.andi %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %6 = arith.ori %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %7 = arith.xori %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = - %8 = arith.andi %2, %4 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = - %9 = arith.ori %2, %4 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [8] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [8], constant_value = - %10 = arith.xori %2, %4 : tensor<128xi32> - return - } - - // ----- - --// CHECK-LABEL: select --func @select() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @select -+func.func @select() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0 - %1 = arith.constant dense<0> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = - %2 = arith.cmpi eq, %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = - %3 = arith.cmpi slt, %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [1] ; ConstantValue: [0] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0 - %4 = arith.constant 0 : i1 -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0 - %7 = tt.splat %4 : (i1) -> tensor<128xi1> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [128] ; ConstantValue: [0] -- %5 = select %4, %3, %7 : tensor<128xi1> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [128], constant_value = 0 -+ %5 = arith.select %4, %3, %7 : tensor<128xi1> -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [128], constant_value = - %8 = "triton_gpu.select"(%7, %3, %2) : (tensor<128xi1>, tensor<128xi1>, tensor<128xi1>) -> tensor<128xi1> - return - } - - // ----- - --func @shift() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+func.func @shift() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8] -+ // CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8 - %1 = arith.constant dense<8> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4] ; Constancy: [128] ; ConstantValue: [4] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4 - %2 = arith.constant dense<4> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [274877906944] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [274877906944], constancy = [1], constant_value = - %3 = arith.shli %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [67108864] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [67108864], constancy = [1], constant_value = - %4 = arith.shrsi %0, %2 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [128] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = 128 - %5 = arith.shli %1, %2 : tensor<128xi32> - return - } - - // ----- - --func @max_min() { -- // CHECK: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+func.func @max_min() { -+ // CHECK: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [64] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [64], constancy = [1], constant_value = - %1 = tt.make_range {end = 192 : i32, start = 64 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %2 = arith.maxsi %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %3 = arith.minsi %0, %1 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [8] ; Constancy: [128] ; ConstantValue: [8] -+ // CHECK-NEXT: contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8 - %4 = arith.constant dense<8> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4] ; Constancy: [128] ; ConstantValue: [4] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4 - %5 = arith.constant dense<4> : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [8] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = 8 - %6 = arith.maxsi %4, %5 : tensor<128xi32> - return - } - - // ----- - --// CHECK-LABEL: for --func @for() { -- // CHECK: Contiguity: [1, 1] ; Divisibility: [4611686018427387904, 4611686018427387904] ; Constancy: [128, 32] ; ConstantValue: [0] -+// CHECK-LABEL: @for -+func.func @for() { -+ // CHECK: contiguity = [1, 1], divisibility = [4611686018427387904, 4611686018427387904], constancy = [128, 32], constant_value = 0 - %a_init = arith.constant dense<0> : tensor<128x32xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [1] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = 1 - %b_init = arith.constant dense<1> : tensor<128x32xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [4, 4] ; Constancy: [128, 32] ; ConstantValue: [4] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [4, 4], constancy = [128, 32], constant_value = 4 - %c_init = arith.constant dense<4> : tensor<128x32xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [128] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = 128 - %ub = arith.constant 128 : index -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [4611686018427387904] ; Constancy: [1] ; ConstantValue: [0] -+ // CHECK-NEXT: contiguity = [1], divisibility = [4611686018427387904], constancy = [1], constant_value = 0 - %lb = arith.constant 0 : index -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [16] -+ // CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [1], constant_value = 16 - %step = arith.constant 16 : index - %a, %b, %c = scf.for %iv = %lb to %ub step %step iter_args(%a = %a_init, %b = %b_init, %c = %c_init) -> (tensor<128x32xi32>, tensor<128x32xi32>, tensor<128x32xi32>) { -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [1], constant_value = - %t = arith.index_cast %iv : index to i32 -- // CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [None] -- // CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 32] ; ConstantValue: [None] -- // CHECK: Contiguity: [1, 1] ; Divisibility: [4, 4] ; Constancy: [128, 32] ; ConstantValue: [4] -+ // CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = -+ // CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 32], constant_value = -+ // CHECK: contiguity = [1, 1], divisibility = [4, 4], constancy = [128, 32], constant_value = 4 - scf.yield %b, %a, %c : tensor<128x32xi32>, tensor<128x32xi32>, tensor<128x32xi32> - } - return -@@ -290,53 +290,53 @@ func @for() { - - // ----- - --// CHECK-LABEL: permute_2d --func @permute_2d(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) { -- // CHECK: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [128, 128] ; ConstantValue: [1] -+// CHECK-LABEL: @permute_2d -+func.func @permute_2d(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) { -+ // CHECK: contiguity = [1, 1], divisibility = [1, 1], constancy = [128, 128], constant_value = 1 - %cst = arith.constant dense : tensor<128x128xi1> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = - %cst_0 = arith.constant dense<0.000000e+00> : tensor<128x128xf32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %0 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %1 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [1073741824, 1] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128, 1], divisibility = [1073741824, 1], constancy = [1, 1], constant_value = - %2 = tt.expand_dims %0 {axis = 1 : i32} : (tensor<128xi32>) -> tensor<128x1xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = - %3 = tt.splat %arg1 : (i32) -> tensor<128x1xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [17179869184, 16] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [17179869184, 16], constancy = [1, 1], constant_value = - %4 = arith.muli %2, %3 : tensor<128x1xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = - %5 = tt.splat %arg0 : (!tt.ptr) -> tensor<128x1x!tt.ptr> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 1], constant_value = - %6 = tt.addptr %5, %4 : tensor<128x1x!tt.ptr>, tensor<128x1xi32> -- // CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = - %7 = tt.expand_dims %1 {axis = 0 : i32}: (tensor<128xi32>) -> tensor<1x128xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 128], constant_value = - %8 = tt.broadcast %6 : (tensor<128x1x!tt.ptr>) -> tensor<128x128x!tt.ptr> -- // CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [128, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [128, 1], constant_value = - %9 = tt.broadcast %7 : (tensor<1x128xi32>) -> tensor<128x128xi32> -- // CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 16] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 16], constancy = [1, 1], constant_value = - %10 = tt.addptr %8, %9 : tensor<128x128x!tt.ptr>, tensor<128x128xi32> -- // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [1073741824, 1] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128, 1], divisibility = [1073741824, 1], constancy = [1, 1], constant_value = - %11 = tt.expand_dims %0 {axis = 1 : i32}: (tensor<128xi32>) -> tensor<128x1xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [128, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [128, 1], constant_value = - %12 = tt.splat %arg2 : (!tt.ptr) -> tensor<128x1x!tt.ptr> -- // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 1], constant_value = - %13 = tt.addptr %12, %11 : tensor<128x1x!tt.ptr>, tensor<128x1xi32> -- // CHECK-NEXT: Contiguity: [1, 128] ; Divisibility: [1, 1073741824] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 128], divisibility = [1, 1073741824], constancy = [1, 1], constant_value = - %14 = tt.expand_dims %1 {axis = 0 : i32} : (tensor<128xi32>) -> tensor<1x128xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 16] ; Constancy: [1, 128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 16], constancy = [1, 128], constant_value = - %15 = tt.splat %arg3 : (i32) -> tensor<1x128xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 17179869184] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 17179869184], constancy = [1, 1], constant_value = - %16 = arith.muli %14, %15 : tensor<1x128xi32> -- // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 128], constant_value = - %17 = tt.broadcast %13 : (tensor<128x1x!tt.ptr>) -> tensor<128x128x!tt.ptr> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [16, 17179869184] ; Constancy: [128, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [16, 17179869184], constancy = [128, 1], constant_value = - %18 = tt.broadcast %16 : (tensor<1x128xi32>) -> tensor<128x128xi32> -- // CHECK-NEXT: Contiguity: [128, 1] ; Divisibility: [16, 1] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128, 1], divisibility = [16, 1], constancy = [1, 1], constant_value = - %19 = tt.addptr %17, %18 : tensor<128x128x!tt.ptr>, tensor<128x128xi32> -- // CHECK-NEXT: Contiguity: [1, 1] ; Divisibility: [1, 1] ; Constancy: [1, 1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1, 1], divisibility = [1, 1], constancy = [1, 1], constant_value = - %20 = tt.load %10, %cst, %cst_0 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<128x128xf32> - tt.store %19, %20, %cst : tensor<128x128xf32> - return -@@ -347,29 +347,29 @@ func @permute_2d(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: i32 {t - module { - - // This is a tiny test for verifying StoreOp-related alignment, It simply store a constant to a buffer. --// CHECK-LABEL: store_constant_align --func @store_constant_align(%addr: !tt.ptr {tt.divisibility = 16 : i32}, %n: i32 {tt.divisibility = 16 : i32}) { -- // CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+// CHECK-LABEL: @store_constant_align -+func.func @store_constant_align(%addr: !tt.ptr {tt.divisibility = 16 : i32}, %n: i32 {tt.divisibility = 16 : i32}) { -+ // CHECK: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %pid = tt.get_program_id {axis = 0 : i32} : i32 -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [128] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = 128 - %c128_i32 = arith.constant 128 : i32 -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [1], constant_value = - %1 = arith.muli %pid, %c128_i32 : i32 -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [1073741824] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [1073741824], constancy = [1], constant_value = - %2 = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [128] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [128], constancy = [128], constant_value = - %3 = tt.splat %1 : (i32) -> tensor<128xi32> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [128] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [128], constancy = [1], constant_value = - %4 = arith.addi %3, %2 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [128], constant_value = - %5 = tt.splat %addr : (!tt.ptr) -> tensor<128x!tt.ptr> -- // CHECK-NEXT: Contiguity: [128] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [128], divisibility = [16], constancy = [1], constant_value = - %6 = tt.addptr %5, %4 : tensor<128x!tt.ptr>, tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [16] ; Constancy: [128] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [16], constancy = [128], constant_value = - %9 = tt.splat %n : (i32) -> tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [16] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [16], constant_value = - %mask = arith.cmpi slt, %4, %9 : tensor<128xi32> -- // CHECK-NEXT: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] -+ // CHECK-NEXT: contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %cst = arith.constant dense<0.0> : tensor<128xf32> - tt.store %5, %cst, %mask : tensor<128xf32> - return -@@ -381,8 +381,8 @@ func @store_constant_align(%addr: !tt.ptr {tt.divisibility = 16 : i32}, %n: - - // This IR is dumped from vecadd test. - // Note, the hint {tt.divisibility = 16 : i32} for %n_elements affects the alignment of mask. --// CHECK-LABEL: vecadd_mask_align_16 --func @vecadd_mask_align_16(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %n_elements: i32 {tt.divisibility = 16 : i32}) { -+// CHECK-LABEL: @vecadd_mask_align_16 -+func.func @vecadd_mask_align_16(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %n_elements: i32 {tt.divisibility = 16 : i32}) { - %c64_i32 = arith.constant 64 : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = arith.muli %0, %c64_i32 : i32 -@@ -394,13 +394,13 @@ func @vecadd_mask_align_16(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %ar - %7 = tt.splat %arg1 : (!tt.ptr) -> tensor<64x!tt.ptr> - %8 = tt.addptr %7, %4 : tensor<64x!tt.ptr>, tensor<64xi32> - %9 = tt.splat %n_elements : (i32) -> tensor<64xi32> -- // CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [16] ; ConstantValue: [None] ( %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}} : tensor<64xi32> ) -+ // CHECK: arith.cmpi slt, %{{.*}} => contiguity = [1], divisibility = [1], constancy = [16], constant_value = - %mask = arith.cmpi slt, %4, %9 : tensor<64xi32> - %11 = tt.load %6, %mask {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32> - %12 = tt.load %8, %mask {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32> - %13 = arith.addf %11, %12 : tensor<64xf32> - %14 = tt.splat %arg2 : (!tt.ptr) -> tensor<64x!tt.ptr> -- // CHECK: Contiguity: [64] ; Divisibility: [16] ; Constancy: [1] ; ConstantValue: [None] ( %{{.*}} = tt.addptr %{{.*}}, %{{.*}} : tensor<64x!tt.ptr>, tensor<64xi32> ) -+ // CHECK: tt.addptr %{{.*}} => contiguity = [64], divisibility = [16], constancy = [1], constant_value = - %15 = tt.addptr %14, %4 : tensor<64x!tt.ptr>, tensor<64xi32> - tt.store %15, %13, %mask : tensor<64xf32> - return -@@ -410,8 +410,8 @@ func @vecadd_mask_align_16(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %ar - - // This IR is dumped from vecadd test. - // Note, there is no divisibility hint for %n_elements, Triton should assume its divisibility to be 1 by default. --// CHECK-LABEL: vecadd_mask_align_1 --func @vecadd_mask_align_1(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %n_elements: i32) { -+// CHECK-LABEL: @vecadd_mask_align_1 -+func.func @vecadd_mask_align_1(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %n_elements: i32) { - %c64_i32 = arith.constant 64 : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = arith.muli %0, %c64_i32 : i32 -@@ -423,7 +423,7 @@ func @vecadd_mask_align_1(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg - %7 = tt.splat %arg1 : (!tt.ptr) -> tensor<64x!tt.ptr> - %8 = tt.addptr %7, %4 : tensor<64x!tt.ptr>, tensor<64xi32> - %9 = tt.splat %n_elements : (i32) -> tensor<64xi32> -- // CHECK: Contiguity: [1] ; Divisibility: [1] ; Constancy: [1] ; ConstantValue: [None] ( %{{.*}} = arith.cmpi slt, %{{.*}}, %{{.*}} : tensor<64xi32> ) -+ // CHECK: arith.cmpi slt, %{{.*}} => contiguity = [1], divisibility = [1], constancy = [1], constant_value = - %10 = arith.cmpi slt, %4, %9 : tensor<64xi32> - %11 = tt.load %6, %10 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32> - %12 = tt.load %8, %10 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64xf32> -diff --git a/test/Analysis/test-allocation.mlir b/test/Analysis/test-allocation.mlir -index efb00c404d..f79222aa7b 100644 ---- a/test/Analysis/test-allocation.mlir -+++ b/test/Analysis/test-allocation.mlir -@@ -13,7 +13,7 @@ - module attributes {"triton_gpu.num-warps" = 4 : i32} { - - // CHECK-LABEL: matmul_loop --func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - %a_ptr_init = tt.broadcast %A : (!tt.ptr) -> tensor<128x32x!tt.ptr, #AL> - %b_ptr_init = tt.broadcast %B : (!tt.ptr) -> tensor<32x128x!tt.ptr, #BL> - -@@ -46,7 +46,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B - - // Shared memory is available after a tensor's liveness range ends - // CHECK-LABEL: reusable --func @reusable(%A : !tt.ptr) { -+func.func @reusable(%A : !tt.ptr) { - %cst1 = arith.constant dense : tensor<128x32xi1, #AL> - %cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL> - %cst3 = arith.constant dense : tensor<32x128xi1, #AL> -@@ -78,7 +78,7 @@ func @reusable(%A : !tt.ptr) { - // %cst1->%cst4 - // %cst3->%g->%h->%i - // CHECK-LABEL: preallocate --func @preallocate(%A : !tt.ptr) { -+func.func @preallocate(%A : !tt.ptr) { - // CHECK: offset = 0, size = 512 - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - // CHECK-NEXT: offset = 1024, size = 512 -@@ -113,7 +113,7 @@ func @preallocate(%A : !tt.ptr) { - - // Unused tensors are immediately released - // CHECK-LABEL: unused --func @unused(%A : !tt.ptr) { -+func.func @unused(%A : !tt.ptr) { - // CHECK: offset = 0, size = 1024 - %cst0 = arith.constant dense<0.000000e+00> : tensor<32x16xf16, #A_SHARED> - // CHECK-NEXT: offset = 0, size = 512 -@@ -128,7 +128,7 @@ func @unused(%A : !tt.ptr) { - - // cst0 is alive through the entire function, it cannot be released before the end of the function - // CHECK-LABEL: longlive --func @longlive(%A : !tt.ptr) { -+func.func @longlive(%A : !tt.ptr) { - // CHECK: offset = 0, size = 512 - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - // CHECK-NEXT: offset = 512, size = 512 -@@ -156,7 +156,7 @@ func @longlive(%A : !tt.ptr) { - } - - // CHECK-LABEL: alloc --func @alloc(%A : !tt.ptr) { -+func.func @alloc(%A : !tt.ptr) { - // CHECK: offset = 0, size = 512 - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - %cst1 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #AL> -@@ -167,7 +167,7 @@ func @alloc(%A : !tt.ptr) { - } - - // CHECK-LABEL: scratch --func @scratch() { -+func.func @scratch() { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL> - // CHECK: scratch offset = 0, size = 512 - %b = tt.reduce %cst0 {redOp = 1 : i32, axis = 0 : i32} : tensor<16x16xf16, #AL> -> tensor<16xf16, #sliceAd0> -@@ -176,7 +176,7 @@ func @scratch() { - } - - // CHECK-LABEL: trans --func @trans(%A : !tt.ptr) { -+func.func @trans(%A : !tt.ptr) { - // CHECK: offset = 0, size = 1024 - %tensor = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED> - %b = tt.trans %tensor : (tensor<16x32xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED_T> -@@ -184,7 +184,7 @@ func @trans(%A : !tt.ptr) { - } - - // CHECK-LABEL: insert_slice_async --func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { -+func.func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { - %a_ptr = tt.broadcast %A : (!tt.ptr) -> tensor<16x16x!tt.ptr, #AL> - %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL> - %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL> -@@ -197,7 +197,7 @@ func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { - } - - // CHECK-LABEL: extract_slice --func @extract_slice(%A : !tt.ptr) { -+func.func @extract_slice(%A : !tt.ptr) { - // CHECK: offset = 0, size = 512 - %cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED> - %index = arith.constant 0 : index -@@ -209,7 +209,7 @@ func @extract_slice(%A : !tt.ptr) { - // B0 -> (B1) -> B0 - // Memory used by B1 can be reused by B0. - // CHECK-LABEL: if --func @if(%i1 : i1) { -+func.func @if(%i1 : i1) { - // CHECK: offset = 0, size = 512 - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - // CHECK-NEXT: offset = 512, size = 512 -@@ -233,7 +233,7 @@ func @if(%i1 : i1) { - // B0 -> (B1) -> (B2) -> B0 - // Memory used by B0 cannot be reused by B1 or B2. - // CHECK-LABEL: if_else --func @if_else(%i1 : i1) { -+func.func @if_else(%i1 : i1) { - // CHECK: offset = 0, size = 512 - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - // CHECK-NEXT: offset = 512, size = 512 -@@ -260,7 +260,7 @@ func @if_else(%i1 : i1) { - // Block arguments and yields are memory aliases that do not trigger a new - // allocation. - // CHECK-LABEL: for --func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - // CHECK: offset = 0, size = 8192 - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: offset = 8192, size = 8192 -@@ -275,7 +275,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.p - } - - // CHECK-LABEL: for_if_slice --func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { -+func.func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { - // CHECK: offset = 0, size = 8192 - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: offset = 8192, size = 8192 -@@ -296,7 +296,7 @@ func @for_if_slice(%lb : index, %ub : index, %step : index, %A : !tt.ptr, % - - // c0 cannot be released in the loop - // CHECK-LABEL: for_use_ancestor --func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { -+func.func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { - // CHECK: offset = 0, size = 8192 - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: offset = 8192, size = 8192 -@@ -316,7 +316,7 @@ func @for_use_ancestor(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { -+func.func @for_if_for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr, %i1 : i1) { - // CHECK: offset = 0, size = 8192 - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: offset = 8192, size = 8192 -diff --git a/test/Analysis/test-membar.mlir b/test/Analysis/test-membar.mlir -index 7199e5f53d..17880b2094 100644 ---- a/test/Analysis/test-membar.mlir -+++ b/test/Analysis/test-membar.mlir -@@ -14,7 +14,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - - // CHECK-LABEL: matmul_loop - // There shouldn't be any membar with the dot op encoding. --func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - %a_ptr_init = tt.broadcast %A : (!tt.ptr) -> tensor<128x32x!tt.ptr, #AL> - %b_ptr_init = tt.broadcast %B : (!tt.ptr) -> tensor<32x128x!tt.ptr, #BL> - -@@ -42,7 +42,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B - } - - // CHECK-LABEL: raw_single_block --func @raw_single_block(%A : !tt.ptr) { -+func.func @raw_single_block(%A : !tt.ptr) { - %cst1 = arith.constant dense : tensor<128x32xi1, #AL> - %cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL> - %a_ptr = tt.broadcast %A : (!tt.ptr) -> tensor<128x32x!tt.ptr, #AL> -@@ -54,7 +54,7 @@ func @raw_single_block(%A : !tt.ptr) { - } - - // CHECK-LABEL: war_single_block --func @war_single_block(%A : !tt.ptr) { -+func.func @war_single_block(%A : !tt.ptr) { - %cst1 = arith.constant dense : tensor<128x32xi1, #AL> - %cst2 = arith.constant dense<0.000000e+00> : tensor<128x32xf16, #AL> - %a_ptr = tt.broadcast %A : (!tt.ptr) -> tensor<128x32x!tt.ptr, #AL> -@@ -70,7 +70,7 @@ func @war_single_block(%A : !tt.ptr) { - } - - // CHECK-LABEL: scratch --func @scratch() { -+func.func @scratch() { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - // CHECK: Membar 1 - %a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED> -@@ -81,7 +81,7 @@ func @scratch() { - } - - // CHECK-LABEL: async_wait --func @async_wait() { -+func.func @async_wait() { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - // CHECK: Membar 1 - %a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED> -@@ -92,7 +92,7 @@ func @async_wait() { - } - - // CHECK-LABEL: alloc --func @alloc() { -+func.func @alloc() { - %cst0 = triton_gpu.alloc_tensor : tensor<16x16xf16, #A_SHARED> - %a = tt.cat %cst0, %cst0 {axis = 0} : (tensor<16x16xf16, #A_SHARED>, tensor<16x16xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED> - // CHECK: Membar 2 -@@ -101,7 +101,7 @@ func @alloc() { - } - - // CHECK-LABEL: extract_slice --func @extract_slice() { -+func.func @extract_slice() { - %cst0 = arith.constant dense<0.000000e+00> : tensor<1x16x16xf16, #A_SHARED> - %index = arith.constant 0 : index - %cst1 = tensor.extract_slice %cst0[%index, 0, 0][1, 16, 16][1, 1, 1] : tensor<1x16x16xf16, #A_SHARED> to tensor<16x16xf16, #A_SHARED> -@@ -113,14 +113,14 @@ func @extract_slice() { - } - - // CHECK-LABEL: trans --func @trans() { -+func.func @trans() { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x32xf16, #A_SHARED> - %b = tt.trans %cst0 : (tensor<16x32xf16, #A_SHARED>) -> tensor<32x16xf16, #A_SHARED_T> - return - } - - // CHECK-LABEL: insert_slice_async --func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { -+func.func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { - %a_ptr = tt.broadcast %A : (!tt.ptr) -> tensor<16x16x!tt.ptr, #AL> - %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL> - %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL> -@@ -135,7 +135,7 @@ func @insert_slice_async(%A : !tt.ptr, %i1 : i1) { - } - - // CHECK-LABEL: insert_slice --func @insert_slice(%A : !tt.ptr, %i1 : i1) { -+func.func @insert_slice(%A : !tt.ptr, %i1 : i1) { - %a_ptr = tt.broadcast %A : (!tt.ptr) -> tensor<16x16x!tt.ptr, #AL> - %mask = tt.splat %i1 : (i1) -> tensor<16x16xi1, #AL> - %other = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL> -@@ -153,7 +153,7 @@ func @insert_slice(%A : !tt.ptr, %i1 : i1) { - - // If branch inserted a barrier for %cst0 and %cst1, but else didn't, then the barrier should be inserted in the parent region - // CHECK-LABEL: multi_blocks --func @multi_blocks(%i1 : i1) { -+func.func @multi_blocks(%i1 : i1) { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - scf.if %i1 { -@@ -174,7 +174,7 @@ func @multi_blocks(%i1 : i1) { - - // Both branches inserted a barrier for %cst0 and %cst1, then the barrier doesn't need to be inserted in the parent region - // CHECK-LABEL: multi_blocks_join_barrier --func @multi_blocks_join_barrier(%i1 : i1) { -+func.func @multi_blocks_join_barrier(%i1 : i1) { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - scf.if %i1 { -@@ -192,7 +192,7 @@ func @multi_blocks_join_barrier(%i1 : i1) { - - // Read yielded tensor requires a barrier - // CHECK-LABEL: multi_blocks_yield --func @multi_blocks_yield(%i1 : i1) { -+func.func @multi_blocks_yield(%i1 : i1) { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - %a = scf.if %i1 -> (tensor<32x16xf16, #A_SHARED>) { -@@ -212,7 +212,7 @@ func @multi_blocks_yield(%i1 : i1) { - - // Conservatively add a barrier as if the branch (%i1) is never taken - // CHECK-LABEL: multi_blocks_noelse --func @multi_blocks_noelse(%i1 : i1) { -+func.func @multi_blocks_noelse(%i1 : i1) { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - scf.if %i1 { -@@ -226,7 +226,7 @@ func @multi_blocks_noelse(%i1 : i1) { - - // Conservatively add a barrier as if the branch (%i2) is never taken - // CHECK-LABEL: multi_blocks_nested_scf --func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) { -+func.func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) { - %cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - %cst1 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #A_SHARED> - scf.if %i1 { -@@ -247,7 +247,7 @@ func @multi_blocks_nested_scf(%i1 : i1, %i2 : i1) { - } - - // CHECK-LABEL: for --func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - %b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - %c_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> -@@ -262,7 +262,7 @@ func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.p - // Although a_shared and b_shared are synced before entering the loop, - // they are reassociated with aliases (c_shared) and thus require a barrier. - // CHECK-LABEL: for_alias --func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - %b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: Membar 2 -@@ -282,7 +282,7 @@ func @for_alias(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : - // Although cst2 is not an argument of scf.yield, its memory is reused by cst1. - // So we need a barrier both before and after cst1 - // CHECK-LABEL: for_reuse --func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - %b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: Membar 2 -@@ -302,7 +302,7 @@ func @for_reuse(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : - - - // CHECK-LABEL: for_reuse_nested --func @for_reuse_nested(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @for_reuse_nested(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - %a_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - %b_shared_init = arith.constant dense<0.00e+00> : tensor<128x32xf16, #A_SHARED> - // CHECK-NEXT: Membar 2 -diff --git a/test/Conversion/triton_ops.mlir b/test/Conversion/triton_ops.mlir -index e9ee502435..0e979b148d 100644 ---- a/test/Conversion/triton_ops.mlir -+++ b/test/Conversion/triton_ops.mlir -@@ -1,6 +1,6 @@ - // RUN: triton-opt %s | FileCheck %s - --func @cast_ops(%scalar_ptr: !tt.ptr, %scalar_f32: f32, %scalar_i64: i64) { -+func.func @cast_ops(%scalar_ptr: !tt.ptr, %scalar_f32: f32, %scalar_i64: i64) { - // scalar -> scalar - // CHECK: i64 -> !tt.ptr - %0 = tt.int_to_ptr %scalar_i64 : i64 -> !tt.ptr -@@ -35,7 +35,7 @@ func @cast_ops(%scalar_ptr: !tt.ptr, %scalar_f32: f32, %scalar_i64: i64) { - return - } - --func @addptr_ops(%scalar_ptr: !tt.ptr, %scalar_i32: i32) { -+func.func @addptr_ops(%scalar_ptr: !tt.ptr, %scalar_i32: i32) { - // scalar -> scalar - // CHECK: !tt.ptr - %0 = tt.addptr %scalar_ptr, %scalar_i32 : !tt.ptr, i32 -@@ -54,7 +54,7 @@ func @addptr_ops(%scalar_ptr: !tt.ptr, %scalar_i32: i32) { - return - } - --func @load_store_ops_scalar(%ptr: !tt.ptr {tt.divisibility = 16 : i32}, %mask : i1) { -+func.func @load_store_ops_scalar(%ptr: !tt.ptr {tt.divisibility = 16 : i32}, %mask : i1) { - // Test if Load/Store ops can handle scalar values - %other = arith.constant 0.0e+0 : f32 - -@@ -76,7 +76,7 @@ func @load_store_ops_scalar(%ptr: !tt.ptr {tt.divisibility = 16 : i32}, %ma - return - } - --func @reduce_ops_infer(%ptr: !tt.ptr, %v : tensor<1x2x4xf32>) { -+func.func @reduce_ops_infer(%ptr: !tt.ptr, %v : tensor<1x2x4xf32>) { - // Test if reduce ops infer types correctly - - // CHECK: %{{.*}} = tt.reduce %{{.*}} -> tensor<2x4xf32> -@@ -101,7 +101,7 @@ func @reduce_ops_infer(%ptr: !tt.ptr, %v : tensor<1x2x4xf32>) { - return - } - --func @dot_ops_infer(%ptr: !tt.ptr, %v : f32) { -+func.func @dot_ops_infer(%ptr: !tt.ptr, %v : f32) { - // Test if reduce ops infer types correctly - %v128x32 = tt.splat %v : (f32) -> tensor<128x32xf32> - %v32x128 = tt.splat %v : (f32) -> tensor<32x128xf32> -diff --git a/test/Conversion/triton_to_tritongpu.mlir b/test/Conversion/triton_to_tritongpu.mlir -index a160bc8815..b461ca542f 100644 ---- a/test/Conversion/triton_to_tritongpu.mlir -+++ b/test/Conversion/triton_to_tritongpu.mlir -@@ -1,6 +1,6 @@ - // RUN: triton-opt %s -split-input-file -convert-triton-to-tritongpu=num-warps=2 | FileCheck %s - --func @ops() { -+func.func @ops() { - // CHECK: module attributes {"triton_gpu.num-warps" = 2 : i32} {{.*}} - %a = arith.constant dense<1.00e+00> : tensor<128x32xf16> - %b = arith.constant dense<2.00e+00> : tensor<32x128xf16> -@@ -11,7 +11,7 @@ func @ops() { - - // ----- - --func @load_ops(%ptr: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @load_ops(%ptr: !tt.ptr {tt.divisibility = 16 : i32}) { - // Test if LoadOp is lowered properly (see #771) - %ptrs = tt.splat %ptr : (!tt.ptr) -> tensor<128x!tt.ptr> - %mask = arith.constant dense : tensor<128xi1> -@@ -30,7 +30,7 @@ func @load_ops(%ptr: !tt.ptr {tt.divisibility = 16 : i32}) { - - // ----- - --func @reduce_ops(%ptr: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @reduce_ops(%ptr: !tt.ptr {tt.divisibility = 16 : i32}) { - // Test if the total number of threadsPerWarp is 32 - // Test if the total number of warps is 2 - // CHECK: #blocked0 = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [4, 8], warpsPerCTA = [1, 2], order = [0, 1]}> -diff --git a/test/Conversion/tritongpu_to_llvm.mlir b/test/Conversion/tritongpu_to_llvm.mlir -index e9e7d5a340..507b362c99 100644 ---- a/test/Conversion/tritongpu_to_llvm.mlir -+++ b/test/Conversion/tritongpu_to_llvm.mlir -@@ -4,7 +4,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK: llvm.func @test_empty_kernel(%arg0: i32, %arg1: !llvm.ptr) - // Here the 128 comes from the 4 in module attribute multiples 32 - // CHECK: attributes {nvvm.kernel = 1 : ui1, nvvm.maxntid = 128 : i32} {{.*}} -- func @test_empty_kernel(%lb : index, %A : !tt.ptr) { -+ func.func @test_empty_kernel(%lb : index, %A : !tt.ptr) { - // CHECK: llvm.return - return - } -@@ -15,7 +15,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_load -- func @basic_load(%a_ptr_init : tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>, %cst_0 : tensor<256xf32, #blocked0>) { -+ func.func @basic_load(%a_ptr_init : tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>, %cst_0 : tensor<256xf32, #blocked0>) { - // CHECK: llvm.inline_asm - // CHECK: llvm.inline_asm - %1 = tt.load %a_ptr_init, %cst, %cst_0 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<256xf32, #blocked0> -@@ -28,7 +28,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [2], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: vectorized_load -- func @vectorized_load(%a_ptr_init : tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>, %cst_0 : tensor<256xf32, #blocked0>) { -+ func.func @vectorized_load(%a_ptr_init : tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>, %cst_0 : tensor<256xf32, #blocked0>) { - // CHECK: llvm.inline_asm - // CHECK-SAME: ld.global.b32 - // CHECK: llvm.inline_asm -@@ -43,7 +43,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [8], threadsPerWarp = [32], warpsPerCTA = [1], order = [0]}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: vectorized_load_f16 -- func @vectorized_load_f16(%a_ptr_init: tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>, %cst_0 : tensor<256xf16, #blocked0>) { -+ func.func @vectorized_load_f16(%a_ptr_init: tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>, %cst_0 : tensor<256xf16, #blocked0>) { - // CHECK: llvm.inline_asm - // CHECK-SAME: ld.global.b16 - // CHECK: llvm.inline_asm -@@ -59,7 +59,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [8], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: masked_load_const_other -- func @masked_load_const_other(%a_ptr_init : tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>) { -+ func.func @masked_load_const_other(%a_ptr_init : tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>) { - %cst_0 = arith.constant dense<0.000000e+00> : tensor<256xf32, #blocked0> - %1 = tt.load %a_ptr_init, %cst, %cst_0 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<256xf32, #blocked0> - return -@@ -72,7 +72,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [4], threadsPerWarp = [32], warpsPerCTA = [8], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: masked_load_const_other_vec -- func @masked_load_const_other_vec(%a_ptr_init : tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>) { -+ func.func @masked_load_const_other_vec(%a_ptr_init : tensor<256x!tt.ptr, #blocked0>, %cst : tensor<256xi1, #blocked0>) { - %cst_0 = arith.constant dense<0.000000e+00> : tensor<256xf32, #blocked0> - %1 = tt.load %a_ptr_init, %cst, %cst_0 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<256xf32, #blocked0> - return -@@ -84,7 +84,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [2], order = [0]}> - module attributes {"triton_gpu.num-warps" = 2 : i32} { - // CHECK-LABEL: global_load_store_no_vec -- func @global_load_store_no_vec(%arg0: !tt.ptr {tt.divisibility = 4 : i32}, %arg1: !tt.ptr {tt.divisibility = 4 : i32}, %arg2: !tt.ptr {tt.divisibility = 4 : i32}, %arg3: i32) { -+ func.func @global_load_store_no_vec(%arg0: !tt.ptr {tt.divisibility = 4 : i32}, %arg1: !tt.ptr {tt.divisibility = 4 : i32}, %arg2: !tt.ptr {tt.divisibility = 4 : i32}, %arg3: i32) { - %c256_i32 = arith.constant 256 : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = arith.muli %0, %c256_i32 : i32 -@@ -128,7 +128,7 @@ module attributes {"triton_gpu.num-warps" = 2 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [4], threadsPerWarp = [32], warpsPerCTA = [2], order = [0]}> - module attributes {"triton_gpu.num-warps" = 2 : i32} { - // CHECK-LABEL: global_load_store_vec4 -- func @global_load_store_vec4(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32) { -+ func.func @global_load_store_vec4(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32) { - %c256_i32 = arith.constant 256 : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = arith.muli %0, %c256_i32 : i32 -@@ -165,7 +165,7 @@ module attributes {"triton_gpu.num-warps" = 2 : i32} { - #blocked = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [2], order = [0]}> - // Note, the %n_elements doesn't have a "tt.divisibility" hint, so Triton assumes it's divisibility is 1, this should effect the mask's alignment and further restrict the load/store ops' vector width to be 1. - module attributes {"triton_gpu.num-warps" = 2 : i32} { -- func @vecadd_masked_vec1(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %n_elements: i32) { -+ func.func @vecadd_masked_vec1(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %n_elements: i32) { - %c64_i32 = arith.constant 64 : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = arith.muli %0, %c64_i32 : i32 -@@ -195,7 +195,7 @@ module attributes {"triton_gpu.num-warps" = 2 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [8], threadsPerWarp = [32], warpsPerCTA = [1], order = [0]}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: global_load_store_vec2 -- func @global_load_store_vec2(%arg0: !tt.ptr {tt.divisibility = 8 : i32}, %arg1: !tt.ptr {tt.divisibility = 8 : i32}, %arg2: !tt.ptr {tt.divisibility = 8 : i32}, %arg3: i32) { -+ func.func @global_load_store_vec2(%arg0: !tt.ptr {tt.divisibility = 8 : i32}, %arg1: !tt.ptr {tt.divisibility = 8 : i32}, %arg2: !tt.ptr {tt.divisibility = 8 : i32}, %arg3: i32) { - %c256_i32 = arith.constant 256 : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = arith.muli %0, %c256_i32 : i32 -@@ -240,7 +240,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [8], threadsPerWarp = [32], warpsPerCTA = [1], order = [0]}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: global_load_store_vec8 -- func @global_load_store_vec8(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32) { -+ func.func @global_load_store_vec8(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32) { - %c256_i32 = arith.constant 256 : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = arith.muli %0, %c256_i32 : i32 -@@ -283,7 +283,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #blocked2 = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [4, 1], order = [0, 1]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_view_broadcast -- func @basic_view_broadcast(%arg : tensor<256xf32,#blocked0>) { -+ func.func @basic_view_broadcast(%arg : tensor<256xf32,#blocked0>) { - // CHECK: llvm.mlir.undef - // CHECK: %[[T0:.*]] = llvm.extractvalue - // CHECK: %[[T1:.*]] = llvm.extractvalue -@@ -307,7 +307,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [2], threadsPerWarp = [32], warpsPerCTA = [1], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_make_range -- func @basic_make_range() { -+ func.func @basic_make_range() { - // CHECK: nvvm.read.ptx.sreg.tid.x - // CHECK: llvm.mlir.undef - // CHECK: llvm.insertvalue -@@ -322,7 +322,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_addf -- func @basic_addf(%arg0 : tensor<256xf32,#blocked0>, %arg1 : tensor<256xf32,#blocked0>) { -+ func.func @basic_addf(%arg0 : tensor<256xf32,#blocked0>, %arg1 : tensor<256xf32,#blocked0>) { - // CHECK: llvm.fadd - // CHECK: llvm.fadd - %1 = arith.addf %arg0, %arg1 : tensor<256xf32,#blocked0> -@@ -335,7 +335,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_addi -- func @basic_addi(%arg0 : tensor<256xi32,#blocked0>, %arg1 : tensor<256xi32,#blocked0>) { -+ func.func @basic_addi(%arg0 : tensor<256xi32,#blocked0>, %arg1 : tensor<256xi32,#blocked0>) { - // CHECK: llvm.add - // CHECK: llvm.add - %1 = arith.addi %arg0, %arg1 : tensor<256xi32,#blocked0> -@@ -347,7 +347,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_program_id -- func @basic_program_id() { -+ func.func @basic_program_id() { - // CHECK: nvvm.read.ptx.sreg.ctaid.x : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 - return -@@ -359,7 +359,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_addptr -- func @basic_addptr(%arg0 : tensor<256x!tt.ptr,#blocked0>, %arg1 : tensor<256xi32,#blocked0>) { -+ func.func @basic_addptr(%arg0 : tensor<256x!tt.ptr,#blocked0>, %arg1 : tensor<256xi32,#blocked0>) { - // CHECK: llvm.getelementptr - // CHECK: llvm.getelementptr - %0 = tt.addptr %arg0, %arg1 : tensor<256x!tt.ptr, #blocked0>, tensor<256xi32, #blocked0> -@@ -373,7 +373,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK: llvm.mlir.global external @global_smem - // CHECK-LABEL: basic_alloc_tensor -- func @basic_alloc_tensor() { -+ func.func @basic_alloc_tensor() { - // CHECK: llvm.mlir.addressof @global_smem - // CHECK-NEXT: llvm.bitcast - // CHECK-NEXT: llvm.mlir.constant -@@ -390,7 +390,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK: llvm.mlir.global external @global_smem - // CHECK-LABEL: basic_extract_slice -- func @basic_extract_slice() { -+ func.func @basic_extract_slice() { - // CHECK: llvm.mlir.addressof @global_smem - // CHECK: llvm.extractvalue - // CHECK-NEXT: llvm.extractvalue -@@ -423,7 +423,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_async_wait -- func @basic_async_wait() { -+ func.func @basic_async_wait() { - // CHECK: cp.async.wait_group 0x4 - triton_gpu.async_wait {num = 4: i32} - return -@@ -442,7 +442,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #A = #triton_gpu.shared<{vec = 8, perPhase = 1, maxPhase = 4, order = [1, 0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_insert_slice_async_fallback -- func @basic_insert_slice_async_fallback(%arg0: !tt.ptr {tt.divisibility = 1 : i32}) { -+ func.func @basic_insert_slice_async_fallback(%arg0: !tt.ptr {tt.divisibility = 1 : i32}) { - %off0_ = tt.make_range {end = 16 : i32, start = 0 : i32} : tensor<16xi32, #slice2d1> - %off1_ = tt.make_range {end = 32 : i32, start = 0 : i32} : tensor<64xi32, #slice3d0> - %off0 = tt.expand_dims %off0_ {axis = 1 : i32} : (tensor<16xi32, #slice2d1>) -> tensor<16x1xi32, #block2> -@@ -481,7 +481,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #A = #triton_gpu.shared<{vec = 8, perPhase = 1, maxPhase = 4, order = [1, 0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_insert_slice_async_v4 -- func @basic_insert_slice_async_v4(%arg0: !tt.ptr {tt.divisibility = 32 : i32}) { -+ func.func @basic_insert_slice_async_v4(%arg0: !tt.ptr {tt.divisibility = 32 : i32}) { - %off0_ = tt.make_range {end = 16 : i32, start = 0 : i32} : tensor<16xi32, #slice2d1> - %off1_ = tt.make_range {end = 32 : i32, start = 0 : i32} : tensor<64xi32, #slice3d0> - %off0 = tt.expand_dims %off0_ {axis = 1 : i32} : (tensor<16xi32, #slice2d1>) -> tensor<16x1xi32, #block2> -@@ -523,7 +523,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #A = #triton_gpu.shared<{vec = 1, perPhase = 1, maxPhase = 4, order = [1, 0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_insert_slice_async_v1 -- func @basic_insert_slice_async_v1(%arg0: !tt.ptr {tt.divisibility = 4 : i32}) { -+ func.func @basic_insert_slice_async_v1(%arg0: !tt.ptr {tt.divisibility = 4 : i32}) { - %off0_ = tt.make_range {end = 16 : i32, start = 0 : i32} : tensor<16xi32, #slice2d1> - %off1_ = tt.make_range {end = 32 : i32, start = 0 : i32} : tensor<32xi32, #slice3d0> - %off0 = tt.expand_dims %off0_ {axis = 1 : i32} : (tensor<16xi32, #slice2d1>) -> tensor<16x1xi32, #block2> -@@ -568,7 +568,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #A = #triton_gpu.shared<{vec = 1, perPhase = 1, maxPhase = 4, order = [1, 0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_insert_slice_async_v1_multictas -- func @basic_insert_slice_async_v1_multictas(%arg0: !tt.ptr {tt.divisibility = 4 : i32}) { -+ func.func @basic_insert_slice_async_v1_multictas(%arg0: !tt.ptr {tt.divisibility = 4 : i32}) { - %off0_ = tt.make_range {end = 32 : i32, start = 0 : i32} : tensor<32xi32, #slice2d1> - %off1_ = tt.make_range {end = 32 : i32, start = 0 : i32} : tensor<32xi32, #slice3d0> - %off0 = tt.expand_dims %off0_ {axis = 1 : i32} : (tensor<32xi32, #slice2d1>) -> tensor<32x1xi32, #block2> -@@ -619,7 +619,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK: basic_splat -- func @basic_splat(%ptr: !tt.ptr) { -+ func.func @basic_splat(%ptr: !tt.ptr) { - // CHECK: llvm.mlir.undef - // CHECK: llvm.insertvalue - // CHECK: llvm.insertvalue -@@ -633,7 +633,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: basic_store -- func @basic_store(%ptrs: tensor<256x!tt.ptr, #blocked0>, %vals: tensor<256xf32, #blocked0>, %mask: tensor<256xi1, #blocked0>) { -+ func.func @basic_store(%ptrs: tensor<256x!tt.ptr, #blocked0>, %vals: tensor<256xf32, #blocked0>, %mask: tensor<256xi1, #blocked0>) { - // CHECK: llvm.inline_asm - // CHECK-SAME: st.global.b32 [ ${{.*}} + 0 ], { ${{.*}} }; - // CHECK: llvm.inline_asm -@@ -650,7 +650,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK: llvm.mlir.global external @global_smem() {addr_space = 3 : i32} : !llvm.array<0 x i8> - // CHECK-LABEL: convert_layout_blocked_blocked -- func @convert_layout_blocked_blocked(%arg0: tensor<16x16xf32, #blocked0>) { -+ func.func @convert_layout_blocked_blocked(%arg0: tensor<16x16xf32, #blocked0>) { - // CHECK: llvm.mlir.addressof @global_smem - // CHECK: llvm.store - // CHECK-SAME: !llvm.ptr, 3> -@@ -697,7 +697,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK: llvm.mlir.global external @global_smem() {addr_space = 3 : i32} : !llvm.array<0 x i8> - // CHECK-LABEL: convert_layout_blocked_blocked_vec -- func @convert_layout_blocked_blocked_vec(%arg0: tensor<16x16xf32, #blocked0>) { -+ func.func @convert_layout_blocked_blocked_vec(%arg0: tensor<16x16xf32, #blocked0>) { - // CHECK: llvm.mlir.addressof @global_smem - // CHECK: llvm.store - // CHECK-SAME: !llvm.ptr, 3> -@@ -720,7 +720,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK: llvm.mlir.global external @global_smem() {addr_space = 3 : i32} : !llvm.array<0 x i8> - // CHECK-LABEL: convert_layout_blocked_blocked_multi_rep -- func @convert_layout_blocked_blocked_multi_rep(%arg0: tensor<16x16xf32, #blocked0>) { -+ func.func @convert_layout_blocked_blocked_multi_rep(%arg0: tensor<16x16xf32, #blocked0>) { - // CHECK: llvm.mlir.addressof @global_smem - // CHECK: llvm.store - // CHECK-SAME: !llvm.ptr, 3> -@@ -751,7 +751,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #dot_operand_b = #triton_gpu.dot_op<{opIdx=1, parent=#mma0}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: convert_dot -- func @convert_dot(%A: tensor<16x16xf16, #blocked0>, %B: tensor<16x16xf16, #blocked0>) { -+ func.func @convert_dot(%A: tensor<16x16xf16, #blocked0>, %B: tensor<16x16xf16, #blocked0>) { - %AA = triton_gpu.convert_layout %A : (tensor<16x16xf16, #blocked0>) -> tensor<16x16xf16, #shared0> - %BB = triton_gpu.convert_layout %B : (tensor<16x16xf16, #blocked0>) -> tensor<16x16xf16, #shared0> - // CHECK: llvm.inline_asm -@@ -775,7 +775,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - // TODO: problems in MLIR's parser on slice layout - // #blocked0 = #triton_gpu.blocked<{sizePerThread = [1, 4], threadsPerWarp = [8, 4], warpsPerCTA = [1, 1], order = [1, 0]}> - // module attributes {"triton_gpu.num-warps" = 1 : i32} { --// func @make_range_sliced_layout() { -+// func.func @make_range_sliced_layout() { - // %0 = tt.make_range {end = 16 : i32, start = 0 : i32} : tensor<16xi32, #triton_gpu.slice<{dim = 0, parent = #blocked0}>> - // return - // } -@@ -788,7 +788,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK: llvm.mlir.global external @global_smem() {addr_space = 3 : i32} : !llvm.array<0 x i8> - // CHECK-LABEL: convert_layout_mmav2_block -- func @convert_layout_mmav2_blocked(%arg0: tensor<32x16xf32, #mma>) { -+ func.func @convert_layout_mmav2_blocked(%arg0: tensor<32x16xf32, #mma>) { - // CHECK: llvm.store - // CHECK-SAME: !llvm.ptr, 3> - // CHECK: llvm.store -@@ -808,7 +808,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK: llvm.mlir.global external @global_smem() {addr_space = 3 : i32} : !llvm.array<0 x i8> - // CHECK-LABEL: convert_layout_mmav1_block -- func @convert_layout_mmav1_blocked(%arg0: tensor<32x64xf32, #mma>) { -+ func.func @convert_layout_mmav1_blocked(%arg0: tensor<32x64xf32, #mma>) { - // CHECK: llvm.store - // CHECK-SAME: !llvm.ptr, 3> - // CHECK: llvm.store -@@ -831,7 +831,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK: llvm.mlir.global external @global_smem() {addr_space = 3 : i32} : !llvm.array<0 x i8> - // CHECK-LABEL: convert_layout_blocked_shared -- func @convert_layout_blocked_shared(%arg0: tensor<128x32xf32, #blocked0>) { -+ func.func @convert_layout_blocked_shared(%arg0: tensor<128x32xf32, #blocked0>) { - // CHECK: llvm.store - // CHECK-SAME: !llvm.ptr, 3> - // CHECK: llvm.store -@@ -847,7 +847,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #blocked1 = #triton_gpu.blocked<{sizePerThread = [1, 4], threadsPerWarp = [4, 8], warpsPerCTA = [1, 1], order = [1, 0]}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: convert_blocked1d_to_slice0 -- func @convert_blocked1d_to_slice0(%src:tensor<32xi32, #blocked0>) { -+ func.func @convert_blocked1d_to_slice0(%src:tensor<32xi32, #blocked0>) { - // CHECK-COUNT-4: llvm.load {{.*}} : !llvm.ptr, 3> - %cvt = triton_gpu.convert_layout %src : (tensor<32xi32, #blocked0>) -> tensor<32xi32, #triton_gpu.slice<{dim = 0, parent = #blocked1}>> - return -@@ -860,7 +860,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #blocked1 = #triton_gpu.blocked<{sizePerThread = [1, 4], threadsPerWarp = [4, 8], warpsPerCTA = [1, 1], order = [1, 0]}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: convert_blocked1d_to_slice1 -- func @convert_blocked1d_to_slice1(%src:tensor<32xi32, #blocked0>) { -+ func.func @convert_blocked1d_to_slice1(%src:tensor<32xi32, #blocked0>) { - // CHECK-COUNT-32: llvm.load {{.*}} : !llvm.ptr, 3> - %cvt = triton_gpu.convert_layout %src : (tensor<32xi32, #blocked0>) -> tensor<32xi32, #triton_gpu.slice<{dim = 1, parent = #blocked1}>> - return -@@ -873,7 +873,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #blocked1 = #triton_gpu.blocked<{sizePerThread = [4], threadsPerWarp = [32], warpsPerCTA = [1], order = [0]}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: convert_blocked_to_blocked_ptr -- func @convert_blocked_to_blocked_ptr(%src:tensor<32x!tt.ptr, #blocked0>) { -+ func.func @convert_blocked_to_blocked_ptr(%src:tensor<32x!tt.ptr, #blocked0>) { - // CHECK: llvm.ptrtoint - // CHECK: llvm.store - // CHECK: nvvm.barrier0 -@@ -892,7 +892,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #dot_operand_a = #triton_gpu.dot_op<{opIdx=0, parent=#mma}> - #dot_operand_b = #triton_gpu.dot_op<{opIdx=1, parent=#mma}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { -- func @matmul_kernel_dot_operand_layout(%ptr:!tt.ptr {tt.divisibility = 16 : i32}, -+ func.func @matmul_kernel_dot_operand_layout(%ptr:!tt.ptr {tt.divisibility = 16 : i32}, - %a:tensor<128x32xf16, #shared>, %b:tensor<32x256xf16, #shared>) { - %cst = arith.constant dense<0.000000e+00> : tensor<128x256xf32, #mma> - // CHECK: ldmatrix.sync.aligned.m8n8.x4.shared.b16 -@@ -918,7 +918,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #dot_operand_a = #triton_gpu.dot_op<{opIdx=0, parent=#mma, isMMAv1Row=true}> - #dot_operand_b = #triton_gpu.dot_op<{opIdx=1, parent=#mma, isMMAv1Row=true}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { -- func @matmul884_kernel_dot_operand_layout(%ptr:!tt.ptr {tt.divisibility = 16 : i32}, -+ func.func @matmul884_kernel_dot_operand_layout(%ptr:!tt.ptr {tt.divisibility = 16 : i32}, - %a:tensor<32x64xf16, #shared0>, %b:tensor<64x64xf16, #shared1>) { - %cst = arith.constant dense<0.000000e+00> : tensor<32x64xf32, #mma> - // CHECK: ldmatrix.sync.aligned.m8n8.x4.shared.b16 -@@ -941,7 +941,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #dot_operand_a = #triton_gpu.dot_op<{opIdx=0, parent=#blocked}> - #dot_operand_b = #triton_gpu.dot_op<{opIdx=1, parent=#blocked}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { -- func @matmul_fmadot(%ptr:!tt.ptr {tt.divisibility = 16 : i32}, -+ func.func @matmul_fmadot(%ptr:!tt.ptr {tt.divisibility = 16 : i32}, - %a:tensor<32x16xf32, #shared>, %b:tensor<16x32xf32, #shared>) { - %cst = arith.constant dense<0.000000e+00> : tensor<32x32xf32, #blocked> - // CHECK: llvm.intr.fmuladd -@@ -965,7 +965,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #dot_operand_b = #triton_gpu.dot_op<{opIdx=1, parent=#mma}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: matmul_tf32dot -- func @matmul_tf32dot(%ptr:!tt.ptr {tt.divisibility = 16 : i32}, -+ func.func @matmul_tf32dot(%ptr:!tt.ptr {tt.divisibility = 16 : i32}, - %a:tensor<32x16xf32, #shared>, %b:tensor<16x32xf32, #shared>) { - %cst = arith.constant dense<0.000000e+00> : tensor<32x32xf32, #mma> - // CHECK: llvm.inline_asm -@@ -1000,7 +1000,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: atomic_add_f32 -- func @atomic_add_f32(%arg0 : tensor<256x!tt.ptr, #blocked0>, %arg1 : tensor<256xi1, #blocked0>, %arg2 : tensor<256xf32, #blocked0>) { -+ func.func @atomic_add_f32(%arg0 : tensor<256x!tt.ptr, #blocked0>, %arg1 : tensor<256xi1, #blocked0>, %arg2 : tensor<256xf32, #blocked0>) { - // CHECK: llvm.inline_asm - // CHECK-SAME: atom.global.gpu.add.f32 - %0 = "tt.atomic_rmw" (%arg0, %arg2, %arg1) {atomic_rmw_op = 5 : i32} : (tensor<256x!tt.ptr, #blocked0>, tensor<256xf32, #blocked0>, tensor<256xi1, #blocked0>) -> tensor<256xf32, #blocked0> -@@ -1012,7 +1012,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - --func @test_get_program_id(%a: tensor<32x!tt.ptr, #blocked0>) { -+func.func @test_get_program_id(%a: tensor<32x!tt.ptr, #blocked0>) { - %blockidx = tt.get_program_id {axis=0:i32} : i32 - %blockidy = tt.get_program_id {axis=1:i32} : i32 - %blockidz = tt.get_program_id {axis=2:i32} : i32 -@@ -1032,7 +1032,7 @@ func @test_get_program_id(%a: tensor<32x!tt.ptr, #blocked0>) { - // ----- - #blocked0 = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { -- func @test_get_num_program(%a: tensor<32x!tt.ptr, #blocked0>) { -+ func.func @test_get_num_program(%a: tensor<32x!tt.ptr, #blocked0>) { - // CHECK: nvvm.read.ptx.sreg.nctaid.x - // CHECK: nvvm.read.ptx.sreg.nctaid.y - // CHECK: nvvm.read.ptx.sreg.nctaid.z -@@ -1052,7 +1052,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #blocked0 = #triton_gpu.blocked<{sizePerThread = [2], threadsPerWarp = [32], warpsPerCTA = [1], order = [0]}> - module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK-LABEL: test_index_cache -- func @test_index_cache() { -+ func.func @test_index_cache() { - // CHECK: nvvm.read.ptx.sreg.tid.x - %0 = tt.make_range {end = 256 : i32, start = 0 : i32} : tensor<256xi32, #blocked0> - // CHECK-NOT: nvvm.read.ptx.sreg.tid.x -@@ -1066,7 +1066,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - #shared0 = #triton_gpu.shared<{vec = 8, perPhase = 2, maxPhase = 4, order = [1, 0]}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: test_base_index_cache -- func @test_base_index_cache(%arg0: tensor<128x32xf32, #blocked0>) { -+ func.func @test_base_index_cache(%arg0: tensor<128x32xf32, #blocked0>) { - // CHECK: nvvm.read.ptx.sreg.tid.x - %0 = triton_gpu.convert_layout %arg0 : (tensor<128x32xf32, #blocked0>) -> tensor<128x32xf32, #shared0> - // CHECK-NOT: nvvm.read.ptx.sreg.tid.x -@@ -1080,7 +1080,7 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} { - #shared0 = #triton_gpu.shared<{vec = 8, perPhase = 2, maxPhase = 4, order = [1, 0]}> - module attributes {"triton_gpu.num-warps" = 1 : i32} { - // CHECK-LABEL: test_index_cache_different_block -- func @test_index_cache_different_block(%arg0: tensor<128x32xf32, #blocked0>, %arg1: i1) { -+ func.func @test_index_cache_different_block(%arg0: tensor<128x32xf32, #blocked0>, %arg1: i1) { - // CHECK: nvvm.read.ptx.sreg.tid.x - %0 = triton_gpu.convert_layout %arg0 : (tensor<128x32xf32, #blocked0>) -> tensor<128x32xf32, #shared0> - scf.if %arg1 { -diff --git a/test/Target/tritongpu_to_llvmir.mlir b/test/Target/tritongpu_to_llvmir.mlir -index cafff3ca60..114d3a9eb2 100644 ---- a/test/Target/tritongpu_to_llvmir.mlir -+++ b/test/Target/tritongpu_to_llvmir.mlir -@@ -4,11 +4,11 @@ - // CHECK-LABEL: ; ModuleID = 'LLVMDialectModule' - // CHECK: define void @test_empty_kernel - // CHECK: !nvvm.annotations --// CHECK: !{void (i32, half addrspace(1)*)* @test_empty_kernel, !"maxntidx", i32 128} -+// CHECK: !{ptr @test_empty_kernel, !"maxntidx", i32 128} - - module attributes {"triton_gpu.num-warps" = 4 : i32} { - --func @test_empty_kernel(%lb : index, %A : !tt.ptr) { -+func.func @test_empty_kernel(%lb : index, %A : !tt.ptr) { - - return - } -diff --git a/test/Target/tritongpu_to_ptx.mlir b/test/Target/tritongpu_to_ptx.mlir -index 404e970a29..12742ad9e2 100644 ---- a/test/Target/tritongpu_to_ptx.mlir -+++ b/test/Target/tritongpu_to_ptx.mlir -@@ -6,7 +6,7 @@ - - module attributes {"triton_gpu.num-warps" = 4 : i32} { - --func @test_empty_kernel(%lb : index, %A : !tt.ptr) { -+func.func @test_empty_kernel(%lb : index, %A : !tt.ptr) { - - return - } -diff --git a/test/Triton/combine.mlir b/test/Triton/combine.mlir -index 050a3f7565..5ef6790e69 100644 ---- a/test/Triton/combine.mlir -+++ b/test/Triton/combine.mlir -@@ -2,10 +2,10 @@ - // RUN: triton-opt %s -split-input-file -canonicalize -triton-combine | FileCheck %s - - // CHECK-LABEL: @test_combine_dot_add_pattern --func @test_combine_dot_add_pattern() -> (tensor<128x128xf32>, tensor<128x128xf32>) { -- // CHECK: %[[d:.*]] = arith.constant dense<3.000000e+00> : tensor<128x128xf32> -- // CHECK: %[[b:.*]] = arith.constant dense<2.000000e+00> : tensor<128x128xf32> -- // CHECK: %[[a:.*]] = arith.constant dense<1.000000e+00> : tensor<128x128xf32> -+func.func @test_combine_dot_add_pattern() -> (tensor<128x128xf32>, tensor<128x128xf32>) { -+ // CHECK-DAG: %[[d:.*]] = arith.constant dense<3.000000e+00> : tensor<128x128xf32> -+ // CHECK-DAG: %[[b:.*]] = arith.constant dense<2.000000e+00> : tensor<128x128xf32> -+ // CHECK-DAG: %[[a:.*]] = arith.constant dense<1.000000e+00> : tensor<128x128xf32> - %a = arith.constant dense<1.0> : tensor<128x128xf32> - %b = arith.constant dense<2.0> : tensor<128x128xf32> - %zero = arith.constant dense<0.0> : tensor<128x128xf32> -@@ -24,7 +24,7 @@ func @test_combine_dot_add_pattern() -> (tensor<128x128xf32>, tensor<128x128xf32 - - - // COM: CHECK-LABEL: @test_combine_addptr_pattern --func @test_combine_addptr_pattern(%base: !tt.ptr) -> tensor<8x!tt.ptr> { -+func.func @test_combine_addptr_pattern(%base: !tt.ptr) -> tensor<8x!tt.ptr> { - %off0 = arith.constant 10 : i32 - %off1 = arith.constant 15 : i32 - -@@ -47,46 +47,46 @@ func @test_combine_addptr_pattern(%base: !tt.ptr) -> tensor<8x!tt.ptr> - - - // CHECK-LABEL: @test_combine_select_masked_load_pattern --func @test_combine_select_masked_load_pattern(%ptr: tensor<8x!tt.ptr>, %cond: i1) -> (tensor<8xf32>, tensor<8xf32>) { -+func.func @test_combine_select_masked_load_pattern(%ptr: tensor<8x!tt.ptr>, %cond: i1) -> (tensor<8xf32>, tensor<8xf32>) { - %mask = tt.broadcast %cond : (i1) -> tensor<8xi1> - %false_val = arith.constant dense<0.0> : tensor<8xf32> - - // CHECK: %[[res1:.*]] = tt.load %{{.*}}, %{{.*}}, %{{.*}} {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<8xf32> - %x = tt.load %ptr, %mask, %false_val {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<8xf32> -- %0 = select %cond, %x, %false_val : tensor<8xf32> -+ %0 = arith.select %cond, %x, %false_val : tensor<8xf32> - - // CHECK: %[[res2:.*]] = tt.load %{{.*}}, %{{.*}}, %{{.*}} {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<8xf32> - %y = tt.load %ptr, %mask, %false_val {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<8xf32> -- %1 = select %cond, %y, %false_val : tensor<8xf32> -+ %1 = arith.select %cond, %y, %false_val : tensor<8xf32> - - // CHECK: return %[[res1]], %[[res2]] : tensor<8xf32>, tensor<8xf32> - return %0, %1 : tensor<8xf32>, tensor<8xf32> - } - - // CHECK-LABEL: @test_combine_select_masked_load_fail_pattern --func @test_combine_select_masked_load_fail_pattern(%ptr: tensor<8x!tt.ptr>, %dummy_load: tensor<8xf32>, %dummy_broadcast: tensor<8xi1>, %cond0: i1, %cond1: i1) -> (tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) { -+func.func @test_combine_select_masked_load_fail_pattern(%ptr: tensor<8x!tt.ptr>, %dummy_load: tensor<8xf32>, %dummy_broadcast: tensor<8xi1>, %cond0: i1, %cond1: i1) -> (tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) { - %false_val = arith.constant dense<0.0> : tensor<8xf32> - - // Case 1: value at the "load" position is not an "op". Select should not be canonicalized. -- // CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : tensor<8xf32> -- %0 = select %cond0, %dummy_load, %false_val : tensor<8xf32> -+ // CHECK: %{{.*}} = arith.select %{{.*}}, %{{.*}}, %{{.*}} : tensor<8xf32> -+ %0 = arith.select %cond0, %dummy_load, %false_val : tensor<8xf32> - - // Case 2: value at the "broadcast" position is not an "op". Select should not be canonicalized. - %real_load0 = tt.load %ptr, %dummy_broadcast, %false_val {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<8xf32> -- // CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : tensor<8xf32> -- %1 = select %cond0, %real_load0, %false_val : tensor<8xf32> -+ // CHECK: %{{.*}} = arith.select %{{.*}}, %{{.*}}, %{{.*}} : tensor<8xf32> -+ %1 = arith.select %cond0, %real_load0, %false_val : tensor<8xf32> - - // Case 3: condition of "broadcast" is not the same as the condition of "select". Select should not be canonicalized. - %cond0_ = tt.broadcast %cond0 : (i1) -> tensor<8xi1> - %real_load1 = tt.load %ptr, %cond0_, %false_val {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<8xf32> -- // CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : tensor<8xf32> -- %2 = select %cond1, %real_load1, %false_val : tensor<8xf32> -+ // CHECK: %{{.*}} = arith.select %{{.*}}, %{{.*}}, %{{.*}} : tensor<8xf32> -+ %2 = arith.select %cond1, %real_load1, %false_val : tensor<8xf32> - - return %0, %1, %2 : tensor<8xf32>, tensor<8xf32>, tensor<8xf32> - } - - // CHECK-LABEL: @test_combine_broadcast_constant_pattern --func @test_combine_broadcast_constant_pattern(%cst : f32) -> tensor<8x2xf32> { -+func.func @test_combine_broadcast_constant_pattern(%cst : f32) -> tensor<8x2xf32> { - // CHECK: %[[cst:.*]] = arith.constant dense<1.000000e+00> : tensor<8x2xf32> - %const = arith.constant dense<1.0> : tensor<8xf32> - %bst_out = tt.broadcast %const : (tensor<8xf32>) -> tensor<8x2xf32> -@@ -96,7 +96,7 @@ func @test_combine_broadcast_constant_pattern(%cst : f32) -> tensor<8x2xf32> { - } - - // CHECK-LABEL: @test_canonicalize_masked_load_pattern --func @test_canonicalize_masked_load_pattern(%ptr: tensor<8x!tt.ptr>) -> (tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) { -+func.func @test_canonicalize_masked_load_pattern(%ptr: tensor<8x!tt.ptr>) -> (tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) { - %true_mask = arith.constant dense : tensor<8xi1> - %false_mask = arith.constant dense : tensor<8xi1> - %other_val = arith.constant dense<0.0> : tensor<8xf32> -@@ -117,7 +117,7 @@ func @test_canonicalize_masked_load_pattern(%ptr: tensor<8x!tt.ptr>) -> (te - } - - // CHECK-LABEL: @test_canonicalize_masked_load_fail_pattern --func @test_canonicalize_masked_load_fail_pattern(%ptr: tensor<8x!tt.ptr>, %mask: tensor<8xi1>) -> (tensor<8xf32>, tensor<8xf32>) { -+func.func @test_canonicalize_masked_load_fail_pattern(%ptr: tensor<8x!tt.ptr>, %mask: tensor<8xi1>) -> (tensor<8xf32>, tensor<8xf32>) { - %other_val = arith.constant dense<0.0> : tensor<8xf32> - - // Case: value at the "mask" position is not an "op". Load should not be canonicalized. -@@ -130,7 +130,7 @@ func @test_canonicalize_masked_load_fail_pattern(%ptr: tensor<8x!tt.ptr>, % - } - - // CHECK-LABEL: @test_canonicalize_masked_store_pattern --func @test_canonicalize_masked_store_pattern(%ptr: tensor<8x!tt.ptr>, %val: tensor<8xf32>) { -+func.func @test_canonicalize_masked_store_pattern(%ptr: tensor<8x!tt.ptr>, %val: tensor<8xf32>) { - %true_mask = arith.constant dense : tensor<8xi1> - %false_mask = arith.constant dense : tensor<8xi1> - -@@ -144,7 +144,7 @@ func @test_canonicalize_masked_store_pattern(%ptr: tensor<8x!tt.ptr>, %val: - } - - // CHECK-LABEL: @test_canonicalize_masked_store_fail_pattern --func @test_canonicalize_masked_store_fail_pattern(%ptr: tensor<8x!tt.ptr>, %val: tensor<8xf32>, %mask: tensor<8xi1>) { -+func.func @test_canonicalize_masked_store_fail_pattern(%ptr: tensor<8x!tt.ptr>, %val: tensor<8xf32>, %mask: tensor<8xi1>) { - // Case: value at the "mask" position is not an "op". Store should not be canonicalized. - // CHECK: tt.store %{{.*}}, %{{.*}}, %{{.*}} : tensor<8xf32> - tt.store %ptr, %val, %mask : tensor<8xf32> -diff --git a/test/Triton/vecadd.mlir b/test/Triton/vecadd.mlir -index 0b69ef3054..f5019b1cdd 100644 ---- a/test/Triton/vecadd.mlir -+++ b/test/Triton/vecadd.mlir -@@ -1,7 +1,7 @@ - // RUN: triton-opt %s -verify-diagnostics - - module { -- func @add_kernel__Pfp32_Pfp32_Pfp32_i32_i32_i32__(%arg0: !tt.ptr, %arg1: !tt.ptr, %arg2: !tt.ptr, %arg3: i32, %arg4: i32, %arg5: i32) { -+ func.func @add_kernel__Pfp32_Pfp32_Pfp32_i32_i32_i32__(%arg0: !tt.ptr, %arg1: !tt.ptr, %arg2: !tt.ptr, %arg3: i32, %arg4: i32, %arg5: i32) { - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %c256_i32 = arith.constant 256 : i32 - %1 = arith.muli %0, %c256_i32 : i32 -@@ -43,7 +43,7 @@ module { - } - } - // module { --// func @add_kernel__Pfp32_Pfp32_Pfp32_i32_i32_i32__(%arg0: !tt.ptr, %arg1: !tt.ptr, %arg2: !tt.ptr, %arg3: i32, %arg4: i32, %arg5: i32) { -+// func.func @add_kernel__Pfp32_Pfp32_Pfp32_i32_i32_i32__(%arg0: !tt.ptr, %arg1: !tt.ptr, %arg2: !tt.ptr, %arg3: i32, %arg4: i32, %arg5: i32) { - // %c64 = arith.constant 64 : index - // %c32 = arith.constant 32 : index - // %c0 = arith.constant 0 : index -diff --git a/test/TritonGPU/coalesce.mlir b/test/TritonGPU/coalesce.mlir -index 60e359f527..51cccccfbd 100644 ---- a/test/TritonGPU/coalesce.mlir -+++ b/test/TritonGPU/coalesce.mlir -@@ -19,7 +19,7 @@ module attributes {"triton_gpu.num-warps" = 4 : i32} { - // CHECK: [[store_val:%.*]] = triton_gpu.convert_layout {{.*}} -> tensor<64x64xf32, [[col_layout]]> - // CHECK: [[store_mask:%.*]] = triton_gpu.convert_layout {{.*}} -> tensor<64x64xi1, [[col_layout]]> - // CHECK: tt.store [[store_ptr]], [[store_val]], [[store_mask]] --func @transpose(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, -+func.func @transpose(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, - %arg1: i32 {tt.divisibility = 16 : i32}, - %arg2: !tt.ptr {tt.divisibility = 16 : i32}, - %arg3: i32 {tt.divisibility = 16 : i32}) { -diff --git a/test/TritonGPU/combine.mlir b/test/TritonGPU/combine.mlir -index 2c009ffa48..7e9cb9d504 100644 ---- a/test/TritonGPU/combine.mlir -+++ b/test/TritonGPU/combine.mlir -@@ -9,7 +9,7 @@ - // CHECK: [[col_layout:#.*]] = #triton_gpu.blocked<{sizePerThread = [4, 1], threadsPerWarp = [16, 2], warpsPerCTA = [4, 1], order = [0, 1]}> - // CHECK: [[col_layout_novec:#.*]] = #triton_gpu.blocked<{sizePerThread = [1, 1], threadsPerWarp = [32, 1], warpsPerCTA = [4, 1], order = [0, 1]}> - // CHECK-LABEL: cst --func @cst() -> tensor<1024xi32, #layout1> { -+func.func @cst() -> tensor<1024xi32, #layout1> { - %cst = arith.constant dense<0> : tensor<1024xi32, #layout0> - %1 = triton_gpu.convert_layout %cst : (tensor<1024xi32, #layout0>) -> tensor<1024xi32, #layout1> - // CHECK-NOT: triton_gpu.convert_layout -@@ -18,7 +18,7 @@ func @cst() -> tensor<1024xi32, #layout1> { - } - - // CHECK-LABEL: range --func @range() -> tensor<1024xi32, #layout1> { -+func.func @range() -> tensor<1024xi32, #layout1> { - %0 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #layout0> - %1 = triton_gpu.convert_layout %0 : (tensor<1024xi32, #layout0>) -> tensor<1024xi32, #layout1> - // CHECK-NOT: triton_gpu.convert_layout -@@ -27,7 +27,7 @@ func @range() -> tensor<1024xi32, #layout1> { - } - - // CHECK-LABEL: splat --func @splat(%arg0: i32) -> tensor<1024xi32, #layout1> { -+func.func @splat(%arg0: i32) -> tensor<1024xi32, #layout1> { - %0 = tt.splat %arg0 : (i32) -> tensor<1024xi32, #layout0> - %1 = triton_gpu.convert_layout %0 : (tensor<1024xi32, #layout0>) -> tensor<1024xi32, #layout1> - // CHECK-NOT: triton_gpu.convert_layout -@@ -36,7 +36,7 @@ func @splat(%arg0: i32) -> tensor<1024xi32, #layout1> { - } - - // CHECK-LABEL: remat --func @remat(%arg0: i32) -> tensor<1024xi32, #layout1> { -+func.func @remat(%arg0: i32) -> tensor<1024xi32, #layout1> { - %0 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #layout0> - %1 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #layout0> - %2 = arith.muli %0, %1 : tensor<1024xi32, #layout0> -@@ -56,7 +56,7 @@ func @remat(%arg0: i32) -> tensor<1024xi32, #layout1> { - } - - // CHECK-LABEL: remat_load_store --func @remat_load_store(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @remat_load_store(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { - %0 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32, #layout0> - %1 = tt.splat %arg : (!tt.ptr) -> tensor<64x!tt.ptr, #layout0> - %2 = tt.addptr %1, %0 : tensor<64x!tt.ptr, #layout0>, tensor<64xi32, #layout0> -@@ -70,7 +70,7 @@ func @remat_load_store(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { - - // Don't rematerialize vectorized loads - // CHECK-LABEL: remat_expensive --func @remat_expensive(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @remat_expensive(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { - %0 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32, #layout1> - %1 = tt.splat %arg : (!tt.ptr) -> tensor<64x!tt.ptr, #layout1> - %2 = tt.addptr %1, %0 : tensor<64x!tt.ptr, #layout1>, tensor<64xi32, #layout1> -@@ -85,7 +85,7 @@ func @remat_expensive(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { - - // Don't rematerialize loads when original and target layouts are different - // CHECK-LABEL: remat_multi_layout --func @remat_multi_layout(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @remat_multi_layout(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { - %0 = tt.make_range {end = 64 : i32, start = 0 : i32} : tensor<64xi32, #layout0> - %1 = tt.splat %arg : (!tt.ptr) -> tensor<64x!tt.ptr, #layout0> - %2 = tt.addptr %1, %0 : tensor<64x!tt.ptr, #layout0>, tensor<64xi32, #layout0> -@@ -100,7 +100,7 @@ func @remat_multi_layout(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { - - // Always rematerialize single value loads - // CHECK-LABEL: remat_single_value --func @remat_single_value(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @remat_single_value(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { - %0 = tt.splat %arg : (!tt.ptr) -> tensor<1x!tt.ptr, #layout1> - %1 = tt.load %0 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<1xi32, #layout1> - // CHECK-NOT: triton_gpu.convert_layout -@@ -111,7 +111,7 @@ func @remat_single_value(%arg: !tt.ptr {tt.divisibility = 16 : i32}) { - } - - // CHECK-LABEL: if --func @if(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @if(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { - // CHECK-NOT: triton_gpu.convert_layout - %c32_i32 = arith.constant dense<32> : tensor<1024xi32, #layout1> - %0 = tt.get_program_id {axis = 0 : i32} : i32 -@@ -128,7 +128,7 @@ func @if(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { - } - - // CHECK-LABEL: if_convert_else_not --func @if_convert_else_not(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @if_convert_else_not(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { - %c32_i32 = arith.constant dense<32> : tensor<1024xi32, #layout0> - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = tt.splat %0 : (i32) -> tensor<1024xi32, #layout0> -@@ -149,7 +149,7 @@ func @if_convert_else_not(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 - } - - // CHECK-LABEL: if_not_else_convert --func @if_not_else_convert(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @if_not_else_convert(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { - %c32_i32 = arith.constant dense<32> : tensor<1024xi32, #layout0> - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = tt.splat %0 : (i32) -> tensor<1024xi32, #layout0> -@@ -170,7 +170,7 @@ func @if_not_else_convert(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 - } - - // CHECK-LABEL: if_else_both_convert --func @if_else_both_convert(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { -+func.func @if_else_both_convert(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 : i32}) { - %c32_i32 = arith.constant dense<32> : tensor<1024xi32, #layout0> - %0 = tt.get_program_id {axis = 0 : i32} : i32 - %1 = tt.splat %0 : (i32) -> tensor<1024xi32, #layout0> -@@ -200,7 +200,7 @@ func @if_else_both_convert(%arg0: i32, %arg1: !tt.ptr {tt.divisibility = 16 - #blocked4 = #triton_gpu.blocked<{sizePerThread = [4, 1], threadsPerWarp = [16, 2], warpsPerCTA = [4, 1], order = [0, 1]}> - - // CHECK-LABEL: transpose --func @transpose(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) { -+func.func @transpose(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: i32 {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}) { - // CHECK-NOT: triton_gpu.convert_layout - // CHECK: [[loaded_val:%.*]] = tt.load {{.*}}, {{%cst.*}}, {{%cst.*}} {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<64x64xf32, [[row_layout]]> - // CHECK: [[cvt_val:%.*]] = triton_gpu.convert_layout [[loaded_val]] : (tensor<64x64xf32, [[row_layout]]>) -> tensor<64x64xf32, [[col_layout]]> -@@ -241,7 +241,7 @@ func @transpose(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: i32 {tt - } - - // CHECK-LABEL: loop --func @loop(%arg0: !tt.ptr, %arg1: i32, %arg2: !tt.ptr, %arg3: i32, %arg4: i32) { -+func.func @loop(%arg0: !tt.ptr, %arg1: i32, %arg2: !tt.ptr, %arg3: i32, %arg4: i32) { - // CHECK-NOT: triton_gpu.convert_layout - // CHECK: [[loop_ret:%.*]]:2 = scf.for {{.*}} -> (tensor<64x64xf32, [[row_layout]]>, tensor<64x64x!tt.ptr, [[row_layout]]>) - // CHECK-NEXT: {{.*}} = tt.load {{.*}} : tensor<64x64xf32, [[row_layout]]> -@@ -295,7 +295,7 @@ func @loop(%arg0: !tt.ptr, %arg1: i32, %arg2: !tt.ptr, %arg3: i32, %ar - } - - // CHECK-LABEL: vecadd --func @vecadd(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32) { -+func.func @vecadd(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32) { - // CHECK-NOT: triton_gpu.convert_layout - %c256_i32 = arith.constant 256 : i32 - %0 = tt.get_program_id {axis = 0 : i32} : i32 -@@ -327,7 +327,7 @@ func @vecadd(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32}) { -+func.func @select(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32}) { - // CHECK-NOT: triton_gpu.convert_layout - %cst = arith.constant dense<30000> : tensor<1x1xi32, #blocked2> - %cst_0 = arith.constant dense<30000> : tensor<1x512xi32, #blocked2> -@@ -378,7 +378,7 @@ func @select(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: !tt.ptr {tt.divisibility = 16 : i32}, %arg4: !tt.ptr {tt.divisibility = 16 : i32}, %arg5: !tt.ptr {tt.divisibility = 16 : i32}, %arg6: !tt.ptr {tt.divisibility = 16 : i32}, %arg7: !tt.ptr {tt.divisibility = 16 : i32}, %arg8: !tt.ptr {tt.divisibility = 16 : i32}, %arg9: !tt.ptr {tt.divisibility = 16 : i32}, %arg10: !tt.ptr {tt.divisibility = 16 : i32}, %arg11: !tt.ptr {tt.divisibility = 16 : i32}, %arg12: !tt.ptr {tt.divisibility = 16 : i32}, %arg13: !tt.ptr {tt.divisibility = 16 : i32}, %arg14: !tt.ptr {tt.divisibility = 16 : i32}, %arg15: !tt.ptr {tt.divisibility = 16 : i32}, %arg16: i32 {tt.divisibility = 16 : i32}) { -+func.func public @long_func(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: !tt.ptr {tt.divisibility = 16 : i32}, %arg4: !tt.ptr {tt.divisibility = 16 : i32}, %arg5: !tt.ptr {tt.divisibility = 16 : i32}, %arg6: !tt.ptr {tt.divisibility = 16 : i32}, %arg7: !tt.ptr {tt.divisibility = 16 : i32}, %arg8: !tt.ptr {tt.divisibility = 16 : i32}, %arg9: !tt.ptr {tt.divisibility = 16 : i32}, %arg10: !tt.ptr {tt.divisibility = 16 : i32}, %arg11: !tt.ptr {tt.divisibility = 16 : i32}, %arg12: !tt.ptr {tt.divisibility = 16 : i32}, %arg13: !tt.ptr {tt.divisibility = 16 : i32}, %arg14: !tt.ptr {tt.divisibility = 16 : i32}, %arg15: !tt.ptr {tt.divisibility = 16 : i32}, %arg16: i32 {tt.divisibility = 16 : i32}) { - %cst = arith.constant dense<1.000000e+00> : tensor<1024xf32, #blocked0> - %cst_0 = arith.constant dense<5.000000e-04> : tensor<1024xf32, #blocked0> - %cst_1 = arith.constant dense<0.999499976> : tensor<1024xf32, #blocked0> -@@ -775,7 +775,7 @@ func public @long_func(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: - // A mnist model from torch inductor. - // Check if topological sort is working correct and there's no unnecessary convert - // CHECK-LABEL: mnist --func public @mnist(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32}, %arg3: i32) { -+func.func public @mnist(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: i32 {tt.divisibility = 16 : i32}, %arg3: i32) { - // CHECK-NOT: triton_gpu.convert_layout - %cst = arith.constant dense<10> : tensor<16x1xi32, #blocked2> - %cst_0 = arith.constant dense<10> : tensor<1x16xi32, #blocked3> -@@ -862,7 +862,7 @@ func public @mnist(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt. - #blocked5 = #triton_gpu.blocked<{sizePerThread = [1, 4], threadsPerWarp = [2, 16], warpsPerCTA = [8, 1], order = [1, 0]}> - // cmpf and cmpi have different operands and result types - // CHECK-LABEL: cmp --func public @cmp(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: !tt.ptr {tt.divisibility = 16 : i32}, %arg4: i32 {tt.divisibility = 16 : i32}, %arg5: i32 {tt.divisibility = 16 : i32}) { -+func.func public @cmp(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: !tt.ptr {tt.divisibility = 16 : i32}, %arg4: i32 {tt.divisibility = 16 : i32}, %arg5: i32 {tt.divisibility = 16 : i32}) { - %c64 = arith.constant 64 : index - %c2048 = arith.constant 2048 : index - %c0 = arith.constant 0 : index -diff --git a/test/TritonGPU/loop-pipeline.mlir b/test/TritonGPU/loop-pipeline.mlir -index 6ee3b15fbc..663f2da7b0 100644 ---- a/test/TritonGPU/loop-pipeline.mlir -+++ b/test/TritonGPU/loop-pipeline.mlir -@@ -10,7 +10,7 @@ - #A = #triton_gpu.dot_op<{opIdx = 0, parent = #C}> - #B = #triton_gpu.dot_op<{opIdx = 1, parent = #C}> - --// CHECK: func @matmul_loop -+// CHECK: func.func @matmul_loop - // CHECK-DAG: %[[CONSTANT_0:.*]] = arith.constant 0 : i32 - // CHECK-DAG: %[[CONSTANT_1:.*]] = arith.constant 1 : i32 - // CHECK-DAG: %[[CONSTANT_2:.*]] = arith.constant 2 : i32 -@@ -46,8 +46,8 @@ - // CHECK-DAG: %[[NEXT_PIPELINE_IDX:.*]] = arith.addi %[[PIPELINE_IDX]], %[[CONSTANT_1]] - // CHECK-DAG: %[[NEXT_LOOP_IDX:.*]] = arith.addi %[[LOOP_IDX]], %[[CONSTANT_1]] - // CHECK: scf.yield {{.*}}, {{.*}}, {{.*}}, %[[NEXT_A_BUFFER]], %[[NEXT_B_BUFFER]], %[[NEXT_A]], %[[NEXT_B]], {{.*}}, {{.*}}, {{.*}}, %[[NEXT_PIPELINE_IDX]], %[[NEXT_LOOP_IDX]] --func @matmul_loop(%lb : index, %ub : index, %step : index, -- %A : !tt.ptr {tt.divisibility = 16 : i32}, -+func.func @matmul_loop(%lb : index, %ub : index, %step : index, -+ %A : !tt.ptr {tt.divisibility = 16 : i32}, - %B : !tt.ptr {tt.divisibility = 16 : i32}) { - // A ptrs - %a_ptr_splat = tt.splat %A : (!tt.ptr) -> tensor<128x32x!tt.ptr, #AL> -@@ -61,7 +61,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, - %b_tmp1 = tt.expand_dims %b_tmp0 {axis = 0 : i32} : (tensor<128xi32, #BLs0>) -> tensor<1x128xi32, #BL> - %b_offs = tt.broadcast %b_tmp1 : (tensor<1x128xi32, #BL>) -> tensor<32x128xi32, #BL> - %b_ptr_init = tt.addptr %b_ptr_splat, %b_offs : tensor<32x128x!tt.ptr, #BL>, tensor<32x128xi32, #BL> -- -+ - - %a_mask = arith.constant dense : tensor<128x32xi1, #AL> - %a_other = arith.constant dense<0.00e+00> : tensor<128x32xf16, #AL> -@@ -88,7 +88,7 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, - } - - --// CHECK: func @matmul_loop_nested -+// CHECK: func.func @matmul_loop_nested - // CHECK-DAG: %[[CONSTANT_0:.*]] = arith.constant 0 : i32 - // CHECK-DAG: %[[CONSTANT_1:.*]] = arith.constant 1 : i32 - // CHECK-DAG: %[[CONSTANT_2:.*]] = arith.constant 2 : i32 -@@ -118,8 +118,8 @@ func @matmul_loop(%lb : index, %ub : index, %step : index, - // CHECK-DAG: %[[NEXT_PIPELINE_IDX:.*]] = arith.addi %[[PIPELINE_IDX]], %[[CONSTANT_1]] - // CHECK-DAG: %[[NEXT_LOOP_IDX:.*]] = arith.addi %[[LOOP_IDX]], %[[CONSTANT_1]] - // CHECK: scf.yield {{.*}}, {{.*}}, {{.*}}, %[[NEXT_A_BUFFER]], %[[NEXT_B_BUFFER]], %[[NEXT_A]], %[[NEXT_B]], {{.*}}, {{.*}}, {{.*}}, %[[NEXT_PIPELINE_IDX]], %[[NEXT_LOOP_IDX]] --func @matmul_loop_nested(%lb : index, %ub : index, %step : index, -- %A : !tt.ptr {tt.divisibility = 16 : i32}, -+func.func @matmul_loop_nested(%lb : index, %ub : index, %step : index, -+ %A : !tt.ptr {tt.divisibility = 16 : i32}, - %B : !tt.ptr {tt.divisibility = 16 : i32}) { - scf.for %iv0 = %lb to %ub step %step { - // A ptrs -@@ -134,7 +134,7 @@ func @matmul_loop_nested(%lb : index, %ub : index, %step : index, - %b_tmp1 = tt.expand_dims %b_tmp0 {axis = 0 : i32} : (tensor<128xi32, #BLs0>) -> tensor<1x128xi32, #BL> - %b_offs = tt.broadcast %b_tmp1 : (tensor<1x128xi32, #BL>) -> tensor<32x128xi32, #BL> - %b_ptr_init = tt.addptr %b_ptr_splat, %b_offs : tensor<32x128x!tt.ptr, #BL>, tensor<32x128xi32, #BL> -- -+ - %a_mask = arith.constant dense : tensor<128x32xi1, #AL> - %a_other = arith.constant dense<0.00e+00> : tensor<128x32xf16, #AL> - %b_mask = arith.constant dense : tensor<32x128xi1, #BL> -@@ -161,7 +161,7 @@ func @matmul_loop_nested(%lb : index, %ub : index, %step : index, - } - - --// CHECK: func @matmul_loop_single_pipeline -+// CHECK: func.func @matmul_loop_single_pipeline - // CHECK-DAG: %[[CONSTANT_0:.*]] = arith.constant 0 : i32 - // CHECK-DAG: %[[CONSTANT_1:.*]] = arith.constant 1 : i32 - // CHECK-DAG: %[[CONSTANT_2:.*]] = arith.constant 2 : i32 -@@ -183,8 +183,8 @@ func @matmul_loop_nested(%lb : index, %ub : index, %step : index, - // CHECK-DAG: %[[NEXT_PIPELINE_IDX:.*]] = arith.addi %[[PIPELINE_IDX]], %[[CONSTANT_1]] - // CHECK-DAG: %[[NEXT_LOOP_IDX:.*]] = arith.addi %[[LOOP_IDX]], %[[CONSTANT_1]] - // CHECK: scf.yield {{.*}}, {{.*}}, %[[NEXT_B_BUFFER]], %[[NEXT_B]], {{.*}}, {{.*}}, %[[NEXT_PIPELINE_IDX]], %[[NEXT_LOOP_IDX]] --func @matmul_loop_single_pipeline(%lb : index, %ub : index, %step : index, -- %A : !tt.ptr {tt.divisibility = 16 : i32}, -+func.func @matmul_loop_single_pipeline(%lb : index, %ub : index, %step : index, -+ %A : !tt.ptr {tt.divisibility = 16 : i32}, - %B : !tt.ptr {tt.divisibility = 16 : i32}) { - // A ptrs - %a_ptr_splat = tt.splat %A : (!tt.ptr) -> tensor<128x32x!tt.ptr, #AL> -diff --git a/test/TritonGPU/matmul.mlir b/test/TritonGPU/matmul.mlir -index 9bd5318e1e..01dc3f0ab1 100644 ---- a/test/TritonGPU/matmul.mlir -+++ b/test/TritonGPU/matmul.mlir -@@ -4,7 +4,7 @@ - // CHECK: offset = 49152, size = 49152 - // CHECK: size = 98304 - module { --func @matmul_kernel__Pfp32_Pfp32_Pfp32_i32_i32_i32_i32_i32_i32_i32_i32_i32__12c64_13c64_14c64_15c8(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32 {tt.divisibility = 16 : i32}, %arg7: i32, %arg8: i32 {tt.divisibility = 16 : i32}, %arg9: i32, %arg10: i32 {tt.divisibility = 16 : i32}, %arg11: i32) { -+func.func @matmul_kernel__Pfp32_Pfp32_Pfp32_i32_i32_i32_i32_i32_i32_i32_i32_i32__12c64_13c64_14c64_15c8(%arg0: !tt.ptr {tt.divisibility = 16 : i32}, %arg1: !tt.ptr {tt.divisibility = 16 : i32}, %arg2: !tt.ptr {tt.divisibility = 16 : i32}, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32 {tt.divisibility = 16 : i32}, %arg7: i32, %arg8: i32 {tt.divisibility = 16 : i32}, %arg9: i32, %arg10: i32 {tt.divisibility = 16 : i32}, %arg11: i32) { - %cst = arith.constant dense : tensor<64x64xi1> - %c64 = arith.constant 64 : index - %c0 = arith.constant 0 : index -@@ -22,7 +22,7 @@ func @matmul_kernel__Pfp32_Pfp32_Pfp32_i32_i32_i32_i32_i32_i32_i32_i32_i32__12c6 - %7 = arith.muli %6, %c8_i32 : i32 - %8 = arith.subi %2, %7 : i32 - %9 = arith.cmpi slt, %8, %c8_i32 : i32 -- %10 = select %9, %8, %c8_i32 : i32 -+ %10 = arith.select %9, %8, %c8_i32 : i32 - %11 = arith.remsi %0, %10 : i32 - %12 = arith.addi %7, %11 : i32 - %13 = arith.remsi %0, %5 : i32 -diff --git a/test/TritonGPU/prefetch.mlir b/test/TritonGPU/prefetch.mlir -index 52b4dddec1..b427547890 100644 ---- a/test/TritonGPU/prefetch.mlir -+++ b/test/TritonGPU/prefetch.mlir -@@ -11,7 +11,7 @@ - #B_OP = #triton_gpu.dot_op<{opIdx = 1, parent = #C}> - - --// CHECK: func @matmul_loop -+// CHECK: func.func @matmul_loop - // CHECK-DAG: %[[A0_PREFETCH_SMEM:.*]] = tensor.extract_slice %[[A0:.*]][0, 0] [128, 16] - // CHECK-DAG: %[[A0_PREFETCH:.*]] = triton_gpu.convert_layout %[[A0_PREFETCH_SMEM]] - // CHECK-DAG: %[[B0_PREFETCH_SMEM:.*]] = tensor.extract_slice %[[B0:.*]][0, 0] [16, 128] -@@ -28,7 +28,7 @@ - // CHECK-DAG: %[[NEXT_B_PREFETCH_SMEM:.*]] = tensor.extract_slice {{.*}}[0, 0] [16, 128] - // CHECK-DAG: %[[NEXT_B_PREFETCH:.*]] = triton_gpu.convert_layout %[[NEXT_B_PREFETCH_SMEM]] - // CHECK: scf.yield {{.*}}, {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[NEXT_A_PREFETCH]], %[[NEXT_B_PREFETCH]] --func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { -+func.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr, %B : !tt.ptr) { - %a_ptr_init = tt.broadcast %A : (!tt.ptr) -> tensor<128x32x!tt.ptr, #AL> - %b_ptr_init = tt.broadcast %B : (!tt.ptr) -> tensor<32x128x!tt.ptr, #BL> - -diff --git a/test/TritonGPU/update-mma-for-volta.mlir b/test/TritonGPU/update-mma-for-volta.mlir -index d587fffcca..7571ec6185 100644 ---- a/test/TritonGPU/update-mma-for-volta.mlir -+++ b/test/TritonGPU/update-mma-for-volta.mlir -@@ -15,7 +15,7 @@ - // CHECK: [[new_mma:#mma.*]] = #triton_gpu.mma<{versionMajor = 1, versionMinor = 3, warpsPerCTA = [4, 2]}> - module attributes {"triton_gpu.num-warps" = 16 : i32} { - // CHECK-LABEL: dot_mmav1 -- func @dot_mmav1(%A: tensor<64x64xf16, #blocked0>, %B: tensor<64x64xf16, #blocked0>) -> tensor<64x64xf32, #blocked0> { -+ func.func @dot_mmav1(%A: tensor<64x64xf16, #blocked0>, %B: tensor<64x64xf16, #blocked0>) -> tensor<64x64xf32, #blocked0> { - %C = arith.constant dense<0.000000e+00> : tensor<64x64xf32, #blocked0> - %AA = triton_gpu.convert_layout %A : (tensor<64x64xf16, #blocked0>) -> tensor<64x64xf16, #dot_operand_a> - %BB = triton_gpu.convert_layout %B : (tensor<64x64xf16, #blocked0>) -> tensor<64x64xf16, #dot_operand_b> -@@ -50,7 +50,7 @@ module attributes {"triton_gpu.num-warps" = 16 : i32} { - - module attributes {"triton_gpu.num-warps" = 16 : i32} { - // CHECK-LABEL: dot_mmav1 -- func @dot_mmav1(%A: tensor<64x64xf16, #blocked0>, %B: tensor<64x64xf16, #blocked0>) -> tensor<64x64xf32, #blocked0> { -+ func.func @dot_mmav1(%A: tensor<64x64xf16, #blocked0>, %B: tensor<64x64xf16, #blocked0>) -> tensor<64x64xf32, #blocked0> { - %C = arith.constant dense<0.000000e+00> : tensor<64x64xf32, #blocked0> - %AA = triton_gpu.convert_layout %A : (tensor<64x64xf16, #blocked0>) -> tensor<64x64xf16, #dot_operand_a> - %BB = triton_gpu.convert_layout %B : (tensor<64x64xf16, #blocked0>) -> tensor<64x64xf16, #dot_operand_b> -diff --git a/test/lib/Analysis/TestAlias.cpp b/test/lib/Analysis/TestAlias.cpp -index 88a4118fe9..3fd0cfd0d3 100644 ---- a/test/lib/Analysis/TestAlias.cpp -+++ b/test/lib/Analysis/TestAlias.cpp -@@ -9,10 +9,10 @@ using namespace mlir; - namespace { - - struct TestAliasPass -- : public PassWrapper> { -+ : public PassWrapper> { -+ -+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAliasPass); - -- // LLVM15+ -- // MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAliasPass); - static void print(StringRef name, SmallVector &vals, - raw_ostream &os) { - if (vals.empty()) -@@ -39,23 +39,24 @@ struct TestAliasPass - auto opName = SymbolTable::getSymbolName(operation).getValue().str(); - os << opName << "\n"; - -- SharedMemoryAliasAnalysis analysis(&getContext()); -- analysis.run(operation); -+ std::unique_ptr solver = createDataFlowSolver(); -+ SharedMemoryAliasAnalysis *analysis = -+ solver->load(); -+ if (failed(solver->initializeAndRun(operation))) -+ return signalPassFailure(); - - AsmState state(operation->getParentOfType()); - // Get operation ids of value's aliases - auto getAllocOpNames = [&](Value value) { -- LatticeElement *latticeElement = -- analysis.lookupLatticeElement(value); -+ dataflow::Lattice *latticeElement = -+ analysis->getLatticeElement(value); - SmallVector opNames; -- if (latticeElement) { -+ if (latticeElement && !latticeElement->isUninitialized()) { - auto &info = latticeElement->getValue(); -- if (!info.getAllocs().empty()) { -- for (auto &alias : info.getAllocs()) { -- auto opName = -- getValueOperandName(alias.getDefiningOp()->getResult(0), state); -- opNames.push_back(std::move(opName)); -- } -+ for (auto &alias : info.getAllocs()) { -+ auto opName = -+ getValueOperandName(alias.getDefiningOp()->getResult(0), state); -+ opNames.push_back(std::move(opName)); - } - } - // Ensure deterministic output -diff --git a/test/lib/Analysis/TestAllocation.cpp b/test/lib/Analysis/TestAllocation.cpp -index 84108c4d36..35e42242bd 100644 ---- a/test/lib/Analysis/TestAllocation.cpp -+++ b/test/lib/Analysis/TestAllocation.cpp -@@ -6,10 +6,9 @@ using namespace mlir; - namespace { - - struct TestAllocationPass -- : public PassWrapper> { -+ : public PassWrapper> { - -- // LLVM15+ -- // MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAllocationPass); -+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAllocationPass); - - StringRef getArgument() const final { return "test-print-allocation"; } - StringRef getDescription() const final { -diff --git a/test/lib/Analysis/TestAxisInfo.cpp b/test/lib/Analysis/TestAxisInfo.cpp -index a5205bb0a0..22347c32f0 100644 ---- a/test/lib/Analysis/TestAxisInfo.cpp -+++ b/test/lib/Analysis/TestAxisInfo.cpp -@@ -1,25 +1,15 @@ - #include "mlir/Pass/Pass.h" - #include "triton/Analysis/AxisInfo.h" -+#include "triton/Analysis/Utility.h" - - using namespace mlir; - - namespace { - - struct TestAxisInfoPass -- : public PassWrapper> { -+ : public PassWrapper> { - -- // LLVM15+ -- // MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAlignmentPass); -- -- void print(const std::string &name, raw_ostream &os, ArrayRef vals) { -- os << name << ": ["; -- for (size_t d = 0; d < vals.size(); d++) { -- if (d != 0) -- os << ", "; -- os << vals[d]; -- } -- os << "]"; -- } -+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAxisInfoPass); - - StringRef getArgument() const final { return "test-print-alignment"; } - StringRef getDescription() const final { -@@ -30,38 +20,19 @@ struct TestAxisInfoPass - Operation *operation = getOperation(); - auto &os = llvm::errs(); - auto opName = SymbolTable::getSymbolName(operation).getValue().str(); -- os << opName << "\n"; -- AxisInfoAnalysis analysis(&getContext()); -- analysis.run(operation); -+ os << "@" << opName << "\n"; -+ -+ std::unique_ptr solver = createDataFlowSolver(); -+ AxisInfoAnalysis *analysis = solver->load(); -+ if (failed(solver->initializeAndRun(operation))) -+ return signalPassFailure(); - operation->walk([&](Operation *op) { - if (op->getNumResults() < 1) - return; - for (Value result : op->getResults()) { -- // std::ostringstream oss; -- // result.print(oss); -- // os << " => "; -- LatticeElement *latticeElement = -- analysis.lookupLatticeElement(result); -- if (!latticeElement) { -- os << "None\n"; -- return; -- } -- AxisInfo &info = latticeElement->getValue(); -- print("Contiguity", os, info.getContiguity()); -- os << " ; "; -- print("Divisibility", os, info.getDivisibility()); -- os << " ; "; -- print("Constancy", os, info.getConstancy()); -- os << " ; "; -- auto constantValue = info.getConstantValue(); -- os << "ConstantValue: ["; -- if (constantValue.has_value()) -- os << constantValue.value(); -- else -- os << "None"; -- os << "] ( "; - result.print(os); -- os << " ) "; -+ os << " => "; -+ analysis->getLatticeElement(result)->getValue().print(os); - os << "\n"; - } - }); -diff --git a/test/lib/Analysis/TestMembar.cpp b/test/lib/Analysis/TestMembar.cpp -index df4279fe24..ab9b9f3fb7 100644 ---- a/test/lib/Analysis/TestMembar.cpp -+++ b/test/lib/Analysis/TestMembar.cpp -@@ -1,4 +1,4 @@ --#include "mlir/Dialect/GPU/GPUDialect.h" -+#include "mlir/Dialect/GPU/IR/GPUDialect.h" - #include "mlir/IR/Dialect.h" - #include "mlir/Pass/Pass.h" - #include "triton/Analysis/Allocation.h" -@@ -9,10 +9,9 @@ using namespace mlir; - namespace { - - struct TestMembarPass -- : public PassWrapper> { -+ : public PassWrapper> { - -- // LLVM15+ -- // MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestMembarPass); -+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestMembarPass); - - StringRef getArgument() const final { return "test-print-membar"; } - StringRef getDescription() const final { diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 16751903fe50..e2d512318111 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -8351,7 +8351,7 @@ self: super: with self; { open-meteo = callPackage ../development/python-modules/open-meteo { }; - openai-triton = callPackage ../development/python-modules/openai-triton { llvmPackages = pkgs.rocmPackages.llvm; }; + openai-triton = callPackage ../development/python-modules/openai-triton { cudaPackages = pkgs.cudaPackages_12_0; }; openai-triton-bin = callPackage ../development/python-modules/openai-triton/bin.nix { }; From 68c237239bf1b10f512a53308ef22c68b8d2feb3 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 7 Oct 2023 00:33:21 -0500 Subject: [PATCH 26/30] frugally_deep: init at 0.15.24-p0 --- .../libraries/frugally-deep/default.nix | 53 +++++++++++++++++++ pkgs/top-level/all-packages.nix | 2 + 2 files changed, 55 insertions(+) create mode 100644 pkgs/development/libraries/frugally-deep/default.nix diff --git a/pkgs/development/libraries/frugally-deep/default.nix b/pkgs/development/libraries/frugally-deep/default.nix new file mode 100644 index 000000000000..f275ec5f02ba --- /dev/null +++ b/pkgs/development/libraries/frugally-deep/default.nix @@ -0,0 +1,53 @@ +{ lib +, stdenv +, fetchFromGitHub +, gitUpdater +, cmake +, functionalplus +, eigen +, nlohmann_json +, doctest +, python3Packages +, buildTests ? false # Needs tensorflow +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "frugally-deep"; + version = "0.15.24-p0"; + + src = fetchFromGitHub { + owner = "Dobiasd"; + repo = "frugally-deep"; + rev = "v${finalAttrs.version}"; + hash = "sha256-yg2SMsYOOSOgsdwIH1bU3iPM45z6c7WeIrgOddt3um4="; + }; + + nativeBuildInputs = [ + cmake + ] ++ lib.optionals buildTests [ + python3Packages.python + python3Packages.numpy + ]; + + buildInputs = lib.optionals buildTests [ + doctest + python3Packages.tensorflow + ]; + + propagatedBuildInputs = [ + functionalplus + eigen + nlohmann_json + ]; + + cmakeFlags = lib.optionals buildTests [ "-DFDEEP_BUILD_UNITTEST=ON" ]; + passthru.updateScript = gitUpdater; + + meta = with lib; { + description = "Header-only library for using Keras (TensorFlow) models in C++"; + homepage = "https://github.com/Dobiasd/frugally-deep"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ Madouura ]; + platforms = platforms.linux; + }; +}) diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index a62783c951af..6f54e51f0f95 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -685,6 +685,8 @@ with pkgs; frugal = callPackage ../development/tools/frugal { }; + frugally-deep = callPackage ../development/libraries/frugally-deep { }; + functiontrace-server = callPackage ../development/tools/functiontrace-server { }; gendef = callPackage ../development/tools/gendef { }; From 1abbe92d900f117b8a537d7be59e5383303c11f2 Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 7 Oct 2023 05:39:37 -0500 Subject: [PATCH 27/30] rocmPackages.rocblas: split up output for hydra caching --- .../rocm-modules/5/rocblas/default.nix | 259 +++++++++++------- 1 file changed, 159 insertions(+), 100 deletions(-) diff --git a/pkgs/development/rocm-modules/5/rocblas/default.nix b/pkgs/development/rocm-modules/5/rocblas/default.nix index f1cd81df663f..76dc38850d57 100644 --- a/pkgs/development/rocm-modules/5/rocblas/default.nix +++ b/pkgs/development/rocm-modules/5/rocblas/default.nix @@ -2,6 +2,7 @@ , stdenv , fetchFromGitHub , rocmUpdateScript +, runCommand , cmake , rocm-cmake , clr @@ -24,11 +25,147 @@ , tensileLibFormat ? "msgpack" , gpuTargets ? [ "all" ] }: +let + rocblas = stdenv.mkDerivation (finalAttrs: { + pname = "rocblas"; + version = "5.7.0"; -# rocBLAS is 3.7GB... I'll have to figure out hydra in another PR -stdenv.mkDerivation (finalAttrs: { - pname = "rocblas"; - version = "5.7.0"; + outputs = [ + "out" + ] ++ lib.optionals buildTests [ + "test" + ] ++ lib.optionals buildBenchmarks [ + "benchmark" + ]; + + src = fetchFromGitHub { + owner = "ROCmSoftwarePlatform"; + repo = "rocBLAS"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-3wKnwvAra8u9xqlC05wUD+gSoBILTVJFU2cIV6xv3Lk="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + clr + ]; + + buildInputs = [ + python3 + ] ++ lib.optionals buildTensile [ + msgpack + libxml2 + python3Packages.msgpack + python3Packages.joblib + ] ++ lib.optionals buildTests [ + gtest + ] ++ lib.optionals (buildTests || buildBenchmarks) [ + gfortran + openmp + amd-blis + ] ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ + python3Packages.pyyaml + ]; + + cmakeFlags = [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + "-Dpython=python3" + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + "-DBUILD_WITH_TENSILE=${if buildTensile then "ON" else "OFF"}" + # Manually define CMAKE_INSTALL_ + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + ] ++ lib.optionals buildTensile [ + "-DVIRTUALENV_HOME_DIR=/build/source/tensile" + "-DTensile_TEST_LOCAL_PATH=/build/source/tensile" + "-DTensile_ROOT=/build/source/tensile/lib/python${python3.pythonVersion}/site-packages/Tensile" + "-DTensile_LOGIC=${tensileLogic}" + "-DTensile_CODE_OBJECT_VERSION=${tensileCOVersion}" + "-DTensile_SEPARATE_ARCHITECTURES=${if tensileSepArch then "ON" else "OFF"}" + "-DTensile_LAZY_LIBRARY_LOADING=${if tensileLazyLib then "ON" else "OFF"}" + "-DTensile_LIBRARY_FORMAT=${tensileLibFormat}" + ] ++ lib.optionals buildTests [ + "-DBUILD_CLIENTS_TESTS=ON" + ] ++ lib.optionals buildBenchmarks [ + "-DBUILD_CLIENTS_BENCHMARKS=ON" + ] ++ lib.optionals (buildTests || buildBenchmarks) [ + "-DCMAKE_CXX_FLAGS=-I${amd-blis}/include/blis" + ]; + + # Tensile REALLY wants to write to the nix directory if we include it normally + postPatch = lib.optionalString buildTensile '' + cp -a ${tensile} tensile + chmod +w -R tensile + + # Rewrap Tensile + substituteInPlace tensile/bin/{.t*,.T*,*} \ + --replace "${tensile}" "/build/source/tensile" + + substituteInPlace CMakeLists.txt \ + --replace "include(virtualenv)" "" \ + --replace "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" + ''; + + postInstall = lib.optionalString buildTests '' + mkdir -p $test/bin + cp -a $out/bin/* $test/bin + rm $test/bin/*-bench || true + '' + lib.optionalString buildBenchmarks '' + mkdir -p $benchmark/bin + cp -a $out/bin/* $benchmark/bin + rm $benchmark/bin/*-test || true + '' + lib.optionalString (buildTests || buildBenchmarks ) '' + rm -rf $out/bin + ''; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + owner = finalAttrs.src.owner; + repo = finalAttrs.src.repo; + }; + + requiredSystemFeatures = [ "big-parallel" ]; + + meta = with lib; { + description = "BLAS implementation for ROCm platform"; + homepage = "https://github.com/ROCmSoftwarePlatform/rocBLAS"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.linux; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; + }; + }); + + gfx80 = runCommand "rocblas-gfx80" { preferLocalBuild = true; } '' + mkdir -p $out/lib/rocblas/library + cp -a ${rocblas}/lib/rocblas/library/*gfx80* $out/lib/rocblas/library + ''; + + gfx90 = runCommand "rocblas-gfx90" { preferLocalBuild = true; } '' + mkdir -p $out/lib/rocblas/library + cp -a ${rocblas}/lib/rocblas/library/*gfx90* $out/lib/rocblas/library + ''; + + gfx94 = runCommand "rocblas-gfx94" { preferLocalBuild = true; } '' + mkdir -p $out/lib/rocblas/library + cp -a ${rocblas}/lib/rocblas/library/*gfx94* $out/lib/rocblas/library + ''; + + gfx10 = runCommand "rocblas-gfx10" { preferLocalBuild = true; } '' + mkdir -p $out/lib/rocblas/library + cp -a ${rocblas}/lib/rocblas/library/*gfx10* $out/lib/rocblas/library + ''; + + gfx11 = runCommand "rocblas-gfx11" { preferLocalBuild = true; } '' + mkdir -p $out/lib/rocblas/library + cp -a ${rocblas}/lib/rocblas/library/*gfx11* $out/lib/rocblas/library + ''; +in stdenv.mkDerivation (finalAttrs: { + inherit (rocblas) pname version src passthru meta; outputs = [ "out" @@ -38,104 +175,26 @@ stdenv.mkDerivation (finalAttrs: { "benchmark" ]; - src = fetchFromGitHub { - owner = "ROCmSoftwarePlatform"; - repo = "rocBLAS"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-3wKnwvAra8u9xqlC05wUD+gSoBILTVJFU2cIV6xv3Lk="; - }; + dontUnpack = true; + dontPatch = true; + dontConfigure = true; + dontBuild = true; - nativeBuildInputs = [ - cmake - rocm-cmake - clr - ]; + installPhase = '' + runHook preInstall - buildInputs = [ - python3 - ] ++ lib.optionals buildTensile [ - msgpack - libxml2 - python3Packages.msgpack - python3Packages.joblib - ] ++ lib.optionals buildTests [ - gtest - ] ++ lib.optionals (buildTests || buildBenchmarks) [ - gfortran - openmp - amd-blis - ] ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ - python3Packages.pyyaml - ]; - - cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - "-Dpython=python3" - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - "-DBUILD_WITH_TENSILE=${if buildTensile then "ON" else "OFF"}" - # Manually define CMAKE_INSTALL_ - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] ++ lib.optionals buildTensile [ - "-DVIRTUALENV_HOME_DIR=/build/source/tensile" - "-DTensile_TEST_LOCAL_PATH=/build/source/tensile" - "-DTensile_ROOT=/build/source/tensile/lib/python${python3.pythonVersion}/site-packages/Tensile" - "-DTensile_LOGIC=${tensileLogic}" - "-DTensile_CODE_OBJECT_VERSION=${tensileCOVersion}" - "-DTensile_SEPARATE_ARCHITECTURES=${if tensileSepArch then "ON" else "OFF"}" - "-DTensile_LAZY_LIBRARY_LOADING=${if tensileLazyLib then "ON" else "OFF"}" - "-DTensile_LIBRARY_FORMAT=${tensileLibFormat}" - ] ++ lib.optionals buildTests [ - "-DBUILD_CLIENTS_TESTS=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_CLIENTS_BENCHMARKS=ON" - ] ++ lib.optionals (buildTests || buildBenchmarks) [ - "-DCMAKE_CXX_FLAGS=-I${amd-blis}/include/blis" - ]; - - # Tensile REALLY wants to write to the nix directory if we include it normally - postPatch = lib.optionalString buildTensile '' - cp -a ${tensile} tensile - chmod +w -R tensile - - # Rewrap Tensile - substituteInPlace tensile/bin/{.t*,.T*,*} \ - --replace "${tensile}" "/build/source/tensile" - - substituteInPlace CMakeLists.txt \ - --replace "include(virtualenv)" "" \ - --replace "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" - ''; - - postInstall = lib.optionalString buildTests '' - mkdir -p $test/bin - cp -a $out/bin/* $test/bin - rm $test/bin/*-bench || true + mkdir -p $out + cp -a --no-preserve=mode ${rocblas}/* $out + ln -sf ${gfx80}/lib/rocblas/library/* $out/lib/rocblas/library + ln -sf ${gfx90}/lib/rocblas/library/* $out/lib/rocblas/library + ln -sf ${gfx94}/lib/rocblas/library/* $out/lib/rocblas/library + ln -sf ${gfx10}/lib/rocblas/library/* $out/lib/rocblas/library + ln -sf ${gfx11}/lib/rocblas/library/* $out/lib/rocblas/library + '' + lib.optionalString buildTests '' + cp -a ${rocblas.test} $test '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - cp -a $out/bin/* $benchmark/bin - rm $benchmark/bin/*-test || true - '' + lib.optionalString (buildTests || buildBenchmarks ) '' - rm -rf $out/bin + cp -a ${rocblas.benchmark} $benchmark + '' + '' + runHook postInstall ''; - - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - - requiredSystemFeatures = [ "big-parallel" ]; - - meta = with lib; { - description = "BLAS implementation for ROCm platform"; - homepage = "https://github.com/ROCmSoftwarePlatform/rocBLAS"; - license = with licenses; [ mit ]; - maintainers = teams.rocm.members; - platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; - }; }) From 8f7b7b0b4ce3343239d8c7c920b6101d8aee3f6b Mon Sep 17 00:00:00 2001 From: Madoura Date: Sat, 7 Oct 2023 07:20:04 -0500 Subject: [PATCH 28/30] rocmPackages.half: init at 5.7.0 --- pkgs/development/rocm-modules/5/default.nix | 5 +++ .../rocm-modules/5/half/default.nix | 39 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 pkgs/development/rocm-modules/5/half/default.nix diff --git a/pkgs/development/rocm-modules/5/default.nix b/pkgs/development/rocm-modules/5/default.nix index ac8a32ef5176..1e1aa35fa328 100644 --- a/pkgs/development/rocm-modules/5/default.nix +++ b/pkgs/development/rocm-modules/5/default.nix @@ -242,6 +242,11 @@ in rec { stdenv = llvm.rocmClangStdenv; }; + half = callPackage ./half { + inherit rocmUpdateScript rocm-cmake; + stdenv = llvm.rocmClangStdenv; + }; + miopen = callPackage ./miopen { inherit rocmUpdateScript rocm-cmake rocblas clang-ocl miopengemm composable_kernel rocm-comgr clr rocm-docs-core half; inherit (llvm) clang-tools-extra; diff --git a/pkgs/development/rocm-modules/5/half/default.nix b/pkgs/development/rocm-modules/5/half/default.nix new file mode 100644 index 000000000000..08c645848fa2 --- /dev/null +++ b/pkgs/development/rocm-modules/5/half/default.nix @@ -0,0 +1,39 @@ +{ lib +, stdenv +, fetchFromGitHub +, rocmUpdateScript +, cmake +, rocm-cmake +}: + +stdenv.mkDerivation (finalAttrs: { + pname = "half"; + version = "5.7.0"; + + src = fetchFromGitHub { + owner = "ROCmSoftwarePlatform"; + repo = "half"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-82It+/wm8+umBdQYn7lz/fS69h+f0mzwPdGxoJNYUq0="; + }; + + nativeBuildInputs = [ + cmake + rocm-cmake + ]; + + passthru.updateScript = rocmUpdateScript { + name = finalAttrs.pname; + owner = finalAttrs.src.owner; + repo = finalAttrs.src.repo; + }; + + meta = with lib; { + description = "C++ library for half precision floating point arithmetics"; + homepage = "https://github.com/ROCmSoftwarePlatform/half"; + license = with licenses; [ mit ]; + maintainers = teams.rocm.members; + platforms = platforms.unix; + broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version; + }; +}) From 58f70713eba42a8fe73c6c373e7e9b7525a2a69f Mon Sep 17 00:00:00 2001 From: Madoura Date: Sun, 8 Oct 2023 18:35:31 -0500 Subject: [PATCH 29/30] top-level/aliases: alias all old ROCm packages --- pkgs/top-level/aliases.nix | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/pkgs/top-level/aliases.nix b/pkgs/top-level/aliases.nix index 4053a661cb11..7f1225a54476 100644 --- a/pkgs/top-level/aliases.nix +++ b/pkgs/top-level/aliases.nix @@ -130,7 +130,9 @@ mapAliases ({ chocolateDoom = chocolate-doom; # Added 2023-05-01 chrome-gnome-shell = gnome-browser-connector; # Added 2022-07-27 citra = citra-nightly; # added 2022-05-17 + clang-ocl = throw "'clang-ocl' has been replaced with 'rocmPackages.clang-ocl'"; # Added 2023-10-08 inherit (libsForQt5.mauiPackages) clip; # added 2022-05-17 + composable_kernel = throw "'composable_kernel' has been replaced with 'rocmPackages.composable_kernel'"; # Added 2023-10-08 cpp-ipfs-api = cpp-ipfs-http-client; # Project has been renamed. Added 2022-05-15 crispyDoom = crispy-doom; # Added 2023-05-01 clasp = clingo; # added 2022-12-22 @@ -331,6 +333,18 @@ mapAliases ({ haxe_3_2 = throw "'haxe_3_2' has been removed because it is old and no longer used by any packages in nixpkgs"; # Added 2023-03-15 haxe_3_4 = throw "'haxe_3_4' has been removed because it is old and no longer used by any packages in nixpkgs"; # Added 2023-03-15 hepmc = throw "'hepmc' has been renamed to/replaced by 'hepmc2'"; # Converted to throw 2023-09-10 + hip = throw "'hip' has been removed in favor of 'rocmPackages.clr'"; # Added 2023-10-08 + hipcc = throw "'hipcc' has been replaced with 'rocmPackages.hipcc'"; # Added 2023-10-08 + hipify = throw "'hipify' has been replaced with 'rocmPackages.hipify'"; # Added 2023-10-08 + hipcub = throw "'hipcub' has been replaced with 'rocmPackages.hipcub'"; # Added 2023-10-08 + hipsparse = throw "'hipsparse' has been replaced with 'rocmPackages.hipsparse'"; # Added 2023-10-08 + hipfort = throw "'hipfort' has been replaced with 'rocmPackages.hipfort'"; # Added 2023-10-08 + hipfft = throw "'hipfft' has been replaced with 'rocmPackages.hipfft'"; # Added 2023-10-08 + hipsolver = throw "'hipsolver' has been replaced with 'rocmPackages.hipsolver'"; # Added 2023-10-08 + hipblas = throw "'hipblas' has been replaced with 'rocmPackages.hipblas'"; # Added 2023-10-08 + hip-amd = throw "'hip-amd' has been removed in favor of 'rocmPackages.clr'"; # Added 2023-10-08 + hip-common = throw "'hip-common' has been replaced with 'rocmPackages.hip-common'"; # Added 2023-10-08 + hip-nvidia = throw "'hip-nvidia' has been removed in favor of 'rocmPackages.clr'"; # Added 2023-10-08 ht-rust = xh; # Added 2021-02-13 hydra-unstable = hydra_unstable; # added 2022-05-10 @@ -403,6 +417,7 @@ mapAliases ({ latinmodern-math = lmmath; ldgallery = throw "'ldgallery' has been removed from nixpkgs. Use the Flake provided by ldgallery instead"; # Added 2023-07-26 lfs = dysk; # Added 2023-07-03 + llvmPackages_rocm = throw "'llvmPackages_rocm' has been replaced with 'rocmPackages.llvm'"; # Added 2023-10-08 libayatana-indicator-gtk3 = libayatana-indicator; # Added 2022-10-18 libayatana-appindicator-gtk3 = libayatana-appindicator; # Added 2022-10-18 libbencodetools = bencodetools; # Added 2022-07-30 @@ -526,6 +541,11 @@ mapAliases ({ meme = meme-image-generator; # Added 2021-04-21 mess = throw "'mess' has been renamed to/replaced by 'mame'"; # Converted to throw 2023-09-10 microsoft_gsl = microsoft-gsl; # Added 2023-05-26 + migraphx = throw "'migraphx' has been replaced with 'rocmPackages.migraphx'"; # Added 2023-10-08 + miopen = throw "'miopen' has been replaced with 'rocmPackages.miopen'"; # Added 2023-10-08 + miopengemm = throw "'miopengemm' has been replaced with 'rocmPackages.miopengemm'"; # Added 2023-10-08 + miopen-hip = throw "'miopen-hip' has been replaced with 'rocmPackages.miopen-hip'"; # Added 2023-10-08 + miopen-opencl = throw "'miopen-opencl' has been replaced with 'rocmPackages.miopen-opencl'"; # Added 2023-10-08 mime-types = mailcap; # Added 2022-01-21 minizip2 = pkgs.minizip-ng; # Added 2022-12-28 monero = monero-cli; # Added 2021-11-28 @@ -703,10 +723,41 @@ mapAliases ({ radare2-cutter = cutter; # Added 2021-03-30 rambox-pro = rambox; # Added 2022-12-12 rarian = throw "rarian has been removed as unused"; # Added 2023-07-05 + rccl = throw "'rccl' has been replaced with 'rocmPackages.rccl'"; # Added 2023-10-08 + rdc = throw "'rdc' has been replaced with 'rocmPackages.rdc'"; # Added 2023-10-08 retroshare06 = retroshare; rigsofrods = rigsofrods-bin; # Added 2023-03-22 ring-daemon = jami-daemon; # Added 2021-10-26 rockbox_utility = rockbox-utility; # Added 2022-03-17 + rocalution = throw "'rocalution' has been replaced with 'rocmPackages.rocalution'"; # Added 2023-10-08 + rocblas = throw "'rocblas' has been replaced with 'rocmPackages.rocblas'"; # Added 2023-10-08 + rocfft = throw "'rocfft' has been replaced with 'rocmPackages.rocfft'"; # Added 2023-10-08 + rocprim = throw "'rocprim' has been replaced with 'rocmPackages.rocprim'"; # Added 2023-10-08 + rocrand = throw "'rocrand' has been replaced with 'rocmPackages.rocrand'"; # Added 2023-10-08 + rocsparse = throw "'rocsparse' has been replaced with 'rocmPackages.rocsparse'"; # Added 2023-10-08 + rocthrust = throw "'rocthrust' has been replaced with 'rocmPackages.rocthrust'"; # Added 2023-10-08 + roctracer = throw "'roctracer' has been replaced with 'rocmPackages.roctracer'"; # Added 2023-10-08 + rocwmma = throw "'rocwmma' has been replaced with 'rocmPackages.rocwmma'"; # Added 2023-10-08 + rocclr = throw "'rocclr' has been removed in favor of 'rocmPackages.clr'"; # Added 2023-10-08 + rocdbgapi = throw "'rocdbgapi' has been replaced with 'rocmPackages.rocdbgapi'"; # Added 2023-10-08 + rocgdb = throw "'rocgdb' has been replaced with 'rocmPackages.rocgdb'"; # Added 2023-10-08 + rocprofiler = throw "'rocprofiler' has been replaced with 'rocmPackages.rocprofiler'"; # Added 2023-10-08 + rocsolver = throw "'rocsolver' has been replaced with 'rocmPackages.rocsolver'"; # Added 2023-10-08 + rocmClangStdenv = throw "'rocmClangStdenv' has been moved to 'rocmPackages' and is no longer public"; # Added 2023-10-08 + rocmUpdateScript = throw "'rocmUpdateScript' has been moved to 'rocmPackages' and is no longer public"; # Added 2023-10-08 + rocminfo = throw "'rocminfo' has been replaced with 'rocmPackages.rocminfo'"; # Added 2023-10-08 + rocmlir = throw "'rocmlir' has been replaced with 'rocmPackages.rocmlir'"; # Added 2023-10-08 + rocmlir-rock = throw "'rocmlir-rock' has been replaced with 'rocmPackages.rocmlir-rock'"; # Added 2023-10-08 + rocm-cmake = throw "'rocm-cmake' has been replaced with 'rocmPackages.rocm-cmake'"; # Added 2023-10-08 + rocm-comgr = throw "'rocm-comgr' has been replaced with 'rocmPackages.rocm-comgr'"; # Added 2023-10-08 + rocm-core = throw "'rocm-core' has been replaced with 'rocmPackages.rocm-core'"; # Added 2023-10-08 + rocm-device-libs = throw "'rccl' has been replaced with 'rocmPackages.rocm-device-libs'"; # Added 2023-10-08 + rocm-opencl-icd = lib.warn "'rocm-opencl-icd' has been replaced with 'rocmPackages.clr.icd'" rocmPackages.clr.icd; # Added 2023-10-08 + rocm-opencl-runtime = lib.warn "'rocm-opencl-runtime' has been replaced with 'rocmPackages.clr'" rocmPackages.clr; # Added 2023-10-08 + rocm-runtime = throw "'rocm-runtime' has been replaced with 'rocmPackages.rocm-runtime'"; # Added 2023-10-08 + rocm-smi = throw "'rocm-smi' has been replaced with 'rocmPackages.rocm-smi'"; # Added 2023-10-08 + rocm-thunk = throw "'rocm-thunk' has been replaced with 'rocmPackages.rocm-thunk'"; # Added 2023-10-08 + rocr-debug-agent = throw "'rocr-debug-agent' has been replaced with 'rocmPackages.rocr-debug-agent'"; # Added 2023-10-08 rome = throw "rome is no longer maintained, consider using biome instead"; # Added 2023-09-12 rpiboot-unstable = rpiboot; # Added 2021-07-30 rr-unstable = rr; # Added 2022-09-17 @@ -795,6 +846,7 @@ mapAliases ({ taro = taproot-assets; # Added 2023-07-04 tdesktop = telegram-desktop; # Added 2023-04-07 telegram-cli = throw "telegram-cli was removed because it was broken and abandoned upstream"; # Added 2023-07-28 + tensile = throw "'tensile' has been replaced with 'rocmPackages.tensile'"; # Added 2023-10-08 testVersion = testers.testVersion; # Added 2022-04-20 invalidateFetcherByDrvHash = testers.invalidateFetcherByDrvHash; # Added 2022-05-05 timescale-prometheus = promscale; # Added 2020-09-29 From aeccee810aabf22bc1bdda305e5611e9e61c406e Mon Sep 17 00:00:00 2001 From: Madoura Date: Tue, 10 Oct 2023 01:43:09 -0500 Subject: [PATCH 30/30] python3Packages.torch: Fix 'setuptools' not being found with ROCm build --- pkgs/development/python-modules/torch/default.nix | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index c9c400b57bd5..217c43de2327 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -101,6 +101,11 @@ let rocminfo rocm-thunk rocm-comgr rocm-device-libs rocm-runtime clr.icd hipify ]; + + # Fix `setuptools` not being found + postBuild = '' + rm -rf $out/nix-support + ''; }; brokenConditions = attrsets.filterAttrs (_: cond: cond) {