Merge pull request #220402 from ConnorBaker/fix/cuda-nvcc-compress-fatbins
cudaPackages: fix #220357; use -Xfatbin=-compress-all; prune default cudaCapabilities
This commit is contained in:
commit
13939e25a3
@ -151,6 +151,10 @@ backendStdenv.mkDerivation rec {
|
||||
# Refer to comments in the overrides for cuda_nvcc for explanation
|
||||
# CUDA_TOOLKIT_ROOT_DIR is legacy,
|
||||
# Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
|
||||
# NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the compiled
|
||||
# binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
|
||||
# the default set of CUDA capabilities we build can regularly cause this to occur (for
|
||||
# example, with Magma).
|
||||
''
|
||||
mkdir -p $out/nix-support
|
||||
cat <<EOF >> $out/nix-support/setup-hook
|
||||
@ -160,7 +164,7 @@ backendStdenv.mkDerivation rec {
|
||||
if [ -z "\''${CUDAHOSTCXX-}" ]; then
|
||||
export CUDAHOSTCXX=${backendStdenv.cc}/bin;
|
||||
fi
|
||||
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
|
||||
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin -Xfatbin=-compress-all'
|
||||
EOF
|
||||
|
||||
# Move some libraries to the lib output so that programs that
|
||||
|
@ -4,12 +4,8 @@
|
||||
}:
|
||||
|
||||
# Type aliases
|
||||
# Gpu = {
|
||||
# archName: String, # e.g., "Hopper"
|
||||
# computeCapability: String, # e.g., "9.0"
|
||||
# minCudaVersion: String, # e.g., "11.8"
|
||||
# maxCudaVersion: String, # e.g., "12.0"
|
||||
# }
|
||||
# Gpu :: AttrSet
|
||||
# - See the documentation in ./gpus.nix.
|
||||
|
||||
let
|
||||
inherit (lib) attrsets lists strings trivial versions;
|
||||
@ -34,22 +30,40 @@ let
|
||||
# gpus :: List Gpu
|
||||
gpus = builtins.import ./gpus.nix;
|
||||
|
||||
# isVersionIn :: Gpu -> Bool
|
||||
# isSupported :: Gpu -> Bool
|
||||
isSupported = gpu:
|
||||
let
|
||||
inherit (gpu) minCudaVersion maxCudaVersion;
|
||||
lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion;
|
||||
upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion);
|
||||
upperBoundSatisfied = (maxCudaVersion == null)
|
||||
|| !(strings.versionOlder maxCudaVersion cudaVersion);
|
||||
in
|
||||
lowerBoundSatisfied && upperBoundSatisfied;
|
||||
|
||||
# isDefault :: Gpu -> Bool
|
||||
isDefault = gpu:
|
||||
let
|
||||
inherit (gpu) dontDefaultAfter;
|
||||
newGpu = dontDefaultAfter == null;
|
||||
recentGpu = newGpu || strings.versionAtLeast dontDefaultAfter cudaVersion;
|
||||
in
|
||||
recentGpu;
|
||||
|
||||
# supportedGpus :: List Gpu
|
||||
# GPUs which are supported by the provided CUDA version.
|
||||
supportedGpus = builtins.filter isSupported gpus;
|
||||
|
||||
# defaultGpus :: List Gpu
|
||||
# GPUs which are supported by the provided CUDA version and we want to build for by default.
|
||||
defaultGpus = builtins.filter isDefault supportedGpus;
|
||||
|
||||
# supportedCapabilities :: List Capability
|
||||
supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;
|
||||
|
||||
# defaultCapabilities :: List Capability
|
||||
# The default capabilities to target, if not overridden by the user.
|
||||
defaultCapabilities = lists.map (gpu: gpu.computeCapability) defaultGpus;
|
||||
|
||||
# cudaArchNameToVersions :: AttrSet String (List String)
|
||||
# Maps the name of a GPU architecture to different versions of that architecture.
|
||||
# For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
|
||||
@ -151,6 +165,6 @@ assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
|
||||
# dropDot :: String -> String
|
||||
inherit dropDot;
|
||||
} // formatCapabilities {
|
||||
cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
|
||||
cudaCapabilities = config.cudaCapabilities or defaultCapabilities;
|
||||
enableForwardCompat = config.cudaForwardCompat or true;
|
||||
}
|
||||
|
@ -1,110 +1,148 @@
|
||||
[
|
||||
# Type alias
|
||||
# Gpu = {
|
||||
# archName: String
|
||||
# - The name of the microarchitecture.
|
||||
# computeCapability: String
|
||||
# - The compute capability of the GPU.
|
||||
# minCudaVersion: String
|
||||
# - The minimum (inclusive) CUDA version that supports this GPU.
|
||||
# dontDefaultAfter: null | String
|
||||
# - The CUDA version after which to exclude this GPU from the list of default capabilities
|
||||
# we build. null means we always include this GPU in the default capabilities if it is
|
||||
# supported.
|
||||
# maxCudaVersion: null | String
|
||||
# - The maximum (exclusive) CUDA version that supports this GPU. null means there is no
|
||||
# maximum.
|
||||
# }
|
||||
{
|
||||
archName = "Kepler";
|
||||
computeCapability = "3.0";
|
||||
minCudaVersion = "10.0";
|
||||
dontDefaultAfter = "10.2";
|
||||
maxCudaVersion = "10.2";
|
||||
}
|
||||
{
|
||||
archName = "Kepler";
|
||||
computeCapability = "3.2";
|
||||
minCudaVersion = "10.0";
|
||||
dontDefaultAfter = "10.2";
|
||||
maxCudaVersion = "10.2";
|
||||
}
|
||||
{
|
||||
archName = "Kepler";
|
||||
computeCapability = "3.5";
|
||||
minCudaVersion = "10.0";
|
||||
dontDefaultAfter = "11.0";
|
||||
maxCudaVersion = "11.8";
|
||||
}
|
||||
{
|
||||
archName = "Kepler";
|
||||
computeCapability = "3.7";
|
||||
minCudaVersion = "10.0";
|
||||
dontDefaultAfter = "11.0";
|
||||
maxCudaVersion = "11.8";
|
||||
}
|
||||
{
|
||||
archName = "Maxwell";
|
||||
computeCapability = "5.0";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = "11.0";
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Maxwell";
|
||||
computeCapability = "5.2";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = "11.0";
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Maxwell";
|
||||
computeCapability = "5.3";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = "11.0";
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Pascal";
|
||||
computeCapability = "6.0";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Pascal";
|
||||
computeCapability = "6.1";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Pascal";
|
||||
computeCapability = "6.2";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Volta";
|
||||
computeCapability = "7.0";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Volta";
|
||||
computeCapability = "7.2";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Turing";
|
||||
computeCapability = "7.5";
|
||||
minCudaVersion = "10.0";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Ampere";
|
||||
computeCapability = "8.0";
|
||||
minCudaVersion = "11.2";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Ampere";
|
||||
computeCapability = "8.6";
|
||||
minCudaVersion = "11.2";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Ampere";
|
||||
computeCapability = "8.7";
|
||||
minCudaVersion = "11.5";
|
||||
maxCudaVersion = "12.0";
|
||||
# NOTE: This is purposefully before 11.5 to ensure it is never a capability we target by
|
||||
# default. 8.7 is the Jetson Orin series of devices which are a very specific platform.
|
||||
# We keep this entry here in case we ever want to target it explicitly, but we don't
|
||||
# want to target it by default.
|
||||
dontDefaultAfter = "11.4";
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Ada";
|
||||
computeCapability = "8.9";
|
||||
minCudaVersion = "11.8";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
{
|
||||
archName = "Hopper";
|
||||
computeCapability = "9.0";
|
||||
minCudaVersion = "11.8";
|
||||
maxCudaVersion = "12.0";
|
||||
dontDefaultAfter = null;
|
||||
maxCudaVersion = null;
|
||||
}
|
||||
]
|
||||
|
@ -41,6 +41,10 @@ in
|
||||
# uses the last --compiler-bindir it gets on the command line.
|
||||
# FIXME: this results in "incompatible redefinition" warnings.
|
||||
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
|
||||
# NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the
|
||||
# compiled binaries. If binaries grow over 2GB, they will fail to link. This is a problem
|
||||
# for us, as the default set of CUDA capabilities we build can regularly cause this to
|
||||
# occur (for example, with Magma).
|
||||
postInstall = (oldAttrs.postInstall or "") + ''
|
||||
mkdir -p $out/nix-support
|
||||
cat <<EOF >> $out/nix-support/setup-hook
|
||||
@ -49,7 +53,7 @@ in
|
||||
if [ -z "\''${CUDAHOSTCXX-}" ]; then
|
||||
export CUDAHOSTCXX=${cc}/bin;
|
||||
fi
|
||||
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin'
|
||||
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin -Xfatbin=-compress-all'
|
||||
EOF
|
||||
'';
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user