Merge pull request #47411 from graham-at-target/multi-layered-images-crafted

Multi-Layered Docker Images
This commit is contained in:
lewo 2018-10-01 09:48:24 +02:00 committed by GitHub
commit 56b4db9710
No known key found for this signature in database
8 changed files with 926 additions and 6 deletions

View File

@ -682,6 +682,177 @@ hello latest de2bf4786de6 About a minute ago 25.2MB
<section xml:id="ssec-pkgs-dockerTools-buildLayeredImage">
Create a Docker image with many of the store paths being on their own layer
to improve sharing between images.
The name of the resulting image.
<varname>tag</varname> <emphasis>optional</emphasis>
Tag of the generated image.
<emphasis>Default:</emphasis> the output path's hash
<varname>contents</varname> <emphasis>optional</emphasis>
Top level paths in the container. Either a single derivation, or a list
of derivations.
<emphasis>Default:</emphasis> <literal>[]</literal>
<varname>config</varname> <emphasis>optional</emphasis>
Run-time configuration of the container. A full list of the options are
available at in the
<link xlink:href="">
Docker Image Specification v1.2.0 </link>.
<emphasis>Default:</emphasis> <literal>{}</literal>
<varname>created</varname> <emphasis>optional</emphasis>
Date and time the layers were created. Follows the same
<literal>now</literal> exception supported by
<emphasis>Default:</emphasis> <literal>1970-01-01T00:00:01Z</literal>
<varname>maxLayers</varname> <emphasis>optional</emphasis>
Maximum number of layers to create.
<emphasis>Default:</emphasis> <literal>24</literal>
<section xml:id="dockerTools-buildLayeredImage-arg-contents">
<title>Behavior of <varname>contents</varname> in the final image</title>
Each path directly listed in <varname>contents</varname> will have a
symlink in the root of the image.
For example:
pkgs.dockerTools.buildLayeredImage {
name = "hello";
contents = [ pkgs.hello ];
will create symlinks for all the paths in the <literal>hello</literal>
/bin/hello -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/bin/hello
/share/info/ -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/info/
/share/locale/bg/LC_MESSAGES/ -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/locale/bg/LC_MESSAGES/
<section xml:id="dockerTools-buildLayeredImage-arg-config">
<title>Automatic inclusion of <varname>config</varname> references</title>
The closure of <varname>config</varname> is automatically included in the
closure of the final image.
This allows you to make very simple Docker images with very little code.
This container will start up and run <command>hello</command>:
pkgs.dockerTools.buildLayeredImage {
name = "hello";
config.Cmd = [ "${pkgs.hello}/bin/hello" ];
<section xml:id="dockerTools-buildLayeredImage-arg-maxLayers">
<title>Adjusting <varname>maxLayers</varname></title>
Increasing the <varname>maxLayers</varname> increases the number of layers
which have a chance to be shared between different images.
Modern Docker installations support up to 128 layers, however older
versions support as few as 42.
If the produced image will not be extended by other Docker builds, it is
safe to set <varname>maxLayers</varname> to <literal>128</literal>.
However it will be impossible to extend the image further.
The first (<literal>maxLayers-2</literal>) most "popular" paths will have
their own individual layers, then layer #<literal>maxLayers-1</literal>
will contain all the remaining "unpopular" paths, and finally layer
#<literal>maxLayers</literal> will contain the Image configuration.
Docker's Layers are not inherently ordered, they are content-addressable
and are not explicitly layered until they are composed in to an Image.
<section xml:id="ssec-pkgs-dockerTools-fetchFromRegistry">

View File

@ -58,5 +58,9 @@ import ./make-test.nix ({ pkgs, ... }: {
# Ensure Docker images can use an unstable date
$docker->succeed("docker load --input='${pkgs.dockerTools.examples.bash}'");
$docker->succeed("[ '1970-01-01T00:00:01Z' != \"\$(docker inspect ${pkgs.dockerTools.examples.unstableDate.imageName} | ${pkgs.jq}/bin/jq -r .[].Created)\" ]");
# Ensure Layered Docker images work
$docker->succeed("docker load --input='${pkgs.dockerTools.examples.layered-image}'");
$docker->succeed("docker run --rm ${pkgs.dockerTools.examples.layered-image.imageName}");

View File

@ -1,4 +1,5 @@
@ -19,6 +20,7 @@
@ -77,7 +79,9 @@ rec {
ln -sT ${docker.src}/components/engine/pkg/tarsum src/
go build
cp tarsum $out
mkdir -p $out/bin
cp tarsum $out/bin/
# buildEnv creates symlinks to dirs, which is hard to edit inside the overlay VM
@ -270,6 +274,81 @@ rec {
perl ${pkgs.pathsFromGraph} closure-* > $out/storePaths
# Create $maxLayers worth of Docker Layers, one layer per store path
# unless there are more paths than $maxLayers. In that case, create
# $maxLayers-1 for the most popular layers, and smush the remainaing
# store paths in to one final layer.
mkManyPureLayers = {
# Files to add to the layer.
# Docker has a 42-layer maximum, we pick 24 to ensure there is plenty
# of room for extension
maxLayers ? 24
runCommand "${name}-granular-docker-layers" {
inherit maxLayers;
paths = referencesByPopularity closure;
buildInputs = [ jshon rsync tarsum ];
enableParallelBuilding = true;
# Delete impurities for store path layers, so they don't get
# shared and taint other projects.
cat ${configJson} \
| jshon -d config \
| jshon -s "1970-01-01T00:00:01Z" -i created > generic.json
# The following code is fiddly w.r.t. ensuring every layer is
# created, and that no paths are missed. If you change the
# following head and tail call lines, double-check that your
# code behaves properly when the number of layers equals:
# maxLayers-1, maxLayers, and maxLayers+1
head -n $((maxLayers - 1)) $paths | cat -n | xargs -P$NIX_BUILD_CORES -n2 ${./}
if [ $(cat $paths | wc -l) -ge $maxLayers ]; then
tail -n+$maxLayers $paths | xargs ${./} $maxLayers
echo "Finished building layer '$name'"
mv ./layers $out
# Create a "Customisation" layer which adds symlinks at the root of
# the image to the root paths of the closure. Also add the config
# data like what command to run and the environment to run it in.
mkCustomisationLayer = {
# Files to add to the layer.
uid ? 0, gid ? 0,
runCommand "${name}-customisation-layer" {
buildInputs = [ jshon rsync tarsum ];
cp -r ${contents}/ ./layer
# Tar up the layer and throw it into 'layer.tar'.
echo "Packing layer..."
mkdir $out
tar -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf $out/layer.tar .
# Compute a checksum of the tarball.
echo "Computing layer checksum..."
tarhash=$(tarsum < $out/layer.tar)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
# Create a "layer" (set of files).
mkPureLayer = {
# Name of the layer
@ -287,7 +366,7 @@ rec {
runCommand "docker-layer-${name}" {
inherit baseJson contents extraCommands;
buildInputs = [ jshon rsync ];
buildInputs = [ jshon rsync tarsum ];
mkdir layer
@ -314,11 +393,11 @@ rec {
# Compute a checksum of the tarball.
echo "Computing layer checksum..."
tarsum=$(${tarsum} < $out/layer.tar)
tarhash=$(tarsum < $out/layer.tar)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
cat ${baseJson} | jshon -s "$tarsum" -i checksum > $out/json
cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
@ -402,8 +481,8 @@ rec {
# Compute the tar checksum and add it to the output json.
echo "Computing checksum..."
ts=$(${tarsum} < $out/layer.tar)
cat ${baseJson} | jshon -s "$ts" -i checksum > $out/json
tarhash=$(${tarsum}/bin/tarsum < $out/layer.tar)
cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
@ -411,6 +490,104 @@ rec {
buildLayeredImage = {
# Image Name
# Image tag, the Nix's output hash will be used if null
tag ? null,
# Files to put on the image (a nix store path or list of paths).
contents ? [],
# Docker config; e.g. what command to run on the container.
config ? {},
# Time of creation of the image. Passing "now" will make the
# created date be the time of building.
created ? "1970-01-01T00:00:01Z",
# Docker's lowest maximum layer limit is 42-layers for an old
# version of the AUFS graph driver. We pick 24 to ensure there is
# plenty of room for extension. I believe the actual maximum is
# 128.
maxLayers ? 24
uid = 0;
gid = 0;
baseName = baseNameOf name;
contentsEnv = symlinkJoin { name = "bulk-layers"; paths = (if builtins.isList contents then contents else [ contents ]); };
configJson = let
pure = writeText "${baseName}-config.json" (builtins.toJSON {
inherit created config;
architecture = "amd64";
os = "linux";
impure = runCommand "${baseName}-standard-dynamic-date.json"
{ buildInputs = [ jq ]; }
jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out
in if created == "now" then impure else pure;
bulkLayers = mkManyPureLayers {
name = baseName;
closure = writeText "closure" "${contentsEnv} ${configJson}";
# One layer will be taken up by the customisationLayer, so
# take up one less.
maxLayers = maxLayers - 1;
inherit configJson;
customisationLayer = mkCustomisationLayer {
name = baseName;
contents = contentsEnv;
baseJson = configJson;
inherit uid gid;
result = runCommand "docker-image-${baseName}.tar.gz" {
buildInputs = [ jshon pigz coreutils findutils jq ];
# Image name and tag must be lowercase
imageName = lib.toLower name;
imageTag = if tag == null then "" else lib.toLower tag;
baseJson = configJson;
} ''
${lib.optionalString (tag == null) ''
outName="$(basename "$out")"
outHash=$(echo "$outName" | cut -d - -f 1)
find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
echo ${customisationLayer} >> layer-list
mkdir image
imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}")
manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]")
for layer in $(cat layer-list); do
layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1)
layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1)
ln -s "$layer" "./image/$layerID"
manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= [\"$layerID/layer.tar\"] + .")
imageJson=$(echo "$imageJson" | jq ".history |= [{\"created\": \"$(jq -r .created ${configJson})\"}] + .")
imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= [\"sha256:$layerChecksum\"] + .")
imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1)
echo "$imageJson" > "image/$imageJsonChecksum.json"
manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"")
echo "$manifestJson" > image/manifest.json
jshon -n object \
-n object -s "$layerID" -i "$imageTag" \
-i "$imageName" > image/repositories
echo "Cooking the image..."
tar -C image --dereference --hard-dereference --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 --mode=a-w --xform s:'^./':: -c . | pigz -nT > $out
echo "Finished."
# 1. extract the base image
# 2. create the layer
# 3. add layer deps to the layer itself, diffing with the base image

View File

@ -150,4 +150,11 @@ rec {
contents = [ pkgs.coreutils ];
created = "now";
# 10. Create a layered image
layered-image = pkgs.dockerTools.buildLayeredImage {
name = "layered-image";
tag = "latest";
config.Cmd = [ "${pkgs.hello}/bin/hello" ];

View File

@ -0,0 +1,24 @@
set -eu
echo "Creating layer #$layerNumber for $@"
mkdir -p "$layerPath"
tar -rpf "$layerPath/layer.tar" --hard-dereference --sort=name \
--mtime="@$SOURCE_DATE_EPOCH" \
--owner=0 --group=0 "$@"
# Compute a checksum of the tarball.
tarhash=$(tarsum < $layerPath/layer.tar)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $layerPath/VERSION

View File

@ -0,0 +1,520 @@
# IMPORTANT: Making changes?
# Validate your changes with python3 ./ --test
# Using a simple algorithm, convert the references to a path in to a
# sorted list of dependent paths based on how often they're referenced
# and how deep in the tree they live. Equally-"popular" paths are then
# sorted by name.
# The existing writeReferencesToFile prints the paths in a simple
# ascii-based sorting of the paths.
# Sorting the paths by graph improves the chances that the difference
# between two builds appear near the end of the list, instead of near
# the beginning. This makes a difference for Nix builds which export a
# closure for another program to consume, if that program implements its
# own level of binary diffing.
# For an example, Docker Images. If each store path is a separate layer
# then Docker Images can be very efficiently transfered between systems,
# and we get very good cache reuse between images built with the same
# version of Nixpkgs. However, since Docker only reliably supports a
# small number of layers (42) it is important to pick the individual
# layers carefully. By storing very popular store paths in the first 40
# layers, we improve the chances that the next Docker image will share
# many of those layers.*
# Given the dependency tree:
# A - B - C - D -\
# \ \ \ \
# \ \ \ \
# \ \ - E ---- F
# \- G
# Nodes which have multiple references are duplicated:
# A - B - C - D - F
# \ \ \
# \ \ \- E - F
# \ \
# \ \- E - F
# \
# \- G
# Each leaf node is now replaced by a counter defaulted to 1:
# A - B - C - D - (F:1)
# \ \ \
# \ \ \- E - (F:1)
# \ \
# \ \- E - (F:1)
# \
# \- (G:1)
# Then each leaf counter is merged with its parent node, replacing the
# parent node with a counter of 1, and each existing counter being
# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`:
# A - B - C - (D:1, F:2)
# \ \ \
# \ \ \- (E:1, F:2)
# \ \
# \ \- (E:1, F:2)
# \
# \- (G:1)
# Then each leaf counter is merged with its parent node again, merging
# any counters, then incrementing each:
# A - B - (C:1, D:2, E:2, F:5)
# \ \
# \ \- (E:1, F:2)
# \
# \- (G:1)
# And again:
# A - (B:1, C:2, D:3, E:4, F:8)
# \
# \- (G:1)
# And again:
# (A:1, B:2, C:3, D:4, E:5, F:9, G:2)
# and then paths have the following "popularity":
# A 1
# B 2
# C 3
# D 4
# E 5
# F 9
# G 2
# and the popularity contest would result in the paths being printed as:
# F
# E
# D
# C
# B
# G
# A
# * Note: People who have used a Dockerfile before assume Docker's
# Layers are inherently ordered. However, this is not true -- Docker
# layers are content-addressable and are not explicitly layered until
# they are composed in to an Image.
import sys
import json
import unittest
from pprint import pprint
from collections import defaultdict
# Find paths in the original dataset which are never referenced by
# any other paths
def find_roots(closures):
roots = [];
for closure in closures:
path = closure['path']
if not any_refer_to(path, closures):
return roots
class TestFindRoots(unittest.TestCase):
def test_find_roots(self):
"path": "/nix/store/foo",
"references": [
"path": "/nix/store/bar",
"references": [
"path": "/nix/store/hello",
"references": [
["/nix/store/foo", "/nix/store/hello"]
def any_refer_to(path, closures):
for closure in closures:
if path != closure['path']:
if path in closure['references']:
return True
return False
class TestAnyReferTo(unittest.TestCase):
def test_has_references(self):
"path": "/nix/store/foo",
"references": [
def test_no_references(self):
"path": "/nix/store/foo",
"references": [
def all_paths(closures):
paths = []
for closure in closures:
return list(set(paths))
class TestAllPaths(unittest.TestCase):
def test_returns_all_paths(self):
"path": "/nix/store/foo",
"references": [
"path": "/nix/store/bar",
"references": [
"path": "/nix/store/hello",
"references": [
["/nix/store/foo", "/nix/store/bar", "/nix/store/hello", "/nix/store/tux",]
def test_no_references(self):
"path": "/nix/store/foo",
"references": [
# Convert:
# [
# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
# { path: /nix/store/tux, references: [ /nix/store/tux ] }
# ]
# To:
# {
# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
# /nix/store/bar: [ /nix/store/baz ],
# /nix/store/baz: [ /nix/store/tux ] },
# /nix/store/tux: [ ]
# }
# Note that it drops self-references to avoid loops.
def make_lookup(closures):
lookup = {}
for closure in closures:
# paths often self-refer
nonreferential_paths = [ref for ref in closure['references'] if ref != closure['path']]
lookup[closure['path']] = nonreferential_paths
return lookup
class TestMakeLookup(unittest.TestCase):
def test_returns_lookp(self):
"path": "/nix/store/foo",
"references": [
"path": "/nix/store/bar",
"references": [
"path": "/nix/store/hello",
"references": [
"/nix/store/foo": [ "/nix/store/bar" ],
"/nix/store/bar": [ "/nix/store/tux" ],
"/nix/store/hello": [ ],
# Convert:
# /nix/store/foo with
# {
# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
# /nix/store/bar: [ /nix/store/baz ],
# /nix/store/baz: [ /nix/store/tux ] },
# /nix/store/tux: [ ]
# }
# To:
# {
# /nix/store/bar: {
# /nix/store/baz: {
# /nix/store/tux: {}
# }
# },
# /nix/store/baz: {
# /nix/store/tux: {}
# }
# }
def make_graph_segment_from_root(root, lookup):
children = {}
for ref in lookup[root]:
children[ref] = make_graph_segment_from_root(ref, lookup)
return children
class TestMakeGraphSegmentFromRoot(unittest.TestCase):
def test_returns_graph(self):
make_graph_segment_from_root("/nix/store/foo", {
"/nix/store/foo": [ "/nix/store/bar" ],
"/nix/store/bar": [ "/nix/store/tux" ],
"/nix/store/tux": [ ],
"/nix/store/hello": [ ],
"/nix/store/bar": {
"/nix/store/tux": {}
def test_returns_graph_tiny(self):
make_graph_segment_from_root("/nix/store/tux", {
"/nix/store/foo": [ "/nix/store/bar" ],
"/nix/store/bar": [ "/nix/store/tux" ],
"/nix/store/tux": [ ],
# Convert a graph segment in to a popularity-counted dictionary:
# From:
# {
# /nix/store/foo: {
# /nix/store/bar: {
# /nix/store/baz: {
# /nix/store/tux: {}
# }
# }
# /nix/store/baz: {
# /nix/store/tux: {}
# }
# }
# }
# to:
# [
# /nix/store/foo: 1
# /nix/store/bar: 2
# /nix/store/baz: 4
# /nix/store/tux: 6
# ]
def graph_popularity_contest(full_graph):
popularity = defaultdict(int)
for path, subgraph in full_graph.items():
popularity[path] += 1
subcontest = graph_popularity_contest(subgraph)
for subpath, subpopularity in subcontest.items():
popularity[subpath] += subpopularity + 1
return popularity
class TestGraphPopularityContest(unittest.TestCase):
def test_counts_popularity(self):
"/nix/store/foo": {
"/nix/store/bar": {
"/nix/store/baz": {
"/nix/store/tux": {}
"/nix/store/baz": {
"/nix/store/tux": {}
"/nix/store/foo": 1,
"/nix/store/bar": 2,
"/nix/store/baz": 4,
"/nix/store/tux": 6,
# Emit a list of packages by popularity, most first:
# From:
# [
# /nix/store/foo: 1
# /nix/store/bar: 1
# /nix/store/baz: 2
# /nix/store/tux: 2
# ]
# To:
# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ]
def order_by_popularity(paths):
paths_by_popularity = defaultdict(list)
popularities = []
for path, popularity in paths.items():
popularities = list(set(popularities))
flat_ordered = []
for popularity in popularities:
paths = paths_by_popularity[popularity]
return list(reversed(flat_ordered))
class TestOrderByPopularity(unittest.TestCase):
def test_returns_in_order(self):
"/nix/store/foo": 1,
"/nix/store/bar": 1,
"/nix/store/baz": 2,
"/nix/store/tux": 2,
def package_name(path):
parts = path.split('-')
start = parts.pop(0)
# don't throw away any data, so the order is always the same.
# even in cases where only the hash at the start has changed.
return '-'.join(parts)
def main():
filename = sys.argv[1]
key = sys.argv[2]
with open(filename) as f:
data = json.load(f)
# Data comes in as:
# [
# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
# { path: /nix/store/tux, references: [ /nix/store/tux ] }
# ]
# and we want to get out a list of paths ordered by how universally,
# important they are, ie: tux is referenced by every path, transitively
# so it should be #1
# [
# /nix/store/tux,
# /nix/store/baz,
# /nix/store/bar,
# /nix/store/foo,
# ]
graph = data[key]
roots = find_roots(graph);
lookup = make_lookup(graph)
full_graph = {}
for root in roots:
full_graph[root] = make_graph_segment_from_root(root, lookup)
ordered = order_by_popularity(graph_popularity_contest(full_graph))
missing = []
for path in all_paths(graph):
if path not in ordered:
if "--test" in sys.argv:
# Don't pass --test otherwise unittest gets mad
unittest.main(argv = [f for f in sys.argv if f != "--test" ])

View File

@ -0,0 +1,15 @@
{ runCommand, python3, coreutils }:
# Write the references of `path' to a file, in order of how "popular" each
# reference is. Nix 2 only.
path: runCommand "closure-paths"
exportReferencesGraph.graph = path;
__structuredAttrs = true;
PATH = "${coreutils}/bin:${python3}/bin";
builder = builtins.toFile "builder"
python3 ${./} .attrs.json graph > ''${outputs[out]}

View File

@ -365,6 +365,8 @@ with pkgs;
nukeReferences = callPackage ../build-support/nuke-references { };
referencesByPopularity = callPackage ../build-support/references-by-popularity { };
removeReferencesTo = callPackage ../build-support/remove-references-to { };
vmTools = callPackage ../build-support/vm { };