pkgs.dockertools.buildLayeredImage: customisable layering strategy (#122608)

2024-11-29 22:39:21 -05:00 · 2024-11-29 22:39:21 -05:00 · 4703b8d2c7
commit 4703b8d2c7
parent ed30be523a ba5a5fac7b
29 changed files with 2850 additions and 63 deletions
--- a/pkgs/build-support/docker/default.nix
+++ b/pkgs/build-support/docker/default.nix
@ -919,10 +919,19 @@ rec {
    , includeStorePaths ? true
    , includeNixDB ? false
    , passthru ? {}
-    ,
+    , # Pipeline used to produce docker layers. If not set, popularity contest
+      # algorithm is used. If set, maxLayers is ignored as the author of the
+      # pipeline can use one of the available functions (like "limit_layers")
+      # to control the amount of layers.
+      # See: pkgs/build-support/flatten-references-graph/src/flatten_references_graph/pipe.py
+      # for available functions, and it's test for how to use them.
+      # WARNING!! this interface is highly experimental and subject to change.
+      layeringPipeline ? null
+    , # Enables debug logging for the layering pipeline.
+      debug ? false
    }:
      assert
-      (lib.assertMsg (maxLayers > 1)
+      (lib.assertMsg (layeringPipeline == null -> maxLayers > 1)
        "the maxLayers argument of dockerTools.buildLayeredImage function must be greather than 1 (current value: ${toString maxLayers})");
      assert
      (lib.assertMsg (enableFakechroot -> !stdenv.hostPlatform.isDarwin) ''
@ -999,18 +1008,23 @@ rec {
          '';
        };

-        closureRoots = lib.optionals includeStorePaths /* normally true */ (
-          [ baseJson customisationLayer ]
-        );
-        overallClosure = writeText "closure" (lib.concatStringsSep " " closureRoots);
-
-        # These derivations are only created as implementation details of docker-tools,
-        # so they'll be excluded from the created images.
-        unnecessaryDrvs = [ baseJson overallClosure customisationLayer ];
+        layersJsonFile = buildPackages.dockerMakeLayers {
+          inherit debug;
+          closureRoots = optionals includeStorePaths [ baseJson customisationLayer ];
+          excludePaths = [ baseJson customisationLayer ];
+          pipeline =
+            if layeringPipeline != null
+            then layeringPipeline
+            else import
+              ./popularity-contest-layering-pipeline.nix
+               { inherit lib jq runCommand; }
+               { inherit fromImage maxLayers; }
+          ;
+        };

        conf = runCommand "${baseName}-conf.json"
          {
-            inherit fromImage maxLayers created mtime uid gid uname gname;
+            inherit fromImage created mtime uid gid uname gname layersJsonFile;
            imageName = lib.toLower name;
            preferLocalBuild = true;
            passthru.imageTag =
@ -1018,7 +1032,6 @@ rec {
              then tag
              else
                lib.head (lib.strings.splitString "-" (baseNameOf (builtins.unsafeDiscardStringContext conf.outPath)));
-            paths = buildPackages.referencesByPopularity overallClosure;
            nativeBuildInputs = [ jq ];
          } ''
          ${if (tag == null) then ''
@ -1038,54 +1051,7 @@ rec {
              mtime="$(date -Iseconds -d "$mtime")"
          fi

-          paths() {
-            cat $paths ${lib.concatMapStringsSep " "
-                           (path: "| (grep -v ${path} || true)")
-                           unnecessaryDrvs}
-          }
-
-          # Compute the number of layers that are already used by a potential
-          # 'fromImage' as well as the customization layer. Ensure that there is
-          # still at least one layer available to store the image contents.
-          usedLayers=0
-
-          # subtract number of base image layers
-          if [[ -n "$fromImage" ]]; then
-            (( usedLayers += $(tar -xOf "$fromImage" manifest.json | jq '.[0].Layers | length') ))
-          fi
-
-          # one layer will be taken up by the customisation layer
-          (( usedLayers += 1 ))
-
-          if ! (( $usedLayers < $maxLayers )); then
-            echo >&2 "Error: usedLayers $usedLayers layers to store 'fromImage' and" \
-                      "'extraCommands', but only maxLayers=$maxLayers were" \
-                      "allowed. At least 1 layer is required to store contents."
-            exit 1
-          fi
-          availableLayers=$(( maxLayers - usedLayers ))
-
-          # Create $maxLayers worth of Docker Layers, one layer per store path
-          # unless there are more paths than $maxLayers. In that case, create
-          # $maxLayers-1 for the most popular layers, and smush the remainaing
-          # store paths in to one final layer.
-          #
-          # The following code is fiddly w.r.t. ensuring every layer is
-          # created, and that no paths are missed. If you change the
-          # following lines, double-check that your code behaves properly
-          # when the number of layers equals:
-          #      maxLayers-1, maxLayers, and maxLayers+1, 0
-          paths |
-            jq -sR '
-              rtrimstr("\n") | split("\n")
-                | (.[:$maxLayers-1] | map([.])) + [ .[$maxLayers-1:] ]
-                | map(select(length > 0))
-              ' \
-              --argjson maxLayers "$availableLayers" > store_layers.json
-
-          # The index on $store_layers is necessary because the --slurpfile
-          # automatically reads the file as an array.
-          cat ${baseJson} | jq '
+          jq '
            . + {
              "store_dir": $store_dir,
              "from_image": $from_image,
@ -1101,7 +1067,7 @@ rec {
            }
            ' --arg store_dir "${storeDir}" \
              --argjson from_image ${if fromImage == null then "null" else "'\"${fromImage}\"'"} \
-              --slurpfile store_layers store_layers.json \
+              --slurpfile store_layers "$layersJsonFile" \
              --arg customisation_layer ${customisationLayer} \
              --arg repo_tag "$imageName:$imageTag" \
              --arg created "$created" \
@ -1109,8 +1075,9 @@ rec {
              --arg uid "$uid" \
              --arg gid "$gid" \
              --arg uname "$uname" \
-              --arg gname "$gname" |
-            tee $out
+              --arg gname "$gname" \
+              ${baseJson} \
+                | tee $out
        '';

        result = runCommand "stream-${baseName}"
--- a/pkgs/build-support/docker/make-layers.nix
+++ b/pkgs/build-support/docker/make-layers.nix
@ -0,0 +1,50 @@
+{
+  coreutils,
+  flattenReferencesGraph,
+  lib,
+  jq,
+  runCommand,
+}:
+{
+  closureRoots,
+  excludePaths ? [ ],
+  # This could be a path to (or a derivation producing a path to)
+  # a json file containing the pipeline
+  pipeline ? [ ],
+  debug ? false,
+}:
+if closureRoots == [ ] then
+  builtins.toFile "docker-layers-empty" "[]"
+else
+  runCommand "docker-layers"
+    {
+      __structuredAttrs = true;
+      # graph, exclude_paths and pipeline are expected by the
+      # flatten_references_graph executable.
+      exportReferencesGraph.graph = closureRoots;
+      exclude_paths = excludePaths;
+      inherit pipeline;
+      nativeBuildInputs = [
+        coreutils
+        flattenReferencesGraph
+        jq
+      ];
+    }
+    ''
+      . .attrs.sh
+
+      flatten_references_graph_arg=.attrs.json
+
+      echo "pipeline: $pipeline"
+
+      if jq -e '.pipeline | type == "string"' .attrs.json; then
+        jq '. + { "pipeline": $pipeline[0] }' \
+          --slurpfile pipeline "$pipeline" \
+          .attrs.json > flatten_references_graph_arg.json
+
+        flatten_references_graph_arg=flatten_references_graph_arg.json
+      fi
+
+      ${lib.optionalString debug "export DEBUG=True"}
+      flatten_references_graph "$flatten_references_graph_arg" > ''${outputs[out]}
+    ''
--- a/pkgs/build-support/docker/popularity-contest-layering-pipeline.nix
+++ b/pkgs/build-support/docker/popularity-contest-layering-pipeline.nix
@ -0,0 +1,34 @@
+{
+  lib,
+  runCommand,
+  jq,
+}:
+{
+  maxLayers,
+  fromImage ? null,
+}:
+runCommand "popularity-contest-layering-pipeline.json" { inherit maxLayers; } ''
+  # Compute the number of layers that are already used by a potential
+  # 'fromImage' as well as the customization layer. Ensure that there is
+  # still at least one layer available to store the image contents.
+  # one layer will be taken up by the customisation layer
+  usedLayers=1
+
+  ${lib.optionalString (fromImage != null) ''
+    # subtract number of base image layers
+    baseImageLayersCount=$(tar -xOf "${fromImage}" manifest.json | ${lib.getExe jq} '.[0].Layers | length')
+
+    (( usedLayers += baseImageLayersCount ))
+  ''}
+
+  if ! (( $usedLayers < $maxLayers )); then
+    echo >&2 "Error: usedLayers $usedLayers layers to store 'fromImage' and" \
+              "'extraCommands', but only maxLayers=$maxLayers were" \
+              "allowed. At least 1 layer is required to store contents."
+    exit 1
+  fi
+  availableLayers=$(( maxLayers - usedLayers ))
+
+  # Produce pipeline which uses popularity_contest algo.
+  echo '[["popularity_contest"],["limit_layers",'$availableLayers']]' > $out
+''
--- a/pkgs/by-name/fl/flattenReferencesGraph/dev-shell.nix
+++ b/pkgs/by-name/fl/flattenReferencesGraph/dev-shell.nix
@ -0,0 +1,54 @@
+# Start this shell with:
+# nix-shell path/to/root/of/nixpkgs -A flattenReferencesGraph.dev-shell
+{
+  mkShell,
+  callPackage,
+  python3Packages,
+}:
+let
+  helpers = callPackage (import ./helpers.nix) { };
+in
+mkShell {
+  inputsFrom = [ (callPackage (import ./package.nix) { }) ];
+  buildInputs = [
+    helpers.format
+    helpers.lint
+    helpers.unittest
+    # This is needed to plot graphs when DEBUG_PLOT is set to True.
+    python3Packages.pycairo
+    # This can be used on linux to display the graphs.
+    # On other platforms the image viewer needs to be set with
+    # DEBUG_PLOT_IMAGE_VIEWER env var.
+    # pkgs.gwenview
+  ];
+  shellHook = ''
+    echo '
+    **********************************************************************
+    **********************************************************************
+
+      Commands useful for development (should be executed from scr dir):
+
+
+      format
+        * formats all files in place using autopep8
+
+      lint
+        * lints all files using flake8
+
+      unittest
+        * runs all unit tests
+
+          following env vars can be set to enable extra output in tests:
+          - DEBUG=True - enable debug logging
+          - DEBUG_PLOT=True - plot graphs processed by split_paths.py and
+              subcomponent.py
+          - DEBUG_PLOT_IMAGE_VIEWER=$PATH_OF_IMAGE_VIEWER_APP - app used to
+              display plots (default: gwenview)
+          - DEBUG_PLOT_SAVE_BASE_NAME=$SOME_NAME - if set, plots will be saved
+              to files instead of displayed with image viewer
+
+    **********************************************************************
+    **********************************************************************
+    '
+  '';
+}
--- a/pkgs/by-name/fl/flattenReferencesGraph/helpers.nix
+++ b/pkgs/by-name/fl/flattenReferencesGraph/helpers.nix
@ -0,0 +1,36 @@
+{
+  bash,
+  writers,
+  python3Packages,
+}:
+let
+  writeCheckedBashBin =
+    name:
+    let
+      interpreter = "${bash}/bin/bash";
+    in
+    writers.makeScriptWriter {
+      inherit interpreter;
+      check = "${interpreter} -n $1";
+    } "/bin/${name}";
+
+  # Helpers used during build/development.
+  lint = writeCheckedBashBin "lint" ''
+    ${python3Packages.flake8}/bin/flake8 --show-source ''${@}
+  '';
+
+  unittest = writeCheckedBashBin "unittest" ''
+    if [ "$#" -eq 0 ]; then
+      set -- discover -p '*_test.py'
+    fi
+
+    ${python3Packages.python}/bin/python -m unittest "''${@}"
+  '';
+
+  format = writeCheckedBashBin "format" ''
+    ${python3Packages.autopep8}/bin/autopep8 -r -i . "''${@}"
+  '';
+in
+{
+  inherit format lint unittest;
+}
--- a/pkgs/by-name/fl/flattenReferencesGraph/package.nix
+++ b/pkgs/by-name/fl/flattenReferencesGraph/package.nix
@ -0,0 +1,38 @@
+{
+  callPackage,
+  lib,
+  python3Packages,
+}:
+let
+  inherit (lib) fileset;
+  helpers = callPackage ./helpers.nix { };
+  pythonPackages = python3Packages;
+in
+pythonPackages.buildPythonApplication {
+  version = "0.1.0";
+  pname = "flatten-references-graph";
+
+  src = fileset.toSource {
+    root = ./src;
+    fileset = fileset.unions [
+      ./src/.flake8
+      ./src/flatten_references_graph
+      ./src/setup.py
+    ];
+  };
+
+  propagatedBuildInputs = with pythonPackages; [
+    igraph
+    toolz
+  ];
+
+  doCheck = true;
+
+  checkPhase = ''
+    ${helpers.unittest}/bin/unittest
+  '';
+
+  passthru = {
+    dev-shell = callPackage ./dev-shell.nix { };
+  };
+}
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/.flake8
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/.flake8
@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 80
+[pep8]
+aggressive = 1
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/.gitignore
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/.gitignore
@ -0,0 +1 @@
+__pycache__
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/init.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/init.py
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/main.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/main.py
@ -0,0 +1,48 @@
+import json as json
+import sys as sys
+
+from .lib import debug, load_json
+from .flatten_references_graph import flatten_references_graph
+
+
+def main_impl(file_path):
+    debug(f"loading json from {file_path}")
+
+    data = load_json(file_path)
+
+    # These are required
+    references_graph = data["graph"]
+    pipeline = data["pipeline"]
+
+    # This is optional
+    exclude_paths = data.get("exclude_paths")
+
+    debug("references_graph", references_graph)
+    debug("pipeline", pipeline)
+    debug("exclude_paths", exclude_paths)
+
+    result = flatten_references_graph(
+        references_graph,
+        pipeline,
+        exclude_paths=exclude_paths
+    )
+
+    debug("result", result)
+
+    return json.dumps(
+        result,
+        # For reproducibility.
+        sort_keys=True,
+        indent=2,
+        # Avoid tailing whitespaces.
+        separators=(",", ": ")
+    )
+
+
+def main():
+    file_path = sys.argv[1]
+    print(main_impl(file_path))
+
+
+if __name__ == "__main__":
+    main()
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/main_test.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/main_test.py
@ -0,0 +1,52 @@
+import unittest
+import inspect as inspect
+
+from .__main__ import main_impl
+from .lib import path_relative_to_file
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+class TestMain(unittest.TestCase):
+
+    def test_main_impl(self):
+
+        file_path = path_relative_to_file(
+            __file__,
+            "__test_fixtures/flatten-references-graph-main-input.json"
+        )
+
+        result = main_impl(file_path)
+
+        self.assertEqual(
+            result,
+            inspect.cleandoc(
+                """
+                [
+                  [
+                    "B"
+                  ],
+                  [
+                    "C"
+                  ],
+                  [
+                    "A"
+                  ]
+                ]
+                """
+            )
+        )
+
+    def test_main_impl2(self):
+        file_path = path_relative_to_file(
+            __file__,
+            "__test_fixtures/flatten-references-graph-main-input-no-paths.json"
+        )
+
+        result = main_impl(file_path)
+
+        self.assertEqual(
+            result,
+            inspect.cleandoc("[]")
+        )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/__test_fixtures/fake-references-graph.json
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/__test_fixtures/fake-references-graph.json
@ -0,0 +1,31 @@
+[
+  {
+    "closureSize": 1,
+    "narHash": "sha256:a",
+    "narSize": 2,
+    "path": "A",
+    "references": [
+      "A",
+      "B",
+      "C"
+    ]
+  },
+  {
+    "closureSize": 3,
+    "narHash": "sha256:b",
+    "narSize": 4,
+    "path": "B",
+    "references": [
+      "C"
+    ]
+  },
+  {
+    "closureSize": 5,
+    "narHash": "sha256:c",
+    "narSize": 6,
+    "path": "C",
+    "references": [
+      "C"
+    ]
+  }
+]
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/__test_fixtures/flatten-references-graph-main-input-no-paths.json
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/__test_fixtures/flatten-references-graph-main-input-no-paths.json
@ -0,0 +1,23 @@
+{
+  "graph": [
+    {
+      "closureSize": 168,
+      "narHash": "sha256:0dl4kfhb493yz8a5wgh0d2z3kr61z65gp85vx33rqwa1m1lnymy8",
+      "narSize": 168,
+      "path": "/nix/store/fakehash000000000000000000000000-no-store-paths-base.json",
+      "references": []
+    }
+  ],
+  "pipeline": [
+    [
+      "popularity_contest"
+    ],
+    [
+      "limit_layers",
+      99
+    ]
+  ],
+  "exclude_paths": [
+    "/nix/store/fakehash000000000000000000000000-no-store-paths-base.json"
+  ]
+}
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/__test_fixtures/flatten-references-graph-main-input.json
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/__test_fixtures/flatten-references-graph-main-input.json
@ -0,0 +1,36 @@
+{
+  "graph": [
+    {
+      "closureSize": 1,
+      "narHash": "sha256:a",
+      "narSize": 2,
+      "path": "A",
+      "references": [
+        "A",
+        "B",
+        "C"
+      ]
+    },
+    {
+      "closureSize": 3,
+      "narHash": "sha256:b",
+      "narSize": 4,
+      "path": "B",
+      "references": [
+        "C"
+      ]
+    },
+    {
+      "closureSize": 5,
+      "narHash": "sha256:c",
+      "narSize": 6,
+      "path": "C",
+      "references": [
+        "C"
+      ]
+    }
+  ],
+  "pipeline": [
+    ["split_paths", ["B"]]
+  ]
+}
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/flatten_references_graph.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/flatten_references_graph.py
@ -0,0 +1,45 @@
+from toolz import curried as tlz
+
+from .lib import (
+    flatten,
+    over,
+    references_graph_to_igraph
+)
+
+from .pipe import pipe
+
+MAX_LAYERS = 127
+
+
+def create_list_of_lists_of_strings(deeply_nested_lists_or_dicts_of_graphs):
+    list_of_graphs = flatten(deeply_nested_lists_or_dicts_of_graphs)
+
+    return list(
+        filter(
+            # remove empty layers
+            lambda xs: len(xs) > 0,
+            tlz.map(
+                lambda g: g.vs["name"],
+                list_of_graphs
+            )
+        )
+    )
+
+
+def flatten_references_graph(references_graph, pipeline, exclude_paths=None):
+    if exclude_paths is not None:
+        exclude_paths = frozenset(exclude_paths)
+        references_graph = tlz.compose(
+            tlz.map(over(
+                "references",
+                lambda xs: frozenset(xs).difference(exclude_paths)
+            )),
+            tlz.remove(lambda node: node["path"] in exclude_paths)
+        )(references_graph)
+
+    igraph_graph = references_graph_to_igraph(references_graph)
+
+    return create_list_of_lists_of_strings(pipe(
+        pipeline,
+        igraph_graph
+    ))
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/flatten_references_graph_test.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/flatten_references_graph_test.py
@ -0,0 +1,121 @@
+import unittest
+from .flatten_references_graph import flatten_references_graph
+# from .lib import path_relative_to_file, load_json
+
+if __name__ == "__main__":
+    unittest.main()
+
+references_graph = [
+    {
+        "closureSize": 1,
+        "narHash": "sha256:a",
+        "narSize": 2,
+        "path": "A",
+        "references": [
+            "A",
+            "C",
+        ]
+    },
+    {
+        "closureSize": 3,
+        "narHash": "sha256:b",
+        "narSize": 4,
+        "path": "B",
+        "references": [
+            "C",
+            "D"
+        ]
+    },
+    {
+        "closureSize": 5,
+        "narHash": "sha256:c",
+        "narSize": 6,
+        "path": "C",
+        "references": [
+            "C"
+        ]
+    },
+    {
+        "closureSize": 7,
+        "narHash": "sha256:d",
+        "narSize": 8,
+        "path": "D",
+        "references": [
+            "D"
+        ]
+    }
+]
+
+
+class Test(unittest.TestCase):
+
+    def test_flatten_references_graph(self):
+        pipeline = [
+            ["split_paths", ["B"]],
+        ]
+
+        result = flatten_references_graph(references_graph, pipeline)
+
+        self.assertEqual(
+            result,
+            [
+                # B and it's exclusive deps
+                ["B", "D"],
+                # Common deps
+                ["C"],
+                # Rest (without common deps)
+                ["A"]
+            ]
+        )
+
+        pipeline = [
+            ["split_paths", ["B"]],
+            ["over", "main", ["subcomponent_in", ["B"]]],
+        ]
+
+        result = flatten_references_graph(references_graph, pipeline)
+
+        self.assertEqual(
+            result,
+            [
+                ["B"],
+                ["D"],
+                ["C"],
+                ["A"]
+            ]
+        )
+
+    def test_flatten_references_graph_exclude_paths(self):
+        pipeline = [
+            ["split_paths", ["B"]],
+        ]
+
+        result = flatten_references_graph(
+            references_graph,
+            pipeline,
+            exclude_paths=["A"]
+        )
+
+        self.assertEqual(
+            result,
+            [
+                # A was excluded so there is no "rest" or "common" layer
+                ["B", "C", "D"]
+            ]
+        )
+
+        result = flatten_references_graph(
+            references_graph,
+            pipeline,
+            exclude_paths=["D"]
+        )
+
+        self.assertEqual(
+            result,
+            [
+                # D removed from this layer
+                ["B"],
+                ["C"],
+                ["A"]
+            ]
+        )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/lib.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/lib.py
@ -0,0 +1,329 @@
+from collections.abc import Iterable
+from pathlib import Path
+from toolz import curried as tlz
+from toolz import curry
+import igraph as igraph
+import itertools as itertools
+import json as json
+import os as os
+import re as re
+import sys
+
+DEBUG = os.environ.get("DEBUG", False) == "True"
+DEBUG_PLOT = os.environ.get("DEBUG_PLOT", False) == "True"
+# If this is set, the plots will be saved to files instead of being displayed
+# with default image viewer.
+DEBUG_PLOT_SAVE_BASE_NAME = os.environ.get("DEBUG_PLOT_SAVE_BASE_NAME")
+
+c = igraph.configuration.init()
+# App used to open the plots when DEBUG_PLOT_SAVE_BASE_NAME is not set.
+c["apps.image_viewer"] = os.environ.get("DEBUG_PLOT_IMAGE_VIEWER", "gwenview")
+
+
+def debug(*args, **kwargs):
+    if DEBUG:
+        print(*args, file=sys.stderr, **kwargs)
+
+
+def debug_plot(graph, name, **kwargs):
+    if not DEBUG_PLOT:
+        return
+
+    vertex_label = [
+        # remove /nix/store/HASH- prefix from labels
+        re.split("^/nix/store/[a-z0-9]{32}-", name)[-1]
+        for name in graph.vs["name"]
+    ]
+
+    save_as = (
+        None if DEBUG_PLOT_SAVE_BASE_NAME is None
+        else DEBUG_PLOT_SAVE_BASE_NAME + name + ".png"
+    )
+
+    igraph.plot(
+        graph,
+        save_as,
+        vertex_label=vertex_label,
+        **(tlz.merge(
+            {
+                # "bbox": (3840, 2160),
+                "bbox": (800, 600),
+                "margin": 100,
+                "vertex_label_dist": -5,
+                "edge_color": "orange",
+                "vertex_size": 20,
+                "vertex_label_size": 30,
+                "edge_arrow_size": 2
+            },
+            kwargs
+        )),
+    )
+
+
+def debug_plot_with_highligth(g, vs, layout):
+    debug_plot(
+        g,
+        layout=layout,
+        # layout=Layout(new_coords),
+        vertex_color=[
+            "green" if v.index in vs else "red"
+            for v in g.vs
+        ]
+    )
+
+
+@curry
+def pick_keys(keys, d):
+    return {
+        key: d[key] for key in keys if key in d
+    }
+
+
+def unnest_iterable(xs):
+    return itertools.chain.from_iterable(xs)
+
+
+def load_json(file_path):
+    with open(file_path) as f:
+        return json.load(f)
+
+
+@curry
+def sorted_by(key, xs):
+    return sorted(xs, key=lambda x: x[key])
+
+
+@curry
+def find_vertex_by_name_or_none(graph, name):
+    try:
+        # NOTE: find by name is constant time.
+        return graph.vs.find(name)
+    # This will be thrown if vertex with given name is not found.
+    except ValueError:
+        return None
+
+
+def subcomponent_multi(graph, vertices, mode="out"):
+    """Return concatenated subcomponents generated by the given list of
+    vertices.
+    """
+    return tlz.mapcat(
+        lambda vertex: graph.subcomponent(vertex, mode=mode),
+        vertices
+    )
+
+
+@curry
+def edges_for_reference_graph_node(path_to_size_dict, reference_graph_node):
+    source = reference_graph_node["path"]
+    return map(
+        lambda x: {"source": source, "target": x},
+        sorted(
+            filter(
+                # references might contain source
+                lambda x: x != source,
+                reference_graph_node["references"]
+            ),
+            key=lambda x: 1 * path_to_size_dict[x]
+        )
+    )
+
+
+reference_graph_node_keys_to_keep = [
+    "closureSize",
+    "narSize"
+]
+
+pick_reference_graph_node_keys = pick_keys(reference_graph_node_keys_to_keep)
+
+
+def vertex_from_reference_graph_node(reference_graph_node):
+    return tlz.merge(
+        {"name": reference_graph_node["path"]},
+        pick_reference_graph_node_keys(reference_graph_node)
+    )
+
+
+def references_graph_to_igraph(references_graph):
+    """
+    Converts result of exportReferencesGraph into an igraph directed graph.
+    Uses paths as igraph node names, and sets closureSize and narSize as
+    properties of igraph nodes.
+    """
+    debug('references_graph', references_graph)
+    references_graph = sorted(references_graph, key=lambda x: 1 * x["narSize"])
+
+    # Short circuit since DictList throws an error if first argument (vertices)
+    # contains no elements.
+    # The error is: KeyError: 'name'
+    # here: https://github.com/igraph/python-igraph/blob/da7484807f5152a2c18c55dd4154653de2c7f5f7/src/igraph/__init__.py#L3091 # noqa: E501
+    # This looks like a bug.
+    if len(references_graph) == 0:
+        return empty_directed_graph()
+
+    path_to_size_dict = {
+        node["path"]: node["narSize"] for node in references_graph
+    }
+
+    debug('path_to_size_dict', path_to_size_dict)
+
+    return igraph.Graph.DictList(
+        map(vertex_from_reference_graph_node, references_graph),
+        unnest_iterable(map(
+            edges_for_reference_graph_node(path_to_size_dict),
+            references_graph
+        )),
+        directed=True
+    )
+
+
+@curry
+def graph_vertex_index_to_name(graph, index):
+    return graph.vs[index]["name"]
+
+
+def igraph_to_reference_graph(igraph_instance):
+    return [
+        tlz.merge(
+            {
+                "path": v["name"],
+                "references": list(map(
+                    graph_vertex_index_to_name(igraph_instance),
+                    igraph_instance.successors(v.index)
+                ))
+            },
+            pick_reference_graph_node_keys(v.attributes())
+        )
+        for v in igraph_instance.vs
+    ]
+
+
+def load_closure_graph(file_path):
+    return references_graph_to_igraph(load_json(file_path))
+
+
+def path_relative_to_file(file_path_from, file_path):
+    dir_path = Path(file_path_from).parent
+    return dir_path / file_path
+
+
+def is_None(x):
+    return x is None
+
+
+def not_None(x):
+    return x is not None
+
+
+def print_layers(layers):
+    debug("\n::::LAYERS:::::")
+    for index, layer in enumerate(layers):
+        debug("")
+        debug("layer index:", index)
+        debug("[")
+        for v in layer.vs["name"]:
+            debug("  ", v)
+        debug("]")
+
+
+def print_vs(graph):
+    for v in graph.vs:
+        debug(v)
+
+
+def directed_graph(edges, vertices=None, vertex_attrs=[]):
+    graph = igraph.Graph.TupleList(edges, directed=True)
+
+    # Add detached vertices (without edges) if any.
+    if vertices is not None:
+        graph = graph + vertices
+
+    # Add vertex attributes if any.
+    for (name, attrs_dict) in vertex_attrs:
+        vertex = graph.vs.find(name)
+
+        for (k, v) in attrs_dict.items():
+            vertex[k] = v
+
+    return graph
+
+
+def empty_directed_graph():
+    return directed_graph([])
+
+
+def graph_is_empty(graph):
+    return len(graph.vs) == 0
+
+
+def pick_attrs(attrs, x):
+    return {attr: getattr(x, attr) for attr in attrs}
+
+
+def merge_graphs(graphs):
+    return tlz.reduce(lambda acc, g: acc + g, graphs, empty_directed_graph())
+
+
+# Functions below can be used in user defined pipeline (see pipe.py).
+# All functions need to be curried, and the user needs to be able to
+# provide values for all arguments apart from the last one from nix code.
+@curry
+def over(prop_name, func, dictionary):
+    value = dictionary[prop_name]
+    return tlz.assoc(dictionary, prop_name, func(value))
+
+
+# One argument functions also need to be curried to simplify processing of the
+# pipeline.
+@curry
+def flatten(xs):
+    xs = xs.values() if isinstance(xs, dict) else xs
+    for x in xs:
+        if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
+            yield from flatten(x)
+        else:
+            yield x
+
+
+@curry
+def split_every(count, graph):
+    vs = graph.vs
+    return [
+        graph.induced_subgraph(vs[x:x + count])
+        for x in range(0, len(vs), count)
+    ]
+
+
+@curry
+def limit_layers(max_count, graphs):
+    assert max_count > 0, "max count needs to > 0"
+
+    graphs_iterator = iter(graphs)
+
+    return tlz.concat([
+        tlz.take(max_count - 1, graphs_iterator),
+        # Merges all graphs remaining in the iterator, after initial
+        # max_count - 1 have been taken.
+        (lambda: (yield merge_graphs(graphs_iterator)))()
+    ])
+
+
+@curry
+def remove_paths(paths, graph):
+    # Allow passing a single path.
+    if isinstance(paths, str):
+        paths = [paths]
+
+    indices_to_remove = tlz.compose(
+        list,
+        tlz.map(lambda v: v.index),
+        tlz.remove(is_None),
+        tlz.map(find_vertex_by_name_or_none(graph))
+    )(paths)
+
+    return graph - indices_to_remove if len(indices_to_remove) > 0 else graph
+
+
+@curry
+def reverse(iterator):
+    return reversed(list(iterator))
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/lib_test.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/lib_test.py
@ -0,0 +1,199 @@
+import unittest
+
+from toolz import curried as tlz
+
+from . import test_helpers as th
+
+from .lib import (
+    directed_graph,
+    igraph_to_reference_graph,
+    limit_layers,
+    pick_keys,
+    references_graph_to_igraph,
+    reference_graph_node_keys_to_keep
+)
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+references_graph = [
+    {
+        "closureSize": 3,
+        "narHash": "sha256:d",
+        "narSize": 0,
+        "path": "D",
+        "references": [
+            "D"
+        ]
+    },
+    {
+        "closureSize": 3,
+        "narHash": "sha256:b",
+        "narSize": 4,
+        "path": "B",
+        "references": [
+            "B"
+        ]
+    },
+    {
+        "closureSize": 3,
+        "narHash": "sha256:e",
+        "narSize": 5,
+        "path": "E",
+        "references": [
+            "E"
+        ]
+    },
+    {
+        "closureSize": 1,
+        "narHash": "sha256:a",
+        "narSize": 10,
+        "path": "A",
+        "references": [
+            # most of the time references contain self path, but not always.
+            "C",
+            "B",
+        ]
+    },
+    {
+        "closureSize": 5,
+        "narHash": "sha256:c",
+        "narSize": 6,
+        "path": "C",
+        "references": [
+            "C",
+            "E",
+            "D"
+        ]
+    },
+    {
+        "closureSize": 5,
+        "narHash": "sha256:f",
+        "narSize": 2,
+        "path": "F",
+        "references": [
+            "F"
+        ]
+    }
+]
+
+
+class TestLib(unittest.TestCase, th.CustomAssertions):
+
+    def test_references_graph_to_igraph(self):
+
+        graph = references_graph_to_igraph(references_graph)
+
+        pick_preserved_keys = pick_keys(reference_graph_node_keys_to_keep)
+
+        self.assertGraphEqual(
+            graph,
+            directed_graph(
+                [
+                    ("A", "B"),
+                    ("A", "C"),
+                    ("C", "E"),
+                    ("C", "D"),
+                ],
+                ["F"],
+                # Add "narSize" and "closureSize" attributes to each node.
+                map(
+                    lambda node: (node["path"], pick_preserved_keys(node)),
+                    references_graph
+                )
+            )
+        )
+
+    def test_references_graph_to_igraph_one_node(self):
+
+        references_graph = [
+            {
+                'closureSize': 168,
+                'narHash': 'sha256:0dl4',
+                'narSize': 168,
+                'path': 'A',
+                'references': []
+            }
+        ]
+
+        graph = references_graph_to_igraph(references_graph)
+
+        pick_preserved_keys = pick_keys(reference_graph_node_keys_to_keep)
+
+        self.assertGraphEqual(
+            graph,
+            directed_graph(
+                [],
+                ["A"],
+                # Add "narSize" and "closureSize" attributes to each node.
+                map(
+                    lambda node: (node["path"], pick_preserved_keys(node)),
+                    references_graph
+                )
+            )
+        )
+
+    def test_references_graph_to_igraph_zero_nodes(self):
+
+        references_graph = []
+
+        graph = references_graph_to_igraph(references_graph)
+
+        self.assertGraphEqual(
+            graph,
+            directed_graph(
+                [],
+                [],
+                []
+            )
+        )
+
+    def test_igraph_to_reference_graph(self):
+
+        graph = references_graph_to_igraph(references_graph)
+
+        nodes_by_path = {
+            node["path"]: node for node in references_graph
+        }
+
+        result = igraph_to_reference_graph(graph)
+
+        self.assertEqual(
+            len(result),
+            len(references_graph)
+        )
+
+        pick_preserved_keys = pick_keys([
+            "path",
+            *reference_graph_node_keys_to_keep
+        ])
+
+        for node in result:
+            original_node = nodes_by_path[node["path"]]
+
+            self.assertDictEqual(
+                pick_preserved_keys(original_node),
+                pick_preserved_keys(node)
+            )
+
+            revove_self_ref = tlz.remove(lambda a: a == node["path"])
+
+            self.assertListEqual(
+                sorted(node["references"]),
+                sorted(revove_self_ref(original_node["references"]))
+            )
+
+    def test_limit_layers_nothing_to_do(self):
+        graph = references_graph_to_igraph(references_graph)
+
+        layers = [graph]
+        result = limit_layers(1, layers)
+        result_list = list(result)
+
+        self.assertEqual(
+            len(result_list),
+            1
+        )
+
+        self.assertGraphEqual(graph, result_list[0])
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/pipe.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/pipe.py
@ -0,0 +1,80 @@
+from toolz import curried as tlz
+from toolz import curry
+
+from . import lib as lib
+from . import subcomponent as subcomponent
+from .popularity_contest import popularity_contest
+from .split_paths import split_paths
+
+from .lib import (
+    # references_graph_to_igraph
+    debug,
+    pick_attrs
+)
+
+funcs = tlz.merge(
+    pick_attrs(
+        [
+            "flatten",
+            "over",
+            "split_every",
+            "limit_layers",
+            "remove_paths",
+            "reverse"
+        ],
+        lib
+    ),
+    pick_attrs(
+        [
+            "subcomponent_in",
+            "subcomponent_out",
+        ],
+        subcomponent
+    ),
+    {
+        "split_paths": split_paths,
+        "popularity_contest": popularity_contest,
+        "map": tlz.map
+    }
+)
+
+
+@curry
+def nth_or_none(index, xs):
+    try:
+        return xs[index]
+    except IndexError:
+        return None
+
+
+def preapply_func(func_call_data):
+    [func_name, *args] = func_call_data
+    debug("func_name", func_name)
+    debug("args", args)
+    debug('func_name in ["over"]', func_name in ["over"])
+
+    # TODO: these could be handled in more generic way by defining, for each
+    # function, which of the args are expected to be functions which need
+    # pre-applying.
+    if func_name == "over":
+        [first_arg, second_arg] = args
+        args = [first_arg, preapply_func(second_arg)]
+
+    elif func_name == "map":
+        args = [preapply_func(args[0])]
+
+    return funcs[func_name](*args)
+
+
+@curry
+def pipe(pipeline, data):
+    debug("pipeline", pipeline)
+    partial_funcs = list(tlz.map(preapply_func, pipeline))
+    debug('partial_funcs', partial_funcs)
+    return tlz.pipe(
+        data,
+        *partial_funcs
+    )
+
+
+funcs["pipe"] = pipe
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/pipe_test.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/pipe_test.py
@ -0,0 +1,153 @@
+import unittest
+from .pipe import pipe
+
+from . import test_helpers as th
+
+from .lib import (
+    directed_graph,
+)
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+def make_test_graph():
+    edges = [
+        ("Root1", "A"),
+        ("A", "B"),
+        ("A", "C"),
+        ("B", "D"),
+        ("B", "E"),
+        ("E", "F"),
+        ("B", "G"),
+        ("Root2", "B"),
+        ("Root3", "C"),
+    ]
+
+    return directed_graph(edges)
+
+
+class CustomAssertions:
+    def runAndAssertResult(self, graph, pipeline, expected_graph_args):
+        result = list(pipe(pipeline, graph))
+
+        for (index, expected_graph_arg) in enumerate(expected_graph_args):
+
+            self.assertGraphEqual(
+                directed_graph(*expected_graph_arg),
+                result[index]
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+class Test(
+    unittest.TestCase,
+    CustomAssertions,
+    th.CustomAssertions
+):
+
+    def test_1(self):
+        pipeline = [
+            ["split_paths", ["B"]],
+            [
+                "over",
+                "main",
+                [
+                    "pipe",
+                    [
+                        ["subcomponent_in", ["B"]],
+                        [
+                            "over",
+                            "rest",
+                            ["popularity_contest"]
+                        ]
+                    ]
+                ]
+            ],
+            ["flatten"],
+            ["map", ["remove_paths", "Root3"]],
+            ["limit_layers", 5],
+        ]
+
+        expected_graph_args = [
+            # "B"" separated from the rest by "split_paths" and
+            # "subcomponent_in' stages.
+            ([], ["B"]),
+            # Deps of "B", split into individual layers by "popularity_contest",
+            # with "F" being most popular
+            ([], ["F"]),
+            ([], ["D"]),
+            ([], ["E"]),
+            # "rest" output of "split_paths" stage with "G" merged into it by
+            # "limit_layers" stage.
+            (
+                [
+                    ("Root1", "A"),
+                    ("A", "C")
+                ],
+                ["Root2", "G"]
+            )
+        ]
+
+        self.runAndAssertResult(
+            make_test_graph(),
+            pipeline,
+            expected_graph_args
+        )
+
+    def test_2(self):
+        graph = directed_graph(
+            [
+                ("Root1", "A"),
+                ("A", "B"),
+            ],
+            ["Root2"]
+        )
+        self.runAndAssertResult(
+            graph,
+            [
+                ["popularity_contest"],
+            ],
+            [
+                # Ordered from most to least popular
+                ([], ["B"]),
+                ([], ["A"]),
+                ([], ["Root1"]),
+                ([], ["Root2"])
+            ]
+        )
+
+        self.runAndAssertResult(
+            graph,
+            [
+                ["popularity_contest"],
+                ["limit_layers", 3],
+            ],
+            [
+                # Most popular first
+                ([], ["B"]),
+                ([], ["A"]),
+                # Least popular combined
+                ([], ["Root1", "Root2"]),
+            ]
+        )
+
+        self.runAndAssertResult(
+            graph,
+            [
+                ["popularity_contest"],
+                ["reverse"],
+                ["limit_layers", 3],
+            ],
+            [
+                # Least popular first
+                ([], ["Root2"]),
+                ([], ["Root1"]),
+                # Most popular first
+                ([], ["A", "B"])
+            ]
+        )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/popularity_contest.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/popularity_contest.py
@ -0,0 +1,398 @@
+# Using a simple algorithm, convert the references to a path in to a
+# sorted list of dependent paths based on how often they're referenced
+# and how deep in the tree they live. Equally-"popular" paths are then
+# sorted by name.
+#
+# The existing writeReferencesToFile prints the paths in a simple
+# ascii-based sorting of the paths.
+#
+# Sorting the paths by graph improves the chances that the difference
+# between two builds appear near the end of the list, instead of near
+# the beginning. This makes a difference for Nix builds which export a
+# closure for another program to consume, if that program implements its
+# own level of binary diffing.
+#
+# For an example, Docker Images. If each store path is a separate layer
+# then Docker Images can be very efficiently transfered between systems,
+# and we get very good cache reuse between images built with the same
+# version of Nixpkgs. However, since Docker only reliably supports a
+# small number of layers (42) it is important to pick the individual
+# layers carefully. By storing very popular store paths in the first 40
+# layers, we improve the chances that the next Docker image will share
+# many of those layers.*
+#
+# Given the dependency tree:
+#
+#     A - B - C - D -\
+#      \   \   \      \
+#       \   \   \      \
+#        \   \ - E ---- F
+#         \- G
+#
+# Nodes which have multiple references are duplicated:
+#
+#     A - B - C - D - F
+#      \   \   \
+#       \   \   \- E - F
+#        \   \
+#         \   \- E - F
+#          \
+#           \- G
+#
+# Each leaf node is now replaced by a counter defaulted to 1:
+#
+#     A - B - C - D - (F:1)
+#      \   \   \
+#       \   \   \- E - (F:1)
+#        \   \
+#         \   \- E - (F:1)
+#          \
+#           \- (G:1)
+#
+# Then each leaf counter is merged with its parent node, replacing the
+# parent node with a counter of 1, and each existing counter being
+# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`:
+#
+#     A - B - C - (D:1, F:2)
+#      \   \   \
+#       \   \   \- (E:1, F:2)
+#        \   \
+#         \   \- (E:1, F:2)
+#          \
+#           \- (G:1)
+#
+# Then each leaf counter is merged with its parent node again, merging
+# any counters, then incrementing each:
+#
+#     A - B - (C:1, D:2, E:2, F:5)
+#      \   \
+#       \   \- (E:1, F:2)
+#        \
+#         \- (G:1)
+#
+# And again:
+#
+#     A - (B:1, C:2, D:3, E:4, F:8)
+#      \
+#       \- (G:1)
+#
+# And again:
+#
+#     (A:1, B:2, C:3, D:4, E:5, F:9, G:2)
+#
+# and then paths have the following "popularity":
+#
+#     A     1
+#     B     2
+#     C     3
+#     D     4
+#     E     5
+#     F     9
+#     G     2
+#
+# and the popularity contest would result in the paths being printed as:
+#
+#     F
+#     E
+#     D
+#     C
+#     B
+#     G
+#     A
+#
+# * Note: People who have used a Dockerfile before assume Docker's
+# Layers are inherently ordered. However, this is not true -- Docker
+# layers are content-addressable and are not explicitly layered until
+# they are composed in to an Image.
+
+import igraph as igraph
+
+from collections import defaultdict
+from operator import eq
+from toolz import curried as tlz
+from toolz import curry
+
+from .lib import (
+    debug,
+    directed_graph,
+    igraph_to_reference_graph,
+    over,
+    pick_keys,
+    reference_graph_node_keys_to_keep
+)
+
+eq = curry(eq)
+
+pick_keys_to_keep = pick_keys(reference_graph_node_keys_to_keep)
+
+
+# Find paths in the original dataset which are never referenced by
+# any other paths
+def find_roots(closures):
+    debug('closures', closures)
+    roots = []
+
+    for closure in closures:
+        path = closure['path']
+        if not any_refer_to(path, closures):
+            roots.append(path)
+
+    return roots
+
+
+def any_refer_to(path, closures):
+    for closure in closures:
+        if path != closure['path']:
+            if path in closure['references']:
+                return True
+    return False
+
+
+def all_paths(closures):
+    paths = []
+    for closure in closures:
+        paths.append(closure['path'])
+        paths.extend(closure['references'])
+    paths.sort()
+    return list(set(paths))
+
+
+# Convert:
+#
+# [
+#    { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },      # noqa: E501
+#    { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
+#    { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
+#    { path: /nix/store/tux, references: [ /nix/store/tux ] }
+#  ]
+#
+# To:
+#    {
+#      /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
+#      /nix/store/bar: [ /nix/store/baz ],
+#      /nix/store/baz: [ /nix/store/tux ] },
+#      /nix/store/tux: [ ]
+#    }
+#
+# Note that it drops self-references to avoid loops.
+
+
+def make_lookup(closures):
+    return {
+        # remove self reference
+        node["path"]: over("references", tlz.remove(eq(node["path"])), node)
+        for node in closures
+    }
+
+
+# Convert:
+#
+# /nix/store/foo with
+#  {
+#    /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
+#    /nix/store/bar: [ /nix/store/baz ],
+#    /nix/store/baz: [ /nix/store/tux ] },
+#    /nix/store/tux: [ ]
+#  }
+#
+# To:
+#
+# {
+#   /nix/store/bar: {
+#                    /nix/store/baz: {
+#                                     /nix/store/tux: {}
+#                    }
+#   },
+#   /nix/store/baz: {
+#                   /nix/store/tux: {}
+#   }
+# }
+
+
+def make_graph_segment_from_root(subgraphs_cache, root, lookup):
+    children = {}
+    for ref in lookup[root]:
+        # make_graph_segment_from_root is a pure function, and will
+        # always return the same result based on a given input. Thus,
+        # cache computation.
+        #
+        # Python's assignment will use a pointer, preventing memory
+        # bloat for large graphs.
+        if ref not in subgraphs_cache:
+            debug("Subgraph Cache miss on {}".format(ref))
+            subgraphs_cache[ref] = make_graph_segment_from_root(
+                subgraphs_cache, ref, lookup
+            )
+        else:
+            debug("Subgraph Cache hit on {}".format(ref))
+        children[ref] = subgraphs_cache[ref]
+    return children
+
+
+# Convert a graph segment in to a popularity-counted dictionary:
+#
+# From:
+# {
+#    /nix/store/foo: {
+#                      /nix/store/bar: {
+#                                        /nix/store/baz: {
+#                                                           /nix/store/tux: {}
+#                                        }
+#                      }
+#                      /nix/store/baz: {
+#                                         /nix/store/tux: {}
+#                      }
+#    }
+# }
+#
+# to:
+# [
+#   /nix/store/foo: 1
+#   /nix/store/bar: 2
+#   /nix/store/baz: 4
+#   /nix/store/tux: 6
+# ]
+
+def graph_popularity_contest(popularity_cache, full_graph):
+    popularity = defaultdict(int)
+    for path, subgraph in full_graph.items():
+        popularity[path] += 1
+        # graph_popularity_contest is a pure function, and will
+        # always return the same result based on a given input. Thus,
+        # cache computation.
+        #
+        # Python's assignment will use a pointer, preventing memory
+        # bloat for large graphs.
+        if path not in popularity_cache:
+            debug("Popularity Cache miss on", path)
+            popularity_cache[path] = graph_popularity_contest(
+                popularity_cache, subgraph
+            )
+        else:
+            debug("Popularity Cache hit on", path)
+
+        subcontest = popularity_cache[path]
+        for subpath, subpopularity in subcontest.items():
+            debug("Calculating popularity for", subpath)
+            popularity[subpath] += subpopularity + 1
+
+    return popularity
+
+# Emit a list of packages by popularity, most first:
+#
+# From:
+# [
+#   /nix/store/foo: 1
+#   /nix/store/bar: 1
+#   /nix/store/baz: 2
+#   /nix/store/tux: 2
+# ]
+#
+# To:
+# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ]
+
+
+def order_by_popularity(paths):
+    paths_by_popularity = defaultdict(list)
+    popularities = []
+    for path, popularity in paths.items():
+        popularities.append(popularity)
+        paths_by_popularity[popularity].append(path)
+
+    popularities = sorted(set(popularities))
+
+    flat_ordered = []
+    for popularity in popularities:
+        paths = paths_by_popularity[popularity]
+        paths.sort(key=package_name)
+
+        flat_ordered.extend(reversed(paths))
+    return list(reversed(flat_ordered))
+
+
+def package_name(path):
+    parts = path.split('-')
+    start = parts.pop(0)
+    # don't throw away any data, so the order is always the same.
+    # even in cases where only the hash at the start has changed.
+    parts.append(start)
+    return '-'.join(parts)
+
+
+@curry
+def popularity_contest(graph):
+    # Data comes in as an igraph directed graph or in the format produced
+    # by nix's exportReferencesGraph:
+    # [
+    #    { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },  # noqa: E501
+    #    { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },                  # noqa: E501
+    #    { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },                  # noqa: E501
+    #    { path: /nix/store/tux, references: [ /nix/store/tux ] }
+    #  ]
+    #
+    # We want to get out a list of paths ordered by how universally,
+    # important they are, ie: tux is referenced by every path, transitively
+    # so it should be #1
+    #
+    # [
+    #   /nix/store/tux,
+    #   /nix/store/baz,
+    #   /nix/store/bar,
+    #   /nix/store/foo,
+    # ]
+    #
+    # NOTE: the output is actually a list of igraph graphs with a single vertex
+    # with v["name"] == path, and some properties (defined in
+    # reference_graph_node_keys_to_keep) from the nodes of the input graph
+    # copied as vertex attributes.
+    debug('graph', graph)
+
+    if isinstance(graph, igraph.Graph):
+        graph = igraph_to_reference_graph(graph)
+
+    debug("Finding roots")
+    roots = find_roots(graph)
+
+    debug("Making lookup")
+    lookup = make_lookup(graph)
+
+    full_graph = {}
+    subgraphs_cache = {}
+    for root in roots:
+        debug("Making full graph for", root)
+        full_graph[root] = make_graph_segment_from_root(
+            subgraphs_cache,
+            root,
+            tlz.valmap(
+                tlz.get("references"),
+                lookup
+            )
+        )
+
+    debug("Running contest")
+    contest = graph_popularity_contest({}, full_graph)
+
+    debug("Ordering by popularity")
+    ordered = order_by_popularity(contest)
+
+    debug("Checking for missing paths")
+    missing = []
+
+    for path in all_paths(graph):
+        if path not in ordered:
+            missing.append(path)
+
+    ordered.extend(missing)
+
+    return map(
+        # Turn each path into a graph with 1 vertex.
+        lambda path: directed_graph(
+            # No edges
+            [],
+            # One vertex, with name=path
+            [path],
+            # Setting desired attributes on the vertex.
+            [(path, pick_keys_to_keep(lookup[path]))]
+        ),
+        ordered
+    )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/popularity_contest_test.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/popularity_contest_test.py
@ -0,0 +1,335 @@
+import unittest
+from toolz import curry
+from toolz import curried as tlz
+
+from . import test_helpers as th
+
+from .popularity_contest import (
+    all_paths,
+    any_refer_to,
+    find_roots,
+    graph_popularity_contest,
+    make_graph_segment_from_root,
+    make_lookup,
+    popularity_contest,
+    order_by_popularity
+)
+
+from .lib import (
+    directed_graph,
+    igraph_to_reference_graph,
+    over
+)
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+class CustomAssertions:
+    @curry
+    def assertResultKeys(self, keys, result):
+        self.assertListEqual(
+            list(result.keys()),
+            keys
+        )
+
+        return result
+
+
+class Test(
+    unittest.TestCase,
+    CustomAssertions,
+    th.CustomAssertions
+):
+
+    def test_empty_graph(self):
+        def test_empty(graph):
+            self.assertListEqual(
+                list(popularity_contest(graph)),
+                []
+            )
+
+        # popularity_contest works with igraph graph or refurence_graph in
+        # form a list of dicts (as returned by nix's exportReferencesGraph)
+        test_empty(directed_graph([]))
+        test_empty([])
+
+    def test_popularity_contest(self):
+        # Making sure vertex attrs are preserved.
+        vertex_props_dict = {
+            "Root1": {"narSize": 1, "closureSize": 2},
+            "B": {"narSize": 3, "closureSize": 4},
+            "X": {"narSize": 5, "closureSize": 6},
+        }
+        edges = [
+            ("Root1", "A"),
+            ("A", "B"),
+            ("A", "D"),
+            ("D", "E"),
+            ("B", "D"),
+            ("B", "F"),
+            ("Root2", "B"),
+            ("Root3", "C")
+        ]
+        detached_vertices = ["X"]
+        vertex_props = vertex_props_dict.items()
+
+        def test(graph):
+            result = list(popularity_contest(graph))
+
+            expected_paths = [
+                'E',
+                'D',
+                'F',
+                'B',
+                'A',
+                'C',
+                'Root1',
+                'Root2',
+                'Root3',
+                'X'
+            ]
+
+            self.assertEqual(
+                len(result),
+                len(expected_paths)
+            )
+
+            for (index, path) in enumerate(expected_paths):
+                path_props = vertex_props_dict.get(path) or {}
+
+                self.assertGraphEqual(
+                    result[index],
+                    directed_graph([], [path], [(path, path_props)])
+                )
+
+        graph = directed_graph(edges, detached_vertices, vertex_props)
+
+        test(graph)
+        test(igraph_to_reference_graph(graph))
+
+
+class TestFindRoots(unittest.TestCase):
+    def test_find_roots(self):
+        self.assertCountEqual(
+            find_roots([
+                {
+                    "path": "/nix/store/foo",
+                    "references": [
+                        "/nix/store/foo",
+                        "/nix/store/bar"
+                    ]
+                },
+                {
+                    "path": "/nix/store/bar",
+                    "references": [
+                        "/nix/store/bar",
+                        "/nix/store/tux"
+                    ]
+                },
+                {
+                    "path": "/nix/store/hello",
+                    "references": [
+                    ]
+                }
+            ]),
+            ["/nix/store/foo", "/nix/store/hello"]
+        )
+
+
+class TestAnyReferTo(unittest.TestCase):
+    def test_has_references(self):
+        self.assertTrue(
+            any_refer_to(
+                "/nix/store/bar",
+                [
+                    {
+                        "path": "/nix/store/foo",
+                        "references": [
+                            "/nix/store/bar"
+                        ]
+                    },
+                ]
+            ),
+        )
+
+    def test_no_references(self):
+        self.assertFalse(
+            any_refer_to(
+                "/nix/store/foo",
+                [
+                    {
+                        "path": "/nix/store/foo",
+                        "references": [
+                            "/nix/store/foo",
+                            "/nix/store/bar"
+                        ]
+                    },
+                ]
+            ),
+        )
+
+
+class TestAllPaths(unittest.TestCase):
+    def test_returns_all_paths(self):
+        self.assertCountEqual(
+            all_paths([
+                {
+                    "path": "/nix/store/foo",
+                    "references": [
+                        "/nix/store/foo",
+                        "/nix/store/bar"
+                    ]
+                },
+                {
+                    "path": "/nix/store/bar",
+                    "references": [
+                        "/nix/store/bar",
+                        "/nix/store/tux"
+                    ]
+                },
+                {
+                    "path": "/nix/store/hello",
+                    "references": [
+                    ]
+                }
+            ]),
+            ["/nix/store/foo", "/nix/store/bar",
+                "/nix/store/hello", "/nix/store/tux", ]
+        )
+
+    def test_no_references(self):
+        self.assertFalse(
+            any_refer_to(
+                "/nix/store/foo",
+                [
+                    {
+                        "path": "/nix/store/foo",
+                        "references": [
+                            "/nix/store/foo",
+                            "/nix/store/bar"
+                        ]
+                    },
+                ]
+            ),
+        )
+
+
+class TestMakeLookup(unittest.TestCase):
+    def test_returns_lookp(self):
+        self.assertDictEqual(
+            # "references" in the result are iterators so we need
+            # to convert them to a list before asserting.
+            tlz.valmap(over("references", list), make_lookup([
+                {
+                    "path": "/nix/store/foo",
+                    "references": [
+                        "/nix/store/foo",
+                        "/nix/store/bar",
+                        "/nix/store/hello"
+                    ]
+                },
+                {
+                    "path": "/nix/store/bar",
+                    "references": [
+                        "/nix/store/bar",
+                        "/nix/store/tux"
+                    ]
+                },
+                {
+                    "path": "/nix/store/hello",
+                    "references": [
+                    ]
+                }
+            ])),
+            {
+                "/nix/store/foo": {
+                    "path": "/nix/store/foo",
+                    "references": [
+                        "/nix/store/bar",
+                        "/nix/store/hello"
+                    ]
+                },
+                "/nix/store/bar": {
+                    "path": "/nix/store/bar",
+                    "references": [
+                        "/nix/store/tux"
+                    ]
+                },
+                "/nix/store/hello": {
+                    "path": "/nix/store/hello",
+                    "references": [
+                    ]
+                }
+            }
+        )
+
+
+class TestMakeGraphSegmentFromRoot(unittest.TestCase):
+    def test_returns_graph(self):
+        self.assertDictEqual(
+            make_graph_segment_from_root({}, "/nix/store/foo", {
+                "/nix/store/foo": ["/nix/store/bar"],
+                "/nix/store/bar": ["/nix/store/tux"],
+                "/nix/store/tux": [],
+                "/nix/store/hello": [],
+            }),
+            {
+                "/nix/store/bar": {
+                    "/nix/store/tux": {}
+                }
+            }
+        )
+
+    def test_returns_graph_tiny(self):
+        self.assertDictEqual(
+            make_graph_segment_from_root({}, "/nix/store/tux", {
+                "/nix/store/foo": ["/nix/store/bar"],
+                "/nix/store/bar": ["/nix/store/tux"],
+                "/nix/store/tux": [],
+            }),
+            {}
+        )
+
+
+class TestGraphPopularityContest(unittest.TestCase):
+    def test_counts_popularity(self):
+        self.assertDictEqual(
+            graph_popularity_contest({}, {
+                "/nix/store/foo": {
+                    "/nix/store/bar": {
+                        "/nix/store/baz": {
+                            "/nix/store/tux": {}
+                        }
+                    },
+                    "/nix/store/baz": {
+                        "/nix/store/tux": {}
+                    }
+                }
+            }),
+            {
+                "/nix/store/foo": 1,
+                "/nix/store/bar": 2,
+                "/nix/store/baz": 4,
+                "/nix/store/tux": 6,
+            }
+        )
+
+
+class TestOrderByPopularity(unittest.TestCase):
+    def test_returns_in_order(self):
+        self.assertEqual(
+            order_by_popularity({
+                "/nix/store/foo": 1,
+                "/nix/store/bar": 1,
+                "/nix/store/baz": 2,
+                "/nix/store/tux": 2,
+            }),
+            [
+                "/nix/store/baz",
+                "/nix/store/tux",
+                "/nix/store/bar",
+                "/nix/store/foo"
+            ]
+        )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/split_paths.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/split_paths.py
@ -0,0 +1,227 @@
+from toolz import curried as tlz
+from toolz import curry
+
+from .lib import (
+    debug,
+    debug_plot,
+    DEBUG_PLOT,
+    find_vertex_by_name_or_none,
+    graph_is_empty,
+    is_None,
+    subcomponent_multi,
+    unnest_iterable
+)
+
+
+@curry
+def coerce_to_singly_rooted_graph(fake_root_name, graph):
+    """Add single root to the graph connected to all existing roots.
+
+    If graph has only one root, return the graph unchanged and the name
+    of the root vertex.
+
+    Otherwise return a modified graph (copy) and a name of the added root
+    vertex.
+    """
+    roots = graph.vs.select(lambda v: len(graph.predecessors(v)) == 0)
+    root_names = roots["name"]
+
+    if len(root_names) == 1:
+        return graph, root_names[0]
+    else:
+        edges = [(fake_root_name, v) for v in root_names]
+        graph_with_root = graph + fake_root_name + edges
+        return graph_with_root, fake_root_name
+
+
+@curry
+def remove_vertex(vertex_name, graph):
+    """Remove vertex with given name, returning copy of input graph if vertex
+    with given name is found in the graph
+    """
+    vertex = find_vertex_by_name_or_none(graph)(vertex_name)
+
+    return graph - vertex_name if vertex else graph
+
+
+def get_children_of(graph, vertex_names):
+    return unnest_iterable(map(
+        graph.successors,
+        tlz.remove(
+            is_None,
+            map(
+                find_vertex_by_name_or_none(graph),
+                vertex_names
+            )
+        )
+    ))
+
+
+def as_list(x):
+    return x if isinstance(x, list) else [x]
+
+
+@curry
+def split_path_spec_to_indices(graph, split_path_spec):
+    debug("split_path_spec", split_path_spec)
+    if isinstance(split_path_spec, dict):
+        if "children_of" in split_path_spec:
+            children_of = split_path_spec["children_of"]
+
+            return get_children_of(graph, as_list(children_of))
+        else:
+            raise Exception(
+                "Unexpected split path spec: dict with invalid keys."
+                "Valid: [\"children_of\"]"
+            )
+    else:
+        vertex = find_vertex_by_name_or_none(graph)(split_path_spec)
+        return [] if is_None(vertex) else [vertex.index]
+
+
+call_count = 0
+
+
+@curry
+def split_paths(split_paths, graph_in):
+    debug("____")
+    debug("split_paths:", split_paths)
+    debug("graph_in:", graph_in)
+
+    if DEBUG_PLOT:
+        global call_count
+        graph_name_prefix = f"split_paths_{call_count}_"
+        call_count += 1
+
+    # Convert list of split_paths into list of vertex indices. Ignores
+    # split_paths which don"t match any vertices in the graph.
+    # All edges pointing at the indices will be deleted from the graph.
+    split_path_indices = list(unnest_iterable(map(
+        split_path_spec_to_indices(graph_in),
+        split_paths
+    )))
+
+    debug("split_path_indices:", split_path_indices)
+
+    # Short circuit if there is nothing to do (split_paths didn"t match any
+    # vertices in the graph).
+    if len(split_path_indices) == 0:
+        if DEBUG_PLOT:
+            layout = graph_in.layout('tree')
+            debug_plot(graph_in, f"{graph_name_prefix}input", layout=layout)
+            debug_plot(graph_in, f"{graph_name_prefix}result", layout=layout)
+
+        return {"rest": graph_in}
+
+    # If graph has multiple roots, add a single one connecting all existing
+    # roots to make it easy to split the graph into 2 sets of vertices after
+    # deleting edges pointing at split_path_indices.
+    fake_root_name = "__root__"
+    graph, root_name = coerce_to_singly_rooted_graph(fake_root_name, graph_in)
+
+    debug("root_name", root_name)
+
+    if (
+        find_vertex_by_name_or_none(graph)(root_name).index
+        in split_path_indices
+    ):
+        if DEBUG_PLOT:
+            layout = graph_in.layout('tree')
+            debug_plot(graph_in, f"{graph_name_prefix}input", layout=layout)
+            debug_plot(
+                graph_in,
+                f"{graph_name_prefix}result",
+                layout=layout,
+                vertex_color="green"
+            )
+
+        return {"main": graph_in}
+
+    # Copy graph if coerce_to_singly_rooted_graph has not already created
+    # a copy, since we are going to mutate the graph and don"t want to
+    # mutate a function argument.
+    graph = graph if graph is not graph_in else graph.copy()
+
+    if DEBUG_PLOT:
+        layout = graph.layout('tree')
+        debug_plot(graph, f"{graph_name_prefix}input", layout=layout)
+
+    # Get incidences of all vertices which can be reached split_path_indices
+    # (including split_path_indices). This is a set of all split_paths and their
+    # dependencies.
+    split_off_vertex_indices = frozenset(
+        subcomponent_multi(graph, split_path_indices))
+    debug("split_off_vertex_indices", split_off_vertex_indices)
+
+    # Delete edges which point at any of the vertices in split_path_indices.
+    graph.delete_edges(_target_in=split_path_indices)
+
+    if DEBUG_PLOT:
+        debug_plot(graph, f"{graph_name_prefix}deleted_edges", layout=layout)
+
+    # Get incidences of all vertices which can be reached from the root. Since
+    # edges pointing at split_path_indices have been deleted, none of the
+    # split_path_indices will be included. Dependencies of rest_with_common will
+    # only be included if they can be reached from any vertex which is itself
+    # not in split_off_vertex_indices.
+    rest_with_common = frozenset(graph.subcomponent(root_name, mode="out"))
+    debug("rest_with_common", rest_with_common)
+
+    # Get a set of all dependencies common to split_path_indices and the rest
+    # of the graph.
+    common = split_off_vertex_indices.intersection(rest_with_common)
+    debug("common", common)
+
+    # Get a set of vertices which cannot be reached from split_path_indices.
+    rest_without_common = rest_with_common.difference(common)
+    debug("rest_without_common", rest_without_common)
+
+    # Get a set of split_path_indices and their dependencies which cannot be
+    # reached from the rest of the graph.
+    split_off_without_common = split_off_vertex_indices.difference(common)
+    debug("split_off_without_common", split_off_without_common)
+
+    if DEBUG_PLOT:
+        def choose_color(index):
+            if (index in split_off_without_common):
+                return "green"
+            elif (index in rest_without_common):
+                return "red"
+            else:
+                return "purple"
+
+        vertex_color = [choose_color(v.index) for v in graph.vs]
+
+        debug_plot(
+            graph,
+            f"{graph_name_prefix}result",
+            layout=layout,
+            vertex_color=vertex_color
+        )
+
+    # Return subgraphs based on calculated sets of vertices.
+
+    result_keys = ["main", "common", "rest"]
+    result_values = [
+        # Split paths and their deps (unreachable from rest of the graph).
+        graph.induced_subgraph(split_off_without_common),
+        # Dependencies of split paths which can be reached from the rest of the
+        # graph.
+        graph.induced_subgraph(common),
+        # Rest of the graph (without dependencies common with split paths).
+        graph.induced_subgraph(rest_without_common),
+    ]
+
+    debug('result_values', result_values[0].vs["name"])
+
+    return tlz.valfilter(
+        tlz.complement(graph_is_empty),
+        dict(zip(
+            result_keys,
+            (
+                result_values if root_name != fake_root_name
+                # If root was added, remove it
+                else tlz.map(remove_vertex(fake_root_name), result_values)
+            )
+        ))
+    )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/split_paths_test.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/split_paths_test.py
@ -0,0 +1,184 @@
+import unittest
+from toolz import curry
+
+from . import test_helpers as th
+
+from .split_paths import (
+    split_paths
+)
+
+from .lib import (
+    directed_graph,
+    pick_keys
+)
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+# Making sure vertex attrs are preserved.
+vertex_props_dict = {
+    "Root1": {"a": 1, "b": 1},
+    "B": {"b": 2},
+    "X": {"x": 3}
+}
+
+
+def make_test_graph():
+    edges = [
+        ("Root1", "A"),
+        ("A", "B"),
+        ("A", "D"),
+        ("D", "E"),
+        ("B", "D"),
+        ("B", "F"),
+        ("Root2", "B"),
+        ("Root3", "C")
+    ]
+
+    detached_vertices = ["X"]
+
+    vertex_props = vertex_props_dict.items()
+
+    return directed_graph(edges, detached_vertices, vertex_props)
+
+
+class CustomAssertions:
+    @curry
+    def assertResultKeys(self, keys, result):
+        self.assertListEqual(
+            list(result.keys()),
+            keys
+        )
+
+        return result
+
+
+class Test(
+    unittest.TestCase,
+    CustomAssertions,
+    th.CustomAssertions
+):
+
+    def test_empty_paths(self):
+        input_graph = make_test_graph()
+
+        result = self.assertResultKeys(
+            ["rest"],
+            split_paths([], input_graph)
+        )
+
+        self.assertGraphEqual(
+            result["rest"],
+            input_graph
+        )
+
+    def test_empty_graph(self):
+        empty_graph = directed_graph([])
+
+        def test_empty(paths):
+            result = self.assertResultKeys(
+                ["rest"],
+                split_paths(paths, empty_graph)
+            )
+
+            self.assertGraphEqual(
+                result["rest"],
+                empty_graph
+            )
+
+        test_empty([])
+        test_empty(["B"])
+
+    def test_split_paths_single(self):
+        result = self.assertResultKeys(
+            ["main", "common", "rest"],
+            split_paths(["B"], make_test_graph())
+        )
+
+        self.assertGraphEqual(
+            result["main"],
+            directed_graph(
+                [
+                    ("B", "F")
+                ],
+                None,
+                pick_keys(["B"], vertex_props_dict).items()
+            )
+        )
+
+        self.assertGraphEqual(
+            result["rest"],
+            directed_graph(
+                [
+                    ("Root1", "A"),
+                    ("Root3", "C")
+                ],
+                ["Root2", "X"],
+                pick_keys(["Root1", "X"], vertex_props_dict).items()
+            )
+        )
+
+        self.assertGraphEqual(
+            result["common"],
+            directed_graph([("D", "E")])
+        )
+
+    def test_split_paths_multi(self):
+        result = self.assertResultKeys(
+            ["main", "common", "rest"],
+            split_paths(["B", "Root3"], make_test_graph())
+        )
+
+        self.assertGraphEqual(
+            result["main"],
+            directed_graph(
+                [
+                    ("B", "F"),
+                    ("Root3", "C")
+                ],
+                None,
+                pick_keys(["B"], vertex_props_dict).items()
+            )
+        )
+
+        self.assertGraphEqual(
+            result["rest"],
+            directed_graph(
+                [("Root1", "A")],
+                ["Root2", "X"],
+                pick_keys(["Root1", "X"], vertex_props_dict).items()
+            )
+        )
+
+        self.assertGraphEqual(
+            result["common"],
+            directed_graph([("D", "E")])
+        )
+
+    def test_split_no_common(self):
+        result = self.assertResultKeys(
+            ["main", "rest"],
+            split_paths(["D"], make_test_graph())
+        )
+
+        self.assertGraphEqual(
+            result["main"],
+            directed_graph([("D", "E")])
+        )
+
+        self.assertGraphEqual(
+            result["rest"],
+            directed_graph(
+                [
+                    ("Root1", "A"),
+                    ("A", "B"),
+                    ("B", "F"),
+                    ("Root2", "B"),
+                    ("Root3", "C"),
+                ],
+                ["X"],
+                pick_keys(["Root1", "B", "X"], vertex_props_dict).items()
+            )
+        )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/subcomponent.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/subcomponent.py
@ -0,0 +1,67 @@
+from toolz import curry
+from toolz import curried as tlz
+from operator import attrgetter
+
+from .lib import (
+    debug,
+    debug_plot,
+    DEBUG_PLOT,
+    find_vertex_by_name_or_none,
+    is_None,
+    subcomponent_multi
+)
+
+
+call_counts = {
+    "in": 0,
+    "out": 0
+}
+
+
+@curry
+def subcomponent(mode, paths, graph):
+    if DEBUG_PLOT:
+        global call_counts
+        graph_name_prefix = f"subcomponent_{mode}_{call_counts[mode]}_"
+        call_counts[mode] += 1
+
+        layout = graph.layout('tree')
+        debug_plot(graph, f"{graph_name_prefix}input", layout=layout)
+
+    path_indices = tlz.compose(
+        tlz.map(attrgetter('index')),
+        tlz.remove(is_None),
+        tlz.map(find_vertex_by_name_or_none(graph))
+    )(paths)
+
+    debug("path_indices", path_indices)
+
+    main_indices = list(subcomponent_multi(graph, path_indices, mode))
+
+    debug('main_indices', main_indices)
+
+    if DEBUG_PLOT:
+        def choose_color(index):
+            if (index in main_indices):
+                return "green"
+            else:
+                return "red"
+
+        vertex_color = [choose_color(v.index) for v in graph.vs]
+
+        debug_plot(
+            graph,
+            f"{graph_name_prefix}result",
+            layout=layout,
+            vertex_color=vertex_color
+        )
+
+    return {
+        "main": graph.induced_subgraph(main_indices),
+        "rest": graph - main_indices
+    }
+
+
+subcomponent_in = subcomponent("in")
+
+subcomponent_out = subcomponent("out")
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/subcomponent_test.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/subcomponent_test.py
@ -0,0 +1,219 @@
+import unittest
+
+from . import test_helpers as th
+
+from .subcomponent import (
+    subcomponent_out,
+    subcomponent_in
+)
+
+from .lib import (
+    pick_keys,
+    directed_graph,
+    empty_directed_graph
+)
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+# Making sure vertex attrs are preserved.
+vertex_props_dict = {
+    "Root1": {"a": 1, "b": 1},
+    "B": {"b": 2},
+    "X": {"x": 3}
+}
+
+
+def make_test_graph():
+    edges = [
+        ("Root1", "A"),
+        ("A", "B"),
+        ("A", "C"),
+        ("B", "D"),
+        ("B", "E"),
+        ("Root2", "B"),
+        ("Root3", "C"),
+    ]
+
+    detached_vertices = ["X"]
+
+    vertex_props = vertex_props_dict.items()
+
+    return directed_graph(edges, detached_vertices, vertex_props)
+
+
+class CustomAssertions:
+    def assertResultKeys(self, result):
+        self.assertListEqual(
+            list(result.keys()),
+            ["main", "rest"]
+        )
+
+        return result
+
+
+class Test(
+    unittest.TestCase,
+    CustomAssertions,
+    th.CustomAssertions
+):
+
+    def test_empty_paths(self):
+        def test(func):
+            input_graph = make_test_graph()
+
+            result = self.assertResultKeys(
+                func([], input_graph)
+            )
+
+            self.assertGraphEqual(
+                result["main"],
+                empty_directed_graph()
+            )
+
+            self.assertGraphEqual(
+                result["rest"],
+                input_graph
+            )
+
+        test(subcomponent_out)
+        test(subcomponent_in)
+
+    def test_empty_graph(self):
+        def test(func):
+            empty_graph = empty_directed_graph()
+
+            def test_empty(paths):
+                result = self.assertResultKeys(
+                    func(paths, empty_graph)
+                )
+
+                self.assertGraphEqual(
+                    result["main"],
+                    empty_graph
+                )
+
+                self.assertGraphEqual(
+                    result["rest"],
+                    empty_graph
+                )
+
+            test_empty([])
+            test_empty(["B"])
+
+        test(subcomponent_out)
+        test(subcomponent_in)
+
+    def test_subcomponent_out(self):
+        result = self.assertResultKeys(
+            subcomponent_out(["B"], make_test_graph())
+        )
+
+        self.assertGraphEqual(
+            result["main"],
+            directed_graph(
+                [
+                    ("B", "D"),
+                    ("B", "E")
+                ],
+                None,
+                pick_keys(["B"], vertex_props_dict).items()
+            )
+        )
+
+        self.assertGraphEqual(
+            result["rest"],
+            directed_graph(
+                [
+                    ("Root1", "A"),
+                    ("A", "C"),
+                    ("Root3", "C")
+                ],
+                ["Root2", "X"],
+                pick_keys(["Root1", "X"], vertex_props_dict).items()
+            )
+        )
+
+    def test_subcomponent_out_multi(self):
+        result = self.assertResultKeys(
+            subcomponent_out(["B", "Root3"], make_test_graph())
+        )
+
+        self.assertGraphEqual(
+            result["main"],
+            directed_graph(
+                [
+                    ("B", "D"),
+                    ("B", "E"),
+                    ("Root3", "C")
+                ],
+                None,
+                pick_keys(["B"], vertex_props_dict).items()
+            )
+        )
+
+        self.assertGraphEqual(
+            result["rest"],
+            directed_graph(
+                [("Root1", "A")],
+                ["Root2", "X"],
+                pick_keys(["Root1", "X"], vertex_props_dict).items()
+            )
+        )
+
+    def test_subcomponent_in(self):
+        result = self.assertResultKeys(
+            subcomponent_in(["B"], make_test_graph())
+        )
+
+        self.assertGraphEqual(
+            result["main"],
+            directed_graph(
+                [
+                    ("Root1", "A"),
+                    ("A", "B"),
+                    ("Root2", "B")
+                ],
+                None,
+                pick_keys(["Root1", "B"], vertex_props_dict).items()
+            )
+        )
+
+        self.assertGraphEqual(
+            result["rest"],
+            directed_graph(
+                [("Root3", "C")],
+                ["D", "E", "X"],
+                pick_keys(["X"], vertex_props_dict).items()
+            )
+        )
+
+    def test_subcomponent_in_multi(self):
+        result = self.assertResultKeys(
+            subcomponent_in(["B", "Root3"], make_test_graph())
+        )
+
+        self.assertGraphEqual(
+            result["main"],
+            directed_graph(
+                [
+                    ("Root1", "A"),
+                    ("A", "B"),
+                    ("Root2", "B"),
+                ],
+                ["Root3"],
+                pick_keys(["Root1", "B"], vertex_props_dict).items()
+
+            )
+        )
+
+        self.assertGraphEqual(
+            result["rest"],
+            directed_graph(
+                [],
+                ["C", "D", "E", "X"],
+                pick_keys(["X"], vertex_props_dict).items()
+            )
+        )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/test_helpers.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/flatten_references_graph/test_helpers.py
@ -0,0 +1,37 @@
+from toolz import curried as tlz
+
+from .lib import (
+    not_None,
+    graph_vertex_index_to_name
+)
+
+
+def edges_as_set(graph):
+    return frozenset(
+        (
+            graph_vertex_index_to_name(graph, e.source),
+            graph_vertex_index_to_name(graph, e.target)
+        ) for e in graph.es
+    )
+
+
+class CustomAssertions:
+    def assertGraphEqual(self, g1, g2):
+        self.assertSetEqual(
+            frozenset(g1.vs["name"]),
+            frozenset(g2.vs["name"])
+        )
+
+        self.assertSetEqual(
+            edges_as_set(g1),
+            edges_as_set(g2)
+        )
+
+        for name in g1.vs["name"]:
+            def get_vertex_attrs(g):
+                return tlz.valfilter(not_None, g.vs.find(name).attributes())
+
+            self.assertDictEqual(
+                get_vertex_attrs(g1),
+                get_vertex_attrs(g2),
+            )
--- a/pkgs/by-name/fl/flattenReferencesGraph/src/setup.py
+++ b/pkgs/by-name/fl/flattenReferencesGraph/src/setup.py
@ -0,0 +1,17 @@
+from setuptools import setup
+
+setup(
+    name="flatten_references_graph",
+    version="0.1.0",
+    author="Adrian Gierakowski",
+    packages=["flatten_references_graph"],
+    install_requires=[
+        "igraph",
+        "toolz"
+    ],
+    entry_points={
+        "console_scripts": [
+            "flatten_references_graph=flatten_references_graph.__main__:main"
+        ]
+    }
+)
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@ -779,6 +779,8 @@ with pkgs;

  referencesByPopularity = callPackage ../build-support/references-by-popularity { };

+  dockerMakeLayers = callPackage ../build-support/docker/make-layers.nix { };
+
  removeReferencesTo = callPackage ../build-support/remove-references-to {
    inherit (darwin) signingUtils;
  };