nixos/hadoop: add module options for commonly used service configs
This commit is contained in:
parent
bef71d7c53
commit
e1017adb32
@ -1,6 +1,6 @@
|
||||
{ cfg, pkgs, lib }:
|
||||
let
|
||||
propertyXml = name: value: ''
|
||||
propertyXml = name: value: lib.optionalString (value != null) ''
|
||||
<property>
|
||||
<name>${name}</name>
|
||||
<value>${builtins.toString value}</value>
|
||||
@ -29,16 +29,16 @@ let
|
||||
export HADOOP_LOG_DIR=/tmp/hadoop/$USER
|
||||
'';
|
||||
in
|
||||
pkgs.runCommand "hadoop-conf" {} ''
|
||||
pkgs.runCommand "hadoop-conf" {} (with cfg; ''
|
||||
mkdir -p $out/
|
||||
cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/
|
||||
cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/
|
||||
cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/
|
||||
cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/
|
||||
cp ${siteXml "httpfs-site.xml" cfg.httpfsSite}/* $out/
|
||||
cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/
|
||||
cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
|
||||
cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
|
||||
cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
|
||||
cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
|
||||
cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/
|
||||
cp ${cfgFile "container-executor.cfg" containerExecutorCfg}/* $out/
|
||||
cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
|
||||
cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
|
||||
cp ${cfg.log4jProperties} $out/log4j.properties
|
||||
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs}
|
||||
''
|
||||
cp ${log4jProperties} $out/log4j.properties
|
||||
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") extraConfDirs}
|
||||
'')
|
||||
|
@ -21,25 +21,50 @@ with lib;
|
||||
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml"/>
|
||||
'';
|
||||
};
|
||||
coreSiteInternal = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
internal = true;
|
||||
description = ''
|
||||
Internal option to add configs to core-site.xml based on module options
|
||||
'';
|
||||
};
|
||||
|
||||
hdfsSite = mkOption {
|
||||
hdfsSiteDefault = mkOption {
|
||||
default = {
|
||||
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
|
||||
"dfs.namenode.http-address" = "0.0.0.0:9870";
|
||||
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
|
||||
"dfs.namenode.http-bind-host" = "0.0.0.0";
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
description = ''
|
||||
Default options for hdfs-site.xml
|
||||
'';
|
||||
};
|
||||
hdfsSite = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
{
|
||||
"dfs.nameservices" = "namenode1";
|
||||
}
|
||||
'';
|
||||
description = ''
|
||||
Hadoop hdfs-site.xml definition
|
||||
Additional options and overrides for hdfs-site.xml
|
||||
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml"/>
|
||||
'';
|
||||
};
|
||||
hdfsSiteInternal = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
internal = true;
|
||||
description = ''
|
||||
Internal option to add configs to hdfs-site.xml based on module options
|
||||
'';
|
||||
};
|
||||
|
||||
mapredSite = mkOption {
|
||||
mapredSiteDefault = mkOption {
|
||||
default = {
|
||||
"mapreduce.framework.name" = "yarn";
|
||||
"yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
|
||||
@ -55,18 +80,25 @@ with lib;
|
||||
}
|
||||
'';
|
||||
type = types.attrsOf types.anything;
|
||||
description = ''
|
||||
Default options for mapred-site.xml
|
||||
'';
|
||||
};
|
||||
mapredSite = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
options.services.hadoop.mapredSite.default // {
|
||||
{
|
||||
"mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
|
||||
}
|
||||
'';
|
||||
description = ''
|
||||
Hadoop mapred-site.xml definition
|
||||
Additional options and overrides for mapred-site.xml
|
||||
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml"/>
|
||||
'';
|
||||
};
|
||||
|
||||
yarnSite = mkOption {
|
||||
yarnSiteDefault = mkOption {
|
||||
default = {
|
||||
"yarn.nodemanager.admin-env" = "PATH=$PATH";
|
||||
"yarn.nodemanager.aux-services" = "mapreduce_shuffle";
|
||||
@ -79,22 +111,33 @@ with lib;
|
||||
"yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
|
||||
"yarn.resourcemanager.bind-host" = "0.0.0.0";
|
||||
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler";
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
|
||||
"yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
description = ''
|
||||
Default options for yarn-site.xml
|
||||
'';
|
||||
};
|
||||
yarnSite = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
options.services.hadoop.yarnSite.default // {
|
||||
{
|
||||
"yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
|
||||
}
|
||||
'';
|
||||
description = ''
|
||||
Hadoop yarn-site.xml definition
|
||||
Additional options and overrides for yarn-site.xml
|
||||
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml"/>
|
||||
'';
|
||||
};
|
||||
yarnSiteInternal = mkOption {
|
||||
default = {};
|
||||
type = types.attrsOf types.anything;
|
||||
internal = true;
|
||||
description = ''
|
||||
Internal option to add configs to yarn-site.xml based on module options
|
||||
'';
|
||||
};
|
||||
|
||||
httpfsSite = mkOption {
|
||||
default = { };
|
||||
|
@ -7,7 +7,7 @@ let
|
||||
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
||||
|
||||
# Generator for HDFS service options
|
||||
hadoopServiceOption = { serviceName, firewallOption ? true }: {
|
||||
hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: {
|
||||
enable = mkEnableOption serviceName;
|
||||
restartIfChanged = mkOption {
|
||||
type = types.bool;
|
||||
@ -19,13 +19,27 @@ let
|
||||
'';
|
||||
default = false;
|
||||
};
|
||||
extraFlags = mkOption{
|
||||
type = with types; listOf str;
|
||||
default = [];
|
||||
description = "Extra command line flags to pass to ${serviceName}";
|
||||
example = [
|
||||
"-Dcom.sun.management.jmxremote"
|
||||
"-Dcom.sun.management.jmxremote.port=8010"
|
||||
];
|
||||
};
|
||||
extraEnv = mkOption{
|
||||
type = with types; attrsOf str;
|
||||
default = {};
|
||||
description = "Extra environment variables for ${serviceName}";
|
||||
};
|
||||
} // (optionalAttrs firewallOption {
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Open firewall ports for ${serviceName}.";
|
||||
};
|
||||
});
|
||||
}) // (optionalAttrs (extraOpts != null) extraOpts);
|
||||
|
||||
# Generator for HDFS service configs
|
||||
hadoopServiceConfig =
|
||||
@ -36,17 +50,19 @@ let
|
||||
, allowedTCPPorts ? [ ]
|
||||
, preStart ? ""
|
||||
, environment ? { }
|
||||
, extraConfig ? { }
|
||||
}: (
|
||||
|
||||
mkIf serviceOptions.enable {
|
||||
mkIf serviceOptions.enable ( mkMerge [{
|
||||
systemd.services."hdfs-${toLower name}" = {
|
||||
inherit description preStart environment;
|
||||
inherit description preStart;
|
||||
environment = environment // serviceOptions.extraEnv;
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (serviceOptions) restartIfChanged;
|
||||
serviceConfig = {
|
||||
inherit User;
|
||||
SyslogIdentifier = "hdfs-${toLower name}";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name}";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
@ -56,7 +72,7 @@ let
|
||||
networking.firewall.allowedTCPPorts = mkIf
|
||||
((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall)
|
||||
allowedTCPPorts;
|
||||
}
|
||||
} extraConfig])
|
||||
);
|
||||
|
||||
in
|
||||
@ -77,7 +93,27 @@ in
|
||||
};
|
||||
};
|
||||
|
||||
datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; };
|
||||
datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // {
|
||||
dataDirs = mkOption {
|
||||
default = null;
|
||||
description = "Tier and path definitions for datanode storage.";
|
||||
type = with types; nullOr (listOf (submodule {
|
||||
options = {
|
||||
type = mkOption {
|
||||
type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ];
|
||||
description = ''
|
||||
Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies.
|
||||
'';
|
||||
};
|
||||
path = mkOption {
|
||||
type = path;
|
||||
example = [ "/var/lib/hadoop/hdfs/dn" ];
|
||||
description = "Determines where on the local filesystem a data node should store its blocks.";
|
||||
};
|
||||
};
|
||||
}));
|
||||
};
|
||||
};
|
||||
|
||||
journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; };
|
||||
|
||||
@ -122,6 +158,8 @@ in
|
||||
50010 # datanode.address
|
||||
50020 # datanode.ipc.address
|
||||
];
|
||||
extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = let d = cfg.hdfs.datanode.dataDirs; in
|
||||
if (d!= null) then (concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs) else d;
|
||||
})
|
||||
|
||||
(hadoopServiceConfig {
|
||||
|
@ -13,12 +13,27 @@ let
|
||||
'';
|
||||
default = false;
|
||||
};
|
||||
extraFlags = mkOption{
|
||||
type = with types; listOf str;
|
||||
default = [];
|
||||
description = "Extra command line flags to pass to the service";
|
||||
example = [
|
||||
"-Dcom.sun.management.jmxremote"
|
||||
"-Dcom.sun.management.jmxremote.port=8010"
|
||||
];
|
||||
};
|
||||
extraEnv = mkOption{
|
||||
type = with types; attrsOf str;
|
||||
default = {};
|
||||
description = "Extra environment variables";
|
||||
};
|
||||
in
|
||||
{
|
||||
options.services.hadoop.yarn = {
|
||||
resourcemanager = {
|
||||
enable = mkEnableOption "Hadoop YARN ResourceManager";
|
||||
inherit restartIfChanged;
|
||||
inherit restartIfChanged extraFlags extraEnv;
|
||||
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
@ -29,7 +44,46 @@ in
|
||||
};
|
||||
nodemanager = {
|
||||
enable = mkEnableOption "Hadoop YARN NodeManager";
|
||||
inherit restartIfChanged;
|
||||
inherit restartIfChanged extraFlags extraEnv;
|
||||
|
||||
resource = {
|
||||
cpuVCores = mkOption {
|
||||
description = "Number of vcores that can be allocated for containers.";
|
||||
type = with types; nullOr ints.positive;
|
||||
default = null;
|
||||
};
|
||||
maximumAllocationVCores = mkOption {
|
||||
description = "The maximum virtual CPU cores any container can be allocated.";
|
||||
type = with types; nullOr ints.positive;
|
||||
default = null;
|
||||
};
|
||||
memoryMB = mkOption {
|
||||
description = "Amount of physical memory, in MB, that can be allocated for containers.";
|
||||
type = with types; nullOr ints.positive;
|
||||
default = null;
|
||||
};
|
||||
maximumAllocationMB = mkOption {
|
||||
description = "The maximum physical memory any container can be allocated.";
|
||||
type = with types; nullOr ints.positive;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
|
||||
useCGroups = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Use cgroups to enforce resource limits on containers
|
||||
'';
|
||||
};
|
||||
|
||||
localDir = mkOption {
|
||||
description = "List of directories to store localized files in.";
|
||||
type = with types; nullOr (listOf path);
|
||||
example = [ "/var/lib/hadoop/yarn/nm" ];
|
||||
default = null;
|
||||
};
|
||||
|
||||
addBinBash = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
@ -62,12 +116,13 @@ in
|
||||
description = "Hadoop YARN ResourceManager";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.yarn.resourcemanager) restartIfChanged;
|
||||
environment = cfg.yarn.resourcemanager.extraEnv;
|
||||
|
||||
serviceConfig = {
|
||||
User = "yarn";
|
||||
SyslogIdentifier = "yarn-resourcemanager";
|
||||
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
|
||||
" resourcemanager";
|
||||
" resourcemanager ${escapeShellArgs cfg.yarn.resourcemanager.extraFlags}";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
@ -94,6 +149,7 @@ in
|
||||
description = "Hadoop YARN NodeManager";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.yarn.nodemanager) restartIfChanged;
|
||||
environment = cfg.yarn.nodemanager.extraEnv;
|
||||
|
||||
preStart = ''
|
||||
# create log dir
|
||||
@ -115,13 +171,26 @@ in
|
||||
SyslogIdentifier = "yarn-nodemanager";
|
||||
PermissionsStartOnly = true;
|
||||
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
|
||||
" nodemanager";
|
||||
" nodemanager ${escapeShellArgs cfg.yarn.nodemanager.extraFlags}";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
services.hadoop.gatewayRole.enable = true;
|
||||
|
||||
services.hadoop.yarnSiteInternal = with cfg.yarn.nodemanager; {
|
||||
"yarn.nodemanager.local-dirs" = localDir;
|
||||
"yarn.scheduler.maximum-allocation-vcores" = resource.maximumAllocationVCores;
|
||||
"yarn.scheduler.maximum-allocation-mb" = resource.maximumAllocationMB;
|
||||
"yarn.nodemanager.resource.cpu-vcores" = resource.cpuVCores;
|
||||
"yarn.nodemanager.resource.memory-mb" = resource.memoryMB;
|
||||
} // mkIf useCGroups {
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
|
||||
"yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
|
||||
"yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPortRanges = [
|
||||
(mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
|
||||
];
|
||||
|
@ -10,15 +10,10 @@ import ../make-test-python.nix ({ package, ... }: {
|
||||
"fs.defaultFS" = "hdfs://ns1";
|
||||
};
|
||||
hdfsSite = {
|
||||
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
|
||||
"dfs.namenode.http-bind-host" = "0.0.0.0";
|
||||
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
|
||||
|
||||
# HA Quorum Journal Manager configuration
|
||||
"dfs.nameservices" = "ns1";
|
||||
"dfs.ha.namenodes.ns1" = "nn1,nn2";
|
||||
"dfs.namenode.shared.edits.dir.ns1.nn1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
|
||||
"dfs.namenode.shared.edits.dir.ns1.nn2" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
|
||||
"dfs.namenode.shared.edits.dir.ns1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
|
||||
"dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020";
|
||||
"dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020";
|
||||
"dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022";
|
||||
@ -32,7 +27,7 @@ import ../make-test-python.nix ({ package, ... }: {
|
||||
"dfs.ha.fencing.methods" = "shell(true)";
|
||||
"ha.zookeeper.quorum" = "zk1:2181";
|
||||
};
|
||||
yarnSiteHA = {
|
||||
yarnSite = {
|
||||
"yarn.resourcemanager.zk-address" = "zk1:2181";
|
||||
"yarn.resourcemanager.ha.enabled" = "true";
|
||||
"yarn.resourcemanager.ha.rm-ids" = "rm1,rm2";
|
||||
@ -116,8 +111,7 @@ import ../make-test-python.nix ({ package, ... }: {
|
||||
# YARN cluster
|
||||
rm1 = { options, ... }: {
|
||||
services.hadoop = {
|
||||
inherit package coreSite hdfsSite;
|
||||
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
|
||||
inherit package coreSite hdfsSite yarnSite;
|
||||
yarn.resourcemanager = {
|
||||
enable = true;
|
||||
openFirewall = true;
|
||||
@ -126,8 +120,7 @@ import ../make-test-python.nix ({ package, ... }: {
|
||||
};
|
||||
rm2 = { options, ... }: {
|
||||
services.hadoop = {
|
||||
inherit package coreSite hdfsSite;
|
||||
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
|
||||
inherit package coreSite hdfsSite yarnSite;
|
||||
yarn.resourcemanager = {
|
||||
enable = true;
|
||||
openFirewall = true;
|
||||
@ -137,8 +130,7 @@ import ../make-test-python.nix ({ package, ... }: {
|
||||
nm1 = { options, ... }: {
|
||||
virtualisation.memorySize = 2048;
|
||||
services.hadoop = {
|
||||
inherit package coreSite hdfsSite;
|
||||
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
|
||||
inherit package coreSite hdfsSite yarnSite;
|
||||
yarn.nodemanager = {
|
||||
enable = true;
|
||||
openFirewall = true;
|
||||
@ -148,8 +140,7 @@ import ../make-test-python.nix ({ package, ... }: {
|
||||
client = { options, ... }: {
|
||||
services.hadoop = {
|
||||
gatewayRole.enable = true;
|
||||
inherit package coreSite hdfsSite;
|
||||
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
|
||||
inherit package coreSite hdfsSite yarnSite;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -35,6 +35,10 @@ with lib;
|
||||
hdfs.datanode = {
|
||||
enable = true;
|
||||
openFirewall = true;
|
||||
dataDirs = [{
|
||||
type = "DISK";
|
||||
path = "/tmp/dn1";
|
||||
}];
|
||||
};
|
||||
inherit coreSite;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user