2020-02-25 15:06:24 +00:00
|
|
|
{ lib
|
2019-12-07 01:15:09 +00:00
|
|
|
, fetchFromGitHub
|
|
|
|
, stdenv
|
|
|
|
, cmake
|
|
|
|
, gperftools
|
2020-02-25 15:06:24 +00:00
|
|
|
|
|
|
|
, withGPerfTools ? true
|
2019-12-07 01:15:09 +00:00
|
|
|
}:
|
|
|
|
|
|
|
|
stdenv.mkDerivation rec {
|
|
|
|
pname = "sentencepiece";
|
2024-02-19 21:21:20 +00:00
|
|
|
version = "0.2.0";
|
2019-12-07 01:15:09 +00:00
|
|
|
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "google";
|
|
|
|
repo = pname;
|
2023-04-17 00:47:33 +01:00
|
|
|
rev = "refs/tags/v${version}";
|
2024-02-19 21:21:20 +00:00
|
|
|
sha256 = "sha256-tMt6UBDqpdjAhxAJlVOFFlE3RC36/t8K0gBAzbesnsg=";
|
2019-12-07 01:15:09 +00:00
|
|
|
};
|
|
|
|
|
2021-01-08 10:16:34 +00:00
|
|
|
nativeBuildInputs = [ cmake ];
|
|
|
|
|
|
|
|
buildInputs = lib.optionals withGPerfTools [ gperftools ];
|
2020-02-25 15:06:24 +00:00
|
|
|
|
|
|
|
outputs = [ "bin" "dev" "out" ];
|
2019-12-07 01:15:09 +00:00
|
|
|
|
2022-05-18 19:50:31 +01:00
|
|
|
# https://github.com/google/sentencepiece/issues/754
|
|
|
|
postPatch = ''
|
|
|
|
substituteInPlace CMakeLists.txt \
|
|
|
|
--replace '\$'{exec_prefix}/'$'{CMAKE_INSTALL_LIBDIR} '$'{CMAKE_INSTALL_FULL_LIBDIR} \
|
|
|
|
--replace '\$'{prefix}/'$'{CMAKE_INSTALL_INCLUDEDIR} '$'{CMAKE_INSTALL_FULL_INCLUDEDIR}
|
|
|
|
'';
|
|
|
|
|
2021-01-21 17:00:13 +00:00
|
|
|
meta = with lib; {
|
2020-02-21 14:38:07 +00:00
|
|
|
homepage = "https://github.com/google/sentencepiece";
|
2019-12-07 01:15:09 +00:00
|
|
|
description = "Unsupervised text tokenizer for Neural Network-based text generation";
|
|
|
|
license = licenses.asl20;
|
2020-02-21 14:38:07 +00:00
|
|
|
platforms = platforms.unix;
|
2021-09-12 15:42:12 +01:00
|
|
|
maintainers = with maintainers; [ pashashocky ];
|
2019-12-07 01:15:09 +00:00
|
|
|
};
|
|
|
|
}
|