diff --git a/pkgs/build-support/node/fetch-npm-deps/src/main.rs b/pkgs/build-support/node/fetch-npm-deps/src/main.rs index 57725a922dfd..2ce03cfd2aad 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/main.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/main.rs @@ -105,7 +105,7 @@ fn main() -> anyhow::Result<()> { eprintln!("{}", package.name); let tarball = package.tarball()?; - let integrity = package.integrity(); + let integrity = package.integrity().map(ToString::to_string); cache .put( diff --git a/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs b/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs index 99bd3020b523..572510bd82d2 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs @@ -1,7 +1,14 @@ -use anyhow::{bail, Context}; +use anyhow::{anyhow, bail, Context}; use rayon::slice::ParallelSliceMut; -use serde::Deserialize; -use std::{collections::HashMap, fmt}; +use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, +}; +use std::{ + cmp::Ordering, + collections::{HashMap, HashSet}, + fmt, +}; use url::Url; pub(super) fn packages(content: &str) -> anyhow::Result> { @@ -33,6 +40,13 @@ pub(super) fn packages(content: &str) -> anyhow::Result> { x.resolved .partial_cmp(&y.resolved) .expect("resolved should be comparable") + .then( + // v1 lockfiles can contain multiple references to the same version of a package, with + // different integrity values (e.g. a SHA-1 and a SHA-512 in one, but just a SHA-512 in another) + y.integrity + .partial_cmp(&x.integrity) + .expect("integrity should be comparable"), + ) }); packages.dedup_by(|x, y| x.resolved == y.resolved); @@ -54,7 +68,7 @@ struct OldPackage { #[serde(default)] bundled: bool, resolved: Option, - integrity: Option, + integrity: Option, dependencies: Option>, } @@ -63,7 +77,7 @@ pub(super) struct Package { #[serde(default)] pub(super) name: Option, pub(super) resolved: Option, - pub(super) integrity: Option, + pub(super) integrity: Option, } #[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)] @@ -82,6 +96,102 @@ impl fmt::Display for UrlOrString { } } +#[derive(Debug, PartialEq, Eq)] +pub(super) struct HashCollection(HashSet); + +impl HashCollection { + pub(super) fn into_best(self) -> Option { + self.0.into_iter().max() + } +} + +impl PartialOrd for HashCollection { + fn partial_cmp(&self, other: &Self) -> Option { + let lhs = self.0.iter().max()?; + let rhs = other.0.iter().max()?; + + lhs.partial_cmp(rhs) + } +} + +impl<'de> Deserialize<'de> for HashCollection { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_string(HashCollectionVisitor) + } +} + +struct HashCollectionVisitor; + +impl<'de> Visitor<'de> for HashCollectionVisitor { + type Value = HashCollection; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a single SRI hash or a collection of them (separated by spaces)") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + let hashes = value + .split_ascii_whitespace() + .map(Hash::new) + .collect::>() + .map_err(E::custom)?; + + Ok(HashCollection(hashes)) + } +} + +#[derive(Debug, Deserialize, PartialEq, Eq, Hash)] +pub struct Hash(String); + +// Hash algorithms, in ascending preference. +const ALGOS: &[&str] = &["sha1", "sha512"]; + +impl Hash { + fn new(s: impl AsRef) -> anyhow::Result { + let algo = s + .as_ref() + .split_once('-') + .ok_or_else(|| anyhow!("expected SRI hash, got {:?}", s.as_ref()))? + .0; + + if ALGOS.iter().any(|&a| algo == a) { + Ok(Hash(s.as_ref().to_string())) + } else { + Err(anyhow!("unknown hash algorithm {algo:?}")) + } + } +} + +impl fmt::Display for Hash { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl PartialOrd for Hash { + fn partial_cmp(&self, other: &Hash) -> Option { + let lhs = self.0.split_once('-')?.0; + let rhs = other.0.split_once('-')?.0; + + ALGOS + .iter() + .position(|&s| lhs == s)? + .partial_cmp(&ALGOS.iter().position(|&s| rhs == s)?) + } +} + +impl Ord for Hash { + fn cmp(&self, other: &Hash) -> Ordering { + self.partial_cmp(other).unwrap() + } +} + #[allow(clippy::case_sensitive_file_extension_comparisons)] fn to_new_packages( old_packages: HashMap, @@ -149,8 +259,13 @@ fn get_initial_url() -> anyhow::Result { #[cfg(test)] mod tests { - use super::{get_initial_url, to_new_packages, OldPackage, Package, UrlOrString}; - use std::collections::HashMap; + use super::{ + get_initial_url, to_new_packages, Hash, HashCollection, OldPackage, Package, UrlOrString, + }; + use std::{ + cmp::Ordering, + collections::{HashMap, HashSet}, + }; use url::Url; #[test] @@ -188,4 +303,23 @@ mod tests { Ok(()) } + + #[test] + fn hash_preference() { + assert_eq!( + Hash(String::from("sha1-foo")).partial_cmp(&Hash(String::from("sha512-foo"))), + Some(Ordering::Less) + ); + + assert_eq!( + HashCollection({ + let mut set = HashSet::new(); + set.insert(Hash(String::from("sha512-foo"))); + set.insert(Hash(String::from("sha1-bar"))); + set + }) + .into_best(), + Some(Hash(String::from("sha512-foo"))) + ); + } } diff --git a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs index 387b3add7ec9..d96f7d878796 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs @@ -87,7 +87,7 @@ pub struct Package { #[derive(Debug)] enum Specifics { - Registry { integrity: String }, + Registry { integrity: lock::Hash }, Git { workdir: TempDir }, } @@ -134,11 +134,11 @@ impl Package { Specifics::Git { workdir } } None => Specifics::Registry { - integrity: get_ideal_hash( - &pkg.integrity - .expect("non-git dependencies should have assosciated integrity"), - )? - .to_string(), + integrity: pkg + .integrity + .expect("non-git dependencies should have assosciated integrity") + .into_best() + .expect("non-git dependencies should have non-empty assosciated integrity"), }, }; @@ -181,9 +181,9 @@ impl Package { } } - pub fn integrity(&self) -> Option { + pub fn integrity(&self) -> Option<&lock::Hash> { match &self.specifics { - Specifics::Registry { integrity } => Some(integrity.clone()), + Specifics::Registry { integrity } => Some(integrity), Specifics::Git { .. } => None, } } @@ -304,25 +304,9 @@ fn get_hosted_git_url(url: &Url) -> anyhow::Result> { } } -fn get_ideal_hash(integrity: &str) -> anyhow::Result<&str> { - let split: Vec<_> = integrity.split_ascii_whitespace().collect(); - - if split.len() == 1 { - Ok(split[0]) - } else { - for hash in ["sha512-", "sha1-"] { - if let Some(h) = split.iter().find(|s| s.starts_with(hash)) { - return Ok(h); - } - } - - Err(anyhow!("not sure which hash to select out of {split:?}")) - } -} - #[cfg(test)] mod tests { - use super::{get_hosted_git_url, get_ideal_hash}; + use super::get_hosted_git_url; use url::Url; #[test] @@ -353,18 +337,4 @@ mod tests { "GitLab URLs should be marked as invalid (lol)" ); } - - #[test] - fn ideal_hashes() { - for (input, expected) in [ - ("sha512-foo sha1-bar", Some("sha512-foo")), - ("sha1-bar md5-foo", Some("sha1-bar")), - ("sha1-bar", Some("sha1-bar")), - ("sha512-foo", Some("sha512-foo")), - ("foo-bar sha1-bar", Some("sha1-bar")), - ("foo-bar baz-foo", None), - ] { - assert_eq!(get_ideal_hash(input).ok(), expected); - } - } }