From 838ba827907404b2724351495c934803abbe84c2 Mon Sep 17 00:00:00 2001 From: NotAShelf Date: Sat, 18 Apr 2026 22:58:31 +0300 Subject: [PATCH] sync: batch file identification via hash lookup Signed-off-by: NotAShelf Change-Id: I85d3f1265cad1996340ac98ac9ee1f7e6a6a6964 --- src/cli/commands/sync.rs | 92 ++++++++++++++++++++++++++++++++++- src/platform.rs | 12 +++++ src/platform/curseforge.rs | 72 +++++++++++++++++++++++++++ src/platform/modrinth.rs | 59 ++++++++++++++++++++++ src/platform/multiplatform.rs | 27 ++++++++++ 5 files changed, 261 insertions(+), 1 deletion(-) diff --git a/src/cli/commands/sync.rs b/src/cli/commands/sync.rs index e806cfc..715268d 100644 --- a/src/cli/commands/sync.rs +++ b/src/cli/commands/sync.rs @@ -63,6 +63,8 @@ pub async fn execute( ); if no_filter || args.additions { + let mut file_hashes = Vec::new(); + for (file_path, _) in &additions { spinner .set_message(format!("Processing addition: {}", file_path.display())); @@ -71,7 +73,34 @@ pub async fn execute( false, global_yes, )? { - add_file_to_lockfile(&mut lockfile, file_path, &config).await?; + if let Ok(file_data) = fs::read(file_path) { + use sha1::Digest; + let mut hasher = sha1::Sha1::new(); + hasher.update(&file_data); + let hash = format!("{:x}", hasher.finalize()); + file_hashes.push(FileHash { + path: file_path.clone(), + hash, + }); + } + } + } + + if !file_hashes.is_empty() { + let fallback_hashes = file_hashes.clone(); + let result = add_files_batch(&mut lockfile, file_hashes).await; + if let Err(e) = result { + log::warn!( + "Batch lookup failed, falling back to individual lookups: {}", + e + ); + for fh in fallback_hashes { + if let Err(e) = + add_file_to_lockfile(&mut lockfile, &fh.path, &config).await + { + log::warn!("Failed to add {}: {}", fh.path.display(), e); + } + } } } } @@ -210,3 +239,64 @@ async fn add_file_to_lockfile( println!("⚠ Could not identify {}, skipping", file_path.display()); Ok(()) } + +#[derive(Clone)] +struct FileHash { + path: PathBuf, + hash: String, +} + +async fn add_files_batch( + lockfile: &mut LockFile, + file_hashes: Vec, +) -> Result<()> { + if file_hashes.is_empty() { + return Ok(()); + } + + let modrinth = ModrinthPlatform::new(); + + let hashes: Vec = + file_hashes.iter().map(|fh| fh.hash.clone()).collect(); + + let projects = modrinth + .request_projects_from_hashes(&hashes, "sha1") + .await?; + + let mut matched_indices: std::collections::HashSet = + std::collections::HashSet::new(); + let mut added_pakku_ids: std::collections::HashSet = + std::collections::HashSet::new(); + + for project in &projects { + let pakku_id = match &project.pakku_id { + Some(id) => id.clone(), + None => continue, + }; + if added_pakku_ids.contains(&pakku_id) { + continue; + } + for file_info in &project.files { + for (idx, fh) in file_hashes.iter().enumerate() { + if !matched_indices.contains(&idx) + && file_info.hashes.get("sha1").map(|s| s.as_str()) == Some(&fh.hash) + { + lockfile.add_project(project.clone()); + added_pakku_ids.insert(pakku_id.clone()); + matched_indices.insert(idx); + println!("✓ Added {} (from Modrinth)", fh.path.display()); + break; + } + } + } + } + + for (idx, fh) in file_hashes.iter().enumerate() { + if matched_indices.contains(&idx) { + continue; + } + println!("⚠ Could not identify {}, skipping", fh.path.display()); + } + + Ok(()) +} diff --git a/src/platform.rs b/src/platform.rs index da04627..3c26574 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -135,4 +135,16 @@ impl PlatformClient for RateLimitedPlatform { self.rate_limiter.wait_for(&self.platform_name).await; self.platform.request_project_from_slug(slug).await } + + async fn request_projects_from_hashes( + &self, + hashes: &[String], + algorithm: &str, + ) -> Result> { + self.rate_limiter.wait_for(&self.platform_name).await; + self + .platform + .request_projects_from_hashes(hashes, algorithm) + .await + } } diff --git a/src/platform/curseforge.rs b/src/platform/curseforge.rs index 4587fc6..f1f8008 100644 --- a/src/platform/curseforge.rs +++ b/src/platform/curseforge.rs @@ -403,6 +403,78 @@ impl PlatformClient for CurseForgePlatform { Err(e) => Err(e), } } + + async fn request_projects_from_hashes( + &self, + hashes: &[String], + _algorithm: &str, + ) -> Result> { + if hashes.is_empty() { + return Ok(Vec::new()); + } + + let fingerprints: Vec = hashes + .iter() + .filter_map(|h| h.parse::().ok()) + .collect(); + + if fingerprints.is_empty() { + return Ok(Vec::new()); + } + + #[derive(Serialize)] + struct FingerprintRequest { + fingerprints: Vec, + } + + let url = format!("{CURSEFORGE_API_BASE}/fingerprints/432"); + let response = self + .client + .post(&url) + .headers(self.get_headers()?) + .json(&FingerprintRequest { + fingerprints: fingerprints.clone(), + }) + .send() + .await?; + + if !response.status().is_success() { + return Err(PakkerError::PlatformApiError(format!( + "CurseForge batch API error: {}", + response.status() + ))); + } + + let response_data: serde_json::Value = response.json().await?; + + let matches = response_data["data"]["exactMatches"] + .as_array() + .cloned() + .unwrap_or_default(); + + let mut projects = Vec::new(); + let mut seen_ids = std::collections::HashSet::new(); + + for m in matches { + if let Some(file) = m["file"].as_object() { + if let Some(mod_id) = file["modId"].as_u64() { + let mod_id_str = mod_id.to_string(); + if seen_ids.contains(&mod_id_str) { + continue; + } + seen_ids.insert(mod_id_str.clone()); + + if let Ok(project) = + self.request_project_with_files(&mod_id_str, &[], &[]).await + { + projects.push(project); + } + } + } + } + + Ok(projects) + } } // CurseForge API models diff --git a/src/platform/modrinth.rs b/src/platform/modrinth.rs index 5663165..906a264 100644 --- a/src/platform/modrinth.rs +++ b/src/platform/modrinth.rs @@ -276,6 +276,65 @@ impl PlatformClient for ModrinthPlatform { let mr_project: ModrinthProject = response.json().await?; Ok(Some(self.convert_project(mr_project))) } + + async fn request_projects_from_hashes( + &self, + hashes: &[String], + algorithm: &str, + ) -> Result> { + if hashes.is_empty() { + return Ok(Vec::new()); + } + + #[derive(Serialize)] + struct HashBatchRequest<'a> { + hashes: &'a [String], + algorithm: &'a str, + } + + #[derive(Debug, Deserialize)] + struct HashBatchResponse { + project_id: String, + } + + let url = format!("{MODRINTH_API_BASE}/version_files"); + let response = self + .client + .post(&url) + .json(&HashBatchRequest { hashes, algorithm }) + .send() + .await?; + + if !response.status().is_success() { + return Err(PakkerError::PlatformApiError(format!( + "Modrinth batch API error: {}", + response.status() + ))); + } + + let versions_map: std::collections::HashMap = + response.json().await?; + + let mut projects = Vec::new(); + let mut seen_project_ids = std::collections::HashSet::new(); + + for version in versions_map.values() { + if seen_project_ids.contains(&version.project_id) { + continue; + } + seen_project_ids.insert(version.project_id.clone()); + + match self + .request_project_with_files(&version.project_id, &[], &[]) + .await + { + Ok(project) => projects.push(project), + Err(_) => continue, + } + } + + Ok(projects) + } } // Modrinth API models diff --git a/src/platform/multiplatform.rs b/src/platform/multiplatform.rs index 21a76ee..5ff8a8b 100644 --- a/src/platform/multiplatform.rs +++ b/src/platform/multiplatform.rs @@ -202,4 +202,31 @@ impl PlatformClient for MultiplatformPlatform { (Err(e), _) | (_, Err(e)) => Err(e), } } + + async fn request_projects_from_hashes( + &self, + hashes: &[String], + algorithm: &str, + ) -> Result> { + let cf_future = self + .curseforge + .request_projects_from_hashes(hashes, algorithm); + let mr_future = self + .modrinth + .request_projects_from_hashes(hashes, algorithm); + + let (cf_projects, mr_projects) = tokio::join!(cf_future, mr_future); + + let mut all_projects = cf_projects?; + for mr_project in mr_projects? { + if !all_projects.iter().any(|p| { + p.id.get("modrinth") == mr_project.id.get("modrinth") + || p.id.get("curseforge") == mr_project.id.get("curseforge") + }) { + all_projects.push(mr_project); + } + } + + Ok(all_projects) + } }