sync: batch file identification via hash lookup

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I85d3f1265cad1996340ac98ac9ee1f7e6a6a6964
This commit is contained in:
raf 2026-04-18 22:58:31 +03:00
commit 838ba82790
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
5 changed files with 261 additions and 1 deletions

View file

@ -63,6 +63,8 @@ pub async fn execute(
);
if no_filter || args.additions {
let mut file_hashes = Vec::new();
for (file_path, _) in &additions {
spinner
.set_message(format!("Processing addition: {}", file_path.display()));
@ -71,7 +73,34 @@ pub async fn execute(
false,
global_yes,
)? {
add_file_to_lockfile(&mut lockfile, file_path, &config).await?;
if let Ok(file_data) = fs::read(file_path) {
use sha1::Digest;
let mut hasher = sha1::Sha1::new();
hasher.update(&file_data);
let hash = format!("{:x}", hasher.finalize());
file_hashes.push(FileHash {
path: file_path.clone(),
hash,
});
}
}
}
if !file_hashes.is_empty() {
let fallback_hashes = file_hashes.clone();
let result = add_files_batch(&mut lockfile, file_hashes).await;
if let Err(e) = result {
log::warn!(
"Batch lookup failed, falling back to individual lookups: {}",
e
);
for fh in fallback_hashes {
if let Err(e) =
add_file_to_lockfile(&mut lockfile, &fh.path, &config).await
{
log::warn!("Failed to add {}: {}", fh.path.display(), e);
}
}
}
}
}
@ -210,3 +239,64 @@ async fn add_file_to_lockfile(
println!("⚠ Could not identify {}, skipping", file_path.display());
Ok(())
}
#[derive(Clone)]
struct FileHash {
path: PathBuf,
hash: String,
}
async fn add_files_batch(
lockfile: &mut LockFile,
file_hashes: Vec<FileHash>,
) -> Result<()> {
if file_hashes.is_empty() {
return Ok(());
}
let modrinth = ModrinthPlatform::new();
let hashes: Vec<String> =
file_hashes.iter().map(|fh| fh.hash.clone()).collect();
let projects = modrinth
.request_projects_from_hashes(&hashes, "sha1")
.await?;
let mut matched_indices: std::collections::HashSet<usize> =
std::collections::HashSet::new();
let mut added_pakku_ids: std::collections::HashSet<String> =
std::collections::HashSet::new();
for project in &projects {
let pakku_id = match &project.pakku_id {
Some(id) => id.clone(),
None => continue,
};
if added_pakku_ids.contains(&pakku_id) {
continue;
}
for file_info in &project.files {
for (idx, fh) in file_hashes.iter().enumerate() {
if !matched_indices.contains(&idx)
&& file_info.hashes.get("sha1").map(|s| s.as_str()) == Some(&fh.hash)
{
lockfile.add_project(project.clone());
added_pakku_ids.insert(pakku_id.clone());
matched_indices.insert(idx);
println!("✓ Added {} (from Modrinth)", fh.path.display());
break;
}
}
}
}
for (idx, fh) in file_hashes.iter().enumerate() {
if matched_indices.contains(&idx) {
continue;
}
println!("⚠ Could not identify {}, skipping", fh.path.display());
}
Ok(())
}

View file

@ -135,4 +135,16 @@ impl PlatformClient for RateLimitedPlatform {
self.rate_limiter.wait_for(&self.platform_name).await;
self.platform.request_project_from_slug(slug).await
}
async fn request_projects_from_hashes(
&self,
hashes: &[String],
algorithm: &str,
) -> Result<Vec<crate::model::Project>> {
self.rate_limiter.wait_for(&self.platform_name).await;
self
.platform
.request_projects_from_hashes(hashes, algorithm)
.await
}
}

View file

@ -403,6 +403,78 @@ impl PlatformClient for CurseForgePlatform {
Err(e) => Err(e),
}
}
async fn request_projects_from_hashes(
&self,
hashes: &[String],
_algorithm: &str,
) -> Result<Vec<Project>> {
if hashes.is_empty() {
return Ok(Vec::new());
}
let fingerprints: Vec<u32> = hashes
.iter()
.filter_map(|h| h.parse::<u32>().ok())
.collect();
if fingerprints.is_empty() {
return Ok(Vec::new());
}
#[derive(Serialize)]
struct FingerprintRequest {
fingerprints: Vec<u32>,
}
let url = format!("{CURSEFORGE_API_BASE}/fingerprints/432");
let response = self
.client
.post(&url)
.headers(self.get_headers()?)
.json(&FingerprintRequest {
fingerprints: fingerprints.clone(),
})
.send()
.await?;
if !response.status().is_success() {
return Err(PakkerError::PlatformApiError(format!(
"CurseForge batch API error: {}",
response.status()
)));
}
let response_data: serde_json::Value = response.json().await?;
let matches = response_data["data"]["exactMatches"]
.as_array()
.cloned()
.unwrap_or_default();
let mut projects = Vec::new();
let mut seen_ids = std::collections::HashSet::new();
for m in matches {
if let Some(file) = m["file"].as_object() {
if let Some(mod_id) = file["modId"].as_u64() {
let mod_id_str = mod_id.to_string();
if seen_ids.contains(&mod_id_str) {
continue;
}
seen_ids.insert(mod_id_str.clone());
if let Ok(project) =
self.request_project_with_files(&mod_id_str, &[], &[]).await
{
projects.push(project);
}
}
}
}
Ok(projects)
}
}
// CurseForge API models

View file

@ -276,6 +276,65 @@ impl PlatformClient for ModrinthPlatform {
let mr_project: ModrinthProject = response.json().await?;
Ok(Some(self.convert_project(mr_project)))
}
async fn request_projects_from_hashes(
&self,
hashes: &[String],
algorithm: &str,
) -> Result<Vec<Project>> {
if hashes.is_empty() {
return Ok(Vec::new());
}
#[derive(Serialize)]
struct HashBatchRequest<'a> {
hashes: &'a [String],
algorithm: &'a str,
}
#[derive(Debug, Deserialize)]
struct HashBatchResponse {
project_id: String,
}
let url = format!("{MODRINTH_API_BASE}/version_files");
let response = self
.client
.post(&url)
.json(&HashBatchRequest { hashes, algorithm })
.send()
.await?;
if !response.status().is_success() {
return Err(PakkerError::PlatformApiError(format!(
"Modrinth batch API error: {}",
response.status()
)));
}
let versions_map: std::collections::HashMap<String, HashBatchResponse> =
response.json().await?;
let mut projects = Vec::new();
let mut seen_project_ids = std::collections::HashSet::new();
for version in versions_map.values() {
if seen_project_ids.contains(&version.project_id) {
continue;
}
seen_project_ids.insert(version.project_id.clone());
match self
.request_project_with_files(&version.project_id, &[], &[])
.await
{
Ok(project) => projects.push(project),
Err(_) => continue,
}
}
Ok(projects)
}
}
// Modrinth API models

View file

@ -202,4 +202,31 @@ impl PlatformClient for MultiplatformPlatform {
(Err(e), _) | (_, Err(e)) => Err(e),
}
}
async fn request_projects_from_hashes(
&self,
hashes: &[String],
algorithm: &str,
) -> Result<Vec<Project>> {
let cf_future = self
.curseforge
.request_projects_from_hashes(hashes, algorithm);
let mr_future = self
.modrinth
.request_projects_from_hashes(hashes, algorithm);
let (cf_projects, mr_projects) = tokio::join!(cf_future, mr_future);
let mut all_projects = cf_projects?;
for mr_project in mr_projects? {
if !all_projects.iter().any(|p| {
p.id.get("modrinth") == mr_project.id.get("modrinth")
|| p.id.get("curseforge") == mr_project.id.get("curseforge")
}) {
all_projects.push(mr_project);
}
}
Ok(all_projects)
}
}