pinakes-core: improve media management features; various configuration improvements
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I2d1f04f13970d21c36067f30bc04a9176a6a6964
This commit is contained in:
parent
cfdc3d0622
commit
e02c15490e
31 changed files with 1167 additions and 197 deletions
|
|
@ -196,6 +196,7 @@ pub trait StorageBackend: Send + Sync + 'static {
|
|||
|
||||
// Duplicates
|
||||
async fn find_duplicates(&self) -> Result<Vec<Vec<MediaItem>>>;
|
||||
async fn find_perceptual_duplicates(&self, threshold: u32) -> Result<Vec<Vec<MediaItem>>>;
|
||||
|
||||
// Database management
|
||||
async fn database_stats(&self) -> Result<DatabaseStats>;
|
||||
|
|
|
|||
|
|
@ -170,6 +170,16 @@ fn row_to_media_item(row: &Row) -> Result<MediaItem> {
|
|||
.map(PathBuf::from),
|
||||
custom_fields: HashMap::new(),
|
||||
file_mtime: row.get("file_mtime"),
|
||||
|
||||
// Photo-specific fields
|
||||
date_taken: row.get("date_taken"),
|
||||
latitude: row.get("latitude"),
|
||||
longitude: row.get("longitude"),
|
||||
camera_make: row.get("camera_make"),
|
||||
camera_model: row.get("camera_model"),
|
||||
rating: row.get("rating"),
|
||||
perceptual_hash: row.get("perceptual_hash"),
|
||||
|
||||
created_at: row.get("created_at"),
|
||||
updated_at: row.get("updated_at"),
|
||||
})
|
||||
|
|
@ -589,9 +599,10 @@ impl StorageBackend for PostgresBackend {
|
|||
"INSERT INTO media_items (
|
||||
id, path, file_name, media_type, content_hash, file_size,
|
||||
title, artist, album, genre, year, duration_secs, description,
|
||||
thumbnail_path, created_at, updated_at
|
||||
thumbnail_path, date_taken, latitude, longitude, camera_make,
|
||||
camera_model, rating, perceptual_hash, created_at, updated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23
|
||||
)",
|
||||
&[
|
||||
&item.id.0,
|
||||
|
|
@ -611,6 +622,13 @@ impl StorageBackend for PostgresBackend {
|
|||
.thumbnail_path
|
||||
.as_ref()
|
||||
.map(|p| p.to_string_lossy().to_string()),
|
||||
&item.date_taken,
|
||||
&item.latitude,
|
||||
&item.longitude,
|
||||
&item.camera_make,
|
||||
&item.camera_model,
|
||||
&item.rating,
|
||||
&item.perceptual_hash,
|
||||
&item.created_at,
|
||||
&item.updated_at,
|
||||
],
|
||||
|
|
@ -658,7 +676,8 @@ impl StorageBackend for PostgresBackend {
|
|||
.query_opt(
|
||||
"SELECT id, path, file_name, media_type, content_hash, file_size,
|
||||
title, artist, album, genre, year, duration_secs, description,
|
||||
thumbnail_path, created_at, updated_at
|
||||
thumbnail_path, file_mtime, date_taken, latitude, longitude,
|
||||
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
|
||||
FROM media_items WHERE id = $1",
|
||||
&[&id.0],
|
||||
)
|
||||
|
|
@ -681,7 +700,8 @@ impl StorageBackend for PostgresBackend {
|
|||
.query_opt(
|
||||
"SELECT id, path, file_name, media_type, content_hash, file_size,
|
||||
title, artist, album, genre, year, duration_secs, description,
|
||||
thumbnail_path, file_mtime, created_at, updated_at
|
||||
thumbnail_path, file_mtime, date_taken, latitude, longitude,
|
||||
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
|
||||
FROM media_items WHERE content_hash = $1",
|
||||
&[&hash.0],
|
||||
)
|
||||
|
|
@ -709,7 +729,8 @@ impl StorageBackend for PostgresBackend {
|
|||
.query_opt(
|
||||
"SELECT id, path, file_name, media_type, content_hash, file_size,
|
||||
title, artist, album, genre, year, duration_secs, description,
|
||||
thumbnail_path, file_mtime, created_at, updated_at
|
||||
thumbnail_path, file_mtime, date_taken, latitude, longitude,
|
||||
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
|
||||
FROM media_items WHERE path = $1",
|
||||
&[&path_str],
|
||||
)
|
||||
|
|
@ -746,7 +767,8 @@ impl StorageBackend for PostgresBackend {
|
|||
let sql = format!(
|
||||
"SELECT id, path, file_name, media_type, content_hash, file_size,
|
||||
title, artist, album, genre, year, duration_secs, description,
|
||||
thumbnail_path, created_at, updated_at
|
||||
thumbnail_path, file_mtime, date_taken, latitude, longitude,
|
||||
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
|
||||
FROM media_items
|
||||
ORDER BY {order_by}
|
||||
LIMIT $1 OFFSET $2"
|
||||
|
|
@ -816,7 +838,8 @@ impl StorageBackend for PostgresBackend {
|
|||
path = $2, file_name = $3, media_type = $4, content_hash = $5,
|
||||
file_size = $6, title = $7, artist = $8, album = $9, genre = $10,
|
||||
year = $11, duration_secs = $12, description = $13,
|
||||
thumbnail_path = $14, updated_at = $15
|
||||
thumbnail_path = $14, date_taken = $15, latitude = $16, longitude = $17,
|
||||
camera_make = $18, camera_model = $19, rating = $20, perceptual_hash = $21, updated_at = $22
|
||||
WHERE id = $1",
|
||||
&[
|
||||
&item.id.0,
|
||||
|
|
@ -836,6 +859,13 @@ impl StorageBackend for PostgresBackend {
|
|||
.thumbnail_path
|
||||
.as_ref()
|
||||
.map(|p| p.to_string_lossy().to_string()),
|
||||
&item.date_taken,
|
||||
&item.latitude,
|
||||
&item.longitude,
|
||||
&item.camera_make,
|
||||
&item.camera_model,
|
||||
&item.rating,
|
||||
&item.perceptual_hash,
|
||||
&item.updated_at,
|
||||
],
|
||||
)
|
||||
|
|
@ -1390,7 +1420,9 @@ impl StorageBackend for PostgresBackend {
|
|||
let select = format!(
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size,
|
||||
m.title, m.artist, m.album, m.genre, m.year, m.duration_secs,
|
||||
m.description, m.thumbnail_path, m.created_at, m.updated_at,
|
||||
m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude,
|
||||
m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash,
|
||||
m.created_at, m.updated_at,
|
||||
ts_rank(m.search_vector, plainto_tsquery('english', ${fts_param_idx})) AS rank
|
||||
FROM media_items m
|
||||
WHERE {full_where}
|
||||
|
|
@ -1405,7 +1437,9 @@ impl StorageBackend for PostgresBackend {
|
|||
let select = format!(
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size,
|
||||
m.title, m.artist, m.album, m.genre, m.year, m.duration_secs,
|
||||
m.description, m.thumbnail_path, m.created_at, m.updated_at
|
||||
m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude,
|
||||
m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash,
|
||||
m.created_at, m.updated_at
|
||||
FROM media_items m
|
||||
WHERE {full_where}
|
||||
ORDER BY {order_by}
|
||||
|
|
@ -1694,6 +1728,112 @@ impl StorageBackend for PostgresBackend {
|
|||
Ok(groups)
|
||||
}
|
||||
|
||||
async fn find_perceptual_duplicates(&self, threshold: u32) -> Result<Vec<Vec<MediaItem>>> {
|
||||
let client = self
|
||||
.pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
|
||||
// Get all images with perceptual hashes
|
||||
let rows = client
|
||||
.query(
|
||||
"SELECT id, path, file_name, media_type, content_hash, file_size,
|
||||
title, artist, album, genre, year, duration_secs, description,
|
||||
thumbnail_path, file_mtime, date_taken, latitude, longitude,
|
||||
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
|
||||
FROM media_items WHERE perceptual_hash IS NOT NULL ORDER BY id",
|
||||
&[],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut items = Vec::with_capacity(rows.len());
|
||||
for row in &rows {
|
||||
items.push(row_to_media_item(row)?);
|
||||
}
|
||||
|
||||
// Batch-load custom fields
|
||||
if !items.is_empty() {
|
||||
let ids: Vec<Uuid> = items.iter().map(|i| i.id.0).collect();
|
||||
let cf_rows = client
|
||||
.query(
|
||||
"SELECT media_id, field_name, field_type, field_value
|
||||
FROM custom_fields WHERE media_id = ANY($1)",
|
||||
&[&ids],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut cf_map: HashMap<Uuid, HashMap<String, CustomField>> = HashMap::new();
|
||||
for row in &cf_rows {
|
||||
let mid: Uuid = row.get("media_id");
|
||||
let name: String = row.get("field_name");
|
||||
let ft_str: String = row.get("field_type");
|
||||
let value: String = row.get("field_value");
|
||||
let field_type = custom_field_type_from_string(&ft_str)?;
|
||||
cf_map
|
||||
.entry(mid)
|
||||
.or_default()
|
||||
.insert(name, CustomField { field_type, value });
|
||||
}
|
||||
|
||||
for item in &mut items {
|
||||
if let Some(fields) = cf_map.remove(&item.id.0) {
|
||||
item.custom_fields = fields;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compare each pair and build groups
|
||||
use image_hasher::ImageHash;
|
||||
let mut groups: Vec<Vec<MediaItem>> = Vec::new();
|
||||
let mut grouped_indices: std::collections::HashSet<usize> =
|
||||
std::collections::HashSet::new();
|
||||
|
||||
for i in 0..items.len() {
|
||||
if grouped_indices.contains(&i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let hash_a = match &items[i].perceptual_hash {
|
||||
Some(h) => match ImageHash::<Vec<u8>>::from_base64(h) {
|
||||
Ok(hash) => hash,
|
||||
Err(_) => continue,
|
||||
},
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let mut group = vec![items[i].clone()];
|
||||
grouped_indices.insert(i);
|
||||
|
||||
for (j, item_j) in items.iter().enumerate().skip(i + 1) {
|
||||
if grouped_indices.contains(&j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let hash_b = match &item_j.perceptual_hash {
|
||||
Some(h) => match ImageHash::<Vec<u8>>::from_base64(h) {
|
||||
Ok(hash) => hash,
|
||||
Err(_) => continue,
|
||||
},
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let distance = hash_a.dist(&hash_b);
|
||||
if distance <= threshold {
|
||||
group.push(item_j.clone());
|
||||
grouped_indices.insert(j);
|
||||
}
|
||||
}
|
||||
|
||||
// Only add groups with more than one item (actual duplicates)
|
||||
if group.len() > 1 {
|
||||
groups.push(group);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(groups)
|
||||
}
|
||||
|
||||
// ---- Database management ----
|
||||
|
||||
async fn database_stats(&self) -> Result<crate::storage::DatabaseStats> {
|
||||
|
|
@ -2359,7 +2499,7 @@ impl StorageBackend for PostgresBackend {
|
|||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
let rows = client.query(
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at FROM media_items m JOIN favorites f ON m.id = f.media_id WHERE f.user_id = $1 ORDER BY f.created_at DESC LIMIT $2 OFFSET $3",
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at FROM media_items m JOIN favorites f ON m.id = f.media_id WHERE f.user_id = $1 ORDER BY f.created_at DESC LIMIT $2 OFFSET $3",
|
||||
&[&user_id.0, &(pagination.limit as i64), &(pagination.offset as i64)],
|
||||
).await?;
|
||||
let mut items: Vec<MediaItem> = rows
|
||||
|
|
@ -2694,7 +2834,7 @@ impl StorageBackend for PostgresBackend {
|
|||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
let rows = client.query(
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at FROM media_items m JOIN playlist_items pi ON m.id = pi.media_id WHERE pi.playlist_id = $1 ORDER BY pi.position ASC",
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at FROM media_items m JOIN playlist_items pi ON m.id = pi.media_id WHERE pi.playlist_id = $1 ORDER BY pi.position ASC",
|
||||
&[&playlist_id],
|
||||
).await?;
|
||||
let mut items: Vec<MediaItem> = rows
|
||||
|
|
@ -2843,13 +2983,13 @@ impl StorageBackend for PostgresBackend {
|
|||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
let rows = client.query(
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at, COUNT(ue.id) as view_count FROM media_items m JOIN usage_events ue ON m.id = ue.media_id WHERE ue.event_type IN ('view', 'play') GROUP BY m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at ORDER BY view_count DESC LIMIT $1",
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at, COUNT(ue.id) as view_count FROM media_items m JOIN usage_events ue ON m.id = ue.media_id WHERE ue.event_type IN ('view', 'play') GROUP BY m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at ORDER BY view_count DESC LIMIT $1",
|
||||
&[&(limit as i64)],
|
||||
).await?;
|
||||
let mut results = Vec::new();
|
||||
for row in &rows {
|
||||
let item = row_to_media_item(row)?;
|
||||
let count: i64 = row.get(16);
|
||||
let count: i64 = row.get(24);
|
||||
results.push((item, count as u64));
|
||||
}
|
||||
|
||||
|
|
@ -2896,7 +3036,7 @@ impl StorageBackend for PostgresBackend {
|
|||
.await
|
||||
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
|
||||
let rows = client.query(
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at FROM media_items m JOIN usage_events ue ON m.id = ue.media_id WHERE ue.user_id = $1 AND ue.event_type IN ('view', 'play') GROUP BY m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at ORDER BY MAX(ue.timestamp) DESC LIMIT $2",
|
||||
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at FROM media_items m JOIN usage_events ue ON m.id = ue.media_id WHERE ue.user_id = $1 AND ue.event_type IN ('view', 'play') GROUP BY m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at ORDER BY MAX(ue.timestamp) DESC LIMIT $2",
|
||||
&[&user_id.0, &(limit as i64)],
|
||||
).await?;
|
||||
let mut items: Vec<MediaItem> = rows
|
||||
|
|
|
|||
|
|
@ -113,6 +113,24 @@ fn row_to_media_item(row: &Row) -> rusqlite::Result<MediaItem> {
|
|||
custom_fields: HashMap::new(), // loaded separately
|
||||
// file_mtime may not be present in all queries, so handle gracefully
|
||||
file_mtime: row.get::<_, Option<i64>>("file_mtime").unwrap_or(None),
|
||||
|
||||
// Photo-specific fields (may not be present in all queries)
|
||||
date_taken: row
|
||||
.get::<_, Option<String>>("date_taken")
|
||||
.ok()
|
||||
.flatten()
|
||||
.and_then(|s| DateTime::parse_from_rfc3339(&s).ok())
|
||||
.map(|dt| dt.with_timezone(&Utc)),
|
||||
latitude: row.get::<_, Option<f64>>("latitude").ok().flatten(),
|
||||
longitude: row.get::<_, Option<f64>>("longitude").ok().flatten(),
|
||||
camera_make: row.get::<_, Option<String>>("camera_make").ok().flatten(),
|
||||
camera_model: row.get::<_, Option<String>>("camera_model").ok().flatten(),
|
||||
rating: row.get::<_, Option<i32>>("rating").ok().flatten(),
|
||||
perceptual_hash: row
|
||||
.get::<_, Option<String>>("perceptual_hash")
|
||||
.ok()
|
||||
.flatten(),
|
||||
|
||||
created_at: parse_datetime(&created_str),
|
||||
updated_at: parse_datetime(&updated_str),
|
||||
})
|
||||
|
|
@ -610,8 +628,9 @@ impl StorageBackend for SqliteBackend {
|
|||
db.execute(
|
||||
"INSERT INTO media_items (id, path, file_name, media_type, content_hash, \
|
||||
file_size, title, artist, album, genre, year, duration_secs, description, \
|
||||
thumbnail_path, file_mtime, created_at, updated_at) \
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17)",
|
||||
thumbnail_path, file_mtime, date_taken, latitude, longitude, camera_make, \
|
||||
camera_model, rating, perceptual_hash, created_at, updated_at) \
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24)",
|
||||
params![
|
||||
item.id.0.to_string(),
|
||||
item.path.to_string_lossy().as_ref(),
|
||||
|
|
@ -630,6 +649,13 @@ impl StorageBackend for SqliteBackend {
|
|||
.as_ref()
|
||||
.map(|p| p.to_string_lossy().to_string()),
|
||||
item.file_mtime,
|
||||
item.date_taken.as_ref().map(|d| d.to_rfc3339()),
|
||||
item.latitude,
|
||||
item.longitude,
|
||||
item.camera_make,
|
||||
item.camera_model,
|
||||
item.rating,
|
||||
item.perceptual_hash,
|
||||
item.created_at.to_rfc3339(),
|
||||
item.updated_at.to_rfc3339(),
|
||||
],
|
||||
|
|
@ -781,7 +807,9 @@ impl StorageBackend for SqliteBackend {
|
|||
"UPDATE media_items SET path = ?2, file_name = ?3, media_type = ?4, \
|
||||
content_hash = ?5, file_size = ?6, title = ?7, artist = ?8, album = ?9, \
|
||||
genre = ?10, year = ?11, duration_secs = ?12, description = ?13, \
|
||||
thumbnail_path = ?14, file_mtime = ?15, updated_at = ?16 WHERE id = ?1",
|
||||
thumbnail_path = ?14, file_mtime = ?15, date_taken = ?16, latitude = ?17, \
|
||||
longitude = ?18, camera_make = ?19, camera_model = ?20, rating = ?21, \
|
||||
perceptual_hash = ?22, updated_at = ?23 WHERE id = ?1",
|
||||
params![
|
||||
item.id.0.to_string(),
|
||||
item.path.to_string_lossy().as_ref(),
|
||||
|
|
@ -800,6 +828,13 @@ impl StorageBackend for SqliteBackend {
|
|||
.as_ref()
|
||||
.map(|p| p.to_string_lossy().to_string()),
|
||||
item.file_mtime,
|
||||
item.date_taken.as_ref().map(|d| d.to_rfc3339()),
|
||||
item.latitude,
|
||||
item.longitude,
|
||||
item.camera_make,
|
||||
item.camera_model,
|
||||
item.rating,
|
||||
item.perceptual_hash,
|
||||
item.updated_at.to_rfc3339(),
|
||||
],
|
||||
)?;
|
||||
|
|
@ -1534,6 +1569,77 @@ impl StorageBackend for SqliteBackend {
|
|||
.map_err(|e| PinakesError::Database(e.to_string()))?
|
||||
}
|
||||
|
||||
async fn find_perceptual_duplicates(&self, threshold: u32) -> Result<Vec<Vec<MediaItem>>> {
|
||||
let conn = Arc::clone(&self.conn);
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let db = conn
|
||||
.lock()
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?;
|
||||
|
||||
// Get all images with perceptual hashes
|
||||
let mut stmt = db.prepare(
|
||||
"SELECT * FROM media_items WHERE perceptual_hash IS NOT NULL ORDER BY id",
|
||||
)?;
|
||||
let mut items: Vec<MediaItem> = stmt
|
||||
.query_map([], row_to_media_item)?
|
||||
.collect::<rusqlite::Result<Vec<_>>>()?;
|
||||
|
||||
load_custom_fields_batch(&db, &mut items)?;
|
||||
|
||||
// Compare each pair and build groups
|
||||
use image_hasher::ImageHash;
|
||||
let mut groups: Vec<Vec<MediaItem>> = Vec::new();
|
||||
let mut grouped_indices: std::collections::HashSet<usize> =
|
||||
std::collections::HashSet::new();
|
||||
|
||||
for i in 0..items.len() {
|
||||
if grouped_indices.contains(&i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let hash_a = match &items[i].perceptual_hash {
|
||||
Some(h) => match ImageHash::<Vec<u8>>::from_base64(h) {
|
||||
Ok(hash) => hash,
|
||||
Err(_) => continue,
|
||||
},
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let mut group = vec![items[i].clone()];
|
||||
grouped_indices.insert(i);
|
||||
|
||||
for (j, item_j) in items.iter().enumerate().skip(i + 1) {
|
||||
if grouped_indices.contains(&j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let hash_b = match &item_j.perceptual_hash {
|
||||
Some(h) => match ImageHash::<Vec<u8>>::from_base64(h) {
|
||||
Ok(hash) => hash,
|
||||
Err(_) => continue,
|
||||
},
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let distance = hash_a.dist(&hash_b);
|
||||
if distance <= threshold {
|
||||
group.push(item_j.clone());
|
||||
grouped_indices.insert(j);
|
||||
}
|
||||
}
|
||||
|
||||
// Only add groups with more than one item (actual duplicates)
|
||||
if group.len() > 1 {
|
||||
groups.push(group);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(groups)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Database(e.to_string()))?
|
||||
}
|
||||
|
||||
// -- Database management -----------------------------------------------
|
||||
|
||||
async fn database_stats(&self) -> Result<crate::storage::DatabaseStats> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue