pinakes-core: improve media management features; various configuration improvements

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I2d1f04f13970d21c36067f30bc04a9176a6a6964
This commit is contained in:
raf 2026-02-05 00:54:10 +03:00
commit e02c15490e
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
31 changed files with 1167 additions and 197 deletions

View file

@ -196,6 +196,7 @@ pub trait StorageBackend: Send + Sync + 'static {
// Duplicates
async fn find_duplicates(&self) -> Result<Vec<Vec<MediaItem>>>;
async fn find_perceptual_duplicates(&self, threshold: u32) -> Result<Vec<Vec<MediaItem>>>;
// Database management
async fn database_stats(&self) -> Result<DatabaseStats>;

View file

@ -170,6 +170,16 @@ fn row_to_media_item(row: &Row) -> Result<MediaItem> {
.map(PathBuf::from),
custom_fields: HashMap::new(),
file_mtime: row.get("file_mtime"),
// Photo-specific fields
date_taken: row.get("date_taken"),
latitude: row.get("latitude"),
longitude: row.get("longitude"),
camera_make: row.get("camera_make"),
camera_model: row.get("camera_model"),
rating: row.get("rating"),
perceptual_hash: row.get("perceptual_hash"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
})
@ -589,9 +599,10 @@ impl StorageBackend for PostgresBackend {
"INSERT INTO media_items (
id, path, file_name, media_type, content_hash, file_size,
title, artist, album, genre, year, duration_secs, description,
thumbnail_path, created_at, updated_at
thumbnail_path, date_taken, latitude, longitude, camera_make,
camera_model, rating, perceptual_hash, created_at, updated_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23
)",
&[
&item.id.0,
@ -611,6 +622,13 @@ impl StorageBackend for PostgresBackend {
.thumbnail_path
.as_ref()
.map(|p| p.to_string_lossy().to_string()),
&item.date_taken,
&item.latitude,
&item.longitude,
&item.camera_make,
&item.camera_model,
&item.rating,
&item.perceptual_hash,
&item.created_at,
&item.updated_at,
],
@ -658,7 +676,8 @@ impl StorageBackend for PostgresBackend {
.query_opt(
"SELECT id, path, file_name, media_type, content_hash, file_size,
title, artist, album, genre, year, duration_secs, description,
thumbnail_path, created_at, updated_at
thumbnail_path, file_mtime, date_taken, latitude, longitude,
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
FROM media_items WHERE id = $1",
&[&id.0],
)
@ -681,7 +700,8 @@ impl StorageBackend for PostgresBackend {
.query_opt(
"SELECT id, path, file_name, media_type, content_hash, file_size,
title, artist, album, genre, year, duration_secs, description,
thumbnail_path, file_mtime, created_at, updated_at
thumbnail_path, file_mtime, date_taken, latitude, longitude,
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
FROM media_items WHERE content_hash = $1",
&[&hash.0],
)
@ -709,7 +729,8 @@ impl StorageBackend for PostgresBackend {
.query_opt(
"SELECT id, path, file_name, media_type, content_hash, file_size,
title, artist, album, genre, year, duration_secs, description,
thumbnail_path, file_mtime, created_at, updated_at
thumbnail_path, file_mtime, date_taken, latitude, longitude,
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
FROM media_items WHERE path = $1",
&[&path_str],
)
@ -746,7 +767,8 @@ impl StorageBackend for PostgresBackend {
let sql = format!(
"SELECT id, path, file_name, media_type, content_hash, file_size,
title, artist, album, genre, year, duration_secs, description,
thumbnail_path, created_at, updated_at
thumbnail_path, file_mtime, date_taken, latitude, longitude,
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
FROM media_items
ORDER BY {order_by}
LIMIT $1 OFFSET $2"
@ -816,7 +838,8 @@ impl StorageBackend for PostgresBackend {
path = $2, file_name = $3, media_type = $4, content_hash = $5,
file_size = $6, title = $7, artist = $8, album = $9, genre = $10,
year = $11, duration_secs = $12, description = $13,
thumbnail_path = $14, updated_at = $15
thumbnail_path = $14, date_taken = $15, latitude = $16, longitude = $17,
camera_make = $18, camera_model = $19, rating = $20, perceptual_hash = $21, updated_at = $22
WHERE id = $1",
&[
&item.id.0,
@ -836,6 +859,13 @@ impl StorageBackend for PostgresBackend {
.thumbnail_path
.as_ref()
.map(|p| p.to_string_lossy().to_string()),
&item.date_taken,
&item.latitude,
&item.longitude,
&item.camera_make,
&item.camera_model,
&item.rating,
&item.perceptual_hash,
&item.updated_at,
],
)
@ -1390,7 +1420,9 @@ impl StorageBackend for PostgresBackend {
let select = format!(
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size,
m.title, m.artist, m.album, m.genre, m.year, m.duration_secs,
m.description, m.thumbnail_path, m.created_at, m.updated_at,
m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude,
m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash,
m.created_at, m.updated_at,
ts_rank(m.search_vector, plainto_tsquery('english', ${fts_param_idx})) AS rank
FROM media_items m
WHERE {full_where}
@ -1405,7 +1437,9 @@ impl StorageBackend for PostgresBackend {
let select = format!(
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size,
m.title, m.artist, m.album, m.genre, m.year, m.duration_secs,
m.description, m.thumbnail_path, m.created_at, m.updated_at
m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude,
m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash,
m.created_at, m.updated_at
FROM media_items m
WHERE {full_where}
ORDER BY {order_by}
@ -1694,6 +1728,112 @@ impl StorageBackend for PostgresBackend {
Ok(groups)
}
async fn find_perceptual_duplicates(&self, threshold: u32) -> Result<Vec<Vec<MediaItem>>> {
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
// Get all images with perceptual hashes
let rows = client
.query(
"SELECT id, path, file_name, media_type, content_hash, file_size,
title, artist, album, genre, year, duration_secs, description,
thumbnail_path, file_mtime, date_taken, latitude, longitude,
camera_make, camera_model, rating, perceptual_hash, created_at, updated_at
FROM media_items WHERE perceptual_hash IS NOT NULL ORDER BY id",
&[],
)
.await?;
let mut items = Vec::with_capacity(rows.len());
for row in &rows {
items.push(row_to_media_item(row)?);
}
// Batch-load custom fields
if !items.is_empty() {
let ids: Vec<Uuid> = items.iter().map(|i| i.id.0).collect();
let cf_rows = client
.query(
"SELECT media_id, field_name, field_type, field_value
FROM custom_fields WHERE media_id = ANY($1)",
&[&ids],
)
.await?;
let mut cf_map: HashMap<Uuid, HashMap<String, CustomField>> = HashMap::new();
for row in &cf_rows {
let mid: Uuid = row.get("media_id");
let name: String = row.get("field_name");
let ft_str: String = row.get("field_type");
let value: String = row.get("field_value");
let field_type = custom_field_type_from_string(&ft_str)?;
cf_map
.entry(mid)
.or_default()
.insert(name, CustomField { field_type, value });
}
for item in &mut items {
if let Some(fields) = cf_map.remove(&item.id.0) {
item.custom_fields = fields;
}
}
}
// Compare each pair and build groups
use image_hasher::ImageHash;
let mut groups: Vec<Vec<MediaItem>> = Vec::new();
let mut grouped_indices: std::collections::HashSet<usize> =
std::collections::HashSet::new();
for i in 0..items.len() {
if grouped_indices.contains(&i) {
continue;
}
let hash_a = match &items[i].perceptual_hash {
Some(h) => match ImageHash::<Vec<u8>>::from_base64(h) {
Ok(hash) => hash,
Err(_) => continue,
},
None => continue,
};
let mut group = vec![items[i].clone()];
grouped_indices.insert(i);
for (j, item_j) in items.iter().enumerate().skip(i + 1) {
if grouped_indices.contains(&j) {
continue;
}
let hash_b = match &item_j.perceptual_hash {
Some(h) => match ImageHash::<Vec<u8>>::from_base64(h) {
Ok(hash) => hash,
Err(_) => continue,
},
None => continue,
};
let distance = hash_a.dist(&hash_b);
if distance <= threshold {
group.push(item_j.clone());
grouped_indices.insert(j);
}
}
// Only add groups with more than one item (actual duplicates)
if group.len() > 1 {
groups.push(group);
}
}
Ok(groups)
}
// ---- Database management ----
async fn database_stats(&self) -> Result<crate::storage::DatabaseStats> {
@ -2359,7 +2499,7 @@ impl StorageBackend for PostgresBackend {
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let rows = client.query(
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at FROM media_items m JOIN favorites f ON m.id = f.media_id WHERE f.user_id = $1 ORDER BY f.created_at DESC LIMIT $2 OFFSET $3",
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at FROM media_items m JOIN favorites f ON m.id = f.media_id WHERE f.user_id = $1 ORDER BY f.created_at DESC LIMIT $2 OFFSET $3",
&[&user_id.0, &(pagination.limit as i64), &(pagination.offset as i64)],
).await?;
let mut items: Vec<MediaItem> = rows
@ -2694,7 +2834,7 @@ impl StorageBackend for PostgresBackend {
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let rows = client.query(
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at FROM media_items m JOIN playlist_items pi ON m.id = pi.media_id WHERE pi.playlist_id = $1 ORDER BY pi.position ASC",
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at FROM media_items m JOIN playlist_items pi ON m.id = pi.media_id WHERE pi.playlist_id = $1 ORDER BY pi.position ASC",
&[&playlist_id],
).await?;
let mut items: Vec<MediaItem> = rows
@ -2843,13 +2983,13 @@ impl StorageBackend for PostgresBackend {
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let rows = client.query(
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at, COUNT(ue.id) as view_count FROM media_items m JOIN usage_events ue ON m.id = ue.media_id WHERE ue.event_type IN ('view', 'play') GROUP BY m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at ORDER BY view_count DESC LIMIT $1",
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at, COUNT(ue.id) as view_count FROM media_items m JOIN usage_events ue ON m.id = ue.media_id WHERE ue.event_type IN ('view', 'play') GROUP BY m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at ORDER BY view_count DESC LIMIT $1",
&[&(limit as i64)],
).await?;
let mut results = Vec::new();
for row in &rows {
let item = row_to_media_item(row)?;
let count: i64 = row.get(16);
let count: i64 = row.get(24);
results.push((item, count as u64));
}
@ -2896,7 +3036,7 @@ impl StorageBackend for PostgresBackend {
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let rows = client.query(
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at FROM media_items m JOIN usage_events ue ON m.id = ue.media_id WHERE ue.user_id = $1 AND ue.event_type IN ('view', 'play') GROUP BY m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.created_at, m.updated_at ORDER BY MAX(ue.timestamp) DESC LIMIT $2",
"SELECT m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at FROM media_items m JOIN usage_events ue ON m.id = ue.media_id WHERE ue.user_id = $1 AND ue.event_type IN ('view', 'play') GROUP BY m.id, m.path, m.file_name, m.media_type, m.content_hash, m.file_size, m.title, m.artist, m.album, m.genre, m.year, m.duration_secs, m.description, m.thumbnail_path, m.file_mtime, m.date_taken, m.latitude, m.longitude, m.camera_make, m.camera_model, m.rating, m.perceptual_hash, m.created_at, m.updated_at ORDER BY MAX(ue.timestamp) DESC LIMIT $2",
&[&user_id.0, &(limit as i64)],
).await?;
let mut items: Vec<MediaItem> = rows

View file

@ -113,6 +113,24 @@ fn row_to_media_item(row: &Row) -> rusqlite::Result<MediaItem> {
custom_fields: HashMap::new(), // loaded separately
// file_mtime may not be present in all queries, so handle gracefully
file_mtime: row.get::<_, Option<i64>>("file_mtime").unwrap_or(None),
// Photo-specific fields (may not be present in all queries)
date_taken: row
.get::<_, Option<String>>("date_taken")
.ok()
.flatten()
.and_then(|s| DateTime::parse_from_rfc3339(&s).ok())
.map(|dt| dt.with_timezone(&Utc)),
latitude: row.get::<_, Option<f64>>("latitude").ok().flatten(),
longitude: row.get::<_, Option<f64>>("longitude").ok().flatten(),
camera_make: row.get::<_, Option<String>>("camera_make").ok().flatten(),
camera_model: row.get::<_, Option<String>>("camera_model").ok().flatten(),
rating: row.get::<_, Option<i32>>("rating").ok().flatten(),
perceptual_hash: row
.get::<_, Option<String>>("perceptual_hash")
.ok()
.flatten(),
created_at: parse_datetime(&created_str),
updated_at: parse_datetime(&updated_str),
})
@ -610,8 +628,9 @@ impl StorageBackend for SqliteBackend {
db.execute(
"INSERT INTO media_items (id, path, file_name, media_type, content_hash, \
file_size, title, artist, album, genre, year, duration_secs, description, \
thumbnail_path, file_mtime, created_at, updated_at) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17)",
thumbnail_path, file_mtime, date_taken, latitude, longitude, camera_make, \
camera_model, rating, perceptual_hash, created_at, updated_at) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24)",
params![
item.id.0.to_string(),
item.path.to_string_lossy().as_ref(),
@ -630,6 +649,13 @@ impl StorageBackend for SqliteBackend {
.as_ref()
.map(|p| p.to_string_lossy().to_string()),
item.file_mtime,
item.date_taken.as_ref().map(|d| d.to_rfc3339()),
item.latitude,
item.longitude,
item.camera_make,
item.camera_model,
item.rating,
item.perceptual_hash,
item.created_at.to_rfc3339(),
item.updated_at.to_rfc3339(),
],
@ -781,7 +807,9 @@ impl StorageBackend for SqliteBackend {
"UPDATE media_items SET path = ?2, file_name = ?3, media_type = ?4, \
content_hash = ?5, file_size = ?6, title = ?7, artist = ?8, album = ?9, \
genre = ?10, year = ?11, duration_secs = ?12, description = ?13, \
thumbnail_path = ?14, file_mtime = ?15, updated_at = ?16 WHERE id = ?1",
thumbnail_path = ?14, file_mtime = ?15, date_taken = ?16, latitude = ?17, \
longitude = ?18, camera_make = ?19, camera_model = ?20, rating = ?21, \
perceptual_hash = ?22, updated_at = ?23 WHERE id = ?1",
params![
item.id.0.to_string(),
item.path.to_string_lossy().as_ref(),
@ -800,6 +828,13 @@ impl StorageBackend for SqliteBackend {
.as_ref()
.map(|p| p.to_string_lossy().to_string()),
item.file_mtime,
item.date_taken.as_ref().map(|d| d.to_rfc3339()),
item.latitude,
item.longitude,
item.camera_make,
item.camera_model,
item.rating,
item.perceptual_hash,
item.updated_at.to_rfc3339(),
],
)?;
@ -1534,6 +1569,77 @@ impl StorageBackend for SqliteBackend {
.map_err(|e| PinakesError::Database(e.to_string()))?
}
async fn find_perceptual_duplicates(&self, threshold: u32) -> Result<Vec<Vec<MediaItem>>> {
let conn = Arc::clone(&self.conn);
tokio::task::spawn_blocking(move || {
let db = conn
.lock()
.map_err(|e| PinakesError::Database(e.to_string()))?;
// Get all images with perceptual hashes
let mut stmt = db.prepare(
"SELECT * FROM media_items WHERE perceptual_hash IS NOT NULL ORDER BY id",
)?;
let mut items: Vec<MediaItem> = stmt
.query_map([], row_to_media_item)?
.collect::<rusqlite::Result<Vec<_>>>()?;
load_custom_fields_batch(&db, &mut items)?;
// Compare each pair and build groups
use image_hasher::ImageHash;
let mut groups: Vec<Vec<MediaItem>> = Vec::new();
let mut grouped_indices: std::collections::HashSet<usize> =
std::collections::HashSet::new();
for i in 0..items.len() {
if grouped_indices.contains(&i) {
continue;
}
let hash_a = match &items[i].perceptual_hash {
Some(h) => match ImageHash::<Vec<u8>>::from_base64(h) {
Ok(hash) => hash,
Err(_) => continue,
},
None => continue,
};
let mut group = vec![items[i].clone()];
grouped_indices.insert(i);
for (j, item_j) in items.iter().enumerate().skip(i + 1) {
if grouped_indices.contains(&j) {
continue;
}
let hash_b = match &item_j.perceptual_hash {
Some(h) => match ImageHash::<Vec<u8>>::from_base64(h) {
Ok(hash) => hash,
Err(_) => continue,
},
None => continue,
};
let distance = hash_a.dist(&hash_b);
if distance <= threshold {
group.push(item_j.clone());
grouped_indices.insert(j);
}
}
// Only add groups with more than one item (actual duplicates)
if group.len() > 1 {
groups.push(group);
}
}
Ok(groups)
})
.await
.map_err(|e| PinakesError::Database(e.to_string()))?
}
// -- Database management -----------------------------------------------
async fn database_stats(&self) -> Result<crate::storage::DatabaseStats> {