pinakes: import in parallel; various UI improvements

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I1eb47cd79cd4145c56af966f6756fe1d6a6a6964
This commit is contained in:
raf 2026-02-03 10:31:20 +03:00
commit 116fe7b059
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
42 changed files with 4316 additions and 316 deletions

View file

@ -114,6 +114,7 @@ fn row_to_media_item(row: &Row) -> Result<MediaItem> {
.get::<_, Option<String>>("thumbnail_path")
.map(PathBuf::from),
custom_fields: HashMap::new(),
file_mtime: row.get("file_mtime"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
})
@ -198,11 +199,61 @@ fn build_search_inner(
if text.is_empty() {
return Ok("TRUE".to_string());
}
let idx = *offset;
// Combine FTS with trigram similarity and ILIKE for comprehensive fuzzy matching
// This allows partial matches like "mus" -> "music"
let idx_fts = *offset;
*offset += 1;
let idx_prefix = *offset;
*offset += 1;
let idx_ilike = *offset;
*offset += 1;
let idx_sim_title = *offset;
*offset += 1;
let idx_sim_artist = *offset;
*offset += 1;
let idx_sim_album = *offset;
*offset += 1;
let idx_sim_filename = *offset;
*offset += 1;
// Sanitize for tsquery prefix matching
let sanitized = text.replace(['&', '|', '!', '(', ')', ':', '*', '\\', '\''], "");
let prefix_query = if sanitized.contains(' ') {
// For multi-word, join with & and add :* to last word
let words: Vec<&str> = sanitized.split_whitespace().collect();
if let Some((last, rest)) = words.split_last() {
let prefix_parts: Vec<String> = rest.iter().map(|w| w.to_string()).collect();
if prefix_parts.is_empty() {
format!("{}:*", last)
} else {
format!("{} & {}:*", prefix_parts.join(" & "), last)
}
} else {
format!("{}:*", sanitized)
}
} else {
format!("{}:*", sanitized)
};
params.push(Box::new(text.clone()));
params.push(Box::new(prefix_query));
params.push(Box::new(format!("%{}%", text)));
params.push(Box::new(text.clone()));
params.push(Box::new(text.clone()));
params.push(Box::new(text.clone()));
params.push(Box::new(text.clone()));
Ok(format!(
"search_vector @@ plainto_tsquery('english', ${idx})"
"(\
search_vector @@ plainto_tsquery('english', ${idx_fts}) OR \
search_vector @@ to_tsquery('english', ${idx_prefix}) OR \
LOWER(COALESCE(title, '')) LIKE LOWER(${idx_ilike}) OR \
LOWER(COALESCE(file_name, '')) LIKE LOWER(${idx_ilike}) OR \
similarity(COALESCE(title, ''), ${idx_sim_title}) > 0.3 OR \
similarity(COALESCE(artist, ''), ${idx_sim_artist}) > 0.3 OR \
similarity(COALESCE(album, ''), ${idx_sim_album}) > 0.3 OR \
similarity(COALESCE(file_name, ''), ${idx_sim_filename}) > 0.25\
)"
))
}
SearchQuery::Prefix(term) => {
@ -214,14 +265,31 @@ fn build_search_inner(
Ok(format!("search_vector @@ to_tsquery('english', ${idx})"))
}
SearchQuery::Fuzzy(term) => {
// Use trigram similarity on multiple fields
let idx_title = *offset;
*offset += 1;
let idx_artist = *offset;
*offset += 1;
let idx_album = *offset;
*offset += 1;
let idx_filename = *offset;
*offset += 1;
let idx_ilike = *offset;
*offset += 1;
params.push(Box::new(term.clone()));
params.push(Box::new(term.clone()));
params.push(Box::new(term.clone()));
params.push(Box::new(term.clone()));
params.push(Box::new(format!("%{}%", term)));
Ok(format!(
"(similarity(COALESCE(title, ''), ${idx_title}) > 0.3 OR similarity(COALESCE(artist, ''), ${idx_artist}) > 0.3)"
"(\
similarity(COALESCE(title, ''), ${idx_title}) > 0.3 OR \
similarity(COALESCE(artist, ''), ${idx_artist}) > 0.3 OR \
similarity(COALESCE(album, ''), ${idx_album}) > 0.3 OR \
similarity(COALESCE(file_name, ''), ${idx_filename}) > 0.25 OR \
LOWER(COALESCE(title, '')) LIKE LOWER(${idx_ilike}) OR \
LOWER(COALESCE(file_name, '')) LIKE LOWER(${idx_ilike})\
)"
))
}
SearchQuery::FieldMatch { field, value } => {
@ -277,6 +345,86 @@ fn build_search_inner(
let frag = build_search_inner(inner, offset, params, type_filters, tag_filters)?;
Ok(format!("NOT ({frag})"))
}
SearchQuery::RangeQuery { field, start, end } => {
let col = match field.as_str() {
"year" => "year",
"size" | "file_size" => "file_size",
"duration" => "duration_secs",
_ => return Ok("TRUE".to_string()), // Unknown field, ignore
};
match (start, end) {
(Some(s), Some(e)) => {
let idx_start = *offset;
*offset += 1;
let idx_end = *offset;
*offset += 1;
params.push(Box::new(*s));
params.push(Box::new(*e));
Ok(format!("({col} >= ${idx_start} AND {col} <= ${idx_end})"))
}
(Some(s), None) => {
let idx = *offset;
*offset += 1;
params.push(Box::new(*s));
Ok(format!("{col} >= ${idx}"))
}
(None, Some(e)) => {
let idx = *offset;
*offset += 1;
params.push(Box::new(*e));
Ok(format!("{col} <= ${idx}"))
}
(None, None) => Ok("TRUE".to_string()),
}
}
SearchQuery::CompareQuery { field, op, value } => {
let col = match field.as_str() {
"year" => "year",
"size" | "file_size" => "file_size",
"duration" => "duration_secs",
_ => return Ok("TRUE".to_string()), // Unknown field, ignore
};
let op_sql = match op {
crate::search::CompareOp::GreaterThan => ">",
crate::search::CompareOp::GreaterOrEqual => ">=",
crate::search::CompareOp::LessThan => "<",
crate::search::CompareOp::LessOrEqual => "<=",
};
let idx = *offset;
*offset += 1;
params.push(Box::new(*value));
Ok(format!("{col} {op_sql} ${idx}"))
}
SearchQuery::DateQuery { field, value } => {
let col = match field.as_str() {
"created" => "created_at",
"modified" | "updated" => "updated_at",
_ => return Ok("TRUE".to_string()),
};
Ok(date_value_to_postgres_expr(col, value))
}
}
}
/// Convert a DateValue to a PostgreSQL datetime comparison expression
fn date_value_to_postgres_expr(col: &str, value: &crate::search::DateValue) -> String {
use crate::search::DateValue;
match value {
DateValue::Today => format!("{col}::date = CURRENT_DATE"),
DateValue::Yesterday => format!("{col}::date = CURRENT_DATE - INTERVAL '1 day'"),
DateValue::ThisWeek => format!("{col} >= date_trunc('week', CURRENT_DATE)"),
DateValue::LastWeek => format!(
"{col} >= date_trunc('week', CURRENT_DATE) - INTERVAL '7 days' AND {col} < date_trunc('week', CURRENT_DATE)"
),
DateValue::ThisMonth => format!("{col} >= date_trunc('month', CURRENT_DATE)"),
DateValue::LastMonth => format!(
"{col} >= date_trunc('month', CURRENT_DATE) - INTERVAL '1 month' AND {col} < date_trunc('month', CURRENT_DATE)"
),
DateValue::ThisYear => format!("{col} >= date_trunc('year', CURRENT_DATE)"),
DateValue::LastYear => format!(
"{col} >= date_trunc('year', CURRENT_DATE) - INTERVAL '1 year' AND {col} < date_trunc('year', CURRENT_DATE)"
),
DateValue::DaysAgo(days) => format!("{col} >= CURRENT_DATE - INTERVAL '{days} days'"),
}
}
@ -478,7 +626,7 @@ impl StorageBackend for PostgresBackend {
.query_opt(
"SELECT id, path, file_name, media_type, content_hash, file_size,
title, artist, album, genre, year, duration_secs, description,
thumbnail_path, created_at, updated_at
thumbnail_path, file_mtime, created_at, updated_at
FROM media_items WHERE content_hash = $1",
&[&hash.0],
)
@ -494,6 +642,34 @@ impl StorageBackend for PostgresBackend {
}
}
async fn get_media_by_path(&self, path: &std::path::Path) -> Result<Option<MediaItem>> {
let path_str = path.to_string_lossy().to_string();
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
let row = client
.query_opt(
"SELECT id, path, file_name, media_type, content_hash, file_size,
title, artist, album, genre, year, duration_secs, description,
thumbnail_path, file_mtime, created_at, updated_at
FROM media_items WHERE path = $1",
&[&path_str],
)
.await?;
match row {
Some(r) => {
let mut item = row_to_media_item(&r)?;
item.custom_fields = self.get_custom_fields(item.id).await?;
Ok(Some(item))
}
None => Ok(None),
}
}
async fn list_media(&self, pagination: &Pagination) -> Result<Vec<MediaItem>> {
let client = self
.pool
@ -671,6 +847,59 @@ impl StorageBackend for PostgresBackend {
Ok(count as u64)
}
// ---- Batch Operations ----
async fn batch_delete_media(&self, ids: &[MediaId]) -> Result<u64> {
if ids.is_empty() {
return Ok(0);
}
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
// Use ANY with array for efficient batch delete
let uuids: Vec<Uuid> = ids.iter().map(|id| id.0).collect();
let rows = client
.execute("DELETE FROM media_items WHERE id = ANY($1)", &[&uuids])
.await?;
Ok(rows)
}
async fn batch_tag_media(&self, media_ids: &[MediaId], tag_ids: &[Uuid]) -> Result<u64> {
if media_ids.is_empty() || tag_ids.is_empty() {
return Ok(0);
}
let client = self
.pool
.get()
.await
.map_err(|e| PinakesError::Database(format!("pool error: {e}")))?;
// Use UNNEST for efficient batch insert
let mut media_uuids = Vec::new();
let mut tag_uuids = Vec::new();
for mid in media_ids {
for tid in tag_ids {
media_uuids.push(mid.0);
tag_uuids.push(*tid);
}
}
let rows = client
.execute(
"INSERT INTO media_tags (media_id, tag_id)
SELECT * FROM UNNEST($1::uuid[], $2::uuid[])
ON CONFLICT DO NOTHING",
&[&media_uuids, &tag_uuids],
)
.await?;
Ok(rows)
}
// ---- Tags ----
async fn create_tag(&self, name: &str, parent_id: Option<Uuid>) -> Result<Tag> {
@ -3155,6 +3384,9 @@ fn query_has_fts(query: &SearchQuery) -> bool {
SearchQuery::FieldMatch { .. } => false,
SearchQuery::TypeFilter(_) => false,
SearchQuery::TagFilter(_) => false,
SearchQuery::RangeQuery { .. } => false,
SearchQuery::CompareQuery { .. } => false,
SearchQuery::DateQuery { .. } => false,
SearchQuery::And(children) | SearchQuery::Or(children) => {
children.iter().any(query_has_fts)
}
@ -3173,7 +3405,7 @@ fn find_first_fts_param(query: &SearchQuery) -> i32 {
None
} else {
let idx = *offset;
*offset += 1;
*offset += 7; // FullText now uses 7 params (fts, prefix, ilike, sim_title, sim_artist, sim_album, sim_filename)
Some(idx)
}
}
@ -3183,7 +3415,7 @@ fn find_first_fts_param(query: &SearchQuery) -> i32 {
Some(idx)
}
SearchQuery::Fuzzy(_) => {
*offset += 2; // fuzzy uses two params
*offset += 5; // Fuzzy now uses 5 params (sim_title, sim_artist, sim_album, sim_filename, ilike)
None
}
SearchQuery::FieldMatch { .. } => {
@ -3191,6 +3423,21 @@ fn find_first_fts_param(query: &SearchQuery) -> i32 {
None
}
SearchQuery::TypeFilter(_) | SearchQuery::TagFilter(_) => None,
SearchQuery::RangeQuery { start, end, .. } => {
// Range queries use 0-2 params depending on bounds
if start.is_some() {
*offset += 1;
}
if end.is_some() {
*offset += 1;
}
None
}
SearchQuery::CompareQuery { .. } => {
*offset += 1;
None
}
SearchQuery::DateQuery { .. } => None, // No params, uses inline SQL
SearchQuery::And(children) | SearchQuery::Or(children) => {
for child in children {
if let Some(idx) = find_inner(child, offset) {
@ -3255,10 +3502,15 @@ mod tests {
let mut offset = 1;
let mut params: Vec<Box<dyn ToSql + Sync + Send>> = Vec::new();
let (clause, types, tags) = build_search_clause(&query, &mut offset, &mut params).unwrap();
assert_eq!(clause, "search_vector @@ plainto_tsquery('english', $1)");
// Fuzzy search combines FTS, prefix, ILIKE, and trigram similarity
assert!(clause.contains("plainto_tsquery"));
assert!(clause.contains("to_tsquery"));
assert!(clause.contains("LIKE"));
assert!(clause.contains("similarity"));
assert!(types.is_empty());
assert!(tags.is_empty());
assert_eq!(offset, 2);
// FullText now uses 7 parameters
assert_eq!(offset, 8);
}
#[test]