treewide: fix various UI bugs; optimize crypto dependencies & format

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: If8fe8b38c1d9c4fecd40ff71f88d2ae06a6a6964
This commit is contained in:
raf 2026-02-10 12:56:05 +03:00
commit 3ccddce7fd
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
178 changed files with 58342 additions and 54241 deletions

View file

@ -4,66 +4,65 @@ use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::model::MediaId;
use crate::users::UserId;
use crate::{model::MediaId, users::UserId};
/// A tracked usage event for a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UsageEvent {
pub id: Uuid,
pub media_id: Option<MediaId>,
pub user_id: Option<UserId>,
pub event_type: UsageEventType,
pub timestamp: DateTime<Utc>,
pub duration_secs: Option<f64>,
pub context_json: Option<String>,
pub id: Uuid,
pub media_id: Option<MediaId>,
pub user_id: Option<UserId>,
pub event_type: UsageEventType,
pub timestamp: DateTime<Utc>,
pub duration_secs: Option<f64>,
pub context_json: Option<String>,
}
/// Types of usage events that can be tracked.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum UsageEventType {
View,
Play,
Export,
Share,
Search,
View,
Play,
Export,
Share,
Search,
}
impl std::fmt::Display for UsageEventType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::View => "view",
Self::Play => "play",
Self::Export => "export",
Self::Share => "share",
Self::Search => "search",
};
write!(f, "{s}")
}
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::View => "view",
Self::Play => "play",
Self::Export => "export",
Self::Share => "share",
Self::Search => "search",
};
write!(f, "{s}")
}
}
impl std::str::FromStr for UsageEventType {
type Err = String;
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"view" => Ok(Self::View),
"play" => Ok(Self::Play),
"export" => Ok(Self::Export),
"share" => Ok(Self::Share),
"search" => Ok(Self::Search),
_ => Err(format!("unknown usage event type: {s}")),
}
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"view" => Ok(Self::View),
"play" => Ok(Self::Play),
"export" => Ok(Self::Export),
"share" => Ok(Self::Share),
"search" => Ok(Self::Search),
_ => Err(format!("unknown usage event type: {s}")),
}
}
}
/// Watch history entry tracking progress through media.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WatchHistory {
pub id: Uuid,
pub user_id: UserId,
pub media_id: MediaId,
pub progress_secs: f64,
pub last_watched: DateTime<Utc>,
pub id: Uuid,
pub user_id: UserId,
pub media_id: MediaId,
pub progress_secs: f64,
pub last_watched: DateTime<Utc>,
}

View file

@ -1,21 +1,23 @@
use uuid::Uuid;
use crate::error::Result;
use crate::model::{AuditAction, AuditEntry, MediaId};
use crate::storage::DynStorageBackend;
use crate::{
error::Result,
model::{AuditAction, AuditEntry, MediaId},
storage::DynStorageBackend,
};
pub async fn record_action(
storage: &DynStorageBackend,
media_id: Option<MediaId>,
action: AuditAction,
details: Option<String>,
storage: &DynStorageBackend,
media_id: Option<MediaId>,
action: AuditAction,
details: Option<String>,
) -> Result<()> {
let entry = AuditEntry {
id: Uuid::now_v7(),
media_id,
action,
details,
timestamp: chrono::Utc::now(),
};
storage.record_audit(&entry).await
let entry = AuditEntry {
id: Uuid::now_v7(),
media_id,
action,
details,
timestamp: chrono::Utc::now(),
};
storage.record_audit(&entry).await
}

View file

@ -2,184 +2,192 @@ use crate::error::{PinakesError, Result};
/// Normalize ISBN to ISBN-13 format
pub fn normalize_isbn(isbn: &str) -> Result<String> {
// Remove hyphens, spaces, and any non-numeric characters (except X for ISBN-10)
let clean: String = isbn
.chars()
.filter(|c| c.is_ascii_digit() || *c == 'X' || *c == 'x')
.collect();
// Remove hyphens, spaces, and any non-numeric characters (except X for
// ISBN-10)
let clean: String = isbn
.chars()
.filter(|c| c.is_ascii_digit() || *c == 'X' || *c == 'x')
.collect();
match clean.len() {
10 => isbn10_to_isbn13(&clean),
13 => {
if is_valid_isbn13(&clean) {
Ok(clean)
} else {
Err(PinakesError::InvalidData(format!(
"Invalid ISBN-13 checksum: {}",
isbn
)))
}
}
_ => Err(PinakesError::InvalidData(format!(
"Invalid ISBN length: {}",
isbn
))),
}
match clean.len() {
10 => isbn10_to_isbn13(&clean),
13 => {
if is_valid_isbn13(&clean) {
Ok(clean)
} else {
Err(PinakesError::InvalidData(format!(
"Invalid ISBN-13 checksum: {}",
isbn
)))
}
},
_ => {
Err(PinakesError::InvalidData(format!(
"Invalid ISBN length: {}",
isbn
)))
},
}
}
/// Convert ISBN-10 to ISBN-13
fn isbn10_to_isbn13(isbn10: &str) -> Result<String> {
if isbn10.len() != 10 {
return Err(PinakesError::InvalidData(format!(
"ISBN-10 must be 10 characters: {}",
isbn10
)));
}
if isbn10.len() != 10 {
return Err(PinakesError::InvalidData(format!(
"ISBN-10 must be 10 characters: {}",
isbn10
)));
}
// Add 978 prefix
let mut isbn13 = format!("978{}", &isbn10[..9]);
// Add 978 prefix
let mut isbn13 = format!("978{}", &isbn10[..9]);
// Calculate check digit
let check_digit = calculate_isbn13_check_digit(&isbn13)?;
isbn13.push_str(&check_digit.to_string());
// Calculate check digit
let check_digit = calculate_isbn13_check_digit(&isbn13)?;
isbn13.push_str(&check_digit.to_string());
Ok(isbn13)
Ok(isbn13)
}
/// Calculate ISBN-13 check digit
fn calculate_isbn13_check_digit(isbn_without_check: &str) -> Result<u32> {
if isbn_without_check.len() != 12 {
return Err(PinakesError::InvalidData(
"ISBN-13 without check digit must be 12 digits".to_string(),
));
}
if isbn_without_check.len() != 12 {
return Err(PinakesError::InvalidData(
"ISBN-13 without check digit must be 12 digits".to_string(),
));
}
let sum: u32 = isbn_without_check
.chars()
.enumerate()
.filter_map(|(i, c)| c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 }))
.sum();
let sum: u32 = isbn_without_check
.chars()
.enumerate()
.filter_map(|(i, c)| {
c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 })
})
.sum();
let check_digit = (10 - (sum % 10)) % 10;
Ok(check_digit)
let check_digit = (10 - (sum % 10)) % 10;
Ok(check_digit)
}
/// Validate ISBN-13 checksum
fn is_valid_isbn13(isbn13: &str) -> bool {
if isbn13.len() != 13 {
return false;
}
if isbn13.len() != 13 {
return false;
}
let sum: u32 = isbn13
.chars()
.enumerate()
.filter_map(|(i, c)| c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 }))
.sum();
let sum: u32 = isbn13
.chars()
.enumerate()
.filter_map(|(i, c)| {
c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 })
})
.sum();
sum.is_multiple_of(10)
sum.is_multiple_of(10)
}
/// Extract ISBN from text (searches for ISBN-10 or ISBN-13 patterns)
pub fn extract_isbn_from_text(text: &str) -> Option<String> {
use regex::Regex;
use regex::Regex;
// Try different patterns in order of specificity
let patterns = vec![
// ISBN followed by colon or "is" with hyphens (most specific)
r"ISBN(?:-13)?(?:\s+is|:)?\s*(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)",
r"ISBN(?:-10)?(?:\s+is|:)?\s*(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])",
// ISBN with just whitespace
r"ISBN(?:-13)?\s+(\d{13})",
r"ISBN(?:-10)?\s+(\d{9}[\dXx])",
// Bare ISBN-13 with hyphens (in case "ISBN" is missing)
r"\b(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)\b",
// Bare ISBN-10 with hyphens
r"\b(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])\b",
];
// Try different patterns in order of specificity
let patterns = vec![
// ISBN followed by colon or "is" with hyphens (most specific)
r"ISBN(?:-13)?(?:\s+is|:)?\s*(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)",
r"ISBN(?:-10)?(?:\s+is|:)?\s*(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])",
// ISBN with just whitespace
r"ISBN(?:-13)?\s+(\d{13})",
r"ISBN(?:-10)?\s+(\d{9}[\dXx])",
// Bare ISBN-13 with hyphens (in case "ISBN" is missing)
r"\b(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)\b",
// Bare ISBN-10 with hyphens
r"\b(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])\b",
];
for pattern_str in patterns {
if let Ok(pattern) = Regex::new(pattern_str)
&& let Some(captures) = pattern.captures(text)
&& let Some(isbn) = captures.get(1)
&& let Ok(normalized) = normalize_isbn(isbn.as_str())
{
return Some(normalized);
}
for pattern_str in patterns {
if let Ok(pattern) = Regex::new(pattern_str)
&& let Some(captures) = pattern.captures(text)
&& let Some(isbn) = captures.get(1)
&& let Ok(normalized) = normalize_isbn(isbn.as_str())
{
return Some(normalized);
}
}
None
None
}
/// Parse author name into "Last, First" format for sorting
pub fn parse_author_file_as(name: &str) -> String {
// Simple heuristic: if already contains comma, use as-is
if name.contains(',') {
return name.to_string();
}
// Simple heuristic: if already contains comma, use as-is
if name.contains(',') {
return name.to_string();
}
// Split by whitespace
let parts: Vec<&str> = name.split_whitespace().collect();
// Split by whitespace
let parts: Vec<&str> = name.split_whitespace().collect();
match parts.len() {
0 => String::new(),
1 => parts[0].to_string(),
_ => {
// Last part is surname, rest is given names
let surname = parts.last().unwrap();
let given_names = parts[..parts.len() - 1].join(" ");
format!("{}, {}", surname, given_names)
}
}
match parts.len() {
0 => String::new(),
1 => parts[0].to_string(),
_ => {
// Last part is surname, rest is given names
let surname = parts.last().unwrap();
let given_names = parts[..parts.len() - 1].join(" ");
format!("{}, {}", surname, given_names)
},
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[test]
fn test_normalize_isbn10() {
assert_eq!(normalize_isbn("0-306-40615-2").unwrap(), "9780306406157");
assert_eq!(normalize_isbn("0306406152").unwrap(), "9780306406157");
}
#[test]
fn test_normalize_isbn10() {
assert_eq!(normalize_isbn("0-306-40615-2").unwrap(), "9780306406157");
assert_eq!(normalize_isbn("0306406152").unwrap(), "9780306406157");
}
#[test]
fn test_normalize_isbn13() {
assert_eq!(
normalize_isbn("978-0-306-40615-7").unwrap(),
"9780306406157"
);
assert_eq!(normalize_isbn("9780306406157").unwrap(), "9780306406157");
}
#[test]
fn test_normalize_isbn13() {
assert_eq!(
normalize_isbn("978-0-306-40615-7").unwrap(),
"9780306406157"
);
assert_eq!(normalize_isbn("9780306406157").unwrap(), "9780306406157");
}
#[test]
fn test_invalid_isbn() {
assert!(normalize_isbn("123").is_err());
assert!(normalize_isbn("123456789012345").is_err());
}
#[test]
fn test_invalid_isbn() {
assert!(normalize_isbn("123").is_err());
assert!(normalize_isbn("123456789012345").is_err());
}
#[test]
fn test_extract_isbn() {
let text = "This book's ISBN is 978-0-306-40615-7 and was published in 2020.";
assert_eq!(
extract_isbn_from_text(text),
Some("9780306406157".to_string())
);
#[test]
fn test_extract_isbn() {
let text =
"This book's ISBN is 978-0-306-40615-7 and was published in 2020.";
assert_eq!(
extract_isbn_from_text(text),
Some("9780306406157".to_string())
);
let text2 = "ISBN-10: 0-306-40615-2";
assert_eq!(
extract_isbn_from_text(text2),
Some("9780306406157".to_string())
);
let text2 = "ISBN-10: 0-306-40615-2";
assert_eq!(
extract_isbn_from_text(text2),
Some("9780306406157".to_string())
);
let text3 = "No ISBN here";
assert_eq!(extract_isbn_from_text(text3), None);
}
let text3 = "No ISBN here";
assert_eq!(extract_isbn_from_text(text3), None);
}
#[test]
fn test_parse_author_file_as() {
assert_eq!(parse_author_file_as("J.K. Rowling"), "Rowling, J.K.");
assert_eq!(parse_author_file_as("Neil Gaiman"), "Gaiman, Neil");
assert_eq!(parse_author_file_as("Rowling, J.K."), "Rowling, J.K.");
assert_eq!(parse_author_file_as("Prince"), "Prince");
}
#[test]
fn test_parse_author_file_as() {
assert_eq!(parse_author_file_as("J.K. Rowling"), "Rowling, J.K.");
assert_eq!(parse_author_file_as("Neil Gaiman"), "Gaiman, Neil");
assert_eq!(parse_author_file_as("Rowling, J.K."), "Rowling, J.K.");
assert_eq!(parse_author_file_as("Prince"), "Prince");
}
}

View file

@ -7,10 +7,14 @@
//! - Metrics tracking (hit rate, size, evictions)
//! - Specialized caches for different data types
use std::hash::Hash;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
use std::{
hash::Hash,
sync::{
Arc,
atomic::{AtomicU64, Ordering},
},
time::Duration,
};
use moka::future::Cache as MokaCache;
@ -19,483 +23,499 @@ use crate::model::MediaId;
/// Cache statistics for monitoring and debugging.
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
pub hits: u64,
pub misses: u64,
pub evictions: u64,
pub size: u64,
pub hits: u64,
pub misses: u64,
pub evictions: u64,
pub size: u64,
}
impl CacheStats {
pub fn hit_rate(&self) -> f64 {
let total = self.hits + self.misses;
if total == 0 {
0.0
} else {
self.hits as f64 / total as f64
}
pub fn hit_rate(&self) -> f64 {
let total = self.hits + self.misses;
if total == 0 {
0.0
} else {
self.hits as f64 / total as f64
}
}
}
/// Atomic counters for cache metrics.
struct CacheMetrics {
hits: AtomicU64,
misses: AtomicU64,
hits: AtomicU64,
misses: AtomicU64,
}
impl Default for CacheMetrics {
fn default() -> Self {
Self {
hits: AtomicU64::new(0),
misses: AtomicU64::new(0),
}
fn default() -> Self {
Self {
hits: AtomicU64::new(0),
misses: AtomicU64::new(0),
}
}
}
impl CacheMetrics {
fn record_hit(&self) {
self.hits.fetch_add(1, Ordering::Relaxed);
}
fn record_hit(&self) {
self.hits.fetch_add(1, Ordering::Relaxed);
}
fn record_miss(&self) {
self.misses.fetch_add(1, Ordering::Relaxed);
}
fn record_miss(&self) {
self.misses.fetch_add(1, Ordering::Relaxed);
}
fn stats(&self) -> (u64, u64) {
(
self.hits.load(Ordering::Relaxed),
self.misses.load(Ordering::Relaxed),
)
}
fn stats(&self) -> (u64, u64) {
(
self.hits.load(Ordering::Relaxed),
self.misses.load(Ordering::Relaxed),
)
}
}
/// A high-performance cache with LRU eviction and TTL support.
pub struct Cache<K, V>
where
K: Hash + Eq + Send + Sync + 'static,
V: Clone + Send + Sync + 'static,
K: Hash + Eq + Send + Sync + 'static,
V: Clone + Send + Sync + 'static,
{
inner: MokaCache<K, V>,
metrics: Arc<CacheMetrics>,
inner: MokaCache<K, V>,
metrics: Arc<CacheMetrics>,
}
impl<K, V> Cache<K, V>
where
K: Hash + Eq + Send + Sync + 'static,
V: Clone + Send + Sync + 'static,
K: Hash + Eq + Send + Sync + 'static,
V: Clone + Send + Sync + 'static,
{
/// Create a new cache with the specified TTL and maximum capacity.
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
let inner = MokaCache::builder()
.time_to_live(ttl)
.max_capacity(max_capacity)
.build();
/// Create a new cache with the specified TTL and maximum capacity.
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
let inner = MokaCache::builder()
.time_to_live(ttl)
.max_capacity(max_capacity)
.build();
Self {
inner,
metrics: Arc::new(CacheMetrics::default()),
}
Self {
inner,
metrics: Arc::new(CacheMetrics::default()),
}
}
/// Create a new cache with TTL, max capacity, and time-to-idle.
pub fn new_with_idle(ttl: Duration, tti: Duration, max_capacity: u64) -> Self {
let inner = MokaCache::builder()
.time_to_live(ttl)
.time_to_idle(tti)
.max_capacity(max_capacity)
.build();
/// Create a new cache with TTL, max capacity, and time-to-idle.
pub fn new_with_idle(
ttl: Duration,
tti: Duration,
max_capacity: u64,
) -> Self {
let inner = MokaCache::builder()
.time_to_live(ttl)
.time_to_idle(tti)
.max_capacity(max_capacity)
.build();
Self {
inner,
metrics: Arc::new(CacheMetrics::default()),
}
Self {
inner,
metrics: Arc::new(CacheMetrics::default()),
}
}
/// Get a value from the cache.
pub async fn get(&self, key: &K) -> Option<V> {
match self.inner.get(key).await {
Some(value) => {
self.metrics.record_hit();
Some(value)
}
None => {
self.metrics.record_miss();
None
}
}
/// Get a value from the cache.
pub async fn get(&self, key: &K) -> Option<V> {
match self.inner.get(key).await {
Some(value) => {
self.metrics.record_hit();
Some(value)
},
None => {
self.metrics.record_miss();
None
},
}
}
/// Insert a value into the cache.
pub async fn insert(&self, key: K, value: V) {
self.inner.insert(key, value).await;
}
/// Insert a value into the cache.
pub async fn insert(&self, key: K, value: V) {
self.inner.insert(key, value).await;
}
/// Remove a specific key from the cache.
pub async fn invalidate(&self, key: &K) {
self.inner.invalidate(key).await;
}
/// Remove a specific key from the cache.
pub async fn invalidate(&self, key: &K) {
self.inner.invalidate(key).await;
}
/// Clear all entries from the cache.
pub async fn invalidate_all(&self) {
self.inner.invalidate_all();
// Run pending tasks to ensure immediate invalidation
self.inner.run_pending_tasks().await;
}
/// Clear all entries from the cache.
pub async fn invalidate_all(&self) {
self.inner.invalidate_all();
// Run pending tasks to ensure immediate invalidation
self.inner.run_pending_tasks().await;
}
/// Get the current number of entries in the cache.
pub fn entry_count(&self) -> u64 {
self.inner.entry_count()
}
/// Get the current number of entries in the cache.
pub fn entry_count(&self) -> u64 {
self.inner.entry_count()
}
/// Get cache statistics.
pub fn stats(&self) -> CacheStats {
let (hits, misses) = self.metrics.stats();
CacheStats {
hits,
misses,
evictions: 0, // Moka doesn't expose this directly
size: self.entry_count(),
}
/// Get cache statistics.
pub fn stats(&self) -> CacheStats {
let (hits, misses) = self.metrics.stats();
CacheStats {
hits,
misses,
evictions: 0, // Moka doesn't expose this directly
size: self.entry_count(),
}
}
}
/// Specialized cache for search query results.
pub struct QueryCache {
/// Cache keyed by (query_hash, offset, limit)
inner: Cache<String, String>,
/// Cache keyed by (query_hash, offset, limit)
inner: Cache<String, String>,
}
impl QueryCache {
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
Self {
inner: Cache::new(ttl, max_capacity),
}
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
Self {
inner: Cache::new(ttl, max_capacity),
}
}
/// Generate a cache key from query parameters.
fn make_key(query: &str, offset: u64, limit: u64, sort: Option<&str>) -> String {
use std::hash::{DefaultHasher, Hasher};
let mut hasher = DefaultHasher::new();
hasher.write(query.as_bytes());
hasher.write(&offset.to_le_bytes());
hasher.write(&limit.to_le_bytes());
if let Some(s) = sort {
hasher.write(s.as_bytes());
}
format!("q:{:016x}", hasher.finish())
/// Generate a cache key from query parameters.
fn make_key(
query: &str,
offset: u64,
limit: u64,
sort: Option<&str>,
) -> String {
use std::hash::{DefaultHasher, Hasher};
let mut hasher = DefaultHasher::new();
hasher.write(query.as_bytes());
hasher.write(&offset.to_le_bytes());
hasher.write(&limit.to_le_bytes());
if let Some(s) = sort {
hasher.write(s.as_bytes());
}
format!("q:{:016x}", hasher.finish())
}
pub async fn get(
&self,
query: &str,
offset: u64,
limit: u64,
sort: Option<&str>,
) -> Option<String> {
let key = Self::make_key(query, offset, limit, sort);
self.inner.get(&key).await
}
pub async fn get(
&self,
query: &str,
offset: u64,
limit: u64,
sort: Option<&str>,
) -> Option<String> {
let key = Self::make_key(query, offset, limit, sort);
self.inner.get(&key).await
}
pub async fn insert(
&self,
query: &str,
offset: u64,
limit: u64,
sort: Option<&str>,
result: String,
) {
let key = Self::make_key(query, offset, limit, sort);
self.inner.insert(key, result).await;
}
pub async fn insert(
&self,
query: &str,
offset: u64,
limit: u64,
sort: Option<&str>,
result: String,
) {
let key = Self::make_key(query, offset, limit, sort);
self.inner.insert(key, result).await;
}
pub async fn invalidate_all(&self) {
self.inner.invalidate_all().await;
}
pub async fn invalidate_all(&self) {
self.inner.invalidate_all().await;
}
pub fn stats(&self) -> CacheStats {
self.inner.stats()
}
pub fn stats(&self) -> CacheStats {
self.inner.stats()
}
}
/// Specialized cache for metadata extraction results.
pub struct MetadataCache {
/// Cache keyed by content hash
inner: Cache<String, String>,
/// Cache keyed by content hash
inner: Cache<String, String>,
}
impl MetadataCache {
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
Self {
inner: Cache::new(ttl, max_capacity),
}
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
Self {
inner: Cache::new(ttl, max_capacity),
}
}
pub async fn get(&self, content_hash: &str) -> Option<String> {
self.inner.get(&content_hash.to_string()).await
}
pub async fn get(&self, content_hash: &str) -> Option<String> {
self.inner.get(&content_hash.to_string()).await
}
pub async fn insert(&self, content_hash: &str, metadata_json: String) {
self.inner
.insert(content_hash.to_string(), metadata_json)
.await;
}
pub async fn insert(&self, content_hash: &str, metadata_json: String) {
self
.inner
.insert(content_hash.to_string(), metadata_json)
.await;
}
pub async fn invalidate(&self, content_hash: &str) {
self.inner.invalidate(&content_hash.to_string()).await;
}
pub async fn invalidate(&self, content_hash: &str) {
self.inner.invalidate(&content_hash.to_string()).await;
}
pub fn stats(&self) -> CacheStats {
self.inner.stats()
}
pub fn stats(&self) -> CacheStats {
self.inner.stats()
}
}
/// Specialized cache for media item data.
pub struct MediaCache {
inner: Cache<String, String>,
inner: Cache<String, String>,
}
impl MediaCache {
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
Self {
inner: Cache::new(ttl, max_capacity),
}
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
Self {
inner: Cache::new(ttl, max_capacity),
}
}
pub async fn get(&self, media_id: MediaId) -> Option<String> {
self.inner.get(&media_id.to_string()).await
}
pub async fn get(&self, media_id: MediaId) -> Option<String> {
self.inner.get(&media_id.to_string()).await
}
pub async fn insert(&self, media_id: MediaId, item_json: String) {
self.inner.insert(media_id.to_string(), item_json).await;
}
pub async fn insert(&self, media_id: MediaId, item_json: String) {
self.inner.insert(media_id.to_string(), item_json).await;
}
pub async fn invalidate(&self, media_id: MediaId) {
self.inner.invalidate(&media_id.to_string()).await;
}
pub async fn invalidate(&self, media_id: MediaId) {
self.inner.invalidate(&media_id.to_string()).await;
}
pub async fn invalidate_all(&self) {
self.inner.invalidate_all().await;
}
pub async fn invalidate_all(&self) {
self.inner.invalidate_all().await;
}
pub fn stats(&self) -> CacheStats {
self.inner.stats()
}
pub fn stats(&self) -> CacheStats {
self.inner.stats()
}
}
/// Configuration for the cache layer.
#[derive(Debug, Clone)]
pub struct CacheConfig {
/// TTL for response cache in seconds
pub response_ttl_secs: u64,
/// Maximum number of cached responses
pub response_max_entries: u64,
/// TTL for query cache in seconds
pub query_ttl_secs: u64,
/// Maximum number of cached query results
pub query_max_entries: u64,
/// TTL for metadata cache in seconds
pub metadata_ttl_secs: u64,
/// Maximum number of cached metadata entries
pub metadata_max_entries: u64,
/// TTL for media cache in seconds
pub media_ttl_secs: u64,
/// Maximum number of cached media items
pub media_max_entries: u64,
/// TTL for response cache in seconds
pub response_ttl_secs: u64,
/// Maximum number of cached responses
pub response_max_entries: u64,
/// TTL for query cache in seconds
pub query_ttl_secs: u64,
/// Maximum number of cached query results
pub query_max_entries: u64,
/// TTL for metadata cache in seconds
pub metadata_ttl_secs: u64,
/// Maximum number of cached metadata entries
pub metadata_max_entries: u64,
/// TTL for media cache in seconds
pub media_ttl_secs: u64,
/// Maximum number of cached media items
pub media_max_entries: u64,
}
impl Default for CacheConfig {
fn default() -> Self {
Self {
response_ttl_secs: 60,
response_max_entries: 1000,
query_ttl_secs: 300,
query_max_entries: 500,
metadata_ttl_secs: 3600,
metadata_max_entries: 10000,
media_ttl_secs: 300,
media_max_entries: 5000,
}
fn default() -> Self {
Self {
response_ttl_secs: 60,
response_max_entries: 1000,
query_ttl_secs: 300,
query_max_entries: 500,
metadata_ttl_secs: 3600,
metadata_max_entries: 10000,
media_ttl_secs: 300,
media_max_entries: 5000,
}
}
}
/// Application-level cache layer wrapping multiple specialized caches.
pub struct CacheLayer {
/// Cache for serialized API responses
pub responses: Cache<String, String>,
/// Cache for search query results
pub queries: QueryCache,
/// Cache for metadata extraction results
pub metadata: MetadataCache,
/// Cache for individual media items
pub media: MediaCache,
/// Configuration
config: CacheConfig,
/// Cache for serialized API responses
pub responses: Cache<String, String>,
/// Cache for search query results
pub queries: QueryCache,
/// Cache for metadata extraction results
pub metadata: MetadataCache,
/// Cache for individual media items
pub media: MediaCache,
/// Configuration
config: CacheConfig,
}
impl CacheLayer {
/// Create a new cache layer with the specified TTL (using defaults for other settings).
pub fn new(ttl_secs: u64) -> Self {
let config = CacheConfig {
response_ttl_secs: ttl_secs,
..Default::default()
};
Self::with_config(config)
}
/// Create a new cache layer with the specified TTL (using defaults for other
/// settings).
pub fn new(ttl_secs: u64) -> Self {
let config = CacheConfig {
response_ttl_secs: ttl_secs,
..Default::default()
};
Self::with_config(config)
}
/// Create a new cache layer with full configuration.
pub fn with_config(config: CacheConfig) -> Self {
Self {
responses: Cache::new(
Duration::from_secs(config.response_ttl_secs),
config.response_max_entries,
),
queries: QueryCache::new(
Duration::from_secs(config.query_ttl_secs),
config.query_max_entries,
),
metadata: MetadataCache::new(
Duration::from_secs(config.metadata_ttl_secs),
config.metadata_max_entries,
),
media: MediaCache::new(
Duration::from_secs(config.media_ttl_secs),
config.media_max_entries,
),
config,
}
/// Create a new cache layer with full configuration.
pub fn with_config(config: CacheConfig) -> Self {
Self {
responses: Cache::new(
Duration::from_secs(config.response_ttl_secs),
config.response_max_entries,
),
queries: QueryCache::new(
Duration::from_secs(config.query_ttl_secs),
config.query_max_entries,
),
metadata: MetadataCache::new(
Duration::from_secs(config.metadata_ttl_secs),
config.metadata_max_entries,
),
media: MediaCache::new(
Duration::from_secs(config.media_ttl_secs),
config.media_max_entries,
),
config,
}
}
/// Invalidate all caches related to a media item update.
pub async fn invalidate_for_media_update(&self, media_id: MediaId) {
self.media.invalidate(media_id).await;
// Query cache should be invalidated as search results may change
self.queries.invalidate_all().await;
}
/// Invalidate all caches related to a media item update.
pub async fn invalidate_for_media_update(&self, media_id: MediaId) {
self.media.invalidate(media_id).await;
// Query cache should be invalidated as search results may change
self.queries.invalidate_all().await;
}
/// Invalidate all caches related to a media item deletion.
pub async fn invalidate_for_media_delete(&self, media_id: MediaId) {
self.media.invalidate(media_id).await;
self.queries.invalidate_all().await;
}
/// Invalidate all caches related to a media item deletion.
pub async fn invalidate_for_media_delete(&self, media_id: MediaId) {
self.media.invalidate(media_id).await;
self.queries.invalidate_all().await;
}
/// Invalidate all caches (useful after bulk imports or major changes).
pub async fn invalidate_all(&self) {
self.responses.invalidate_all().await;
self.queries.invalidate_all().await;
self.media.invalidate_all().await;
// Keep metadata cache as it's keyed by content hash which doesn't change
}
/// Invalidate all caches (useful after bulk imports or major changes).
pub async fn invalidate_all(&self) {
self.responses.invalidate_all().await;
self.queries.invalidate_all().await;
self.media.invalidate_all().await;
// Keep metadata cache as it's keyed by content hash which doesn't change
}
/// Get aggregated statistics for all caches.
pub fn stats(&self) -> CacheLayerStats {
CacheLayerStats {
responses: self.responses.stats(),
queries: self.queries.stats(),
metadata: self.metadata.stats(),
media: self.media.stats(),
}
/// Get aggregated statistics for all caches.
pub fn stats(&self) -> CacheLayerStats {
CacheLayerStats {
responses: self.responses.stats(),
queries: self.queries.stats(),
metadata: self.metadata.stats(),
media: self.media.stats(),
}
}
/// Get the current configuration.
pub fn config(&self) -> &CacheConfig {
&self.config
}
/// Get the current configuration.
pub fn config(&self) -> &CacheConfig {
&self.config
}
}
/// Aggregated statistics for the entire cache layer.
#[derive(Debug, Clone)]
pub struct CacheLayerStats {
pub responses: CacheStats,
pub queries: CacheStats,
pub metadata: CacheStats,
pub media: CacheStats,
pub responses: CacheStats,
pub queries: CacheStats,
pub metadata: CacheStats,
pub media: CacheStats,
}
impl CacheLayerStats {
/// Get the overall hit rate across all caches.
pub fn overall_hit_rate(&self) -> f64 {
let total_hits =
self.responses.hits + self.queries.hits + self.metadata.hits + self.media.hits;
let total_requests = total_hits
+ self.responses.misses
+ self.queries.misses
+ self.metadata.misses
+ self.media.misses;
/// Get the overall hit rate across all caches.
pub fn overall_hit_rate(&self) -> f64 {
let total_hits = self.responses.hits
+ self.queries.hits
+ self.metadata.hits
+ self.media.hits;
let total_requests = total_hits
+ self.responses.misses
+ self.queries.misses
+ self.metadata.misses
+ self.media.misses;
if total_requests == 0 {
0.0
} else {
total_hits as f64 / total_requests as f64
}
if total_requests == 0 {
0.0
} else {
total_hits as f64 / total_requests as f64
}
}
/// Get the total number of entries across all caches.
pub fn total_entries(&self) -> u64 {
self.responses.size + self.queries.size + self.metadata.size + self.media.size
}
/// Get the total number of entries across all caches.
pub fn total_entries(&self) -> u64 {
self.responses.size
+ self.queries.size
+ self.metadata.size
+ self.media.size
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[tokio::test]
async fn test_cache_basic_operations() {
let cache: Cache<String, String> = Cache::new(Duration::from_secs(60), 100);
#[tokio::test]
async fn test_cache_basic_operations() {
let cache: Cache<String, String> = Cache::new(Duration::from_secs(60), 100);
// Insert and get
cache.insert("key1".to_string(), "value1".to_string()).await;
assert_eq!(
cache.get(&"key1".to_string()).await,
Some("value1".to_string())
);
// Insert and get
cache.insert("key1".to_string(), "value1".to_string()).await;
assert_eq!(
cache.get(&"key1".to_string()).await,
Some("value1".to_string())
);
// Miss
assert_eq!(cache.get(&"key2".to_string()).await, None);
// Miss
assert_eq!(cache.get(&"key2".to_string()).await, None);
// Invalidate
cache.invalidate(&"key1".to_string()).await;
assert_eq!(cache.get(&"key1".to_string()).await, None);
}
// Invalidate
cache.invalidate(&"key1".to_string()).await;
assert_eq!(cache.get(&"key1".to_string()).await, None);
}
#[tokio::test]
async fn test_cache_stats() {
let cache: Cache<String, String> = Cache::new(Duration::from_secs(60), 100);
#[tokio::test]
async fn test_cache_stats() {
let cache: Cache<String, String> = Cache::new(Duration::from_secs(60), 100);
cache.insert("key1".to_string(), "value1".to_string()).await;
let _ = cache.get(&"key1".to_string()).await; // hit
let _ = cache.get(&"key2".to_string()).await; // miss
cache.insert("key1".to_string(), "value1".to_string()).await;
let _ = cache.get(&"key1".to_string()).await; // hit
let _ = cache.get(&"key2".to_string()).await; // miss
let stats = cache.stats();
assert_eq!(stats.hits, 1);
assert_eq!(stats.misses, 1);
assert!((stats.hit_rate() - 0.5).abs() < 0.01);
}
let stats = cache.stats();
assert_eq!(stats.hits, 1);
assert_eq!(stats.misses, 1);
assert!((stats.hit_rate() - 0.5).abs() < 0.01);
}
#[tokio::test]
async fn test_query_cache() {
let cache = QueryCache::new(Duration::from_secs(60), 100);
#[tokio::test]
async fn test_query_cache() {
let cache = QueryCache::new(Duration::from_secs(60), 100);
cache
.insert("test query", 0, 10, Some("name"), "results".to_string())
.await;
assert_eq!(
cache.get("test query", 0, 10, Some("name")).await,
Some("results".to_string())
);
cache
.insert("test query", 0, 10, Some("name"), "results".to_string())
.await;
assert_eq!(
cache.get("test query", 0, 10, Some("name")).await,
Some("results".to_string())
);
// Different parameters should miss
assert_eq!(cache.get("test query", 10, 10, Some("name")).await, None);
}
// Different parameters should miss
assert_eq!(cache.get("test query", 10, 10, Some("name")).await, None);
}
#[tokio::test]
async fn test_cache_layer() {
let layer = CacheLayer::new(60);
#[tokio::test]
async fn test_cache_layer() {
let layer = CacheLayer::new(60);
let media_id = MediaId::new();
layer.media.insert(media_id, "{}".to_string()).await;
assert!(layer.media.get(media_id).await.is_some());
let media_id = MediaId::new();
layer.media.insert(media_id, "{}".to_string()).await;
assert!(layer.media.get(media_id).await.is_some());
layer.invalidate_for_media_delete(media_id).await;
assert!(layer.media.get(media_id).await.is_none());
}
layer.invalidate_for_media_delete(media_id).await;
assert!(layer.media.get(media_id).await.is_none());
}
}

View file

@ -1,78 +1,78 @@
use uuid::Uuid;
use crate::error::Result;
use crate::model::*;
use crate::storage::DynStorageBackend;
use crate::{error::Result, model::*, storage::DynStorageBackend};
pub async fn create_collection(
storage: &DynStorageBackend,
name: &str,
kind: CollectionKind,
description: Option<&str>,
filter_query: Option<&str>,
storage: &DynStorageBackend,
name: &str,
kind: CollectionKind,
description: Option<&str>,
filter_query: Option<&str>,
) -> Result<Collection> {
storage
.create_collection(name, kind, description, filter_query)
.await
storage
.create_collection(name, kind, description, filter_query)
.await
}
pub async fn add_member(
storage: &DynStorageBackend,
collection_id: Uuid,
media_id: MediaId,
position: i32,
storage: &DynStorageBackend,
collection_id: Uuid,
media_id: MediaId,
position: i32,
) -> Result<()> {
storage
.add_to_collection(collection_id, media_id, position)
.await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::AddedToCollection,
Some(format!("collection_id={collection_id}")),
)
.await
storage
.add_to_collection(collection_id, media_id, position)
.await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::AddedToCollection,
Some(format!("collection_id={collection_id}")),
)
.await
}
pub async fn remove_member(
storage: &DynStorageBackend,
collection_id: Uuid,
media_id: MediaId,
storage: &DynStorageBackend,
collection_id: Uuid,
media_id: MediaId,
) -> Result<()> {
storage
.remove_from_collection(collection_id, media_id)
.await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::RemovedFromCollection,
Some(format!("collection_id={collection_id}")),
)
.await
storage
.remove_from_collection(collection_id, media_id)
.await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::RemovedFromCollection,
Some(format!("collection_id={collection_id}")),
)
.await
}
pub async fn get_members(
storage: &DynStorageBackend,
collection_id: Uuid,
storage: &DynStorageBackend,
collection_id: Uuid,
) -> Result<Vec<MediaItem>> {
let collection = storage.get_collection(collection_id).await?;
let collection = storage.get_collection(collection_id).await?;
match collection.kind {
CollectionKind::Virtual => {
// Virtual collections evaluate their filter_query dynamically
if let Some(ref query_str) = collection.filter_query {
let query = crate::search::parse_search_query(query_str)?;
let request = crate::search::SearchRequest {
query,
sort: crate::search::SortOrder::DateDesc,
pagination: Pagination::new(0, 10000, None),
};
let results = storage.search(&request).await?;
Ok(results.items)
} else {
Ok(Vec::new())
}
}
CollectionKind::Manual => storage.get_collection_members(collection_id).await,
}
match collection.kind {
CollectionKind::Virtual => {
// Virtual collections evaluate their filter_query dynamically
if let Some(ref query_str) = collection.filter_query {
let query = crate::search::parse_search_query(query_str)?;
let request = crate::search::SearchRequest {
query,
sort: crate::search::SortOrder::DateDesc,
pagination: Pagination::new(0, 10000, None),
};
let results = storage.search(&request).await?;
Ok(results.items)
} else {
Ok(Vec::new())
}
},
CollectionKind::Manual => {
storage.get_collection_members(collection_id).await
},
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,237 +1,253 @@
use chrono::Utc;
use uuid::Uuid;
use crate::error::{PinakesError, Result};
use crate::model::MediaItem;
use super::googlebooks::GoogleBooksClient;
use super::openlibrary::OpenLibraryClient;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
use super::{
EnrichmentSourceType,
ExternalMetadata,
MetadataEnricher,
googlebooks::GoogleBooksClient,
openlibrary::OpenLibraryClient,
};
use crate::{
error::{PinakesError, Result},
model::MediaItem,
};
/// Book enricher that tries OpenLibrary first, then falls back to Google Books
pub struct BookEnricher {
openlibrary: OpenLibraryClient,
googlebooks: GoogleBooksClient,
openlibrary: OpenLibraryClient,
googlebooks: GoogleBooksClient,
}
impl BookEnricher {
pub fn new(google_api_key: Option<String>) -> Self {
Self {
openlibrary: OpenLibraryClient::new(),
googlebooks: GoogleBooksClient::new(google_api_key),
}
pub fn new(google_api_key: Option<String>) -> Self {
Self {
openlibrary: OpenLibraryClient::new(),
googlebooks: GoogleBooksClient::new(google_api_key),
}
}
/// Try to enrich from OpenLibrary first
pub async fn try_openlibrary(
&self,
isbn: &str,
) -> Result<Option<ExternalMetadata>> {
match self.openlibrary.fetch_by_isbn(isbn).await {
Ok(book) => {
let metadata_json = serde_json::to_string(&book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {}", e))
})?;
Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()), // Will be set by caller
source: EnrichmentSourceType::OpenLibrary,
external_id: None,
metadata_json,
confidence: calculate_openlibrary_confidence(&book),
last_updated: Utc::now(),
}))
},
Err(_) => Ok(None),
}
}
/// Try to enrich from Google Books
pub async fn try_googlebooks(
&self,
isbn: &str,
) -> Result<Option<ExternalMetadata>> {
match self.googlebooks.fetch_by_isbn(isbn).await {
Ok(books) if !books.is_empty() => {
let book = &books[0];
let metadata_json = serde_json::to_string(book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {}", e))
})?;
Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()), // Will be set by caller
source: EnrichmentSourceType::GoogleBooks,
external_id: Some(book.id.clone()),
metadata_json,
confidence: calculate_googlebooks_confidence(&book.volume_info),
last_updated: Utc::now(),
}))
},
_ => Ok(None),
}
}
/// Try to enrich by searching with title and author
pub async fn enrich_by_search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Option<ExternalMetadata>> {
// Try OpenLibrary search first
if let Ok(results) = self.openlibrary.search(title, author).await
&& let Some(result) = results.first()
{
let metadata_json = serde_json::to_string(result).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {}", e))
})?;
return Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()),
source: EnrichmentSourceType::OpenLibrary,
external_id: result.key.clone(),
metadata_json,
confidence: 0.6, // Lower confidence for search results
last_updated: Utc::now(),
}));
}
/// Try to enrich from OpenLibrary first
pub async fn try_openlibrary(&self, isbn: &str) -> Result<Option<ExternalMetadata>> {
match self.openlibrary.fetch_by_isbn(isbn).await {
Ok(book) => {
let metadata_json = serde_json::to_string(&book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {}", e))
})?;
// Fall back to Google Books
if let Ok(results) = self.googlebooks.search(title, author).await
&& let Some(book) = results.first()
{
let metadata_json = serde_json::to_string(book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {}", e))
})?;
Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()), // Will be set by caller
source: EnrichmentSourceType::OpenLibrary,
external_id: None,
metadata_json,
confidence: calculate_openlibrary_confidence(&book),
last_updated: Utc::now(),
}))
}
Err(_) => Ok(None),
}
return Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()),
source: EnrichmentSourceType::GoogleBooks,
external_id: Some(book.id.clone()),
metadata_json,
confidence: 0.6,
last_updated: Utc::now(),
}));
}
/// Try to enrich from Google Books
pub async fn try_googlebooks(&self, isbn: &str) -> Result<Option<ExternalMetadata>> {
match self.googlebooks.fetch_by_isbn(isbn).await {
Ok(books) if !books.is_empty() => {
let book = &books[0];
let metadata_json = serde_json::to_string(book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {}", e))
})?;
Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()), // Will be set by caller
source: EnrichmentSourceType::GoogleBooks,
external_id: Some(book.id.clone()),
metadata_json,
confidence: calculate_googlebooks_confidence(&book.volume_info),
last_updated: Utc::now(),
}))
}
_ => Ok(None),
}
}
/// Try to enrich by searching with title and author
pub async fn enrich_by_search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Option<ExternalMetadata>> {
// Try OpenLibrary search first
if let Ok(results) = self.openlibrary.search(title, author).await
&& let Some(result) = results.first()
{
let metadata_json = serde_json::to_string(result).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {}", e))
})?;
return Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()),
source: EnrichmentSourceType::OpenLibrary,
external_id: result.key.clone(),
metadata_json,
confidence: 0.6, // Lower confidence for search results
last_updated: Utc::now(),
}));
}
// Fall back to Google Books
if let Ok(results) = self.googlebooks.search(title, author).await
&& let Some(book) = results.first()
{
let metadata_json = serde_json::to_string(book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {}", e))
})?;
return Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()),
source: EnrichmentSourceType::GoogleBooks,
external_id: Some(book.id.clone()),
metadata_json,
confidence: 0.6,
last_updated: Utc::now(),
}));
}
Ok(None)
}
Ok(None)
}
}
#[async_trait::async_trait]
impl MetadataEnricher for BookEnricher {
fn source(&self) -> EnrichmentSourceType {
// Returns the preferred source
EnrichmentSourceType::OpenLibrary
}
fn source(&self) -> EnrichmentSourceType {
// Returns the preferred source
EnrichmentSourceType::OpenLibrary
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
// Try ISBN-based enrichment first by checking title/description for ISBN patterns
if let Some(ref title) = item.title {
if let Some(isbn) = crate::books::extract_isbn_from_text(title) {
if let Some(mut metadata) = self.try_openlibrary(&isbn).await? {
metadata.media_id = item.id;
return Ok(Some(metadata));
}
if let Some(mut metadata) = self.try_googlebooks(&isbn).await? {
metadata.media_id = item.id;
return Ok(Some(metadata));
}
}
// Fall back to title/author search
let author = item.artist.as_deref();
return self.enrich_by_search(title, author).await;
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
// Try ISBN-based enrichment first by checking title/description for ISBN
// patterns
if let Some(ref title) = item.title {
if let Some(isbn) = crate::books::extract_isbn_from_text(title) {
if let Some(mut metadata) = self.try_openlibrary(&isbn).await? {
metadata.media_id = item.id;
return Ok(Some(metadata));
}
if let Some(mut metadata) = self.try_googlebooks(&isbn).await? {
metadata.media_id = item.id;
return Ok(Some(metadata));
}
}
// No title available
Ok(None)
// Fall back to title/author search
let author = item.artist.as_deref();
return self.enrich_by_search(title, author).await;
}
// No title available
Ok(None)
}
}
/// Calculate confidence score for OpenLibrary metadata
pub fn calculate_openlibrary_confidence(book: &super::openlibrary::OpenLibraryBook) -> f64 {
let mut score: f64 = 0.5; // Base score
pub fn calculate_openlibrary_confidence(
book: &super::openlibrary::OpenLibraryBook,
) -> f64 {
let mut score: f64 = 0.5; // Base score
if book.title.is_some() {
score += 0.1;
}
if !book.authors.is_empty() {
score += 0.1;
}
if !book.publishers.is_empty() {
score += 0.05;
}
if book.publish_date.is_some() {
score += 0.05;
}
if book.description.is_some() {
score += 0.1;
}
if !book.covers.is_empty() {
score += 0.1;
}
if book.title.is_some() {
score += 0.1;
}
if !book.authors.is_empty() {
score += 0.1;
}
if !book.publishers.is_empty() {
score += 0.05;
}
if book.publish_date.is_some() {
score += 0.05;
}
if book.description.is_some() {
score += 0.1;
}
if !book.covers.is_empty() {
score += 0.1;
}
score.min(1.0)
score.min(1.0)
}
/// Calculate confidence score for Google Books metadata
pub fn calculate_googlebooks_confidence(info: &super::googlebooks::VolumeInfo) -> f64 {
let mut score: f64 = 0.5; // Base score
pub fn calculate_googlebooks_confidence(
info: &super::googlebooks::VolumeInfo,
) -> f64 {
let mut score: f64 = 0.5; // Base score
if info.title.is_some() {
score += 0.1;
}
if !info.authors.is_empty() {
score += 0.1;
}
if info.publisher.is_some() {
score += 0.05;
}
if info.published_date.is_some() {
score += 0.05;
}
if info.description.is_some() {
score += 0.1;
}
if info.image_links.is_some() {
score += 0.1;
}
if info.title.is_some() {
score += 0.1;
}
if !info.authors.is_empty() {
score += 0.1;
}
if info.publisher.is_some() {
score += 0.05;
}
if info.published_date.is_some() {
score += 0.05;
}
if info.description.is_some() {
score += 0.1;
}
if info.image_links.is_some() {
score += 0.1;
}
score.min(1.0)
score.min(1.0)
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[test]
fn test_openlibrary_confidence_calculation() {
let book = super::super::openlibrary::OpenLibraryBook {
title: Some("Test Book".to_string()),
subtitle: None,
authors: vec![],
publishers: vec![],
publish_date: None,
number_of_pages: None,
subjects: vec![],
covers: vec![],
isbn_10: vec![],
isbn_13: vec![],
series: vec![],
description: None,
languages: vec![],
};
#[test]
fn test_openlibrary_confidence_calculation() {
let book = super::super::openlibrary::OpenLibraryBook {
title: Some("Test Book".to_string()),
subtitle: None,
authors: vec![],
publishers: vec![],
publish_date: None,
number_of_pages: None,
subjects: vec![],
covers: vec![],
isbn_10: vec![],
isbn_13: vec![],
series: vec![],
description: None,
languages: vec![],
};
let confidence = calculate_openlibrary_confidence(&book);
assert_eq!(confidence, 0.6); // 0.5 base + 0.1 for title
}
let confidence = calculate_openlibrary_confidence(&book);
assert_eq!(confidence, 0.6); // 0.5 base + 0.1 for title
}
#[test]
fn test_googlebooks_confidence_calculation() {
let info = super::super::googlebooks::VolumeInfo {
title: Some("Test Book".to_string()),
..Default::default()
};
#[test]
fn test_googlebooks_confidence_calculation() {
let info = super::super::googlebooks::VolumeInfo {
title: Some("Test Book".to_string()),
..Default::default()
};
let confidence = calculate_googlebooks_confidence(&info);
assert_eq!(confidence, 0.6); // 0.5 base + 0.1 for title
}
let confidence = calculate_googlebooks_confidence(&info);
assert_eq!(confidence, 0.6); // 0.5 base + 0.1 for title
}
}

View file

@ -4,274 +4,276 @@ use crate::error::{PinakesError, Result};
/// Google Books API client for book metadata enrichment
pub struct GoogleBooksClient {
client: reqwest::Client,
api_key: Option<String>,
client: reqwest::Client,
api_key: Option<String>,
}
impl GoogleBooksClient {
pub fn new(api_key: Option<String>) -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/1.0")
.timeout(std::time::Duration::from_secs(10))
.build()
.expect("Failed to build HTTP client"),
api_key,
}
pub fn new(api_key: Option<String>) -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/1.0")
.timeout(std::time::Duration::from_secs(10))
.build()
.expect("Failed to build HTTP client"),
api_key,
}
}
/// Fetch book metadata by ISBN
pub async fn fetch_by_isbn(&self, isbn: &str) -> Result<Vec<GoogleBook>> {
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes?q=isbn:{}",
isbn
);
if let Some(ref key) = self.api_key {
url.push_str(&format!("&key={}", key));
}
/// Fetch book metadata by ISBN
pub async fn fetch_by_isbn(&self, isbn: &str) -> Result<Vec<GoogleBook>> {
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes?q=isbn:{}",
isbn
);
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Google Books request failed: {}", e))
})?;
if let Some(ref key) = self.api_key {
url.push_str(&format!("&key={}", key));
}
let response =
self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Google Books request failed: {}", e))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Google Books returned status: {}",
response.status()
)));
}
let volumes: GoogleBooksResponse = response.json().await.map_err(|e| {
PinakesError::External(format!("Failed to parse Google Books response: {}", e))
})?;
Ok(volumes.items)
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Google Books returned status: {}",
response.status()
)));
}
/// Search for books by title and author
pub async fn search(&self, title: &str, author: Option<&str>) -> Result<Vec<GoogleBook>> {
let mut query = format!("intitle:{}", urlencoding::encode(title));
let volumes: GoogleBooksResponse = response.json().await.map_err(|e| {
PinakesError::External(format!(
"Failed to parse Google Books response: {}",
e
))
})?;
if let Some(author) = author {
query.push_str(&format!("+inauthor:{}", urlencoding::encode(author)));
}
Ok(volumes.items)
}
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes?q={}&maxResults=5",
query
);
/// Search for books by title and author
pub async fn search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Vec<GoogleBook>> {
let mut query = format!("intitle:{}", urlencoding::encode(title));
if let Some(ref key) = self.api_key {
url.push_str(&format!("&key={}", key));
}
let response =
self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Google Books search failed: {}", e))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Google Books search returned status: {}",
response.status()
)));
}
let volumes: GoogleBooksResponse = response.json().await.map_err(|e| {
PinakesError::External(format!("Failed to parse search results: {}", e))
})?;
Ok(volumes.items)
if let Some(author) = author {
query.push_str(&format!("+inauthor:{}", urlencoding::encode(author)));
}
/// Download cover image from Google Books
pub async fn fetch_cover(&self, image_link: &str) -> Result<Vec<u8>> {
// Replace thumbnail link with higher resolution if possible
let high_res_link = image_link
.replace("&zoom=1", "&zoom=2")
.replace("&edge=curl", "");
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes?q={}&maxResults=5",
query
);
let response = self
.client
.get(&high_res_link)
.send()
.await
.map_err(|e| PinakesError::External(format!("Cover download failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| PinakesError::External(format!("Failed to read cover data: {}", e)))
if let Some(ref key) = self.api_key {
url.push_str(&format!("&key={}", key));
}
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Google Books search failed: {}", e))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Google Books search returned status: {}",
response.status()
)));
}
let volumes: GoogleBooksResponse = response.json().await.map_err(|e| {
PinakesError::External(format!("Failed to parse search results: {}", e))
})?;
Ok(volumes.items)
}
/// Download cover image from Google Books
pub async fn fetch_cover(&self, image_link: &str) -> Result<Vec<u8>> {
// Replace thumbnail link with higher resolution if possible
let high_res_link = image_link
.replace("&zoom=1", "&zoom=2")
.replace("&edge=curl", "");
let response =
self.client.get(&high_res_link).send().await.map_err(|e| {
PinakesError::External(format!("Cover download failed: {}", e))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response.bytes().await.map(|b| b.to_vec()).map_err(|e| {
PinakesError::External(format!("Failed to read cover data: {}", e))
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoogleBooksResponse {
#[serde(default)]
pub items: Vec<GoogleBook>,
#[serde(default)]
pub items: Vec<GoogleBook>,
#[serde(default)]
pub total_items: i32,
#[serde(default)]
pub total_items: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoogleBook {
pub id: String,
pub id: String,
#[serde(default)]
pub volume_info: VolumeInfo,
#[serde(default)]
pub volume_info: VolumeInfo,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct VolumeInfo {
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub subtitle: Option<String>,
#[serde(default)]
pub subtitle: Option<String>,
#[serde(default)]
pub authors: Vec<String>,
#[serde(default)]
pub authors: Vec<String>,
#[serde(default)]
pub publisher: Option<String>,
#[serde(default)]
pub publisher: Option<String>,
#[serde(default)]
pub published_date: Option<String>,
#[serde(default)]
pub published_date: Option<String>,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub page_count: Option<i32>,
#[serde(default)]
pub page_count: Option<i32>,
#[serde(default)]
pub categories: Vec<String>,
#[serde(default)]
pub categories: Vec<String>,
#[serde(default)]
pub average_rating: Option<f64>,
#[serde(default)]
pub average_rating: Option<f64>,
#[serde(default)]
pub ratings_count: Option<i32>,
#[serde(default)]
pub ratings_count: Option<i32>,
#[serde(default)]
pub image_links: Option<ImageLinks>,
#[serde(default)]
pub image_links: Option<ImageLinks>,
#[serde(default)]
pub language: Option<String>,
#[serde(default)]
pub language: Option<String>,
#[serde(default)]
pub industry_identifiers: Vec<IndustryIdentifier>,
#[serde(default)]
pub industry_identifiers: Vec<IndustryIdentifier>,
#[serde(default)]
pub main_category: Option<String>,
#[serde(default)]
pub main_category: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageLinks {
#[serde(default)]
pub small_thumbnail: Option<String>,
#[serde(default)]
pub small_thumbnail: Option<String>,
#[serde(default)]
pub thumbnail: Option<String>,
#[serde(default)]
pub thumbnail: Option<String>,
#[serde(default)]
pub small: Option<String>,
#[serde(default)]
pub small: Option<String>,
#[serde(default)]
pub medium: Option<String>,
#[serde(default)]
pub medium: Option<String>,
#[serde(default)]
pub large: Option<String>,
#[serde(default)]
pub large: Option<String>,
#[serde(default)]
pub extra_large: Option<String>,
#[serde(default)]
pub extra_large: Option<String>,
}
impl ImageLinks {
/// Get the best available image link (highest resolution)
pub fn best_link(&self) -> Option<&String> {
self.extra_large
.as_ref()
.or(self.large.as_ref())
.or(self.medium.as_ref())
.or(self.small.as_ref())
.or(self.thumbnail.as_ref())
.or(self.small_thumbnail.as_ref())
}
/// Get the best available image link (highest resolution)
pub fn best_link(&self) -> Option<&String> {
self
.extra_large
.as_ref()
.or(self.large.as_ref())
.or(self.medium.as_ref())
.or(self.small.as_ref())
.or(self.thumbnail.as_ref())
.or(self.small_thumbnail.as_ref())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndustryIdentifier {
#[serde(rename = "type")]
pub identifier_type: String,
#[serde(rename = "type")]
pub identifier_type: String,
pub identifier: String,
pub identifier: String,
}
impl IndustryIdentifier {
/// Check if this is an ISBN-13
pub fn is_isbn13(&self) -> bool {
self.identifier_type == "ISBN_13"
}
/// Check if this is an ISBN-13
pub fn is_isbn13(&self) -> bool {
self.identifier_type == "ISBN_13"
}
/// Check if this is an ISBN-10
pub fn is_isbn10(&self) -> bool {
self.identifier_type == "ISBN_10"
}
/// Check if this is an ISBN-10
pub fn is_isbn10(&self) -> bool {
self.identifier_type == "ISBN_10"
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[test]
fn test_googlebooks_client_creation() {
let client = GoogleBooksClient::new(None);
assert!(client.api_key.is_none());
#[test]
fn test_googlebooks_client_creation() {
let client = GoogleBooksClient::new(None);
assert!(client.api_key.is_none());
let client_with_key = GoogleBooksClient::new(Some("test-key".to_string()));
assert_eq!(client_with_key.api_key, Some("test-key".to_string()));
}
let client_with_key = GoogleBooksClient::new(Some("test-key".to_string()));
assert_eq!(client_with_key.api_key, Some("test-key".to_string()));
}
#[test]
fn test_image_links_best_link() {
let links = ImageLinks {
small_thumbnail: Some("small.jpg".to_string()),
thumbnail: Some("thumb.jpg".to_string()),
small: None,
medium: Some("medium.jpg".to_string()),
large: Some("large.jpg".to_string()),
extra_large: None,
};
#[test]
fn test_image_links_best_link() {
let links = ImageLinks {
small_thumbnail: Some("small.jpg".to_string()),
thumbnail: Some("thumb.jpg".to_string()),
small: None,
medium: Some("medium.jpg".to_string()),
large: Some("large.jpg".to_string()),
extra_large: None,
};
assert_eq!(links.best_link(), Some(&"large.jpg".to_string()));
}
assert_eq!(links.best_link(), Some(&"large.jpg".to_string()));
}
#[test]
fn test_industry_identifier_type_checks() {
let isbn13 = IndustryIdentifier {
identifier_type: "ISBN_13".to_string(),
identifier: "9780123456789".to_string(),
};
assert!(isbn13.is_isbn13());
assert!(!isbn13.is_isbn10());
#[test]
fn test_industry_identifier_type_checks() {
let isbn13 = IndustryIdentifier {
identifier_type: "ISBN_13".to_string(),
identifier: "9780123456789".to_string(),
};
assert!(isbn13.is_isbn13());
assert!(!isbn13.is_isbn10());
let isbn10 = IndustryIdentifier {
identifier_type: "ISBN_10".to_string(),
identifier: "0123456789".to_string(),
};
assert!(!isbn10.is_isbn13());
assert!(isbn10.is_isbn10());
}
let isbn10 = IndustryIdentifier {
identifier_type: "ISBN_10".to_string(),
identifier: "0123456789".to_string(),
};
assert!(!isbn10.is_isbn13());
assert!(isbn10.is_isbn10());
}
}

View file

@ -5,105 +5,110 @@ use std::time::Duration;
use chrono::Utc;
use uuid::Uuid;
use crate::error::{PinakesError, Result};
use crate::model::MediaItem;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
use crate::{
error::{PinakesError, Result},
model::MediaItem,
};
pub struct LastFmEnricher {
client: reqwest::Client,
api_key: String,
base_url: String,
client: reqwest::Client,
api_key: String,
base_url: String,
}
impl LastFmEnricher {
pub fn new(api_key: String) -> Self {
Self {
client: reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client with configured timeouts"),
api_key,
base_url: "https://ws.audioscrobbler.com/2.0".to_string(),
}
pub fn new(api_key: String) -> Self {
Self {
client: reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client with configured timeouts"),
api_key,
base_url: "https://ws.audioscrobbler.com/2.0".to_string(),
}
}
}
#[async_trait::async_trait]
impl MetadataEnricher for LastFmEnricher {
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::LastFm
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::LastFm
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let artist = match &item.artist {
Some(a) if !a.is_empty() => a,
_ => return Ok(None),
};
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let url = format!("{}/", self.base_url);
let resp = self
.client
.get(&url)
.query(&[
("method", "track.getInfo"),
("api_key", self.api_key.as_str()),
("artist", artist.as_str()),
("track", title.as_str()),
("format", "json"),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!("Last.fm request failed: {e}"))
})?;
if !resp.status().is_success() {
return Ok(None);
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let artist = match &item.artist {
Some(a) if !a.is_empty() => a,
_ => return Ok(None),
};
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"Last.fm response read failed: {e}"
))
})?;
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!(
"Last.fm JSON parse failed: {e}"
))
})?;
let url = format!("{}/", self.base_url);
let resp = self
.client
.get(&url)
.query(&[
("method", "track.getInfo"),
("api_key", self.api_key.as_str()),
("artist", artist.as_str()),
("track", title.as_str()),
("format", "json"),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!("Last.fm request failed: {e}"))
})?;
if !resp.status().is_success() {
return Ok(None);
}
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!("Last.fm response read failed: {e}"))
})?;
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!("Last.fm JSON parse failed: {e}"))
})?;
// Check for error response
if json.get("error").is_some() {
return Ok(None);
}
let track = match json.get("track") {
Some(t) => t,
None => return Ok(None),
};
let mbid = track.get("mbid").and_then(|m| m.as_str()).map(String::from);
let listeners = track
.get("listeners")
.and_then(|l| l.as_str())
.and_then(|l| l.parse::<f64>().ok())
.unwrap_or(0.0);
// Normalize listeners to confidence (arbitrary scale)
let confidence = (listeners / 1_000_000.0).min(1.0);
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::LastFm,
external_id: mbid,
metadata_json: body,
confidence,
last_updated: Utc::now(),
}))
// Check for error response
if json.get("error").is_some() {
return Ok(None);
}
let track = match json.get("track") {
Some(t) => t,
None => return Ok(None),
};
let mbid = track.get("mbid").and_then(|m| m.as_str()).map(String::from);
let listeners = track
.get("listeners")
.and_then(|l| l.as_str())
.and_then(|l| l.parse::<f64>().ok())
.unwrap_or(0.0);
// Normalize listeners to confidence (arbitrary scale)
let confidence = (listeners / 1_000_000.0).min(1.0);
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::LastFm,
external_id: mbid,
metadata_json: body,
confidence,
last_updated: Utc::now(),
}))
}
}

View file

@ -11,67 +11,69 @@ use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::error::Result;
use crate::model::{MediaId, MediaItem};
use crate::{
error::Result,
model::{MediaId, MediaItem},
};
/// Externally-sourced metadata for a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExternalMetadata {
pub id: Uuid,
pub media_id: MediaId,
pub source: EnrichmentSourceType,
pub external_id: Option<String>,
pub metadata_json: String,
pub confidence: f64,
pub last_updated: DateTime<Utc>,
pub id: Uuid,
pub media_id: MediaId,
pub source: EnrichmentSourceType,
pub external_id: Option<String>,
pub metadata_json: String,
pub confidence: f64,
pub last_updated: DateTime<Utc>,
}
/// Supported enrichment data sources.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum EnrichmentSourceType {
#[serde(rename = "musicbrainz")]
MusicBrainz,
#[serde(rename = "tmdb")]
Tmdb,
#[serde(rename = "lastfm")]
LastFm,
#[serde(rename = "openlibrary")]
OpenLibrary,
#[serde(rename = "googlebooks")]
GoogleBooks,
#[serde(rename = "musicbrainz")]
MusicBrainz,
#[serde(rename = "tmdb")]
Tmdb,
#[serde(rename = "lastfm")]
LastFm,
#[serde(rename = "openlibrary")]
OpenLibrary,
#[serde(rename = "googlebooks")]
GoogleBooks,
}
impl std::fmt::Display for EnrichmentSourceType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::MusicBrainz => "musicbrainz",
Self::Tmdb => "tmdb",
Self::LastFm => "lastfm",
Self::OpenLibrary => "openlibrary",
Self::GoogleBooks => "googlebooks",
};
write!(f, "{s}")
}
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::MusicBrainz => "musicbrainz",
Self::Tmdb => "tmdb",
Self::LastFm => "lastfm",
Self::OpenLibrary => "openlibrary",
Self::GoogleBooks => "googlebooks",
};
write!(f, "{s}")
}
}
impl std::str::FromStr for EnrichmentSourceType {
type Err = String;
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"musicbrainz" => Ok(Self::MusicBrainz),
"tmdb" => Ok(Self::Tmdb),
"lastfm" => Ok(Self::LastFm),
"openlibrary" => Ok(Self::OpenLibrary),
"googlebooks" => Ok(Self::GoogleBooks),
_ => Err(format!("unknown enrichment source: {s}")),
}
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"musicbrainz" => Ok(Self::MusicBrainz),
"tmdb" => Ok(Self::Tmdb),
"lastfm" => Ok(Self::LastFm),
"openlibrary" => Ok(Self::OpenLibrary),
"googlebooks" => Ok(Self::GoogleBooks),
_ => Err(format!("unknown enrichment source: {s}")),
}
}
}
/// Trait for metadata enrichment providers.
#[async_trait::async_trait]
pub trait MetadataEnricher: Send + Sync {
fn source(&self) -> EnrichmentSourceType;
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>>;
fn source(&self) -> EnrichmentSourceType;
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>>;
}

View file

@ -5,130 +5,137 @@ use std::time::Duration;
use chrono::Utc;
use uuid::Uuid;
use crate::error::{PinakesError, Result};
use crate::model::MediaItem;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
use crate::{
error::{PinakesError, Result},
model::MediaItem,
};
pub struct MusicBrainzEnricher {
client: reqwest::Client,
base_url: String,
client: reqwest::Client,
base_url: String,
}
impl Default for MusicBrainzEnricher {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
impl MusicBrainzEnricher {
pub fn new() -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/0.1 (https://github.com/notashelf/pinakes)")
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client with configured timeouts"),
base_url: "https://musicbrainz.org/ws/2".to_string(),
}
pub fn new() -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/0.1 (https://github.com/notashelf/pinakes)")
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client with configured timeouts"),
base_url: "https://musicbrainz.org/ws/2".to_string(),
}
}
}
fn escape_lucene_query(s: &str) -> String {
let special_chars = [
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\',
'/',
];
let mut escaped = String::with_capacity(s.len() * 2);
for c in s.chars() {
if special_chars.contains(&c) {
escaped.push('\\');
}
escaped.push(c);
let special_chars = [
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*',
'?', ':', '\\', '/',
];
let mut escaped = String::with_capacity(s.len() * 2);
for c in s.chars() {
if special_chars.contains(&c) {
escaped.push('\\');
}
escaped
escaped.push(c);
}
escaped
}
#[async_trait::async_trait]
impl MetadataEnricher for MusicBrainzEnricher {
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::MusicBrainz
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::MusicBrainz
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let mut query = format!("recording:{}", escape_lucene_query(title));
if let Some(ref artist) = item.artist {
query.push_str(&format!(" AND artist:{}", escape_lucene_query(artist)));
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let url = format!("{}/recording/", self.base_url);
let mut query = format!("recording:{}", escape_lucene_query(title));
if let Some(ref artist) = item.artist {
query.push_str(&format!(" AND artist:{}", escape_lucene_query(artist)));
}
let resp = self
.client
.get(&url)
.query(&[
("query", &query),
("fmt", &"json".to_string()),
("limit", &"1".to_string()),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz request failed: {e}"
))
})?;
let url = format!("{}/recording/", self.base_url);
let resp = self
.client
.get(&url)
.query(&[
("query", &query),
("fmt", &"json".to_string()),
("limit", &"1".to_string()),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!("MusicBrainz request failed: {e}"))
})?;
if !resp.status().is_success() {
let status = resp.status();
if status == reqwest::StatusCode::TOO_MANY_REQUESTS
|| status == reqwest::StatusCode::SERVICE_UNAVAILABLE
{
return Err(PinakesError::MetadataExtraction(format!(
"MusicBrainz rate limited (HTTP {})",
status.as_u16()
)));
}
return Ok(None);
}
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!("MusicBrainz response read failed: {e}"))
})?;
// Parse to check if we got results
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!("MusicBrainz JSON parse failed: {e}"))
})?;
let recordings = json.get("recordings").and_then(|r| r.as_array());
if recordings.is_none_or(|r| r.is_empty()) {
return Ok(None);
}
let recording = &recordings.unwrap()[0];
let external_id = recording
.get("id")
.and_then(|id| id.as_str())
.map(String::from);
let score = recording
.get("score")
.and_then(|s| s.as_f64())
.unwrap_or(0.0)
/ 100.0;
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::MusicBrainz,
external_id,
metadata_json: body,
confidence: score,
last_updated: Utc::now(),
}))
if !resp.status().is_success() {
let status = resp.status();
if status == reqwest::StatusCode::TOO_MANY_REQUESTS
|| status == reqwest::StatusCode::SERVICE_UNAVAILABLE
{
return Err(PinakesError::MetadataExtraction(format!(
"MusicBrainz rate limited (HTTP {})",
status.as_u16()
)));
}
return Ok(None);
}
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz response read failed: {e}"
))
})?;
// Parse to check if we got results
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz JSON parse failed: {e}"
))
})?;
let recordings = json.get("recordings").and_then(|r| r.as_array());
if recordings.is_none_or(|r| r.is_empty()) {
return Ok(None);
}
let recording = &recordings.unwrap()[0];
let external_id = recording
.get("id")
.and_then(|id| id.as_str())
.map(String::from);
let score = recording
.get("score")
.and_then(|s| s.as_f64())
.unwrap_or(0.0)
/ 100.0;
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::MusicBrainz,
external_id,
metadata_json: body,
confidence: score,
last_updated: Utc::now(),
}))
}
}

View file

@ -4,285 +4,284 @@ use crate::error::{PinakesError, Result};
/// OpenLibrary API client for book metadata enrichment
pub struct OpenLibraryClient {
client: reqwest::Client,
base_url: String,
client: reqwest::Client,
base_url: String,
}
impl Default for OpenLibraryClient {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
impl OpenLibraryClient {
pub fn new() -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/1.0")
.timeout(std::time::Duration::from_secs(10))
.build()
.expect("Failed to build HTTP client"),
base_url: "https://openlibrary.org".to_string(),
}
pub fn new() -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/1.0")
.timeout(std::time::Duration::from_secs(10))
.build()
.expect("Failed to build HTTP client"),
base_url: "https://openlibrary.org".to_string(),
}
}
/// Fetch book metadata by ISBN
pub async fn fetch_by_isbn(&self, isbn: &str) -> Result<OpenLibraryBook> {
let url = format!("{}/isbn/{}.json", self.base_url, isbn);
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("OpenLibrary request failed: {}", e))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"OpenLibrary returned status: {}",
response.status()
)));
}
/// Fetch book metadata by ISBN
pub async fn fetch_by_isbn(&self, isbn: &str) -> Result<OpenLibraryBook> {
let url = format!("{}/isbn/{}.json", self.base_url, isbn);
response.json::<OpenLibraryBook>().await.map_err(|e| {
PinakesError::External(format!(
"Failed to parse OpenLibrary response: {}",
e
))
})
}
let response =
self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("OpenLibrary request failed: {}", e))
})?;
/// Search for books by title and author
pub async fn search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Vec<OpenLibrarySearchResult>> {
let mut url = format!(
"{}/search.json?title={}",
self.base_url,
urlencoding::encode(title)
);
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"OpenLibrary returned status: {}",
response.status()
)));
}
response.json::<OpenLibraryBook>().await.map_err(|e| {
PinakesError::External(format!("Failed to parse OpenLibrary response: {}", e))
})
if let Some(author) = author {
url.push_str(&format!("&author={}", urlencoding::encode(author)));
}
/// Search for books by title and author
pub async fn search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Vec<OpenLibrarySearchResult>> {
let mut url = format!(
"{}/search.json?title={}",
self.base_url,
urlencoding::encode(title)
);
url.push_str("&limit=5");
if let Some(author) = author {
url.push_str(&format!("&author={}", urlencoding::encode(author)));
}
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("OpenLibrary search failed: {}", e))
})?;
url.push_str("&limit=5");
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("OpenLibrary search failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"OpenLibrary search returned status: {}",
response.status()
)));
}
let search_response: OpenLibrarySearchResponse = response.json().await.map_err(|e| {
PinakesError::External(format!("Failed to parse search results: {}", e))
})?;
Ok(search_response.docs)
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"OpenLibrary search returned status: {}",
response.status()
)));
}
/// Fetch cover image by cover ID
pub async fn fetch_cover(&self, cover_id: i64, size: CoverSize) -> Result<Vec<u8>> {
let size_str = match size {
CoverSize::Small => "S",
CoverSize::Medium => "M",
CoverSize::Large => "L",
};
let search_response: OpenLibrarySearchResponse =
response.json().await.map_err(|e| {
PinakesError::External(format!("Failed to parse search results: {}", e))
})?;
let url = format!(
"https://covers.openlibrary.org/b/id/{}-{}.jpg",
cover_id, size_str
);
Ok(search_response.docs)
}
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("Cover download failed: {}", e)))?;
/// Fetch cover image by cover ID
pub async fn fetch_cover(
&self,
cover_id: i64,
size: CoverSize,
) -> Result<Vec<u8>> {
let size_str = match size {
CoverSize::Small => "S",
CoverSize::Medium => "M",
CoverSize::Large => "L",
};
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
let url = format!(
"https://covers.openlibrary.org/b/id/{}-{}.jpg",
cover_id, size_str
);
response
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| PinakesError::External(format!("Failed to read cover data: {}", e)))
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Cover download failed: {}", e))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
/// Fetch cover by ISBN
pub async fn fetch_cover_by_isbn(&self, isbn: &str, size: CoverSize) -> Result<Vec<u8>> {
let size_str = match size {
CoverSize::Small => "S",
CoverSize::Medium => "M",
CoverSize::Large => "L",
};
response.bytes().await.map(|b| b.to_vec()).map_err(|e| {
PinakesError::External(format!("Failed to read cover data: {}", e))
})
}
let url = format!(
"https://covers.openlibrary.org/b/isbn/{}-{}.jpg",
isbn, size_str
);
/// Fetch cover by ISBN
pub async fn fetch_cover_by_isbn(
&self,
isbn: &str,
size: CoverSize,
) -> Result<Vec<u8>> {
let size_str = match size {
CoverSize::Small => "S",
CoverSize::Medium => "M",
CoverSize::Large => "L",
};
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("Cover download failed: {}", e)))?;
let url = format!(
"https://covers.openlibrary.org/b/isbn/{}-{}.jpg",
isbn, size_str
);
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Cover download failed: {}", e))
})?;
response
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| PinakesError::External(format!("Failed to read cover data: {}", e)))
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response.bytes().await.map(|b| b.to_vec()).map_err(|e| {
PinakesError::External(format!("Failed to read cover data: {}", e))
})
}
}
#[derive(Debug, Clone, Copy)]
pub enum CoverSize {
Small, // 256x256
Medium, // 600x800
Large, // Original
Small, // 256x256
Medium, // 600x800
Large, // Original
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibraryBook {
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub subtitle: Option<String>,
#[serde(default)]
pub subtitle: Option<String>,
#[serde(default)]
pub authors: Vec<AuthorRef>,
#[serde(default)]
pub authors: Vec<AuthorRef>,
#[serde(default)]
pub publishers: Vec<String>,
#[serde(default)]
pub publishers: Vec<String>,
#[serde(default)]
pub publish_date: Option<String>,
#[serde(default)]
pub publish_date: Option<String>,
#[serde(default)]
pub number_of_pages: Option<i32>,
#[serde(default)]
pub number_of_pages: Option<i32>,
#[serde(default)]
pub subjects: Vec<String>,
#[serde(default)]
pub subjects: Vec<String>,
#[serde(default)]
pub covers: Vec<i64>,
#[serde(default)]
pub covers: Vec<i64>,
#[serde(default)]
pub isbn_10: Vec<String>,
#[serde(default)]
pub isbn_10: Vec<String>,
#[serde(default)]
pub isbn_13: Vec<String>,
#[serde(default)]
pub isbn_13: Vec<String>,
#[serde(default)]
pub series: Vec<String>,
#[serde(default)]
pub series: Vec<String>,
#[serde(default)]
pub description: Option<StringOrObject>,
#[serde(default)]
pub description: Option<StringOrObject>,
#[serde(default)]
pub languages: Vec<LanguageRef>,
#[serde(default)]
pub languages: Vec<LanguageRef>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuthorRef {
pub key: String,
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageRef {
pub key: String,
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum StringOrObject {
String(String),
Object { value: String },
String(String),
Object { value: String },
}
impl StringOrObject {
pub fn as_str(&self) -> &str {
match self {
Self::String(s) => s,
Self::Object { value } => value,
}
pub fn as_str(&self) -> &str {
match self {
Self::String(s) => s,
Self::Object { value } => value,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibrarySearchResponse {
#[serde(default)]
pub docs: Vec<OpenLibrarySearchResult>,
#[serde(default)]
pub docs: Vec<OpenLibrarySearchResult>,
#[serde(default)]
pub num_found: i32,
#[serde(default)]
pub num_found: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibrarySearchResult {
#[serde(default)]
pub key: Option<String>,
#[serde(default)]
pub key: Option<String>,
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub author_name: Vec<String>,
#[serde(default)]
pub author_name: Vec<String>,
#[serde(default)]
pub first_publish_year: Option<i32>,
#[serde(default)]
pub first_publish_year: Option<i32>,
#[serde(default)]
pub publisher: Vec<String>,
#[serde(default)]
pub publisher: Vec<String>,
#[serde(default)]
pub isbn: Vec<String>,
#[serde(default)]
pub isbn: Vec<String>,
#[serde(default)]
pub cover_i: Option<i64>,
#[serde(default)]
pub cover_i: Option<i64>,
#[serde(default)]
pub subject: Vec<String>,
#[serde(default)]
pub subject: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[tokio::test]
async fn test_openlibrary_client_creation() {
let client = OpenLibraryClient::new();
assert_eq!(client.base_url, "https://openlibrary.org");
}
#[tokio::test]
async fn test_openlibrary_client_creation() {
let client = OpenLibraryClient::new();
assert_eq!(client.base_url, "https://openlibrary.org");
}
#[test]
fn test_string_or_object_parsing() {
let string_desc: StringOrObject = serde_json::from_str(r#""Simple description""#).unwrap();
assert_eq!(string_desc.as_str(), "Simple description");
#[test]
fn test_string_or_object_parsing() {
let string_desc: StringOrObject =
serde_json::from_str(r#""Simple description""#).unwrap();
assert_eq!(string_desc.as_str(), "Simple description");
let object_desc: StringOrObject =
serde_json::from_str(r#"{"value": "Object description"}"#).unwrap();
assert_eq!(object_desc.as_str(), "Object description");
}
let object_desc: StringOrObject =
serde_json::from_str(r#"{"value": "Object description"}"#).unwrap();
assert_eq!(object_desc.as_str(), "Object description");
}
}

View file

@ -5,105 +5,110 @@ use std::time::Duration;
use chrono::Utc;
use uuid::Uuid;
use crate::error::{PinakesError, Result};
use crate::model::MediaItem;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
use crate::{
error::{PinakesError, Result},
model::MediaItem,
};
pub struct TmdbEnricher {
client: reqwest::Client,
api_key: String,
base_url: String,
client: reqwest::Client,
api_key: String,
base_url: String,
}
impl TmdbEnricher {
pub fn new(api_key: String) -> Self {
Self {
client: reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client with configured timeouts"),
api_key,
base_url: "https://api.themoviedb.org/3".to_string(),
}
pub fn new(api_key: String) -> Self {
Self {
client: reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client with configured timeouts"),
api_key,
base_url: "https://api.themoviedb.org/3".to_string(),
}
}
}
#[async_trait::async_trait]
impl MetadataEnricher for TmdbEnricher {
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::Tmdb
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::Tmdb
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let url = format!("{}/search/movie", self.base_url);
let resp = self
.client
.get(&url)
.query(&[
("api_key", &self.api_key),
("query", &title.to_string()),
("page", &"1".to_string()),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!("TMDB request failed: {e}"))
})?;
if !resp.status().is_success() {
let status = resp.status();
if status == reqwest::StatusCode::UNAUTHORIZED {
return Err(PinakesError::MetadataExtraction(
"TMDB API key is invalid (401)".into(),
));
}
if status == reqwest::StatusCode::TOO_MANY_REQUESTS {
tracing::warn!("TMDB rate limit exceeded (429)");
return Ok(None);
}
tracing::debug!(status = %status, "TMDB search returned non-success status");
return Ok(None);
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"TMDB response read failed: {e}"
))
})?;
let url = format!("{}/search/movie", self.base_url);
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!("TMDB JSON parse failed: {e}"))
})?;
let resp = self
.client
.get(&url)
.query(&[
("api_key", &self.api_key),
("query", &title.to_string()),
("page", &"1".to_string()),
])
.send()
.await
.map_err(|e| PinakesError::MetadataExtraction(format!("TMDB request failed: {e}")))?;
if !resp.status().is_success() {
let status = resp.status();
if status == reqwest::StatusCode::UNAUTHORIZED {
return Err(PinakesError::MetadataExtraction(
"TMDB API key is invalid (401)".into(),
));
}
if status == reqwest::StatusCode::TOO_MANY_REQUESTS {
tracing::warn!("TMDB rate limit exceeded (429)");
return Ok(None);
}
tracing::debug!(status = %status, "TMDB search returned non-success status");
return Ok(None);
}
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!("TMDB response read failed: {e}"))
})?;
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!("TMDB JSON parse failed: {e}"))
})?;
let results = json.get("results").and_then(|r| r.as_array());
if results.is_none_or(|r| r.is_empty()) {
return Ok(None);
}
let movie = &results.unwrap()[0];
let external_id = match movie.get("id").and_then(|id| id.as_i64()) {
Some(id) => id.to_string(),
None => return Ok(None),
};
let popularity = movie
.get("popularity")
.and_then(|p| p.as_f64())
.unwrap_or(0.0);
// Normalize popularity to 0-1 range (TMDB popularity can be very high)
let confidence = (popularity / 100.0).min(1.0);
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::Tmdb,
external_id: Some(external_id),
metadata_json: body,
confidence,
last_updated: Utc::now(),
}))
let results = json.get("results").and_then(|r| r.as_array());
if results.is_none_or(|r| r.is_empty()) {
return Ok(None);
}
let movie = &results.unwrap()[0];
let external_id = match movie.get("id").and_then(|id| id.as_i64()) {
Some(id) => id.to_string(),
None => return Ok(None),
};
let popularity = movie
.get("popularity")
.and_then(|p| p.as_f64())
.unwrap_or(0.0);
// Normalize popularity to 0-1 range (TMDB popularity can be very high)
let confidence = (popularity / 100.0).min(1.0);
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::Tmdb,
external_id: Some(external_id),
metadata_json: body,
confidence,
last_updated: Utc::now(),
}))
}
}

View file

@ -4,125 +4,125 @@ use thiserror::Error;
#[derive(Debug, Error)]
pub enum PinakesError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("database error: {0}")]
Database(String),
#[error("database error: {0}")]
Database(String),
#[error("migration error: {0}")]
Migration(String),
#[error("migration error: {0}")]
Migration(String),
#[error("configuration error: {0}")]
Config(String),
#[error("configuration error: {0}")]
Config(String),
#[error("media item not found: {0}")]
NotFound(String),
#[error("media item not found: {0}")]
NotFound(String),
#[error("duplicate content hash: {0}")]
DuplicateHash(String),
#[error("duplicate content hash: {0}")]
DuplicateHash(String),
#[error("unsupported media type for path: {0}")]
UnsupportedMediaType(PathBuf),
#[error("unsupported media type for path: {0}")]
UnsupportedMediaType(PathBuf),
#[error("metadata extraction failed: {0}")]
MetadataExtraction(String),
#[error("metadata extraction failed: {0}")]
MetadataExtraction(String),
#[error("search query parse error: {0}")]
SearchParse(String),
#[error("search query parse error: {0}")]
SearchParse(String),
#[error("file not found at path: {0}")]
FileNotFound(PathBuf),
#[error("file not found at path: {0}")]
FileNotFound(PathBuf),
#[error("tag not found: {0}")]
TagNotFound(String),
#[error("tag not found: {0}")]
TagNotFound(String),
#[error("collection not found: {0}")]
CollectionNotFound(String),
#[error("collection not found: {0}")]
CollectionNotFound(String),
#[error("invalid operation: {0}")]
InvalidOperation(String),
#[error("invalid operation: {0}")]
InvalidOperation(String),
#[error("invalid data: {0}")]
InvalidData(String),
#[error("invalid data: {0}")]
InvalidData(String),
#[error("authentication error: {0}")]
Authentication(String),
#[error("authentication error: {0}")]
Authentication(String),
#[error("authorization error: {0}")]
Authorization(String),
#[error("authorization error: {0}")]
Authorization(String),
#[error("path not allowed: {0}")]
PathNotAllowed(String),
#[error("path not allowed: {0}")]
PathNotAllowed(String),
#[error("external API error: {0}")]
External(String),
#[error("external API error: {0}")]
External(String),
// Managed Storage errors
#[error("managed storage not enabled")]
ManagedStorageDisabled,
// Managed Storage errors
#[error("managed storage not enabled")]
ManagedStorageDisabled,
#[error("upload too large: {0} bytes exceeds limit")]
UploadTooLarge(u64),
#[error("upload too large: {0} bytes exceeds limit")]
UploadTooLarge(u64),
#[error("blob not found: {0}")]
BlobNotFound(String),
#[error("blob not found: {0}")]
BlobNotFound(String),
#[error("storage integrity error: {0}")]
StorageIntegrity(String),
#[error("storage integrity error: {0}")]
StorageIntegrity(String),
// Sync errors
#[error("sync not enabled")]
SyncDisabled,
// Sync errors
#[error("sync not enabled")]
SyncDisabled,
#[error("device not found: {0}")]
DeviceNotFound(String),
#[error("device not found: {0}")]
DeviceNotFound(String),
#[error("sync conflict: {0}")]
SyncConflict(String),
#[error("sync conflict: {0}")]
SyncConflict(String),
#[error("upload session expired: {0}")]
UploadSessionExpired(String),
#[error("upload session expired: {0}")]
UploadSessionExpired(String),
#[error("upload session not found: {0}")]
UploadSessionNotFound(String),
#[error("upload session not found: {0}")]
UploadSessionNotFound(String),
#[error("chunk out of order: expected {expected}, got {actual}")]
ChunkOutOfOrder { expected: u64, actual: u64 },
#[error("chunk out of order: expected {expected}, got {actual}")]
ChunkOutOfOrder { expected: u64, actual: u64 },
// Sharing errors
#[error("share not found: {0}")]
ShareNotFound(String),
// Sharing errors
#[error("share not found: {0}")]
ShareNotFound(String),
#[error("share expired: {0}")]
ShareExpired(String),
#[error("share expired: {0}")]
ShareExpired(String),
#[error("share password required")]
SharePasswordRequired,
#[error("share password required")]
SharePasswordRequired,
#[error("share password invalid")]
SharePasswordInvalid,
#[error("share password invalid")]
SharePasswordInvalid,
#[error("insufficient share permissions")]
InsufficientSharePermissions,
#[error("insufficient share permissions")]
InsufficientSharePermissions,
}
impl From<rusqlite::Error> for PinakesError {
fn from(e: rusqlite::Error) -> Self {
PinakesError::Database(e.to_string())
}
fn from(e: rusqlite::Error) -> Self {
PinakesError::Database(e.to_string())
}
}
impl From<tokio_postgres::Error> for PinakesError {
fn from(e: tokio_postgres::Error) -> Self {
PinakesError::Database(e.to_string())
}
fn from(e: tokio_postgres::Error) -> Self {
PinakesError::Database(e.to_string())
}
}
impl From<serde_json::Error> for PinakesError {
fn from(e: serde_json::Error) -> Self {
PinakesError::Database(format!("JSON serialization error: {}", e))
}
fn from(e: serde_json::Error) -> Self {
PinakesError::Database(format!("JSON serialization error: {}", e))
}
}
pub type Result<T> = std::result::Result<T, PinakesError>;

View file

@ -1,205 +1,212 @@
//! Auto-detection of photo events and albums based on time and location proximity
//! Auto-detection of photo events and albums based on time and location
//! proximity
use chrono::{DateTime, Utc};
use crate::error::Result;
use crate::model::{MediaId, MediaItem};
use crate::{
error::Result,
model::{MediaId, MediaItem},
};
/// Configuration for event detection
#[derive(Debug, Clone)]
pub struct EventDetectionConfig {
/// Maximum time gap between photos in the same event (in seconds)
pub max_time_gap_secs: i64,
/// Minimum number of photos to form an event
pub min_photos: usize,
/// Maximum distance between photos in the same event (in kilometers)
/// None means location is not considered
pub max_distance_km: Option<f64>,
/// Consider photos on the same day as potentially the same event
pub same_day_threshold: bool,
/// Maximum time gap between photos in the same event (in seconds)
pub max_time_gap_secs: i64,
/// Minimum number of photos to form an event
pub min_photos: usize,
/// Maximum distance between photos in the same event (in kilometers)
/// None means location is not considered
pub max_distance_km: Option<f64>,
/// Consider photos on the same day as potentially the same event
pub same_day_threshold: bool,
}
impl Default for EventDetectionConfig {
fn default() -> Self {
Self {
max_time_gap_secs: 2 * 60 * 60, // 2 hours
min_photos: 5,
max_distance_km: Some(1.0), // 1km
same_day_threshold: true,
}
fn default() -> Self {
Self {
max_time_gap_secs: 2 * 60 * 60, // 2 hours
min_photos: 5,
max_distance_km: Some(1.0), // 1km
same_day_threshold: true,
}
}
}
/// A detected photo event/album
#[derive(Debug, Clone)]
pub struct DetectedEvent {
/// Suggested name for the event (e.g., "Photos from 2024-01-15")
pub suggested_name: String,
/// Start time of the event
pub start_time: DateTime<Utc>,
/// End time of the event
pub end_time: DateTime<Utc>,
/// Media items in this event
pub items: Vec<MediaId>,
/// Representative location (if available)
pub location: Option<(f64, f64)>, // (latitude, longitude)
/// Suggested name for the event (e.g., "Photos from 2024-01-15")
pub suggested_name: String,
/// Start time of the event
pub start_time: DateTime<Utc>,
/// End time of the event
pub end_time: DateTime<Utc>,
/// Media items in this event
pub items: Vec<MediaId>,
/// Representative location (if available)
pub location: Option<(f64, f64)>, // (latitude, longitude)
}
/// Calculate Haversine distance between two GPS coordinates in kilometers
fn haversine_distance(lat1: f64, lon1: f64, lat2: f64, lon2: f64) -> f64 {
const EARTH_RADIUS_KM: f64 = 6371.0;
const EARTH_RADIUS_KM: f64 = 6371.0;
let dlat = (lat2 - lat1).to_radians();
let dlon = (lon2 - lon1).to_radians();
let dlat = (lat2 - lat1).to_radians();
let dlon = (lon2 - lon1).to_radians();
let a = (dlat / 2.0).sin().powi(2)
+ lat1.to_radians().cos() * lat2.to_radians().cos() * (dlon / 2.0).sin().powi(2);
let a = (dlat / 2.0).sin().powi(2)
+ lat1.to_radians().cos()
* lat2.to_radians().cos()
* (dlon / 2.0).sin().powi(2);
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
let c = 2.0 * a.sqrt().atan2((1.0 - a).sqrt());
EARTH_RADIUS_KM * c
EARTH_RADIUS_KM * c
}
/// Detect photo events from a list of media items
pub fn detect_events(
mut items: Vec<MediaItem>,
config: &EventDetectionConfig,
mut items: Vec<MediaItem>,
config: &EventDetectionConfig,
) -> Result<Vec<DetectedEvent>> {
// Filter to only photos with date_taken
items.retain(|item| item.date_taken.is_some());
// Filter to only photos with date_taken
items.retain(|item| item.date_taken.is_some());
if items.is_empty() {
return Ok(Vec::new());
}
if items.is_empty() {
return Ok(Vec::new());
}
// Sort by date_taken
items.sort_by(|a, b| a.date_taken.unwrap().cmp(&b.date_taken.unwrap()));
// Sort by date_taken
items.sort_by(|a, b| a.date_taken.unwrap().cmp(&b.date_taken.unwrap()));
let mut events: Vec<DetectedEvent> = Vec::new();
let mut current_event_items: Vec<MediaId> = vec![items[0].id];
let mut current_start_time = items[0].date_taken.unwrap();
let mut current_last_time = items[0].date_taken.unwrap();
let mut current_location = items[0].latitude.zip(items[0].longitude);
let mut events: Vec<DetectedEvent> = Vec::new();
let mut current_event_items: Vec<MediaId> = vec![items[0].id];
let mut current_start_time = items[0].date_taken.unwrap();
let mut current_last_time = items[0].date_taken.unwrap();
let mut current_location = items[0].latitude.zip(items[0].longitude);
for item in items.iter().skip(1) {
let item_time = item.date_taken.unwrap();
let time_gap = (item_time - current_last_time).num_seconds();
for item in items.iter().skip(1) {
let item_time = item.date_taken.unwrap();
let time_gap = (item_time - current_last_time).num_seconds();
// Check time gap
let time_ok = if config.same_day_threshold {
// Same day or within time gap
item_time.date_naive() == current_last_time.date_naive()
|| time_gap <= config.max_time_gap_secs
} else {
time_gap <= config.max_time_gap_secs
};
// Check time gap
let time_ok = if config.same_day_threshold {
// Same day or within time gap
item_time.date_naive() == current_last_time.date_naive()
|| time_gap <= config.max_time_gap_secs
} else {
time_gap <= config.max_time_gap_secs
};
// Check location proximity if both have GPS data
let location_ok = match (
config.max_distance_km,
current_location,
item.latitude.zip(item.longitude),
) {
(Some(max_dist), Some((lat1, lon1)), Some((lat2, lon2))) => {
let dist = haversine_distance(lat1, lon1, lat2, lon2);
dist <= max_dist
}
// If no location constraint or missing GPS, consider location OK
_ => true,
};
// Check location proximity if both have GPS data
let location_ok = match (
config.max_distance_km,
current_location,
item.latitude.zip(item.longitude),
) {
(Some(max_dist), Some((lat1, lon1)), Some((lat2, lon2))) => {
let dist = haversine_distance(lat1, lon1, lat2, lon2);
dist <= max_dist
},
// If no location constraint or missing GPS, consider location OK
_ => true,
};
if time_ok && location_ok {
// Add to current event
current_event_items.push(item.id);
current_last_time = item_time;
if time_ok && location_ok {
// Add to current event
current_event_items.push(item.id);
current_last_time = item_time;
// Update location to average if available
if let (Some((lat1, lon1)), Some((lat2, lon2))) =
(current_location, item.latitude.zip(item.longitude))
{
current_location = Some(((lat1 + lat2) / 2.0, (lon1 + lon2) / 2.0));
} else if item.latitude.is_some() && item.longitude.is_some() {
current_location = item.latitude.zip(item.longitude);
}
} else {
// Start new event if current has enough photos
if current_event_items.len() >= config.min_photos {
let event_name = format!("Event on {}", current_start_time.format("%Y-%m-%d"));
events.push(DetectedEvent {
suggested_name: event_name,
start_time: current_start_time,
end_time: current_last_time,
items: current_event_items.clone(),
location: current_location,
});
}
// Reset for new event
current_event_items = vec![item.id];
current_start_time = item_time;
current_last_time = item_time;
current_location = item.latitude.zip(item.longitude);
}
}
// Don't forget the last event
if current_event_items.len() >= config.min_photos {
let event_name = format!("Event on {}", current_start_time.format("%Y-%m-%d"));
// Update location to average if available
if let (Some((lat1, lon1)), Some((lat2, lon2))) =
(current_location, item.latitude.zip(item.longitude))
{
current_location = Some(((lat1 + lat2) / 2.0, (lon1 + lon2) / 2.0));
} else if item.latitude.is_some() && item.longitude.is_some() {
current_location = item.latitude.zip(item.longitude);
}
} else {
// Start new event if current has enough photos
if current_event_items.len() >= config.min_photos {
let event_name =
format!("Event on {}", current_start_time.format("%Y-%m-%d"));
events.push(DetectedEvent {
suggested_name: event_name,
start_time: current_start_time,
end_time: current_last_time,
items: current_event_items,
location: current_location,
suggested_name: event_name,
start_time: current_start_time,
end_time: current_last_time,
items: current_event_items.clone(),
location: current_location,
});
}
}
Ok(events)
// Reset for new event
current_event_items = vec![item.id];
current_start_time = item_time;
current_last_time = item_time;
current_location = item.latitude.zip(item.longitude);
}
}
// Don't forget the last event
if current_event_items.len() >= config.min_photos {
let event_name =
format!("Event on {}", current_start_time.format("%Y-%m-%d"));
events.push(DetectedEvent {
suggested_name: event_name,
start_time: current_start_time,
end_time: current_last_time,
items: current_event_items,
location: current_location,
});
}
Ok(events)
}
/// Detect photo bursts (rapid sequences of photos)
/// Returns groups of media IDs that are likely burst sequences
pub fn detect_bursts(
mut items: Vec<MediaItem>,
max_gap_secs: i64,
min_burst_size: usize,
mut items: Vec<MediaItem>,
max_gap_secs: i64,
min_burst_size: usize,
) -> Result<Vec<Vec<MediaId>>> {
// Filter to only photos with date_taken
items.retain(|item| item.date_taken.is_some());
// Filter to only photos with date_taken
items.retain(|item| item.date_taken.is_some());
if items.is_empty() {
return Ok(Vec::new());
if items.is_empty() {
return Ok(Vec::new());
}
// Sort by date_taken
items.sort_by(|a, b| a.date_taken.unwrap().cmp(&b.date_taken.unwrap()));
let mut bursts: Vec<Vec<MediaId>> = Vec::new();
let mut current_burst: Vec<MediaId> = vec![items[0].id];
let mut last_time = items[0].date_taken.unwrap();
for item in items.iter().skip(1) {
let item_time = item.date_taken.unwrap();
let gap = (item_time - last_time).num_seconds();
if gap <= max_gap_secs {
current_burst.push(item.id);
} else {
if current_burst.len() >= min_burst_size {
bursts.push(current_burst.clone());
}
current_burst = vec![item.id];
}
// Sort by date_taken
items.sort_by(|a, b| a.date_taken.unwrap().cmp(&b.date_taken.unwrap()));
last_time = item_time;
}
let mut bursts: Vec<Vec<MediaId>> = Vec::new();
let mut current_burst: Vec<MediaId> = vec![items[0].id];
let mut last_time = items[0].date_taken.unwrap();
// Don't forget the last burst
if current_burst.len() >= min_burst_size {
bursts.push(current_burst);
}
for item in items.iter().skip(1) {
let item_time = item.date_taken.unwrap();
let gap = (item_time - last_time).num_seconds();
if gap <= max_gap_secs {
current_burst.push(item.id);
} else {
if current_burst.len() >= min_burst_size {
bursts.push(current_burst.clone());
}
current_burst = vec![item.id];
}
last_time = item_time;
}
// Don't forget the last burst
if current_burst.len() >= min_burst_size {
bursts.push(current_burst);
}
Ok(bursts)
Ok(bursts)
}

View file

@ -2,67 +2,70 @@ use std::path::Path;
use serde::{Deserialize, Serialize};
use crate::error::Result;
use crate::jobs::ExportFormat;
use crate::storage::DynStorageBackend;
use crate::{error::Result, jobs::ExportFormat, storage::DynStorageBackend};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportResult {
pub items_exported: usize,
pub output_path: String,
pub items_exported: usize,
pub output_path: String,
}
/// Export library data to the specified format.
pub async fn export_library(
storage: &DynStorageBackend,
format: &ExportFormat,
destination: &Path,
storage: &DynStorageBackend,
format: &ExportFormat,
destination: &Path,
) -> Result<ExportResult> {
let pagination = crate::model::Pagination {
offset: 0,
limit: u64::MAX,
sort: None,
};
let items = storage.list_media(&pagination).await?;
let count = items.len();
let pagination = crate::model::Pagination {
offset: 0,
limit: u64::MAX,
sort: None,
};
let items = storage.list_media(&pagination).await?;
let count = items.len();
match format {
ExportFormat::Json => {
let json = serde_json::to_string_pretty(&items)
.map_err(|e| crate::error::PinakesError::Config(format!("json serialize: {e}")))?;
std::fs::write(destination, json)?;
}
ExportFormat::Csv => {
let mut csv = String::new();
csv.push_str("id,path,file_name,media_type,content_hash,file_size,title,artist,album,genre,year,duration_secs,description,created_at,updated_at\n");
for item in &items {
csv.push_str(&format!(
"{},{},{},{:?},{},{},{},{},{},{},{},{},{},{},{}\n",
item.id,
item.path.display(),
item.file_name,
item.media_type,
item.content_hash,
item.file_size,
item.title.as_deref().unwrap_or(""),
item.artist.as_deref().unwrap_or(""),
item.album.as_deref().unwrap_or(""),
item.genre.as_deref().unwrap_or(""),
item.year.map(|y| y.to_string()).unwrap_or_default(),
item.duration_secs
.map(|d| d.to_string())
.unwrap_or_default(),
item.description.as_deref().unwrap_or(""),
item.created_at,
item.updated_at,
));
}
std::fs::write(destination, csv)?;
}
}
match format {
ExportFormat::Json => {
let json = serde_json::to_string_pretty(&items).map_err(|e| {
crate::error::PinakesError::Config(format!("json serialize: {e}"))
})?;
std::fs::write(destination, json)?;
},
ExportFormat::Csv => {
let mut csv = String::new();
csv.push_str(
"id,path,file_name,media_type,content_hash,file_size,title,artist,\
album,genre,year,duration_secs,description,created_at,updated_at\n",
);
for item in &items {
csv.push_str(&format!(
"{},{},{},{:?},{},{},{},{},{},{},{},{},{},{},{}\n",
item.id,
item.path.display(),
item.file_name,
item.media_type,
item.content_hash,
item.file_size,
item.title.as_deref().unwrap_or(""),
item.artist.as_deref().unwrap_or(""),
item.album.as_deref().unwrap_or(""),
item.genre.as_deref().unwrap_or(""),
item.year.map(|y| y.to_string()).unwrap_or_default(),
item
.duration_secs
.map(|d| d.to_string())
.unwrap_or_default(),
item.description.as_deref().unwrap_or(""),
item.created_at,
item.updated_at,
));
}
std::fs::write(destination, csv)?;
},
}
Ok(ExportResult {
items_exported: count,
output_path: destination.to_string_lossy().to_string(),
})
Ok(ExportResult {
items_exported: count,
output_path: destination.to_string_lossy().to_string(),
})
}

View file

@ -1,31 +1,30 @@
use std::path::Path;
use crate::error::Result;
use crate::model::ContentHash;
use crate::{error::Result, model::ContentHash};
const BUFFER_SIZE: usize = 65536;
pub async fn compute_file_hash(path: &Path) -> Result<ContentHash> {
let path = path.to_path_buf();
let hash = tokio::task::spawn_blocking(move || -> Result<ContentHash> {
let mut hasher = blake3::Hasher::new();
let mut file = std::fs::File::open(&path)?;
let mut buf = vec![0u8; BUFFER_SIZE];
loop {
let n = std::io::Read::read(&mut file, &mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(ContentHash::new(hasher.finalize().to_hex().to_string()))
})
.await
.map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))??;
Ok(hash)
let path = path.to_path_buf();
let hash = tokio::task::spawn_blocking(move || -> Result<ContentHash> {
let mut hasher = blake3::Hasher::new();
let mut file = std::fs::File::open(&path)?;
let mut buf = vec![0u8; BUFFER_SIZE];
loop {
let n = std::io::Read::read(&mut file, &mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(ContentHash::new(hasher.finalize().to_hex().to_string()))
})
.await
.map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))??;
Ok(hash)
}
pub fn compute_hash_sync(data: &[u8]) -> ContentHash {
let hash = blake3::hash(data);
ContentHash::new(hash.to_hex().to_string())
let hash = blake3::hash(data);
ContentHash::new(hash.to_hex().to_string())
}

View file

@ -1,436 +1,457 @@
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use std::{
path::{Path, PathBuf},
time::SystemTime,
};
use tracing::info;
use crate::audit;
use crate::error::{PinakesError, Result};
use crate::hash::compute_file_hash;
use crate::links;
use crate::media_type::{BuiltinMediaType, MediaType};
use crate::metadata;
use crate::model::*;
use crate::storage::DynStorageBackend;
use crate::thumbnail;
use crate::{
audit,
error::{PinakesError, Result},
hash::compute_file_hash,
links,
media_type::{BuiltinMediaType, MediaType},
metadata,
model::*,
storage::DynStorageBackend,
thumbnail,
};
pub struct ImportResult {
pub media_id: MediaId,
pub was_duplicate: bool,
/// True if the file was skipped because it hasn't changed since last scan
pub was_skipped: bool,
pub path: PathBuf,
pub media_id: MediaId,
pub was_duplicate: bool,
/// True if the file was skipped because it hasn't changed since last scan
pub was_skipped: bool,
pub path: PathBuf,
}
/// Options for import operations
#[derive(Debug, Clone)]
pub struct ImportOptions {
/// Skip files that haven't changed since last scan (based on mtime)
pub incremental: bool,
/// Force re-import even if mtime hasn't changed
pub force: bool,
/// Photo configuration for toggleable features
pub photo_config: crate::config::PhotoConfig,
/// Skip files that haven't changed since last scan (based on mtime)
pub incremental: bool,
/// Force re-import even if mtime hasn't changed
pub force: bool,
/// Photo configuration for toggleable features
pub photo_config: crate::config::PhotoConfig,
}
impl Default for ImportOptions {
fn default() -> Self {
Self {
incremental: false,
force: false,
photo_config: crate::config::PhotoConfig::default(),
}
fn default() -> Self {
Self {
incremental: false,
force: false,
photo_config: crate::config::PhotoConfig::default(),
}
}
}
/// Get the modification time of a file as a Unix timestamp
fn get_file_mtime(path: &Path) -> Option<i64> {
std::fs::metadata(path)
.ok()
.and_then(|m| m.modified().ok())
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
.map(|d| d.as_secs() as i64)
std::fs::metadata(path)
.ok()
.and_then(|m| m.modified().ok())
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
.map(|d| d.as_secs() as i64)
}
/// Check that a canonicalized path falls under at least one configured root directory.
/// If no roots are configured, all paths are allowed (for ad-hoc imports).
pub async fn validate_path_in_roots(storage: &DynStorageBackend, path: &Path) -> Result<()> {
let roots = storage.list_root_dirs().await?;
if roots.is_empty() {
return Ok(());
/// Check that a canonicalized path falls under at least one configured root
/// directory. If no roots are configured, all paths are allowed (for ad-hoc
/// imports).
pub async fn validate_path_in_roots(
storage: &DynStorageBackend,
path: &Path,
) -> Result<()> {
let roots = storage.list_root_dirs().await?;
if roots.is_empty() {
return Ok(());
}
for root in &roots {
if let Ok(canonical_root) = root.canonicalize()
&& path.starts_with(&canonical_root)
{
return Ok(());
}
for root in &roots {
if let Ok(canonical_root) = root.canonicalize()
&& path.starts_with(&canonical_root)
{
return Ok(());
}
}
Err(PinakesError::InvalidOperation(format!(
"path {} is not within any configured root directory",
path.display()
)))
}
Err(PinakesError::InvalidOperation(format!(
"path {} is not within any configured root directory",
path.display()
)))
}
pub async fn import_file(storage: &DynStorageBackend, path: &Path) -> Result<ImportResult> {
import_file_with_options(storage, path, &ImportOptions::default()).await
pub async fn import_file(
storage: &DynStorageBackend,
path: &Path,
) -> Result<ImportResult> {
import_file_with_options(storage, path, &ImportOptions::default()).await
}
/// Import a file with configurable options for incremental scanning
pub async fn import_file_with_options(
storage: &DynStorageBackend,
path: &Path,
options: &ImportOptions,
storage: &DynStorageBackend,
path: &Path,
options: &ImportOptions,
) -> Result<ImportResult> {
let path = path.canonicalize()?;
let path = path.canonicalize()?;
if !path.exists() {
return Err(PinakesError::FileNotFound(path));
if !path.exists() {
return Err(PinakesError::FileNotFound(path));
}
validate_path_in_roots(storage, &path).await?;
let media_type = MediaType::from_path(&path)
.ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;
let current_mtime = get_file_mtime(&path);
// Check for incremental scan: skip if file hasn't changed
if options.incremental
&& !options.force
&& let Some(existing) = storage.get_media_by_path(&path).await?
&& let (Some(stored_mtime), Some(curr_mtime)) =
(existing.file_mtime, current_mtime)
&& stored_mtime == curr_mtime
{
return Ok(ImportResult {
media_id: existing.id,
was_duplicate: false,
was_skipped: true,
path: path.clone(),
});
}
let content_hash = compute_file_hash(&path).await?;
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
// Update the mtime even for duplicates so incremental scan works
if current_mtime.is_some() && existing.file_mtime != current_mtime {
let mut updated = existing.clone();
updated.file_mtime = current_mtime;
let _ = storage.update_media(&updated).await;
}
return Ok(ImportResult {
media_id: existing.id,
was_duplicate: true,
was_skipped: false,
path: path.clone(),
});
}
validate_path_in_roots(storage, &path).await?;
let file_meta = std::fs::metadata(&path)?;
let file_size = file_meta.len();
let media_type = MediaType::from_path(&path)
.ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;
let current_mtime = get_file_mtime(&path);
// Check for incremental scan: skip if file hasn't changed
if options.incremental
&& !options.force
&& let Some(existing) = storage.get_media_by_path(&path).await?
&& let (Some(stored_mtime), Some(curr_mtime)) = (existing.file_mtime, current_mtime)
&& stored_mtime == curr_mtime
{
return Ok(ImportResult {
media_id: existing.id,
was_duplicate: false,
was_skipped: true,
path: path.clone(),
});
}
let content_hash = compute_file_hash(&path).await?;
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
// Update the mtime even for duplicates so incremental scan works
if current_mtime.is_some() && existing.file_mtime != current_mtime {
let mut updated = existing.clone();
updated.file_mtime = current_mtime;
let _ = storage.update_media(&updated).await;
}
return Ok(ImportResult {
media_id: existing.id,
was_duplicate: true,
was_skipped: false,
path: path.clone(),
});
}
let file_meta = std::fs::metadata(&path)?;
let file_size = file_meta.len();
let extracted = {
let path_clone = path.clone();
let media_type_clone = media_type.clone();
tokio::task::spawn_blocking(move || {
metadata::extract_metadata(&path_clone, media_type_clone)
})
.await
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
};
let file_name = path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let now = chrono::Utc::now();
let media_id = MediaId::new();
// Generate thumbnail for image types
let thumb_path = {
let source = path.clone();
let thumb_dir = thumbnail::default_thumbnail_dir();
let media_type_clone = media_type.clone();
tokio::task::spawn_blocking(move || {
thumbnail::generate_thumbnail(media_id, &source, media_type_clone, &thumb_dir)
})
.await
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
};
// Generate perceptual hash for image files (if enabled in config)
let perceptual_hash = if options.photo_config.generate_perceptual_hash
&& media_type.category() == crate::media_type::MediaCategory::Image
{
crate::metadata::image::generate_perceptual_hash(&path)
} else {
None
};
// Check if this is a markdown file for link extraction
let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
let item = MediaItem {
id: media_id,
path: path.clone(),
file_name,
media_type,
content_hash,
file_size,
title: extracted.title,
artist: extracted.artist,
album: extracted.album,
genre: extracted.genre,
year: extracted.year,
duration_secs: extracted.duration_secs,
description: extracted.description,
thumbnail_path: thumb_path,
custom_fields: std::collections::HashMap::new(),
file_mtime: current_mtime,
// Photo-specific metadata from extraction
date_taken: extracted.date_taken,
latitude: extracted.latitude,
longitude: extracted.longitude,
camera_make: extracted.camera_make,
camera_model: extracted.camera_model,
rating: extracted.rating,
perceptual_hash,
// Managed storage fields - external files use defaults
storage_mode: StorageMode::External,
original_filename: None,
uploaded_at: None,
storage_key: None,
created_at: now,
updated_at: now,
// New items are not deleted
deleted_at: None,
// Links will be extracted separately
links_extracted_at: None,
};
storage.insert_media(&item).await?;
// Extract and store markdown links for markdown files
if is_markdown {
if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
tracing::warn!(
media_id = %media_id,
path = %path.display(),
error = %e,
"failed to extract markdown links"
);
}
}
// Store extracted extra metadata as custom fields
for (key, value) in &extracted.extra {
let field = CustomField {
field_type: CustomFieldType::Text,
value: value.clone(),
};
if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
tracing::warn!(
media_id = %media_id,
field = %key,
error = %e,
"failed to store extracted metadata as custom field"
);
}
}
audit::record_action(
storage,
Some(media_id),
AuditAction::Imported,
Some(format!("path={}", path.display())),
)
.await?;
info!(media_id = %media_id, path = %path.display(), "imported media file");
Ok(ImportResult {
media_id,
was_duplicate: false,
was_skipped: false,
path: path.clone(),
let extracted = {
let path_clone = path.clone();
let media_type_clone = media_type.clone();
tokio::task::spawn_blocking(move || {
metadata::extract_metadata(&path_clone, media_type_clone)
})
.await
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
};
let file_name = path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let now = chrono::Utc::now();
let media_id = MediaId::new();
// Generate thumbnail for image types
let thumb_path = {
let source = path.clone();
let thumb_dir = thumbnail::default_thumbnail_dir();
let media_type_clone = media_type.clone();
tokio::task::spawn_blocking(move || {
thumbnail::generate_thumbnail(
media_id,
&source,
media_type_clone,
&thumb_dir,
)
})
.await
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
};
// Generate perceptual hash for image files (if enabled in config)
let perceptual_hash = if options.photo_config.generate_perceptual_hash
&& media_type.category() == crate::media_type::MediaCategory::Image
{
crate::metadata::image::generate_perceptual_hash(&path)
} else {
None
};
// Check if this is a markdown file for link extraction
let is_markdown =
media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
let item = MediaItem {
id: media_id,
path: path.clone(),
file_name,
media_type,
content_hash,
file_size,
title: extracted.title,
artist: extracted.artist,
album: extracted.album,
genre: extracted.genre,
year: extracted.year,
duration_secs: extracted.duration_secs,
description: extracted.description,
thumbnail_path: thumb_path,
custom_fields: std::collections::HashMap::new(),
file_mtime: current_mtime,
// Photo-specific metadata from extraction
date_taken: extracted.date_taken,
latitude: extracted.latitude,
longitude: extracted.longitude,
camera_make: extracted.camera_make,
camera_model: extracted.camera_model,
rating: extracted.rating,
perceptual_hash,
// Managed storage fields - external files use defaults
storage_mode: StorageMode::External,
original_filename: None,
uploaded_at: None,
storage_key: None,
created_at: now,
updated_at: now,
// New items are not deleted
deleted_at: None,
// Links will be extracted separately
links_extracted_at: None,
};
storage.insert_media(&item).await?;
// Extract and store markdown links for markdown files
if is_markdown {
if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
tracing::warn!(
media_id = %media_id,
path = %path.display(),
error = %e,
"failed to extract markdown links"
);
}
}
// Store extracted extra metadata as custom fields
for (key, value) in &extracted.extra {
let field = CustomField {
field_type: CustomFieldType::Text,
value: value.clone(),
};
if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
tracing::warn!(
media_id = %media_id,
field = %key,
error = %e,
"failed to store extracted metadata as custom field"
);
}
}
audit::record_action(
storage,
Some(media_id),
AuditAction::Imported,
Some(format!("path={}", path.display())),
)
.await?;
info!(media_id = %media_id, path = %path.display(), "imported media file");
Ok(ImportResult {
media_id,
was_duplicate: false,
was_skipped: false,
path: path.clone(),
})
}
pub(crate) fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
for component in path.components() {
if let std::path::Component::Normal(name) = component {
let name_str = name.to_string_lossy();
for pattern in patterns {
if pattern.starts_with('.')
&& name_str.starts_with('.')
&& pattern == name_str.as_ref()
{
return true;
}
// Simple glob: ".*" matches any dotfile
if pattern == ".*" && name_str.starts_with('.') {
return true;
}
if name_str == pattern.as_str() {
return true;
}
}
pub(crate) fn should_ignore(
path: &std::path::Path,
patterns: &[String],
) -> bool {
for component in path.components() {
if let std::path::Component::Normal(name) = component {
let name_str = name.to_string_lossy();
for pattern in patterns {
if pattern.starts_with('.')
&& name_str.starts_with('.')
&& pattern == name_str.as_ref()
{
return true;
}
// Simple glob: ".*" matches any dotfile
if pattern == ".*" && name_str.starts_with('.') {
return true;
}
if name_str == pattern.as_str() {
return true;
}
}
}
false
}
false
}
/// Default number of concurrent import tasks.
const DEFAULT_IMPORT_CONCURRENCY: usize = 8;
pub async fn import_directory(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
import_directory_with_options(
storage,
dir,
ignore_patterns,
DEFAULT_IMPORT_CONCURRENCY,
&ImportOptions::default(),
)
.await
import_directory_with_options(
storage,
dir,
ignore_patterns,
DEFAULT_IMPORT_CONCURRENCY,
&ImportOptions::default(),
)
.await
}
pub async fn import_directory_with_concurrency(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
concurrency: usize,
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
concurrency: usize,
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
import_directory_with_options(
storage,
dir,
ignore_patterns,
concurrency,
&ImportOptions::default(),
)
.await
import_directory_with_options(
storage,
dir,
ignore_patterns,
concurrency,
&ImportOptions::default(),
)
.await
}
/// Import a directory with full options including incremental scanning support
pub async fn import_directory_with_options(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
concurrency: usize,
options: &ImportOptions,
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
concurrency: usize,
options: &ImportOptions,
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
let concurrency = concurrency.clamp(1, 256);
let dir = dir.to_path_buf();
let patterns = ignore_patterns.to_vec();
let options = options.clone();
let concurrency = concurrency.clamp(1, 256);
let dir = dir.to_path_buf();
let patterns = ignore_patterns.to_vec();
let options = options.clone();
let entries: Vec<PathBuf> = {
let dir = dir.clone();
tokio::task::spawn_blocking(move || {
walkdir::WalkDir::new(&dir)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| MediaType::from_path(e.path()).is_some())
.filter(|e| !should_ignore(e.path(), &patterns))
.map(|e| e.path().to_path_buf())
.collect()
})
.await
.map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
};
let entries: Vec<PathBuf> = {
let dir = dir.clone();
tokio::task::spawn_blocking(move || {
walkdir::WalkDir::new(&dir)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| MediaType::from_path(e.path()).is_some())
.filter(|e| !should_ignore(e.path(), &patterns))
.map(|e| e.path().to_path_buf())
.collect()
})
.await
.map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
};
let mut results = Vec::with_capacity(entries.len());
let mut join_set = tokio::task::JoinSet::new();
let mut results = Vec::with_capacity(entries.len());
let mut join_set = tokio::task::JoinSet::new();
for entry_path in entries {
let storage = storage.clone();
let path = entry_path.clone();
let opts = options.clone();
for entry_path in entries {
let storage = storage.clone();
let path = entry_path.clone();
let opts = options.clone();
join_set.spawn(async move {
let result = import_file_with_options(&storage, &path, &opts).await;
(path, result)
});
join_set.spawn(async move {
let result = import_file_with_options(&storage, &path, &opts).await;
(path, result)
});
// Limit concurrency by draining when we hit the cap
if join_set.len() >= concurrency
&& let Some(Ok((path, result))) = join_set.join_next().await
{
match result {
Ok(r) => results.push(Ok(r)),
Err(e) => {
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
results.push(Err(e));
}
}
}
// Limit concurrency by draining when we hit the cap
if join_set.len() >= concurrency
&& let Some(Ok((path, result))) = join_set.join_next().await
{
match result {
Ok(r) => results.push(Ok(r)),
Err(e) => {
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
results.push(Err(e));
},
}
}
}
// Drain remaining tasks
while let Some(Ok((path, result))) = join_set.join_next().await {
match result {
Ok(r) => results.push(Ok(r)),
Err(e) => {
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
results.push(Err(e));
}
}
// Drain remaining tasks
while let Some(Ok((path, result))) = join_set.join_next().await {
match result {
Ok(r) => results.push(Ok(r)),
Err(e) => {
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
results.push(Err(e));
},
}
}
Ok(results)
Ok(results)
}
/// Extract markdown links from a file and store them in the database.
async fn extract_and_store_links(
storage: &DynStorageBackend,
media_id: MediaId,
path: &Path,
storage: &DynStorageBackend,
media_id: MediaId,
path: &Path,
) -> Result<()> {
// Read file content
let content = tokio::fs::read_to_string(path).await.map_err(|e| {
PinakesError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
format!("failed to read markdown file for link extraction: {e}"),
))
})?;
// Read file content
let content = tokio::fs::read_to_string(path).await.map_err(|e| {
PinakesError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
format!("failed to read markdown file for link extraction: {e}"),
))
})?;
// Extract links
let extracted_links = links::extract_links(media_id, &content);
// Extract links
let extracted_links = links::extract_links(media_id, &content);
if extracted_links.is_empty() {
// No links found, just mark as extracted
storage.mark_links_extracted(media_id).await?;
return Ok(());
}
// Clear any existing links for this media (in case of re-import)
storage.clear_links_for_media(media_id).await?;
// Save extracted links
storage
.save_markdown_links(media_id, &extracted_links)
.await?;
// Mark links as extracted
if extracted_links.is_empty() {
// No links found, just mark as extracted
storage.mark_links_extracted(media_id).await?;
return Ok(());
}
tracing::debug!(
media_id = %media_id,
link_count = extracted_links.len(),
"extracted markdown links"
);
// Clear any existing links for this media (in case of re-import)
storage.clear_links_for_media(media_id).await?;
Ok(())
// Save extracted links
storage
.save_markdown_links(media_id, &extracted_links)
.await?;
// Mark links as extracted
storage.mark_links_extracted(media_id).await?;
tracing::debug!(
media_id = %media_id,
link_count = extracted_links.len(),
"extracted markdown links"
);
Ok(())
}

View file

@ -1,373 +1,384 @@
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::{
collections::{HashMap, HashSet},
path::{Path, PathBuf},
};
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::error::Result;
use crate::hash::compute_file_hash;
use crate::media_type::MediaType;
use crate::model::{ContentHash, MediaId};
use crate::storage::DynStorageBackend;
use crate::{
error::Result,
hash::compute_file_hash,
media_type::MediaType,
model::{ContentHash, MediaId},
storage::DynStorageBackend,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OrphanReport {
/// Media items whose files no longer exist on disk.
pub orphaned_ids: Vec<MediaId>,
/// Files on disk that are not tracked in the database.
pub untracked_paths: Vec<PathBuf>,
/// Files that appear to have moved (same hash, different path).
pub moved_files: Vec<(MediaId, PathBuf, PathBuf)>,
/// Media items whose files no longer exist on disk.
pub orphaned_ids: Vec<MediaId>,
/// Files on disk that are not tracked in the database.
pub untracked_paths: Vec<PathBuf>,
/// Files that appear to have moved (same hash, different path).
pub moved_files: Vec<(MediaId, PathBuf, PathBuf)>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum OrphanAction {
Delete,
Ignore,
Delete,
Ignore,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VerificationReport {
pub verified: usize,
pub mismatched: Vec<(MediaId, String, String)>,
pub missing: Vec<MediaId>,
pub errors: Vec<(MediaId, String)>,
pub verified: usize,
pub mismatched: Vec<(MediaId, String, String)>,
pub missing: Vec<MediaId>,
pub errors: Vec<(MediaId, String)>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum IntegrityStatus {
Unverified,
Verified,
Mismatch,
Missing,
Unverified,
Verified,
Mismatch,
Missing,
}
impl std::fmt::Display for IntegrityStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Unverified => write!(f, "unverified"),
Self::Verified => write!(f, "verified"),
Self::Mismatch => write!(f, "mismatch"),
Self::Missing => write!(f, "missing"),
}
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Unverified => write!(f, "unverified"),
Self::Verified => write!(f, "verified"),
Self::Mismatch => write!(f, "mismatch"),
Self::Missing => write!(f, "missing"),
}
}
}
impl std::str::FromStr for IntegrityStatus {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"unverified" => Ok(Self::Unverified),
"verified" => Ok(Self::Verified),
"mismatch" => Ok(Self::Mismatch),
"missing" => Ok(Self::Missing),
_ => Err(format!("unknown integrity status: {s}")),
}
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"unverified" => Ok(Self::Unverified),
"verified" => Ok(Self::Verified),
"mismatch" => Ok(Self::Mismatch),
"missing" => Ok(Self::Missing),
_ => Err(format!("unknown integrity status: {s}")),
}
}
}
/// Detect orphaned media items (files that no longer exist on disk),
/// untracked files (files on disk not in database), and moved files (same hash, different path).
pub async fn detect_orphans(storage: &DynStorageBackend) -> Result<OrphanReport> {
let media_paths = storage.list_media_paths().await?;
let mut orphaned_ids = Vec::new();
/// untracked files (files on disk not in database), and moved files (same hash,
/// different path).
pub async fn detect_orphans(
storage: &DynStorageBackend,
) -> Result<OrphanReport> {
let media_paths = storage.list_media_paths().await?;
let mut orphaned_ids = Vec::new();
// Build hash index: ContentHash -> Vec<(MediaId, PathBuf)>
let mut hash_index: HashMap<ContentHash, Vec<(MediaId, PathBuf)>> = HashMap::new();
for (id, path, hash) in &media_paths {
hash_index
.entry(hash.clone())
.or_default()
.push((*id, path.clone()));
// Build hash index: ContentHash -> Vec<(MediaId, PathBuf)>
let mut hash_index: HashMap<ContentHash, Vec<(MediaId, PathBuf)>> =
HashMap::new();
for (id, path, hash) in &media_paths {
hash_index
.entry(hash.clone())
.or_default()
.push((*id, path.clone()));
}
// Detect orphaned files (in DB but not on disk)
for (id, path, _hash) in &media_paths {
if !path.exists() {
orphaned_ids.push(*id);
}
}
// Detect orphaned files (in DB but not on disk)
for (id, path, _hash) in &media_paths {
if !path.exists() {
orphaned_ids.push(*id);
}
}
// Detect moved files (orphaned items with same hash existing elsewhere)
let moved_files =
detect_moved_files(&orphaned_ids, &media_paths, &hash_index);
// Detect moved files (orphaned items with same hash existing elsewhere)
let moved_files = detect_moved_files(&orphaned_ids, &media_paths, &hash_index);
// Detect untracked files (on disk but not in DB)
let untracked_paths = detect_untracked_files(storage, &media_paths).await?;
// Detect untracked files (on disk but not in DB)
let untracked_paths = detect_untracked_files(storage, &media_paths).await?;
info!(
orphaned = orphaned_ids.len(),
untracked = untracked_paths.len(),
moved = moved_files.len(),
total = media_paths.len(),
"orphan detection complete"
);
info!(
orphaned = orphaned_ids.len(),
untracked = untracked_paths.len(),
moved = moved_files.len(),
total = media_paths.len(),
"orphan detection complete"
);
Ok(OrphanReport {
orphaned_ids,
untracked_paths,
moved_files,
})
Ok(OrphanReport {
orphaned_ids,
untracked_paths,
moved_files,
})
}
/// Detect files that appear to have moved (same content hash, different path).
fn detect_moved_files(
orphaned_ids: &[MediaId],
media_paths: &[(MediaId, PathBuf, ContentHash)],
hash_index: &HashMap<ContentHash, Vec<(MediaId, PathBuf)>>,
orphaned_ids: &[MediaId],
media_paths: &[(MediaId, PathBuf, ContentHash)],
hash_index: &HashMap<ContentHash, Vec<(MediaId, PathBuf)>>,
) -> Vec<(MediaId, PathBuf, PathBuf)> {
let mut moved = Vec::new();
let mut moved = Vec::new();
// Build lookup map for orphaned items: MediaId -> (PathBuf, ContentHash)
let orphaned_map: HashMap<MediaId, (PathBuf, ContentHash)> = media_paths
.iter()
.filter(|(id, _, _)| orphaned_ids.contains(id))
.map(|(id, path, hash)| (*id, (path.clone(), hash.clone())))
.collect();
// Build lookup map for orphaned items: MediaId -> (PathBuf, ContentHash)
let orphaned_map: HashMap<MediaId, (PathBuf, ContentHash)> = media_paths
.iter()
.filter(|(id, ..)| orphaned_ids.contains(id))
.map(|(id, path, hash)| (*id, (path.clone(), hash.clone())))
.collect();
// For each orphaned item, check if there's another file with the same hash
for (orphaned_id, (old_path, hash)) in &orphaned_map {
if let Some(items_with_hash) = hash_index.get(hash) {
// Find other items with same hash that exist on disk
for (other_id, new_path) in items_with_hash {
// Skip if it's the same item
if other_id == orphaned_id {
continue;
}
// Check if the new path exists
if new_path.exists() {
moved.push((*orphaned_id, old_path.clone(), new_path.clone()));
// Only report first match (most likely candidate)
break;
}
}
// For each orphaned item, check if there's another file with the same hash
for (orphaned_id, (old_path, hash)) in &orphaned_map {
if let Some(items_with_hash) = hash_index.get(hash) {
// Find other items with same hash that exist on disk
for (other_id, new_path) in items_with_hash {
// Skip if it's the same item
if other_id == orphaned_id {
continue;
}
}
moved
// Check if the new path exists
if new_path.exists() {
moved.push((*orphaned_id, old_path.clone(), new_path.clone()));
// Only report first match (most likely candidate)
break;
}
}
}
}
moved
}
/// Detect files on disk that are not tracked in the database.
async fn detect_untracked_files(
storage: &DynStorageBackend,
media_paths: &[(MediaId, PathBuf, ContentHash)],
storage: &DynStorageBackend,
media_paths: &[(MediaId, PathBuf, ContentHash)],
) -> Result<Vec<PathBuf>> {
// Get root directories
let roots = storage.list_root_dirs().await?;
if roots.is_empty() {
return Ok(Vec::new());
}
// Get root directories
let roots = storage.list_root_dirs().await?;
if roots.is_empty() {
return Ok(Vec::new());
}
// Build set of tracked paths for fast lookup
let tracked_paths: HashSet<PathBuf> = media_paths
.iter()
.map(|(_, path, _)| path.clone())
.collect();
// Build set of tracked paths for fast lookup
let tracked_paths: HashSet<PathBuf> = media_paths
.iter()
.map(|(_, path, _)| path.clone())
.collect();
// Get ignore patterns (we'll need to load config somehow, for now use empty)
let ignore_patterns: Vec<String> = vec![
".*".to_string(),
"node_modules".to_string(),
"__pycache__".to_string(),
"target".to_string(),
];
// Get ignore patterns (we'll need to load config somehow, for now use empty)
let ignore_patterns: Vec<String> = vec![
".*".to_string(),
"node_modules".to_string(),
"__pycache__".to_string(),
"target".to_string(),
];
// Walk filesystem for each root in parallel (limit concurrency to 4)
let mut filesystem_paths = HashSet::new();
let mut tasks = tokio::task::JoinSet::new();
// Walk filesystem for each root in parallel (limit concurrency to 4)
let mut filesystem_paths = HashSet::new();
let mut tasks = tokio::task::JoinSet::new();
for root in roots {
let ignore_patterns = ignore_patterns.clone();
tasks.spawn_blocking(move || -> Result<Vec<PathBuf>> {
let mut paths = Vec::new();
for root in roots {
let ignore_patterns = ignore_patterns.clone();
tasks.spawn_blocking(move || -> Result<Vec<PathBuf>> {
let mut paths = Vec::new();
let walker = walkdir::WalkDir::new(&root)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
// Skip directories that match ignore patterns
if e.file_type().is_dir() {
let name = e.file_name().to_string_lossy();
for pattern in &ignore_patterns {
if pattern.starts_with("*.")
&& let Some(ext) = pattern.strip_prefix("*.")
&& name.ends_with(ext)
{
// Extension pattern
return false;
} else if pattern.contains('*') {
// Glob pattern - simplified matching
let pattern_without_stars = pattern.replace('*', "");
if name.contains(&pattern_without_stars) {
return false;
}
} else if name.as_ref() == pattern
|| name.starts_with(&format!("{pattern}."))
{
// Exact match or starts with pattern
return false;
}
}
}
true
});
for entry in walker {
match entry {
Ok(entry) => {
let path = entry.path();
// Only process files
if !path.is_file() {
continue;
}
// Check if it's a supported media type
if MediaType::from_path(path).is_some() {
paths.push(path.to_path_buf());
}
}
Err(e) => {
warn!(error = %e, "failed to read directory entry");
}
let walker = walkdir::WalkDir::new(&root)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
// Skip directories that match ignore patterns
if e.file_type().is_dir() {
let name = e.file_name().to_string_lossy();
for pattern in &ignore_patterns {
if pattern.starts_with("*.")
&& let Some(ext) = pattern.strip_prefix("*.")
&& name.ends_with(ext)
{
// Extension pattern
return false;
} else if pattern.contains('*') {
// Glob pattern - simplified matching
let pattern_without_stars = pattern.replace('*', "");
if name.contains(&pattern_without_stars) {
return false;
}
} else if name.as_ref() == pattern
|| name.starts_with(&format!("{pattern}."))
{
// Exact match or starts with pattern
return false;
}
}
Ok(paths)
}
true
});
}
// Collect results from all tasks
while let Some(result) = tasks.join_next().await {
match result {
Ok(Ok(paths)) => {
filesystem_paths.extend(paths);
for entry in walker {
match entry {
Ok(entry) => {
let path = entry.path();
// Only process files
if !path.is_file() {
continue;
}
Ok(Err(e)) => {
warn!(error = %e, "failed to walk directory");
}
Err(e) => {
warn!(error = %e, "task join error");
// Check if it's a supported media type
if MediaType::from_path(path).is_some() {
paths.push(path.to_path_buf());
}
},
Err(e) => {
warn!(error = %e, "failed to read directory entry");
},
}
}
Ok(paths)
});
}
// Collect results from all tasks
while let Some(result) = tasks.join_next().await {
match result {
Ok(Ok(paths)) => {
filesystem_paths.extend(paths);
},
Ok(Err(e)) => {
warn!(error = %e, "failed to walk directory");
},
Err(e) => {
warn!(error = %e, "task join error");
},
}
}
// Compute set difference: filesystem - tracked
let untracked: Vec<PathBuf> = filesystem_paths
.difference(&tracked_paths)
.cloned()
.collect();
// Compute set difference: filesystem - tracked
let untracked: Vec<PathBuf> = filesystem_paths
.difference(&tracked_paths)
.cloned()
.collect();
Ok(untracked)
Ok(untracked)
}
/// Resolve orphaned media items by deleting them from the database.
pub async fn resolve_orphans(
storage: &DynStorageBackend,
action: OrphanAction,
ids: &[MediaId],
storage: &DynStorageBackend,
action: OrphanAction,
ids: &[MediaId],
) -> Result<u64> {
match action {
OrphanAction::Delete => {
let count = storage.batch_delete_media(ids).await?;
info!(count, "resolved orphans by deletion");
Ok(count)
}
OrphanAction::Ignore => {
info!(count = ids.len(), "orphans ignored");
Ok(0)
}
}
match action {
OrphanAction::Delete => {
let count = storage.batch_delete_media(ids).await?;
info!(count, "resolved orphans by deletion");
Ok(count)
},
OrphanAction::Ignore => {
info!(count = ids.len(), "orphans ignored");
Ok(0)
},
}
}
/// Verify integrity of media files by recomputing hashes and comparing.
pub async fn verify_integrity(
storage: &DynStorageBackend,
media_ids: Option<&[MediaId]>,
storage: &DynStorageBackend,
media_ids: Option<&[MediaId]>,
) -> Result<VerificationReport> {
let all_paths = storage.list_media_paths().await?;
let all_paths = storage.list_media_paths().await?;
let paths_to_check: Vec<(MediaId, PathBuf, ContentHash)> = if let Some(ids) = media_ids {
let id_set: std::collections::HashSet<MediaId> = ids.iter().copied().collect();
all_paths
.into_iter()
.filter(|(id, _, _)| id_set.contains(id))
.collect()
let paths_to_check: Vec<(MediaId, PathBuf, ContentHash)> =
if let Some(ids) = media_ids {
let id_set: std::collections::HashSet<MediaId> =
ids.iter().copied().collect();
all_paths
.into_iter()
.filter(|(id, ..)| id_set.contains(id))
.collect()
} else {
all_paths
all_paths
};
let mut report = VerificationReport {
verified: 0,
mismatched: Vec::new(),
missing: Vec::new(),
errors: Vec::new(),
};
let mut report = VerificationReport {
verified: 0,
mismatched: Vec::new(),
missing: Vec::new(),
errors: Vec::new(),
};
for (id, path, expected_hash) in paths_to_check {
if !path.exists() {
report.missing.push(id);
continue;
}
match compute_file_hash(&path).await {
Ok(actual_hash) => {
if actual_hash.0 == expected_hash.0 {
report.verified += 1;
} else {
report
.mismatched
.push((id, expected_hash.0.clone(), actual_hash.0));
}
}
Err(e) => {
report.errors.push((id, e.to_string()));
}
}
for (id, path, expected_hash) in paths_to_check {
if !path.exists() {
report.missing.push(id);
continue;
}
info!(
verified = report.verified,
mismatched = report.mismatched.len(),
missing = report.missing.len(),
errors = report.errors.len(),
"integrity verification complete"
);
match compute_file_hash(&path).await {
Ok(actual_hash) => {
if actual_hash.0 == expected_hash.0 {
report.verified += 1;
} else {
report
.mismatched
.push((id, expected_hash.0.clone(), actual_hash.0));
}
},
Err(e) => {
report.errors.push((id, e.to_string()));
},
}
}
Ok(report)
info!(
verified = report.verified,
mismatched = report.mismatched.len(),
missing = report.missing.len(),
errors = report.errors.len(),
"integrity verification complete"
);
Ok(report)
}
/// Clean up orphaned thumbnail files that don't correspond to any media item.
pub async fn cleanup_orphaned_thumbnails(
storage: &DynStorageBackend,
thumbnail_dir: &Path,
storage: &DynStorageBackend,
thumbnail_dir: &Path,
) -> Result<usize> {
let media_paths = storage.list_media_paths().await?;
let known_ids: std::collections::HashSet<String> = media_paths
.iter()
.map(|(id, _, _)| id.0.to_string())
.collect();
let media_paths = storage.list_media_paths().await?;
let known_ids: std::collections::HashSet<String> = media_paths
.iter()
.map(|(id, ..)| id.0.to_string())
.collect();
let mut removed = 0;
let mut removed = 0;
if thumbnail_dir.exists() {
let entries = std::fs::read_dir(thumbnail_dir)?;
for entry in entries.flatten() {
let path = entry.path();
if let Some(stem) = path.file_stem().and_then(|s| s.to_str())
&& !known_ids.contains(stem)
{
if let Err(e) = std::fs::remove_file(&path) {
warn!(path = %path.display(), error = %e, "failed to remove orphaned thumbnail");
} else {
removed += 1;
}
}
if thumbnail_dir.exists() {
let entries = std::fs::read_dir(thumbnail_dir)?;
for entry in entries.flatten() {
let path = entry.path();
if let Some(stem) = path.file_stem().and_then(|s| s.to_str())
&& !known_ids.contains(stem)
{
if let Err(e) = std::fs::remove_file(&path) {
warn!(path = %path.display(), error = %e, "failed to remove orphaned thumbnail");
} else {
removed += 1;
}
}
}
}
info!(removed, "orphaned thumbnail cleanup complete");
Ok(removed)
info!(removed, "orphaned thumbnail cleanup complete");
Ok(removed)
}

View file

@ -1,6 +1,4 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::{collections::HashMap, path::PathBuf, sync::Arc};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
@ -14,258 +12,268 @@ use crate::model::MediaId;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum JobKind {
Scan {
path: Option<PathBuf>,
},
GenerateThumbnails {
media_ids: Vec<MediaId>,
},
VerifyIntegrity {
media_ids: Vec<MediaId>,
},
OrphanDetection,
CleanupThumbnails,
Export {
format: ExportFormat,
destination: PathBuf,
},
Transcode {
media_id: MediaId,
profile: String,
},
Enrich {
media_ids: Vec<MediaId>,
},
CleanupAnalytics,
Scan {
path: Option<PathBuf>,
},
GenerateThumbnails {
media_ids: Vec<MediaId>,
},
VerifyIntegrity {
media_ids: Vec<MediaId>,
},
OrphanDetection,
CleanupThumbnails,
Export {
format: ExportFormat,
destination: PathBuf,
},
Transcode {
media_id: MediaId,
profile: String,
},
Enrich {
media_ids: Vec<MediaId>,
},
CleanupAnalytics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ExportFormat {
Json,
Csv,
Json,
Csv,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "state")]
pub enum JobStatus {
Pending,
Running { progress: f32, message: String },
Completed { result: Value },
Failed { error: String },
Cancelled,
Pending,
Running { progress: f32, message: String },
Completed { result: Value },
Failed { error: String },
Cancelled,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Job {
pub id: Uuid,
pub kind: JobKind,
pub status: JobStatus,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub id: Uuid,
pub kind: JobKind,
pub status: JobStatus,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
struct WorkerItem {
job_id: Uuid,
kind: JobKind,
cancel: CancellationToken,
job_id: Uuid,
kind: JobKind,
cancel: CancellationToken,
}
pub struct JobQueue {
jobs: Arc<RwLock<HashMap<Uuid, Job>>>,
cancellations: Arc<RwLock<HashMap<Uuid, CancellationToken>>>,
tx: mpsc::Sender<WorkerItem>,
jobs: Arc<RwLock<HashMap<Uuid, Job>>>,
cancellations: Arc<RwLock<HashMap<Uuid, CancellationToken>>>,
tx: mpsc::Sender<WorkerItem>,
}
impl JobQueue {
/// Create a new job queue and spawn `worker_count` background workers.
///
/// The `executor` callback is invoked for each job; it receives the job kind,
/// a progress-reporting callback, and a cancellation token.
pub fn new<F>(worker_count: usize, executor: F) -> Arc<Self>
where
F: Fn(
Uuid,
JobKind,
CancellationToken,
Arc<RwLock<HashMap<Uuid, Job>>>,
) -> tokio::task::JoinHandle<()>
+ Send
+ Sync
+ 'static,
{
let (tx, rx) = mpsc::channel::<WorkerItem>(256);
let rx = Arc::new(tokio::sync::Mutex::new(rx));
let jobs: Arc<RwLock<HashMap<Uuid, Job>>> = Arc::new(RwLock::new(HashMap::new()));
let cancellations: Arc<RwLock<HashMap<Uuid, CancellationToken>>> =
Arc::new(RwLock::new(HashMap::new()));
/// Create a new job queue and spawn `worker_count` background workers.
///
/// The `executor` callback is invoked for each job; it receives the job kind,
/// a progress-reporting callback, and a cancellation token.
pub fn new<F>(worker_count: usize, executor: F) -> Arc<Self>
where
F: Fn(
Uuid,
JobKind,
CancellationToken,
Arc<RwLock<HashMap<Uuid, Job>>>,
) -> tokio::task::JoinHandle<()>
+ Send
+ Sync
+ 'static,
{
let (tx, rx) = mpsc::channel::<WorkerItem>(256);
let rx = Arc::new(tokio::sync::Mutex::new(rx));
let jobs: Arc<RwLock<HashMap<Uuid, Job>>> =
Arc::new(RwLock::new(HashMap::new()));
let cancellations: Arc<RwLock<HashMap<Uuid, CancellationToken>>> =
Arc::new(RwLock::new(HashMap::new()));
let executor = Arc::new(executor);
let executor = Arc::new(executor);
for _ in 0..worker_count {
let rx = rx.clone();
let jobs = jobs.clone();
let cancellations = cancellations.clone();
let executor = executor.clone();
for _ in 0..worker_count {
let rx = rx.clone();
let jobs = jobs.clone();
let cancellations = cancellations.clone();
let executor = executor.clone();
tokio::spawn(async move {
loop {
let item = {
let mut guard = rx.lock().await;
guard.recv().await
};
let Some(item) = item else { break };
tokio::spawn(async move {
loop {
let item = {
let mut guard = rx.lock().await;
guard.recv().await
};
let Some(item) = item else { break };
// Mark as running
{
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&item.job_id) {
job.status = JobStatus::Running {
progress: 0.0,
message: "starting".to_string(),
};
job.updated_at = Utc::now();
}
}
let handle = executor(item.job_id, item.kind, item.cancel, jobs.clone());
let _ = handle.await;
// Clean up cancellation token
cancellations.write().await.remove(&item.job_id);
}
});
}
Arc::new(Self {
jobs,
cancellations,
tx,
})
}
/// Submit a new job, returning its ID.
pub async fn submit(&self, kind: JobKind) -> Uuid {
let id = Uuid::now_v7();
let now = Utc::now();
let cancel = CancellationToken::new();
let job = Job {
id,
kind: kind.clone(),
status: JobStatus::Pending,
created_at: now,
updated_at: now,
};
self.jobs.write().await.insert(id, job);
self.cancellations.write().await.insert(id, cancel.clone());
let item = WorkerItem {
job_id: id,
kind,
cancel,
};
// If the channel is full we still record the job — it'll stay Pending
let _ = self.tx.send(item).await;
id
}
/// Get the status of a job.
pub async fn status(&self, id: Uuid) -> Option<Job> {
self.jobs.read().await.get(&id).cloned()
}
/// List all jobs, most recent first.
pub async fn list(&self) -> Vec<Job> {
let map = self.jobs.read().await;
let mut jobs: Vec<Job> = map.values().cloned().collect();
jobs.sort_by_key(|job| std::cmp::Reverse(job.created_at));
jobs
}
/// Cancel a running or pending job.
pub async fn cancel(&self, id: Uuid) -> bool {
if let Some(token) = self.cancellations.read().await.get(&id) {
token.cancel();
let mut map = self.jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Cancelled;
job.updated_at = Utc::now();
// Mark as running
{
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&item.job_id) {
job.status = JobStatus::Running {
progress: 0.0,
message: "starting".to_string(),
};
job.updated_at = Utc::now();
}
true
} else {
false
}
let handle =
executor(item.job_id, item.kind, item.cancel, jobs.clone());
let _ = handle.await;
// Clean up cancellation token
cancellations.write().await.remove(&item.job_id);
}
});
}
/// Update a job's progress. Called by executors.
pub async fn update_progress(
jobs: &Arc<RwLock<HashMap<Uuid, Job>>>,
id: Uuid,
progress: f32,
message: String,
) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Running { progress, message };
job.updated_at = Utc::now();
}
Arc::new(Self {
jobs,
cancellations,
tx,
})
}
/// Submit a new job, returning its ID.
pub async fn submit(&self, kind: JobKind) -> Uuid {
let id = Uuid::now_v7();
let now = Utc::now();
let cancel = CancellationToken::new();
let job = Job {
id,
kind: kind.clone(),
status: JobStatus::Pending,
created_at: now,
updated_at: now,
};
self.jobs.write().await.insert(id, job);
self.cancellations.write().await.insert(id, cancel.clone());
let item = WorkerItem {
job_id: id,
kind,
cancel,
};
// If the channel is full we still record the job — it'll stay Pending
let _ = self.tx.send(item).await;
id
}
/// Get the status of a job.
pub async fn status(&self, id: Uuid) -> Option<Job> {
self.jobs.read().await.get(&id).cloned()
}
/// List all jobs, most recent first.
pub async fn list(&self) -> Vec<Job> {
let map = self.jobs.read().await;
let mut jobs: Vec<Job> = map.values().cloned().collect();
jobs.sort_by_key(|job| std::cmp::Reverse(job.created_at));
jobs
}
/// Cancel a running or pending job.
pub async fn cancel(&self, id: Uuid) -> bool {
if let Some(token) = self.cancellations.read().await.get(&id) {
token.cancel();
let mut map = self.jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Cancelled;
job.updated_at = Utc::now();
}
true
} else {
false
}
}
/// Update a job's progress. Called by executors.
pub async fn update_progress(
jobs: &Arc<RwLock<HashMap<Uuid, Job>>>,
id: Uuid,
progress: f32,
message: String,
) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Running { progress, message };
job.updated_at = Utc::now();
}
}
/// Mark a job as completed.
pub async fn complete(
jobs: &Arc<RwLock<HashMap<Uuid, Job>>>,
id: Uuid,
result: Value,
) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Completed { result };
job.updated_at = Utc::now();
}
}
/// Mark a job as failed.
pub async fn fail(
jobs: &Arc<RwLock<HashMap<Uuid, Job>>>,
id: Uuid,
error: String,
) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Failed { error };
job.updated_at = Utc::now();
}
}
/// Get job queue statistics
pub async fn stats(&self) -> JobQueueStats {
let jobs = self.jobs.read().await;
let mut pending = 0;
let mut running = 0;
let mut completed = 0;
let mut failed = 0;
for job in jobs.values() {
match job.status {
JobStatus::Pending => pending += 1,
JobStatus::Running { .. } => running += 1,
JobStatus::Completed { .. } => completed += 1,
JobStatus::Failed { .. } => failed += 1,
JobStatus::Cancelled => {}, // Don't count cancelled jobs
}
}
/// Mark a job as completed.
pub async fn complete(jobs: &Arc<RwLock<HashMap<Uuid, Job>>>, id: Uuid, result: Value) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Completed { result };
job.updated_at = Utc::now();
}
}
/// Mark a job as failed.
pub async fn fail(jobs: &Arc<RwLock<HashMap<Uuid, Job>>>, id: Uuid, error: String) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Failed { error };
job.updated_at = Utc::now();
}
}
/// Get job queue statistics
pub async fn stats(&self) -> JobQueueStats {
let jobs = self.jobs.read().await;
let mut pending = 0;
let mut running = 0;
let mut completed = 0;
let mut failed = 0;
for job in jobs.values() {
match job.status {
JobStatus::Pending => pending += 1,
JobStatus::Running { .. } => running += 1,
JobStatus::Completed { .. } => completed += 1,
JobStatus::Failed { .. } => failed += 1,
JobStatus::Cancelled => {} // Don't count cancelled jobs
}
}
JobQueueStats {
pending,
running,
completed,
failed,
total: jobs.len(),
}
JobQueueStats {
pending,
running,
completed,
failed,
total: jobs.len(),
}
}
}
/// Statistics about the job queue
#[derive(Debug, Clone, Default)]
pub struct JobQueueStats {
pub pending: usize,
pub running: usize,
pub completed: usize,
pub failed: usize,
pub total: usize,
pub pending: usize,
pub running: usize,
pub completed: usize,
pub failed: usize,
pub total: usize,
}

View file

@ -1,4 +1,5 @@
//! Markdown link extraction and management for Obsidian-style bidirectional links.
//! Markdown link extraction and management for Obsidian-style bidirectional
//! links.
//!
//! This module provides:
//! - Wikilink extraction (`[[target]]` and `[[target|display]]`)
@ -24,254 +25,289 @@ const CONTEXT_CHARS_AFTER: usize = 50;
/// - Wikilinks: `[[target]]` and `[[target|display text]]`
/// - Embeds: `![[target]]`
/// - Markdown links: `[text](path)` (internal paths only, no http/https)
pub fn extract_links(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
let mut links = Vec::new();
pub fn extract_links(
source_media_id: MediaId,
content: &str,
) -> Vec<MarkdownLink> {
let mut links = Vec::new();
// Extract wikilinks: [[target]] or [[target|display]]
links.extend(extract_wikilinks(source_media_id, content));
// Extract wikilinks: [[target]] or [[target|display]]
links.extend(extract_wikilinks(source_media_id, content));
// Extract embeds: ![[target]]
links.extend(extract_embeds(source_media_id, content));
// Extract embeds: ![[target]]
links.extend(extract_embeds(source_media_id, content));
// Extract markdown links: [text](path)
links.extend(extract_markdown_links(source_media_id, content));
// Extract markdown links: [text](path)
links.extend(extract_markdown_links(source_media_id, content));
links
links
}
/// Extract wikilinks from content.
/// Matches: `[[target]]` or `[[target|display text]]` but NOT `![[...]]` (embeds)
fn extract_wikilinks(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
// Match [[...]] - we'll manually filter out embeds that are preceded by !
let re = Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
let mut links = Vec::new();
/// Matches: `[[target]]` or `[[target|display text]]` but NOT `![[...]]`
/// (embeds)
fn extract_wikilinks(
source_media_id: MediaId,
content: &str,
) -> Vec<MarkdownLink> {
// Match [[...]] - we'll manually filter out embeds that are preceded by !
let re = Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
// Check if preceded by ! (which would make it an embed, not a wikilink)
if match_start > 0 {
let bytes = line.as_bytes();
if bytes.get(match_start - 1) == Some(&b'!') {
continue; // Skip embeds
}
}
let target = cap.get(1).unwrap().as_str().trim();
let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
let context = extract_context(content, line_num, full_match.start(), full_match.end());
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target.to_string(),
target_media_id: None, // Will be resolved later
link_type: LinkType::Wikilink,
link_text: display_text.or_else(|| Some(target.to_string())),
line_number: Some(line_num as i32 + 1), // 1-indexed
context: Some(context),
created_at: chrono::Utc::now(),
});
// Check if preceded by ! (which would make it an embed, not a wikilink)
if match_start > 0 {
let bytes = line.as_bytes();
if bytes.get(match_start - 1) == Some(&b'!') {
continue; // Skip embeds
}
}
}
links
let target = cap.get(1).unwrap().as_str().trim();
let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
let context = extract_context(
content,
line_num,
full_match.start(),
full_match.end(),
);
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target.to_string(),
target_media_id: None, // Will be resolved later
link_type: LinkType::Wikilink,
link_text: display_text.or_else(|| Some(target.to_string())),
line_number: Some(line_num as i32 + 1), // 1-indexed
context: Some(context),
created_at: chrono::Utc::now(),
});
}
}
links
}
/// Extract embeds from content.
/// Matches: `![[target]]`
fn extract_embeds(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
let re = Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
let mut links = Vec::new();
fn extract_embeds(
source_media_id: MediaId,
content: &str,
) -> Vec<MarkdownLink> {
let re = Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").unwrap();
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let target = cap.get(1).unwrap().as_str().trim();
let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let target = cap.get(1).unwrap().as_str().trim();
let display_text = cap.get(2).map(|m| m.as_str().trim().to_string());
let context = extract_context(content, line_num, full_match.start(), full_match.end());
let context = extract_context(
content,
line_num,
full_match.start(),
full_match.end(),
);
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target.to_string(),
target_media_id: None,
link_type: LinkType::Embed,
link_text: display_text.or_else(|| Some(target.to_string())),
line_number: Some(line_num as i32 + 1),
context: Some(context),
created_at: chrono::Utc::now(),
});
}
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target.to_string(),
target_media_id: None,
link_type: LinkType::Embed,
link_text: display_text.or_else(|| Some(target.to_string())),
line_number: Some(line_num as i32 + 1),
context: Some(context),
created_at: chrono::Utc::now(),
});
}
}
links
links
}
/// Extract markdown links from content.
/// Matches: `[text](path)` but only for internal paths (no http/https)
fn extract_markdown_links(source_media_id: MediaId, content: &str) -> Vec<MarkdownLink> {
// Match [text](path) where path doesn't start with http:// or https://
let re = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
let mut links = Vec::new();
fn extract_markdown_links(
source_media_id: MediaId,
content: &str,
) -> Vec<MarkdownLink> {
// Match [text](path) where path doesn't start with http:// or https://
let re = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
let mut links = Vec::new();
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
for (line_num, line) in content.lines().enumerate() {
for cap in re.captures_iter(line) {
let full_match = cap.get(0).unwrap();
let match_start = full_match.start();
// Skip markdown images: ![alt](image.png)
// Check if the character immediately before '[' is '!'
if match_start > 0 && line.as_bytes().get(match_start - 1) == Some(&b'!') {
continue;
}
// Skip markdown images: ![alt](image.png)
// Check if the character immediately before '[' is '!'
if match_start > 0 && line.as_bytes().get(match_start - 1) == Some(&b'!')
{
continue;
}
let text = cap.get(1).unwrap().as_str().trim();
let path = cap.get(2).unwrap().as_str().trim();
let text = cap.get(1).unwrap().as_str().trim();
let path = cap.get(2).unwrap().as_str().trim();
// Skip external links
if path.starts_with("http://")
|| path.starts_with("https://")
|| path.starts_with("mailto:")
|| path.starts_with("ftp://")
{
continue;
}
// Skip external links
if path.starts_with("http://")
|| path.starts_with("https://")
|| path.starts_with("mailto:")
|| path.starts_with("ftp://")
{
continue;
}
// Skip anchor-only links
if path.starts_with('#') {
continue;
}
// Skip anchor-only links
if path.starts_with('#') {
continue;
}
// Remove any anchor from the path for resolution
let target_path = path.split('#').next().unwrap_or(path);
// Remove any anchor from the path for resolution
let target_path = path.split('#').next().unwrap_or(path);
let context = extract_context(content, line_num, full_match.start(), full_match.end());
let context = extract_context(
content,
line_num,
full_match.start(),
full_match.end(),
);
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target_path.to_string(),
target_media_id: None,
link_type: LinkType::MarkdownLink,
link_text: Some(text.to_string()),
line_number: Some(line_num as i32 + 1),
context: Some(context),
created_at: chrono::Utc::now(),
});
}
links.push(MarkdownLink {
id: Uuid::now_v7(),
source_media_id,
target_path: target_path.to_string(),
target_media_id: None,
link_type: LinkType::MarkdownLink,
link_text: Some(text.to_string()),
line_number: Some(line_num as i32 + 1),
context: Some(context),
created_at: chrono::Utc::now(),
});
}
}
links
links
}
/// Extract surrounding context for a link.
fn extract_context(content: &str, line_num: usize, _start: usize, _end: usize) -> String {
let lines: Vec<&str> = content.lines().collect();
if line_num >= lines.len() {
return String::new();
}
fn extract_context(
content: &str,
line_num: usize,
_start: usize,
_end: usize,
) -> String {
let lines: Vec<&str> = content.lines().collect();
if line_num >= lines.len() {
return String::new();
}
let line = lines[line_num];
let line_len = line.len();
let line = lines[line_num];
let line_len = line.len();
// Get surrounding lines for context if the current line is short
if line_len < 30 && line_num > 0 {
// Include previous line
let prev = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
let next = lines.get(line_num + 1).unwrap_or(&"");
return format!("{} {} {}", prev.trim(), line.trim(), next.trim())
.chars()
.take(CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER + 20)
.collect();
}
// Get surrounding lines for context if the current line is short
if line_len < 30 && line_num > 0 {
// Include previous line
let prev = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
let next = lines.get(line_num + 1).unwrap_or(&"");
return format!("{} {} {}", prev.trim(), line.trim(), next.trim())
.chars()
.take(CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER + 20)
.collect();
}
// Truncate long lines
if line_len > CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER {
line.chars()
.take(CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER)
.collect()
} else {
line.to_string()
}
// Truncate long lines
if line_len > CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER {
line
.chars()
.take(CONTEXT_CHARS_BEFORE + CONTEXT_CHARS_AFTER)
.collect()
} else {
line.to_string()
}
}
/// Link resolution strategies for finding target media items.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ResolutionStrategy {
/// Direct path match
DirectPath,
/// Relative to source directory
RelativeToSource,
/// Filename with .md extension added
FilenameWithMd,
/// Filename-only search (Obsidian-style)
FilenameOnly,
/// Direct path match
DirectPath,
/// Relative to source directory
RelativeToSource,
/// Filename with .md extension added
FilenameWithMd,
/// Filename-only search (Obsidian-style)
FilenameOnly,
}
/// Resolve a link target to possible file paths.
///
/// Returns a list of candidate paths to check, in order of preference.
pub fn resolve_link_candidates(
target: &str,
source_path: &Path,
root_dirs: &[std::path::PathBuf],
target: &str,
source_path: &Path,
root_dirs: &[std::path::PathBuf],
) -> Vec<std::path::PathBuf> {
let mut candidates = Vec::new();
let mut candidates = Vec::new();
// Clean up the target path
let target = target.trim();
// Clean up the target path
let target = target.trim();
// 1. Direct path - if it looks like a path
if target.contains('/') || target.contains('\\') {
let direct = std::path::PathBuf::from(target);
if direct.is_absolute() {
candidates.push(direct);
} else {
// Relative to each root dir
for root in root_dirs {
candidates.push(root.join(&direct));
}
}
}
// 2. Relative to source file's directory
if let Some(source_dir) = source_path.parent() {
let relative = source_dir.join(target);
candidates.push(relative.clone());
// Also try with .md extension
if !target.ends_with(".md") {
candidates.push(relative.with_extension("md"));
let mut with_md = relative.clone();
with_md.set_file_name(format!(
"{}.md",
relative.file_name().unwrap_or_default().to_string_lossy()
));
candidates.push(with_md);
}
}
// 3. Filename with .md extension in root dirs
let target_with_md = if target.ends_with(".md") {
target.to_string()
// 1. Direct path - if it looks like a path
if target.contains('/') || target.contains('\\') {
let direct = std::path::PathBuf::from(target);
if direct.is_absolute() {
candidates.push(direct);
} else {
format!("{}.md", target)
};
for root in root_dirs {
candidates.push(root.join(&target_with_md));
// Relative to each root dir
for root in root_dirs {
candidates.push(root.join(&direct));
}
}
}
// 4. Remove duplicates while preserving order
let mut seen = std::collections::HashSet::new();
candidates.retain(|p| seen.insert(p.clone()));
// 2. Relative to source file's directory
if let Some(source_dir) = source_path.parent() {
let relative = source_dir.join(target);
candidates.push(relative.clone());
candidates
// Also try with .md extension
if !target.ends_with(".md") {
candidates.push(relative.with_extension("md"));
let mut with_md = relative.clone();
with_md.set_file_name(format!(
"{}.md",
relative.file_name().unwrap_or_default().to_string_lossy()
));
candidates.push(with_md);
}
}
// 3. Filename with .md extension in root dirs
let target_with_md = if target.ends_with(".md") {
target.to_string()
} else {
format!("{}.md", target)
};
for root in root_dirs {
candidates.push(root.join(&target_with_md));
}
// 4. Remove duplicates while preserving order
let mut seen = std::collections::HashSet::new();
candidates.retain(|p| seen.insert(p.clone()));
candidates
}
/// Extract frontmatter aliases from markdown content.
@ -279,102 +315,107 @@ pub fn resolve_link_candidates(
/// Obsidian uses the `aliases` field in frontmatter to define alternative names
/// for a note that can be used in wikilinks.
pub fn extract_aliases(content: &str) -> Vec<String> {
let Ok(parsed) = gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(content) else {
return Vec::new();
};
let Ok(parsed) =
gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(content)
else {
return Vec::new();
};
let Some(data) = parsed.data else {
return Vec::new();
};
let Some(data) = parsed.data else {
return Vec::new();
};
let gray_matter::Pod::Hash(map) = data else {
return Vec::new();
};
let gray_matter::Pod::Hash(map) = data else {
return Vec::new();
};
let Some(aliases) = map.get("aliases") else {
return Vec::new();
};
let Some(aliases) = map.get("aliases") else {
return Vec::new();
};
match aliases {
gray_matter::Pod::Array(arr) => arr
.iter()
.filter_map(|a| {
if let gray_matter::Pod::String(s) = a {
Some(s.clone())
} else {
None
}
})
.collect(),
gray_matter::Pod::String(s) => {
// Single alias as string
vec![s.clone()]
}
_ => Vec::new(),
}
match aliases {
gray_matter::Pod::Array(arr) => {
arr
.iter()
.filter_map(|a| {
if let gray_matter::Pod::String(s) = a {
Some(s.clone())
} else {
None
}
})
.collect()
},
gray_matter::Pod::String(s) => {
// Single alias as string
vec![s.clone()]
},
_ => Vec::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
fn test_media_id() -> MediaId {
MediaId(Uuid::nil())
}
fn test_media_id() -> MediaId {
MediaId(Uuid::nil())
}
#[test]
fn test_extract_simple_wikilink() {
let content = "This is a [[simple link]] in text.";
let links = extract_links(test_media_id(), content);
#[test]
fn test_extract_simple_wikilink() {
let content = "This is a [[simple link]] in text.";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "simple link");
assert_eq!(links[0].link_type, LinkType::Wikilink);
assert_eq!(links[0].link_text, Some("simple link".to_string()));
}
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "simple link");
assert_eq!(links[0].link_type, LinkType::Wikilink);
assert_eq!(links[0].link_text, Some("simple link".to_string()));
}
#[test]
fn test_extract_wikilink_with_display() {
let content = "Check out [[target note|this article]] for more.";
let links = extract_links(test_media_id(), content);
#[test]
fn test_extract_wikilink_with_display() {
let content = "Check out [[target note|this article]] for more.";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "target note");
assert_eq!(links[0].link_text, Some("this article".to_string()));
}
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "target note");
assert_eq!(links[0].link_text, Some("this article".to_string()));
}
#[test]
fn test_extract_embed() {
let content = "Here is an image: ![[image.png]]";
let links = extract_links(test_media_id(), content);
#[test]
fn test_extract_embed() {
let content = "Here is an image: ![[image.png]]";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "image.png");
assert_eq!(links[0].link_type, LinkType::Embed);
}
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "image.png");
assert_eq!(links[0].link_type, LinkType::Embed);
}
#[test]
fn test_extract_markdown_link() {
let content = "Read [the documentation](docs/README.md) for details.";
let links = extract_links(test_media_id(), content);
#[test]
fn test_extract_markdown_link() {
let content = "Read [the documentation](docs/README.md) for details.";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "docs/README.md");
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
assert_eq!(links[0].link_text, Some("the documentation".to_string()));
}
assert_eq!(links.len(), 1);
assert_eq!(links[0].target_path, "docs/README.md");
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
assert_eq!(links[0].link_text, Some("the documentation".to_string()));
}
#[test]
fn test_skip_external_links() {
let content = "Visit [our site](https://example.com) or [email us](mailto:test@test.com).";
let links = extract_links(test_media_id(), content);
#[test]
fn test_skip_external_links() {
let content = "Visit [our site](https://example.com) or [email \
us](mailto:test@test.com).";
let links = extract_links(test_media_id(), content);
assert!(links.is_empty());
}
assert!(links.is_empty());
}
#[test]
fn test_multiple_links() {
let content = r#"
#[test]
fn test_multiple_links() {
let content = r#"
# My Note
This links to [[Note A]] and also [[Note B|Note B Title]].
@ -383,44 +424,45 @@ We also have a markdown link to [config](./config.md).
And an embedded image: ![[diagram.png]]
"#;
let links = extract_links(test_media_id(), content);
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 4);
assert_eq!(links.len(), 4);
let types: Vec<_> = links.iter().map(|l| l.link_type).collect();
assert!(types.contains(&LinkType::Wikilink));
assert!(types.contains(&LinkType::Embed));
assert!(types.contains(&LinkType::MarkdownLink));
}
let types: Vec<_> = links.iter().map(|l| l.link_type).collect();
assert!(types.contains(&LinkType::Wikilink));
assert!(types.contains(&LinkType::Embed));
assert!(types.contains(&LinkType::MarkdownLink));
}
#[test]
fn test_line_numbers() {
let content = "Line 1\n[[link on line 2]]\nLine 3";
let links = extract_links(test_media_id(), content);
#[test]
fn test_line_numbers() {
let content = "Line 1\n[[link on line 2]]\nLine 3";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 1);
assert_eq!(links[0].line_number, Some(2));
}
assert_eq!(links.len(), 1);
assert_eq!(links[0].line_number, Some(2));
}
#[test]
fn test_resolve_candidates() {
let source_path = std::path::Path::new("/notes/projects/readme.md");
let root_dirs = vec![std::path::PathBuf::from("/notes")];
#[test]
fn test_resolve_candidates() {
let source_path = std::path::Path::new("/notes/projects/readme.md");
let root_dirs = vec![std::path::PathBuf::from("/notes")];
let candidates = resolve_link_candidates("My Note", source_path, &root_dirs);
let candidates =
resolve_link_candidates("My Note", source_path, &root_dirs);
// Should include relative path and .md variations
assert!(!candidates.is_empty());
assert!(
candidates
.iter()
.any(|p| p.to_string_lossy().contains("My Note.md"))
);
}
// Should include relative path and .md variations
assert!(!candidates.is_empty());
assert!(
candidates
.iter()
.any(|p| p.to_string_lossy().contains("My Note.md"))
);
}
#[test]
fn test_extract_aliases() {
let content = r#"---
#[test]
fn test_extract_aliases() {
let content = r#"---
title: My Note
aliases:
- Alternative Name
@ -429,48 +471,48 @@ aliases:
# Content here
"#;
let aliases = extract_aliases(content);
assert_eq!(aliases, vec!["Alternative Name", "Another Alias"]);
}
let aliases = extract_aliases(content);
assert_eq!(aliases, vec!["Alternative Name", "Another Alias"]);
}
#[test]
fn test_extract_single_alias() {
let content = r#"---
#[test]
fn test_extract_single_alias() {
let content = r#"---
title: My Note
aliases: Single Alias
---
# Content
"#;
let aliases = extract_aliases(content);
assert_eq!(aliases, vec!["Single Alias"]);
}
let aliases = extract_aliases(content);
assert_eq!(aliases, vec!["Single Alias"]);
}
#[test]
fn test_wikilink_not_matching_embed() {
let content = "A wikilink [[note]] and an embed ![[image.png]]";
let links = extract_links(test_media_id(), content);
#[test]
fn test_wikilink_not_matching_embed() {
let content = "A wikilink [[note]] and an embed ![[image.png]]";
let links = extract_links(test_media_id(), content);
assert_eq!(links.len(), 2);
let wikilinks: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::Wikilink)
.collect();
let embeds: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::Embed)
.collect();
assert_eq!(links.len(), 2);
let wikilinks: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::Wikilink)
.collect();
let embeds: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::Embed)
.collect();
assert_eq!(wikilinks.len(), 1);
assert_eq!(embeds.len(), 1);
assert_eq!(wikilinks[0].target_path, "note");
assert_eq!(embeds[0].target_path, "image.png");
}
assert_eq!(wikilinks.len(), 1);
assert_eq!(embeds.len(), 1);
assert_eq!(wikilinks[0].target_path, "note");
assert_eq!(embeds[0].target_path, "image.png");
}
#[test]
fn test_exclude_markdown_images() {
// Test that markdown images ![alt](image.png) are NOT extracted as links
let content = r#"
#[test]
fn test_exclude_markdown_images() {
// Test that markdown images ![alt](image.png) are NOT extracted as links
let content = r#"
# My Note
Here's a regular link: [documentation](docs/guide.md)
@ -484,70 +526,71 @@ Multiple images:
Mixed: [link](file.md) then ![image](pic.png) then [another](other.md)
"#;
let links = extract_links(test_media_id(), content);
let links = extract_links(test_media_id(), content);
// Should only extract the 4 markdown links, not the 4 images
assert_eq!(
links.len(),
4,
"Should extract 4 links, not images. Got: {:#?}",
links
);
// Should only extract the 4 markdown links, not the 4 images
assert_eq!(
links.len(),
4,
"Should extract 4 links, not images. Got: {:#?}",
links
);
// Verify all extracted items are MarkdownLink type (not images)
for link in &links {
assert_eq!(
link.link_type,
LinkType::MarkdownLink,
"Link '{}' should be MarkdownLink type",
link.target_path
);
}
// Verify correct targets were extracted (links, not images)
let targets: Vec<&str> = links.iter().map(|l| l.target_path.as_str()).collect();
assert!(
targets.contains(&"docs/guide.md"),
"Should contain docs/guide.md"
);
assert!(
targets.contains(&"config.toml"),
"Should contain config.toml"
);
assert!(targets.contains(&"file.md"), "Should contain file.md");
assert!(targets.contains(&"other.md"), "Should contain other.md");
// Verify images were NOT extracted
assert!(
!targets.contains(&"images/screenshot.png"),
"Should NOT contain screenshot.png (it's an image)"
);
assert!(
!targets.contains(&"logo.png"),
"Should NOT contain logo.png (it's an image)"
);
assert!(
!targets.contains(&"banner.jpg"),
"Should NOT contain banner.jpg (it's an image)"
);
assert!(
!targets.contains(&"pic.png"),
"Should NOT contain pic.png (it's an image)"
);
// Verify all extracted items are MarkdownLink type (not images)
for link in &links {
assert_eq!(
link.link_type,
LinkType::MarkdownLink,
"Link '{}' should be MarkdownLink type",
link.target_path
);
}
#[test]
fn test_edge_case_image_at_line_start() {
// Test edge case: image at the very start of a line
let content = "![Image at start](start.png)\n[Link](file.md)";
let links = extract_links(test_media_id(), content);
// Verify correct targets were extracted (links, not images)
let targets: Vec<&str> =
links.iter().map(|l| l.target_path.as_str()).collect();
assert!(
targets.contains(&"docs/guide.md"),
"Should contain docs/guide.md"
);
assert!(
targets.contains(&"config.toml"),
"Should contain config.toml"
);
assert!(targets.contains(&"file.md"), "Should contain file.md");
assert!(targets.contains(&"other.md"), "Should contain other.md");
assert_eq!(
links.len(),
1,
"Should only extract the link, not the image"
);
assert_eq!(links[0].target_path, "file.md");
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
}
// Verify images were NOT extracted
assert!(
!targets.contains(&"images/screenshot.png"),
"Should NOT contain screenshot.png (it's an image)"
);
assert!(
!targets.contains(&"logo.png"),
"Should NOT contain logo.png (it's an image)"
);
assert!(
!targets.contains(&"banner.jpg"),
"Should NOT contain banner.jpg (it's an image)"
);
assert!(
!targets.contains(&"pic.png"),
"Should NOT contain pic.png (it's an image)"
);
}
#[test]
fn test_edge_case_image_at_line_start() {
// Test edge case: image at the very start of a line
let content = "![Image at start](start.png)\n[Link](file.md)";
let links = extract_links(test_media_id(), content);
assert_eq!(
links.len(),
1,
"Should only extract the link, not the image"
);
assert_eq!(links[0].target_path, "file.md");
assert_eq!(links[0].link_type, LinkType::MarkdownLink);
}
}

View file

@ -7,390 +7,407 @@
use std::path::{Path, PathBuf};
use tokio::fs;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt, BufReader};
use tokio::{
fs,
io::{AsyncRead, AsyncReadExt, AsyncWriteExt, BufReader},
};
use tracing::{debug, info, warn};
use crate::error::{PinakesError, Result};
use crate::model::ContentHash;
use crate::{
error::{PinakesError, Result},
model::ContentHash,
};
/// Content-addressable storage service for managed files.
#[derive(Debug, Clone)]
pub struct ManagedStorageService {
root_dir: PathBuf,
max_upload_size: u64,
verify_on_read: bool,
root_dir: PathBuf,
max_upload_size: u64,
verify_on_read: bool,
}
impl ManagedStorageService {
/// Create a new managed storage service.
pub fn new(root_dir: PathBuf, max_upload_size: u64, verify_on_read: bool) -> Self {
Self {
root_dir,
max_upload_size,
verify_on_read,
}
/// Create a new managed storage service.
pub fn new(
root_dir: PathBuf,
max_upload_size: u64,
verify_on_read: bool,
) -> Self {
Self {
root_dir,
max_upload_size,
verify_on_read,
}
}
/// Initialize the storage directory structure.
pub async fn init(&self) -> Result<()> {
fs::create_dir_all(&self.root_dir).await?;
info!(path = %self.root_dir.display(), "initialized managed storage");
Ok(())
/// Initialize the storage directory structure.
pub async fn init(&self) -> Result<()> {
fs::create_dir_all(&self.root_dir).await?;
info!(path = %self.root_dir.display(), "initialized managed storage");
Ok(())
}
/// Get the storage path for a content hash.
///
/// Layout: `<root>/<hash[0:2]>/<hash[2:4]>/<full_hash>`
pub fn path(&self, hash: &ContentHash) -> PathBuf {
let h = &hash.0;
if h.len() >= 4 {
self.root_dir.join(&h[0..2]).join(&h[2..4]).join(h)
} else {
// Fallback for short hashes (shouldn't happen with BLAKE3)
self.root_dir.join(h)
}
}
/// Get the storage path for a content hash.
///
/// Layout: `<root>/<hash[0:2]>/<hash[2:4]>/<full_hash>`
pub fn path(&self, hash: &ContentHash) -> PathBuf {
let h = &hash.0;
if h.len() >= 4 {
self.root_dir.join(&h[0..2]).join(&h[2..4]).join(h)
} else {
// Fallback for short hashes (shouldn't happen with BLAKE3)
self.root_dir.join(h)
}
}
/// Check if a blob exists in storage.
pub async fn exists(&self, hash: &ContentHash) -> bool {
self.path(hash).exists()
}
/// Check if a blob exists in storage.
pub async fn exists(&self, hash: &ContentHash) -> bool {
self.path(hash).exists()
}
/// Store a file from an async reader, computing the hash as we go.
///
/// Returns the content hash and file size.
/// If the file already exists with the same hash, returns early
/// (deduplication).
pub async fn store_stream<R: AsyncRead + Unpin>(
&self,
mut reader: R,
) -> Result<(ContentHash, u64)> {
// First, stream to a temp file while computing the hash
let temp_dir = self.root_dir.join("temp");
fs::create_dir_all(&temp_dir).await?;
/// Store a file from an async reader, computing the hash as we go.
///
/// Returns the content hash and file size.
/// If the file already exists with the same hash, returns early (deduplication).
pub async fn store_stream<R: AsyncRead + Unpin>(
&self,
mut reader: R,
) -> Result<(ContentHash, u64)> {
// First, stream to a temp file while computing the hash
let temp_dir = self.root_dir.join("temp");
fs::create_dir_all(&temp_dir).await?;
let temp_id = uuid::Uuid::now_v7();
let temp_path = temp_dir.join(temp_id.to_string());
let temp_id = uuid::Uuid::now_v7();
let temp_path = temp_dir.join(temp_id.to_string());
let mut hasher = blake3::Hasher::new();
let mut temp_file = fs::File::create(&temp_path).await?;
let mut total_size = 0u64;
let mut hasher = blake3::Hasher::new();
let mut temp_file = fs::File::create(&temp_path).await?;
let mut total_size = 0u64;
let mut buf = vec![0u8; 64 * 1024]; // 64KB buffer
loop {
let n = reader.read(&mut buf).await?;
if n == 0 {
break;
}
let mut buf = vec![0u8; 64 * 1024]; // 64KB buffer
loop {
let n = reader.read(&mut buf).await?;
if n == 0 {
break;
}
total_size += n as u64;
if total_size > self.max_upload_size {
// Clean up temp file
drop(temp_file);
let _ = fs::remove_file(&temp_path).await;
return Err(PinakesError::UploadTooLarge(total_size));
}
hasher.update(&buf[..n]);
temp_file.write_all(&buf[..n]).await?;
}
temp_file.flush().await?;
temp_file.sync_all().await?;
total_size += n as u64;
if total_size > self.max_upload_size {
// Clean up temp file
drop(temp_file);
let _ = fs::remove_file(&temp_path).await;
return Err(PinakesError::UploadTooLarge(total_size));
}
let hash = ContentHash::new(hasher.finalize().to_hex().to_string());
let final_path = self.path(&hash);
// Check if file already exists (deduplication)
if final_path.exists() {
// Verify size matches
let existing_meta = fs::metadata(&final_path).await?;
if existing_meta.len() == total_size {
debug!(hash = %hash, "blob already exists, deduplicating");
let _ = fs::remove_file(&temp_path).await;
return Ok((hash, total_size));
} else {
warn!(
hash = %hash,
expected = total_size,
actual = existing_meta.len(),
"size mismatch for existing blob, replacing"
);
}
}
// Move temp file to final location
if let Some(parent) = final_path.parent() {
fs::create_dir_all(parent).await?;
}
fs::rename(&temp_path, &final_path).await?;
info!(hash = %hash, size = total_size, "stored new blob");
Ok((hash, total_size))
hasher.update(&buf[..n]);
temp_file.write_all(&buf[..n]).await?;
}
/// Store a file from a path.
pub async fn store_file(&self, path: &Path) -> Result<(ContentHash, u64)> {
let file = fs::File::open(path).await?;
let reader = BufReader::new(file);
self.store_stream(reader).await
temp_file.flush().await?;
temp_file.sync_all().await?;
drop(temp_file);
let hash = ContentHash::new(hasher.finalize().to_hex().to_string());
let final_path = self.path(&hash);
// Check if file already exists (deduplication)
if final_path.exists() {
// Verify size matches
let existing_meta = fs::metadata(&final_path).await?;
if existing_meta.len() == total_size {
debug!(hash = %hash, "blob already exists, deduplicating");
let _ = fs::remove_file(&temp_path).await;
return Ok((hash, total_size));
} else {
warn!(
hash = %hash,
expected = total_size,
actual = existing_meta.len(),
"size mismatch for existing blob, replacing"
);
}
}
/// Store bytes directly.
pub async fn store_bytes(&self, data: &[u8]) -> Result<(ContentHash, u64)> {
use std::io::Cursor;
let cursor = Cursor::new(data);
self.store_stream(cursor).await
// Move temp file to final location
if let Some(parent) = final_path.parent() {
fs::create_dir_all(parent).await?;
}
fs::rename(&temp_path, &final_path).await?;
info!(hash = %hash, size = total_size, "stored new blob");
Ok((hash, total_size))
}
/// Store a file from a path.
pub async fn store_file(&self, path: &Path) -> Result<(ContentHash, u64)> {
let file = fs::File::open(path).await?;
let reader = BufReader::new(file);
self.store_stream(reader).await
}
/// Store bytes directly.
pub async fn store_bytes(&self, data: &[u8]) -> Result<(ContentHash, u64)> {
use std::io::Cursor;
let cursor = Cursor::new(data);
self.store_stream(cursor).await
}
/// Open a blob for reading.
pub async fn open(&self, hash: &ContentHash) -> Result<fs::File> {
let path = self.path(hash);
if !path.exists() {
return Err(PinakesError::BlobNotFound(hash.0.clone()));
}
/// Open a blob for reading.
pub async fn open(&self, hash: &ContentHash) -> Result<fs::File> {
let path = self.path(hash);
if !path.exists() {
return Err(PinakesError::BlobNotFound(hash.0.clone()));
}
if self.verify_on_read {
self.verify(hash).await?;
}
fs::File::open(&path).await.map_err(|e| PinakesError::Io(e))
if self.verify_on_read {
self.verify(hash).await?;
}
/// Read a blob entirely into memory.
pub async fn read(&self, hash: &ContentHash) -> Result<Vec<u8>> {
let path = self.path(hash);
if !path.exists() {
return Err(PinakesError::BlobNotFound(hash.0.clone()));
}
fs::File::open(&path).await.map_err(|e| PinakesError::Io(e))
}
let data = fs::read(&path).await?;
if self.verify_on_read {
let computed = blake3::hash(&data);
if computed.to_hex().to_string() != hash.0 {
return Err(PinakesError::StorageIntegrity(format!(
"hash mismatch for blob {}",
hash
)));
}
}
Ok(data)
/// Read a blob entirely into memory.
pub async fn read(&self, hash: &ContentHash) -> Result<Vec<u8>> {
let path = self.path(hash);
if !path.exists() {
return Err(PinakesError::BlobNotFound(hash.0.clone()));
}
/// Verify the integrity of a stored blob.
pub async fn verify(&self, hash: &ContentHash) -> Result<bool> {
let path = self.path(hash);
if !path.exists() {
return Ok(false);
}
let data = fs::read(&path).await?;
let file = fs::File::open(&path).await?;
let mut reader = BufReader::new(file);
let mut hasher = blake3::Hasher::new();
let mut buf = vec![0u8; 64 * 1024];
loop {
let n = reader.read(&mut buf).await?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
let computed = hasher.finalize().to_hex().to_string();
if computed != hash.0 {
warn!(
expected = %hash,
computed = %computed,
"blob integrity check failed"
);
return Err(PinakesError::StorageIntegrity(format!(
"hash mismatch: expected {}, computed {}",
hash, computed
)));
}
debug!(hash = %hash, "blob integrity verified");
Ok(true)
if self.verify_on_read {
let computed = blake3::hash(&data);
if computed.to_hex().to_string() != hash.0 {
return Err(PinakesError::StorageIntegrity(format!(
"hash mismatch for blob {}",
hash
)));
}
}
/// Delete a blob from storage.
pub async fn delete(&self, hash: &ContentHash) -> Result<()> {
let path = self.path(hash);
if path.exists() {
fs::remove_file(&path).await?;
info!(hash = %hash, "deleted blob");
Ok(data)
}
// Try to remove empty parent directories
if let Some(parent) = path.parent() {
let _ = fs::remove_dir(parent).await;
if let Some(grandparent) = parent.parent() {
let _ = fs::remove_dir(grandparent).await;
/// Verify the integrity of a stored blob.
pub async fn verify(&self, hash: &ContentHash) -> Result<bool> {
let path = self.path(hash);
if !path.exists() {
return Ok(false);
}
let file = fs::File::open(&path).await?;
let mut reader = BufReader::new(file);
let mut hasher = blake3::Hasher::new();
let mut buf = vec![0u8; 64 * 1024];
loop {
let n = reader.read(&mut buf).await?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
let computed = hasher.finalize().to_hex().to_string();
if computed != hash.0 {
warn!(
expected = %hash,
computed = %computed,
"blob integrity check failed"
);
return Err(PinakesError::StorageIntegrity(format!(
"hash mismatch: expected {}, computed {}",
hash, computed
)));
}
debug!(hash = %hash, "blob integrity verified");
Ok(true)
}
/// Delete a blob from storage.
pub async fn delete(&self, hash: &ContentHash) -> Result<()> {
let path = self.path(hash);
if path.exists() {
fs::remove_file(&path).await?;
info!(hash = %hash, "deleted blob");
// Try to remove empty parent directories
if let Some(parent) = path.parent() {
let _ = fs::remove_dir(parent).await;
if let Some(grandparent) = parent.parent() {
let _ = fs::remove_dir(grandparent).await;
}
}
}
Ok(())
}
/// Get the size of a stored blob.
pub async fn size(&self, hash: &ContentHash) -> Result<u64> {
let path = self.path(hash);
if !path.exists() {
return Err(PinakesError::BlobNotFound(hash.0.clone()));
}
let meta = fs::metadata(&path).await?;
Ok(meta.len())
}
/// List all blob hashes in storage.
pub async fn list_all(&self) -> Result<Vec<ContentHash>> {
let mut hashes = Vec::new();
let mut entries = fs::read_dir(&self.root_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.is_dir() && path.file_name().map(|n| n.len()) == Some(2) {
let mut sub_entries = fs::read_dir(&path).await?;
while let Some(sub_entry) = sub_entries.next_entry().await? {
let sub_path = sub_entry.path();
if sub_path.is_dir()
&& sub_path.file_name().map(|n| n.len()) == Some(2)
{
let mut file_entries = fs::read_dir(&sub_path).await?;
while let Some(file_entry) = file_entries.next_entry().await? {
let file_path = file_entry.path();
if file_path.is_file() {
if let Some(name) = file_path.file_name() {
hashes
.push(ContentHash::new(name.to_string_lossy().to_string()));
}
}
}
}
}
Ok(())
}
}
/// Get the size of a stored blob.
pub async fn size(&self, hash: &ContentHash) -> Result<u64> {
let path = self.path(hash);
if !path.exists() {
return Err(PinakesError::BlobNotFound(hash.0.clone()));
}
let meta = fs::metadata(&path).await?;
Ok(meta.len())
Ok(hashes)
}
/// Calculate total storage used by all blobs.
pub async fn total_size(&self) -> Result<u64> {
let hashes = self.list_all().await?;
let mut total = 0u64;
for hash in hashes {
if let Ok(size) = self.size(&hash).await {
total += size;
}
}
Ok(total)
}
/// Clean up any orphaned temp files.
pub async fn cleanup_temp(&self) -> Result<u64> {
let temp_dir = self.root_dir.join("temp");
if !temp_dir.exists() {
return Ok(0);
}
/// List all blob hashes in storage.
pub async fn list_all(&self) -> Result<Vec<ContentHash>> {
let mut hashes = Vec::new();
let mut entries = fs::read_dir(&self.root_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.is_dir() && path.file_name().map(|n| n.len()) == Some(2) {
let mut sub_entries = fs::read_dir(&path).await?;
while let Some(sub_entry) = sub_entries.next_entry().await? {
let sub_path = sub_entry.path();
if sub_path.is_dir() && sub_path.file_name().map(|n| n.len()) == Some(2) {
let mut file_entries = fs::read_dir(&sub_path).await?;
while let Some(file_entry) = file_entries.next_entry().await? {
let file_path = file_entry.path();
if file_path.is_file() {
if let Some(name) = file_path.file_name() {
hashes
.push(ContentHash::new(name.to_string_lossy().to_string()));
}
}
}
}
}
let mut count = 0u64;
let mut entries = fs::read_dir(&temp_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.is_file() {
// Check if temp file is old (> 1 hour)
if let Ok(meta) = fs::metadata(&path).await {
if let Ok(modified) = meta.modified() {
let age = std::time::SystemTime::now()
.duration_since(modified)
.unwrap_or_default();
if age.as_secs() > 3600 {
let _ = fs::remove_file(&path).await;
count += 1;
}
}
}
Ok(hashes)
}
}
/// Calculate total storage used by all blobs.
pub async fn total_size(&self) -> Result<u64> {
let hashes = self.list_all().await?;
let mut total = 0u64;
for hash in hashes {
if let Ok(size) = self.size(&hash).await {
total += size;
}
}
Ok(total)
}
/// Clean up any orphaned temp files.
pub async fn cleanup_temp(&self) -> Result<u64> {
let temp_dir = self.root_dir.join("temp");
if !temp_dir.exists() {
return Ok(0);
}
let mut count = 0u64;
let mut entries = fs::read_dir(&temp_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.is_file() {
// Check if temp file is old (> 1 hour)
if let Ok(meta) = fs::metadata(&path).await {
if let Ok(modified) = meta.modified() {
let age = std::time::SystemTime::now()
.duration_since(modified)
.unwrap_or_default();
if age.as_secs() > 3600 {
let _ = fs::remove_file(&path).await;
count += 1;
}
}
}
}
}
if count > 0 {
info!(count, "cleaned up orphaned temp files");
}
Ok(count)
if count > 0 {
info!(count, "cleaned up orphaned temp files");
}
Ok(count)
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
use tempfile::tempdir;
#[tokio::test]
async fn test_store_and_retrieve() {
let dir = tempdir().unwrap();
let service = ManagedStorageService::new(dir.path().to_path_buf(), 1024 * 1024, false);
service.init().await.unwrap();
use super::*;
let data = b"hello, world!";
let (hash, size) = service.store_bytes(data).await.unwrap();
#[tokio::test]
async fn test_store_and_retrieve() {
let dir = tempdir().unwrap();
let service =
ManagedStorageService::new(dir.path().to_path_buf(), 1024 * 1024, false);
service.init().await.unwrap();
assert_eq!(size, data.len() as u64);
assert!(service.exists(&hash).await);
let data = b"hello, world!";
let (hash, size) = service.store_bytes(data).await.unwrap();
let retrieved = service.read(&hash).await.unwrap();
assert_eq!(retrieved, data);
}
assert_eq!(size, data.len() as u64);
assert!(service.exists(&hash).await);
#[tokio::test]
async fn test_deduplication() {
let dir = tempdir().unwrap();
let service = ManagedStorageService::new(dir.path().to_path_buf(), 1024 * 1024, false);
service.init().await.unwrap();
let retrieved = service.read(&hash).await.unwrap();
assert_eq!(retrieved, data);
}
let data = b"duplicate content";
let (hash1, _) = service.store_bytes(data).await.unwrap();
let (hash2, _) = service.store_bytes(data).await.unwrap();
#[tokio::test]
async fn test_deduplication() {
let dir = tempdir().unwrap();
let service =
ManagedStorageService::new(dir.path().to_path_buf(), 1024 * 1024, false);
service.init().await.unwrap();
assert_eq!(hash1.0, hash2.0);
assert_eq!(service.list_all().await.unwrap().len(), 1);
}
let data = b"duplicate content";
let (hash1, _) = service.store_bytes(data).await.unwrap();
let (hash2, _) = service.store_bytes(data).await.unwrap();
#[tokio::test]
async fn test_verify_integrity() {
let dir = tempdir().unwrap();
let service = ManagedStorageService::new(dir.path().to_path_buf(), 1024 * 1024, true);
service.init().await.unwrap();
assert_eq!(hash1.0, hash2.0);
assert_eq!(service.list_all().await.unwrap().len(), 1);
}
let data = b"verify me";
let (hash, _) = service.store_bytes(data).await.unwrap();
#[tokio::test]
async fn test_verify_integrity() {
let dir = tempdir().unwrap();
let service =
ManagedStorageService::new(dir.path().to_path_buf(), 1024 * 1024, true);
service.init().await.unwrap();
assert!(service.verify(&hash).await.unwrap());
}
let data = b"verify me";
let (hash, _) = service.store_bytes(data).await.unwrap();
#[tokio::test]
async fn test_upload_too_large() {
let dir = tempdir().unwrap();
let service = ManagedStorageService::new(dir.path().to_path_buf(), 100, false);
service.init().await.unwrap();
assert!(service.verify(&hash).await.unwrap());
}
let data = vec![0u8; 200];
let result = service.store_bytes(&data).await;
#[tokio::test]
async fn test_upload_too_large() {
let dir = tempdir().unwrap();
let service =
ManagedStorageService::new(dir.path().to_path_buf(), 100, false);
service.init().await.unwrap();
assert!(matches!(result, Err(PinakesError::UploadTooLarge(_))));
}
let data = vec![0u8; 200];
let result = service.store_bytes(&data).await;
#[tokio::test]
async fn test_delete() {
let dir = tempdir().unwrap();
let service = ManagedStorageService::new(dir.path().to_path_buf(), 1024 * 1024, false);
service.init().await.unwrap();
assert!(matches!(result, Err(PinakesError::UploadTooLarge(_))));
}
let data = b"delete me";
let (hash, _) = service.store_bytes(data).await.unwrap();
assert!(service.exists(&hash).await);
#[tokio::test]
async fn test_delete() {
let dir = tempdir().unwrap();
let service =
ManagedStorageService::new(dir.path().to_path_buf(), 1024 * 1024, false);
service.init().await.unwrap();
service.delete(&hash).await.unwrap();
assert!(!service.exists(&hash).await);
}
let data = b"delete me";
let (hash, _) = service.store_bytes(data).await.unwrap();
assert!(service.exists(&hash).await);
service.delete(&hash).await.unwrap();
assert!(!service.exists(&hash).await);
}
}

View file

@ -5,246 +5,250 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum BuiltinMediaType {
// Audio
Mp3,
Flac,
Ogg,
Wav,
Aac,
Opus,
// Audio
Mp3,
Flac,
Ogg,
Wav,
Aac,
Opus,
// Video
Mp4,
Mkv,
Avi,
Webm,
// Video
Mp4,
Mkv,
Avi,
Webm,
// Documents
Pdf,
Epub,
Djvu,
// Documents
Pdf,
Epub,
Djvu,
// Text
Markdown,
PlainText,
// Text
Markdown,
PlainText,
// Images
Jpeg,
Png,
Gif,
Webp,
Svg,
Avif,
Tiff,
Bmp,
// Images
Jpeg,
Png,
Gif,
Webp,
Svg,
Avif,
Tiff,
Bmp,
// RAW Images
Cr2,
Nef,
Arw,
Dng,
Orf,
Rw2,
// RAW Images
Cr2,
Nef,
Arw,
Dng,
Orf,
Rw2,
// HEIC/HEIF
Heic,
// HEIC/HEIF
Heic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum MediaCategory {
Audio,
Video,
Document,
Text,
Image,
Audio,
Video,
Document,
Text,
Image,
}
impl BuiltinMediaType {
/// Get the unique ID for this media type
pub fn id(&self) -> String {
format!("{:?}", self).to_lowercase()
}
/// Get the unique ID for this media type
pub fn id(&self) -> String {
format!("{:?}", self).to_lowercase()
}
/// Get the display name for this media type
pub fn name(&self) -> String {
match self {
Self::Mp3 => "MP3 Audio".to_string(),
Self::Flac => "FLAC Audio".to_string(),
Self::Ogg => "OGG Audio".to_string(),
Self::Wav => "WAV Audio".to_string(),
Self::Aac => "AAC Audio".to_string(),
Self::Opus => "Opus Audio".to_string(),
Self::Mp4 => "MP4 Video".to_string(),
Self::Mkv => "MKV Video".to_string(),
Self::Avi => "AVI Video".to_string(),
Self::Webm => "WebM Video".to_string(),
Self::Pdf => "PDF Document".to_string(),
Self::Epub => "EPUB eBook".to_string(),
Self::Djvu => "DjVu Document".to_string(),
Self::Markdown => "Markdown".to_string(),
Self::PlainText => "Plain Text".to_string(),
Self::Jpeg => "JPEG Image".to_string(),
Self::Png => "PNG Image".to_string(),
Self::Gif => "GIF Image".to_string(),
Self::Webp => "WebP Image".to_string(),
Self::Svg => "SVG Image".to_string(),
Self::Avif => "AVIF Image".to_string(),
Self::Tiff => "TIFF Image".to_string(),
Self::Bmp => "BMP Image".to_string(),
Self::Cr2 => "Canon RAW (CR2)".to_string(),
Self::Nef => "Nikon RAW (NEF)".to_string(),
Self::Arw => "Sony RAW (ARW)".to_string(),
Self::Dng => "Adobe DNG RAW".to_string(),
Self::Orf => "Olympus RAW (ORF)".to_string(),
Self::Rw2 => "Panasonic RAW (RW2)".to_string(),
Self::Heic => "HEIC Image".to_string(),
}
/// Get the display name for this media type
pub fn name(&self) -> String {
match self {
Self::Mp3 => "MP3 Audio".to_string(),
Self::Flac => "FLAC Audio".to_string(),
Self::Ogg => "OGG Audio".to_string(),
Self::Wav => "WAV Audio".to_string(),
Self::Aac => "AAC Audio".to_string(),
Self::Opus => "Opus Audio".to_string(),
Self::Mp4 => "MP4 Video".to_string(),
Self::Mkv => "MKV Video".to_string(),
Self::Avi => "AVI Video".to_string(),
Self::Webm => "WebM Video".to_string(),
Self::Pdf => "PDF Document".to_string(),
Self::Epub => "EPUB eBook".to_string(),
Self::Djvu => "DjVu Document".to_string(),
Self::Markdown => "Markdown".to_string(),
Self::PlainText => "Plain Text".to_string(),
Self::Jpeg => "JPEG Image".to_string(),
Self::Png => "PNG Image".to_string(),
Self::Gif => "GIF Image".to_string(),
Self::Webp => "WebP Image".to_string(),
Self::Svg => "SVG Image".to_string(),
Self::Avif => "AVIF Image".to_string(),
Self::Tiff => "TIFF Image".to_string(),
Self::Bmp => "BMP Image".to_string(),
Self::Cr2 => "Canon RAW (CR2)".to_string(),
Self::Nef => "Nikon RAW (NEF)".to_string(),
Self::Arw => "Sony RAW (ARW)".to_string(),
Self::Dng => "Adobe DNG RAW".to_string(),
Self::Orf => "Olympus RAW (ORF)".to_string(),
Self::Rw2 => "Panasonic RAW (RW2)".to_string(),
Self::Heic => "HEIC Image".to_string(),
}
}
pub fn from_extension(ext: &str) -> Option<Self> {
match ext.to_ascii_lowercase().as_str() {
"mp3" => Some(Self::Mp3),
"flac" => Some(Self::Flac),
"ogg" | "oga" => Some(Self::Ogg),
"wav" => Some(Self::Wav),
"aac" | "m4a" => Some(Self::Aac),
"opus" => Some(Self::Opus),
"mp4" | "m4v" => Some(Self::Mp4),
"mkv" => Some(Self::Mkv),
"avi" => Some(Self::Avi),
"webm" => Some(Self::Webm),
"pdf" => Some(Self::Pdf),
"epub" => Some(Self::Epub),
"djvu" => Some(Self::Djvu),
"md" | "markdown" => Some(Self::Markdown),
"txt" | "text" => Some(Self::PlainText),
"jpg" | "jpeg" => Some(Self::Jpeg),
"png" => Some(Self::Png),
"gif" => Some(Self::Gif),
"webp" => Some(Self::Webp),
"svg" => Some(Self::Svg),
"avif" => Some(Self::Avif),
"tiff" | "tif" => Some(Self::Tiff),
"bmp" => Some(Self::Bmp),
"cr2" => Some(Self::Cr2),
"nef" => Some(Self::Nef),
"arw" => Some(Self::Arw),
"dng" => Some(Self::Dng),
"orf" => Some(Self::Orf),
"rw2" => Some(Self::Rw2),
"heic" | "heif" => Some(Self::Heic),
_ => None,
}
pub fn from_extension(ext: &str) -> Option<Self> {
match ext.to_ascii_lowercase().as_str() {
"mp3" => Some(Self::Mp3),
"flac" => Some(Self::Flac),
"ogg" | "oga" => Some(Self::Ogg),
"wav" => Some(Self::Wav),
"aac" | "m4a" => Some(Self::Aac),
"opus" => Some(Self::Opus),
"mp4" | "m4v" => Some(Self::Mp4),
"mkv" => Some(Self::Mkv),
"avi" => Some(Self::Avi),
"webm" => Some(Self::Webm),
"pdf" => Some(Self::Pdf),
"epub" => Some(Self::Epub),
"djvu" => Some(Self::Djvu),
"md" | "markdown" => Some(Self::Markdown),
"txt" | "text" => Some(Self::PlainText),
"jpg" | "jpeg" => Some(Self::Jpeg),
"png" => Some(Self::Png),
"gif" => Some(Self::Gif),
"webp" => Some(Self::Webp),
"svg" => Some(Self::Svg),
"avif" => Some(Self::Avif),
"tiff" | "tif" => Some(Self::Tiff),
"bmp" => Some(Self::Bmp),
"cr2" => Some(Self::Cr2),
"nef" => Some(Self::Nef),
"arw" => Some(Self::Arw),
"dng" => Some(Self::Dng),
"orf" => Some(Self::Orf),
"rw2" => Some(Self::Rw2),
"heic" | "heif" => Some(Self::Heic),
_ => None,
}
}
pub fn from_path(path: &Path) -> Option<Self> {
path.extension()
.and_then(|e| e.to_str())
.and_then(Self::from_extension)
}
pub fn from_path(path: &Path) -> Option<Self> {
path
.extension()
.and_then(|e| e.to_str())
.and_then(Self::from_extension)
}
pub fn mime_type(&self) -> &'static str {
match self {
Self::Mp3 => "audio/mpeg",
Self::Flac => "audio/flac",
Self::Ogg => "audio/ogg",
Self::Wav => "audio/wav",
Self::Aac => "audio/aac",
Self::Opus => "audio/opus",
Self::Mp4 => "video/mp4",
Self::Mkv => "video/x-matroska",
Self::Avi => "video/x-msvideo",
Self::Webm => "video/webm",
Self::Pdf => "application/pdf",
Self::Epub => "application/epub+zip",
Self::Djvu => "image/vnd.djvu",
Self::Markdown => "text/markdown",
Self::PlainText => "text/plain",
Self::Jpeg => "image/jpeg",
Self::Png => "image/png",
Self::Gif => "image/gif",
Self::Webp => "image/webp",
Self::Svg => "image/svg+xml",
Self::Avif => "image/avif",
Self::Tiff => "image/tiff",
Self::Bmp => "image/bmp",
Self::Cr2 => "image/x-canon-cr2",
Self::Nef => "image/x-nikon-nef",
Self::Arw => "image/x-sony-arw",
Self::Dng => "image/x-adobe-dng",
Self::Orf => "image/x-olympus-orf",
Self::Rw2 => "image/x-panasonic-rw2",
Self::Heic => "image/heic",
}
pub fn mime_type(&self) -> &'static str {
match self {
Self::Mp3 => "audio/mpeg",
Self::Flac => "audio/flac",
Self::Ogg => "audio/ogg",
Self::Wav => "audio/wav",
Self::Aac => "audio/aac",
Self::Opus => "audio/opus",
Self::Mp4 => "video/mp4",
Self::Mkv => "video/x-matroska",
Self::Avi => "video/x-msvideo",
Self::Webm => "video/webm",
Self::Pdf => "application/pdf",
Self::Epub => "application/epub+zip",
Self::Djvu => "image/vnd.djvu",
Self::Markdown => "text/markdown",
Self::PlainText => "text/plain",
Self::Jpeg => "image/jpeg",
Self::Png => "image/png",
Self::Gif => "image/gif",
Self::Webp => "image/webp",
Self::Svg => "image/svg+xml",
Self::Avif => "image/avif",
Self::Tiff => "image/tiff",
Self::Bmp => "image/bmp",
Self::Cr2 => "image/x-canon-cr2",
Self::Nef => "image/x-nikon-nef",
Self::Arw => "image/x-sony-arw",
Self::Dng => "image/x-adobe-dng",
Self::Orf => "image/x-olympus-orf",
Self::Rw2 => "image/x-panasonic-rw2",
Self::Heic => "image/heic",
}
}
pub fn category(&self) -> MediaCategory {
match self {
Self::Mp3 | Self::Flac | Self::Ogg | Self::Wav | Self::Aac | Self::Opus => {
MediaCategory::Audio
}
Self::Mp4 | Self::Mkv | Self::Avi | Self::Webm => MediaCategory::Video,
Self::Pdf | Self::Epub | Self::Djvu => MediaCategory::Document,
Self::Markdown | Self::PlainText => MediaCategory::Text,
Self::Jpeg
| Self::Png
| Self::Gif
| Self::Webp
| Self::Svg
| Self::Avif
| Self::Tiff
| Self::Bmp
| Self::Cr2
| Self::Nef
| Self::Arw
| Self::Dng
| Self::Orf
| Self::Rw2
| Self::Heic => MediaCategory::Image,
}
pub fn category(&self) -> MediaCategory {
match self {
Self::Mp3
| Self::Flac
| Self::Ogg
| Self::Wav
| Self::Aac
| Self::Opus => MediaCategory::Audio,
Self::Mp4 | Self::Mkv | Self::Avi | Self::Webm => MediaCategory::Video,
Self::Pdf | Self::Epub | Self::Djvu => MediaCategory::Document,
Self::Markdown | Self::PlainText => MediaCategory::Text,
Self::Jpeg
| Self::Png
| Self::Gif
| Self::Webp
| Self::Svg
| Self::Avif
| Self::Tiff
| Self::Bmp
| Self::Cr2
| Self::Nef
| Self::Arw
| Self::Dng
| Self::Orf
| Self::Rw2
| Self::Heic => MediaCategory::Image,
}
}
pub fn extensions(&self) -> &'static [&'static str] {
match self {
Self::Mp3 => &["mp3"],
Self::Flac => &["flac"],
Self::Ogg => &["ogg", "oga"],
Self::Wav => &["wav"],
Self::Aac => &["aac", "m4a"],
Self::Opus => &["opus"],
Self::Mp4 => &["mp4", "m4v"],
Self::Mkv => &["mkv"],
Self::Avi => &["avi"],
Self::Webm => &["webm"],
Self::Pdf => &["pdf"],
Self::Epub => &["epub"],
Self::Djvu => &["djvu"],
Self::Markdown => &["md", "markdown"],
Self::PlainText => &["txt", "text"],
Self::Jpeg => &["jpg", "jpeg"],
Self::Png => &["png"],
Self::Gif => &["gif"],
Self::Webp => &["webp"],
Self::Svg => &["svg"],
Self::Avif => &["avif"],
Self::Tiff => &["tiff", "tif"],
Self::Bmp => &["bmp"],
Self::Cr2 => &["cr2"],
Self::Nef => &["nef"],
Self::Arw => &["arw"],
Self::Dng => &["dng"],
Self::Orf => &["orf"],
Self::Rw2 => &["rw2"],
Self::Heic => &["heic", "heif"],
}
pub fn extensions(&self) -> &'static [&'static str] {
match self {
Self::Mp3 => &["mp3"],
Self::Flac => &["flac"],
Self::Ogg => &["ogg", "oga"],
Self::Wav => &["wav"],
Self::Aac => &["aac", "m4a"],
Self::Opus => &["opus"],
Self::Mp4 => &["mp4", "m4v"],
Self::Mkv => &["mkv"],
Self::Avi => &["avi"],
Self::Webm => &["webm"],
Self::Pdf => &["pdf"],
Self::Epub => &["epub"],
Self::Djvu => &["djvu"],
Self::Markdown => &["md", "markdown"],
Self::PlainText => &["txt", "text"],
Self::Jpeg => &["jpg", "jpeg"],
Self::Png => &["png"],
Self::Gif => &["gif"],
Self::Webp => &["webp"],
Self::Svg => &["svg"],
Self::Avif => &["avif"],
Self::Tiff => &["tiff", "tif"],
Self::Bmp => &["bmp"],
Self::Cr2 => &["cr2"],
Self::Nef => &["nef"],
Self::Arw => &["arw"],
Self::Dng => &["dng"],
Self::Orf => &["orf"],
Self::Rw2 => &["rw2"],
Self::Heic => &["heic", "heif"],
}
}
/// Returns true if this is a RAW image format.
pub fn is_raw(&self) -> bool {
matches!(
self,
Self::Cr2 | Self::Nef | Self::Arw | Self::Dng | Self::Orf | Self::Rw2
)
}
/// Returns true if this is a RAW image format.
pub fn is_raw(&self) -> bool {
matches!(
self,
Self::Cr2 | Self::Nef | Self::Arw | Self::Dng | Self::Orf | Self::Rw2
)
}
}

View file

@ -3,9 +3,10 @@
//! This module provides an extensible media type system that supports both
//! built-in media types and plugin-registered custom types.
use serde::{Deserialize, Serialize};
use std::path::Path;
use serde::{Deserialize, Serialize};
pub mod builtin;
pub mod registry;
@ -16,217 +17,248 @@ pub use registry::{MediaTypeDescriptor, MediaTypeRegistry};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(untagged)]
pub enum MediaType {
/// Built-in media type (backward compatible)
Builtin(BuiltinMediaType),
/// Built-in media type (backward compatible)
Builtin(BuiltinMediaType),
/// Custom media type from a plugin
Custom(String),
/// Custom media type from a plugin
Custom(String),
}
impl MediaType {
/// Create a new custom media type
pub fn custom(id: impl Into<String>) -> Self {
Self::Custom(id.into())
/// Create a new custom media type
pub fn custom(id: impl Into<String>) -> Self {
Self::Custom(id.into())
}
/// Get the type ID as a string
pub fn id(&self) -> String {
match self {
Self::Builtin(b) => b.id(),
Self::Custom(id) => id.clone(),
}
}
/// Get the type ID as a string
pub fn id(&self) -> String {
match self {
Self::Builtin(b) => b.id(),
Self::Custom(id) => id.clone(),
}
/// Get the display name for this media type
/// For custom types without a registry, returns the ID as the name
pub fn name(&self) -> String {
match self {
Self::Builtin(b) => b.name(),
Self::Custom(id) => id.clone(),
}
}
/// Get the display name for this media type
/// For custom types without a registry, returns the ID as the name
pub fn name(&self) -> String {
match self {
Self::Builtin(b) => b.name(),
Self::Custom(id) => id.clone(),
}
}
/// Get the display name for this media type with registry support
pub fn name_with_registry(&self, registry: &MediaTypeRegistry) -> String {
match self {
Self::Builtin(b) => b.name(),
Self::Custom(id) => registry
.get(id)
.map(|d| d.name.clone())
.unwrap_or_else(|| id.clone()),
}
}
/// Get the category for this media type
/// For custom types without a registry, returns MediaCategory::Document as default
pub fn category(&self) -> MediaCategory {
match self {
Self::Builtin(b) => b.category(),
Self::Custom(_) => MediaCategory::Document,
}
}
/// Get the category for this media type with registry support
pub fn category_with_registry(&self, registry: &MediaTypeRegistry) -> MediaCategory {
match self {
Self::Builtin(b) => b.category(),
Self::Custom(id) => registry
.get(id)
.and_then(|d| d.category)
.unwrap_or(MediaCategory::Document),
}
}
/// Get the MIME type
/// For custom types without a registry, returns "application/octet-stream"
pub fn mime_type(&self) -> String {
match self {
Self::Builtin(b) => b.mime_type().to_string(),
Self::Custom(_) => "application/octet-stream".to_string(),
}
}
/// Get the MIME type with registry support
pub fn mime_type_with_registry(&self, registry: &MediaTypeRegistry) -> String {
match self {
Self::Builtin(b) => b.mime_type().to_string(),
Self::Custom(id) => registry
.get(id)
.and_then(|d| d.mime_types.first().cloned())
.unwrap_or_else(|| "application/octet-stream".to_string()),
}
}
/// Get file extensions
/// For custom types without a registry, returns an empty vec
pub fn extensions(&self) -> Vec<String> {
match self {
Self::Builtin(b) => b.extensions().iter().map(|s| s.to_string()).collect(),
Self::Custom(_) => vec![],
}
}
/// Get file extensions with registry support
pub fn extensions_with_registry(&self, registry: &MediaTypeRegistry) -> Vec<String> {
match self {
Self::Builtin(b) => b.extensions().iter().map(|s| s.to_string()).collect(),
Self::Custom(id) => registry
.get(id)
.map(|d| d.extensions.clone())
.unwrap_or_default(),
}
}
/// Check if this is a RAW image format
pub fn is_raw(&self) -> bool {
match self {
Self::Builtin(b) => b.is_raw(),
Self::Custom(_) => false,
}
}
/// Resolve a media type from file extension (built-in types only)
/// Use from_extension_with_registry for custom types
pub fn from_extension(ext: &str) -> Option<Self> {
BuiltinMediaType::from_extension(ext).map(Self::Builtin)
}
/// Resolve a media type from file extension with registry (includes custom types)
pub fn from_extension_with_registry(ext: &str, registry: &MediaTypeRegistry) -> Option<Self> {
// Try built-in types first
if let Some(builtin) = BuiltinMediaType::from_extension(ext) {
return Some(Self::Builtin(builtin));
}
// Try registered custom types
/// Get the display name for this media type with registry support
pub fn name_with_registry(&self, registry: &MediaTypeRegistry) -> String {
match self {
Self::Builtin(b) => b.name(),
Self::Custom(id) => {
registry
.get_by_extension(ext)
.map(|desc| Self::Custom(desc.id.clone()))
.get(id)
.map(|d| d.name.clone())
.unwrap_or_else(|| id.clone())
},
}
}
/// Get the category for this media type
/// For custom types without a registry, returns MediaCategory::Document as
/// default
pub fn category(&self) -> MediaCategory {
match self {
Self::Builtin(b) => b.category(),
Self::Custom(_) => MediaCategory::Document,
}
}
/// Get the category for this media type with registry support
pub fn category_with_registry(
&self,
registry: &MediaTypeRegistry,
) -> MediaCategory {
match self {
Self::Builtin(b) => b.category(),
Self::Custom(id) => {
registry
.get(id)
.and_then(|d| d.category)
.unwrap_or(MediaCategory::Document)
},
}
}
/// Get the MIME type
/// For custom types without a registry, returns "application/octet-stream"
pub fn mime_type(&self) -> String {
match self {
Self::Builtin(b) => b.mime_type().to_string(),
Self::Custom(_) => "application/octet-stream".to_string(),
}
}
/// Get the MIME type with registry support
pub fn mime_type_with_registry(
&self,
registry: &MediaTypeRegistry,
) -> String {
match self {
Self::Builtin(b) => b.mime_type().to_string(),
Self::Custom(id) => {
registry
.get(id)
.and_then(|d| d.mime_types.first().cloned())
.unwrap_or_else(|| "application/octet-stream".to_string())
},
}
}
/// Get file extensions
/// For custom types without a registry, returns an empty vec
pub fn extensions(&self) -> Vec<String> {
match self {
Self::Builtin(b) => {
b.extensions().iter().map(|s| s.to_string()).collect()
},
Self::Custom(_) => vec![],
}
}
/// Get file extensions with registry support
pub fn extensions_with_registry(
&self,
registry: &MediaTypeRegistry,
) -> Vec<String> {
match self {
Self::Builtin(b) => {
b.extensions().iter().map(|s| s.to_string()).collect()
},
Self::Custom(id) => {
registry
.get(id)
.map(|d| d.extensions.clone())
.unwrap_or_default()
},
}
}
/// Check if this is a RAW image format
pub fn is_raw(&self) -> bool {
match self {
Self::Builtin(b) => b.is_raw(),
Self::Custom(_) => false,
}
}
/// Resolve a media type from file extension (built-in types only)
/// Use from_extension_with_registry for custom types
pub fn from_extension(ext: &str) -> Option<Self> {
BuiltinMediaType::from_extension(ext).map(Self::Builtin)
}
/// Resolve a media type from file extension with registry (includes custom
/// types)
pub fn from_extension_with_registry(
ext: &str,
registry: &MediaTypeRegistry,
) -> Option<Self> {
// Try built-in types first
if let Some(builtin) = BuiltinMediaType::from_extension(ext) {
return Some(Self::Builtin(builtin));
}
/// Resolve a media type from file path (built-in types only)
/// Use from_path_with_registry for custom types
pub fn from_path(path: &Path) -> Option<Self> {
path.extension()
.and_then(|e| e.to_str())
.and_then(Self::from_extension)
}
// Try registered custom types
registry
.get_by_extension(ext)
.map(|desc| Self::Custom(desc.id.clone()))
}
/// Resolve a media type from file path with registry (includes custom types)
pub fn from_path_with_registry(path: &Path, registry: &MediaTypeRegistry) -> Option<Self> {
path.extension()
.and_then(|e| e.to_str())
.and_then(|ext| Self::from_extension_with_registry(ext, registry))
}
/// Resolve a media type from file path (built-in types only)
/// Use from_path_with_registry for custom types
pub fn from_path(path: &Path) -> Option<Self> {
path
.extension()
.and_then(|e| e.to_str())
.and_then(Self::from_extension)
}
/// Resolve a media type from file path with registry (includes custom types)
pub fn from_path_with_registry(
path: &Path,
registry: &MediaTypeRegistry,
) -> Option<Self> {
path
.extension()
.and_then(|e| e.to_str())
.and_then(|ext| Self::from_extension_with_registry(ext, registry))
}
}
// Implement From<BuiltinMediaType> for easier conversion
impl From<BuiltinMediaType> for MediaType {
fn from(builtin: BuiltinMediaType) -> Self {
Self::Builtin(builtin)
}
fn from(builtin: BuiltinMediaType) -> Self {
Self::Builtin(builtin)
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[test]
fn test_builtin_media_type() {
let mt = MediaType::Builtin(BuiltinMediaType::Mp3);
#[test]
fn test_builtin_media_type() {
let mt = MediaType::Builtin(BuiltinMediaType::Mp3);
assert_eq!(mt.id(), "mp3");
assert_eq!(mt.mime_type(), "audio/mpeg");
assert_eq!(mt.category(), MediaCategory::Audio);
}
assert_eq!(mt.id(), "mp3");
assert_eq!(mt.mime_type(), "audio/mpeg");
assert_eq!(mt.category(), MediaCategory::Audio);
}
#[test]
fn test_custom_media_type() {
let mut registry = MediaTypeRegistry::new();
#[test]
fn test_custom_media_type() {
let mut registry = MediaTypeRegistry::new();
let descriptor = MediaTypeDescriptor {
id: "heif".to_string(),
name: "HEIF Image".to_string(),
category: Some(MediaCategory::Image),
extensions: vec!["heif".to_string()],
mime_types: vec!["image/heif".to_string()],
plugin_id: Some("heif-plugin".to_string()),
};
let descriptor = MediaTypeDescriptor {
id: "heif".to_string(),
name: "HEIF Image".to_string(),
category: Some(MediaCategory::Image),
extensions: vec!["heif".to_string()],
mime_types: vec!["image/heif".to_string()],
plugin_id: Some("heif-plugin".to_string()),
};
registry.register(descriptor).unwrap();
registry.register(descriptor).unwrap();
let mt = MediaType::custom("heif");
assert_eq!(mt.id(), "heif");
assert_eq!(mt.mime_type_with_registry(&registry), "image/heif");
assert_eq!(mt.category_with_registry(&registry), MediaCategory::Image);
}
let mt = MediaType::custom("heif");
assert_eq!(mt.id(), "heif");
assert_eq!(mt.mime_type_with_registry(&registry), "image/heif");
assert_eq!(mt.category_with_registry(&registry), MediaCategory::Image);
}
#[test]
fn test_from_extension_builtin() {
let registry = MediaTypeRegistry::new();
let mt = MediaType::from_extension_with_registry("mp3", &registry);
#[test]
fn test_from_extension_builtin() {
let registry = MediaTypeRegistry::new();
let mt = MediaType::from_extension_with_registry("mp3", &registry);
assert!(mt.is_some());
assert_eq!(mt.unwrap(), MediaType::Builtin(BuiltinMediaType::Mp3));
}
assert!(mt.is_some());
assert_eq!(mt.unwrap(), MediaType::Builtin(BuiltinMediaType::Mp3));
}
#[test]
fn test_from_extension_custom() {
let mut registry = MediaTypeRegistry::new();
#[test]
fn test_from_extension_custom() {
let mut registry = MediaTypeRegistry::new();
let descriptor = MediaTypeDescriptor {
id: "customformat".to_string(),
name: "Custom Format".to_string(),
category: Some(MediaCategory::Image),
extensions: vec!["xyz".to_string()],
mime_types: vec!["application/x-custom".to_string()],
plugin_id: Some("custom-plugin".to_string()),
};
let descriptor = MediaTypeDescriptor {
id: "customformat".to_string(),
name: "Custom Format".to_string(),
category: Some(MediaCategory::Image),
extensions: vec!["xyz".to_string()],
mime_types: vec!["application/x-custom".to_string()],
plugin_id: Some("custom-plugin".to_string()),
};
registry.register(descriptor).unwrap();
registry.register(descriptor).unwrap();
let mt = MediaType::from_extension_with_registry("xyz", &registry);
assert!(mt.is_some());
assert_eq!(mt.unwrap(), MediaType::custom("customformat"));
}
let mt = MediaType::from_extension_with_registry("xyz", &registry);
assert!(mt.is_some());
assert_eq!(mt.unwrap(), MediaType::custom("customformat"));
}
}

View file

@ -1,285 +1,290 @@
//! Media type registry for managing both built-in and custom media types
use std::collections::HashMap;
use anyhow::{Result, anyhow};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use super::MediaCategory;
/// Descriptor for a media type (built-in or custom)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MediaTypeDescriptor {
/// Unique identifier
pub id: String,
/// Unique identifier
pub id: String,
/// Display name
pub name: String,
/// Display name
pub name: String,
/// Category
pub category: Option<MediaCategory>,
/// Category
pub category: Option<MediaCategory>,
/// File extensions
pub extensions: Vec<String>,
/// File extensions
pub extensions: Vec<String>,
/// MIME types
pub mime_types: Vec<String>,
/// MIME types
pub mime_types: Vec<String>,
/// Plugin that registered this type (None for built-in types)
pub plugin_id: Option<String>,
/// Plugin that registered this type (None for built-in types)
pub plugin_id: Option<String>,
}
/// Registry for media types
#[derive(Debug, Clone)]
pub struct MediaTypeRegistry {
/// Map of media type ID to descriptor
types: HashMap<String, MediaTypeDescriptor>,
/// Map of media type ID to descriptor
types: HashMap<String, MediaTypeDescriptor>,
/// Map of extension to media type ID
extension_map: HashMap<String, String>,
/// Map of extension to media type ID
extension_map: HashMap<String, String>,
}
impl MediaTypeRegistry {
/// Create a new empty registry
pub fn new() -> Self {
Self {
types: HashMap::new(),
extension_map: HashMap::new(),
}
/// Create a new empty registry
pub fn new() -> Self {
Self {
types: HashMap::new(),
extension_map: HashMap::new(),
}
}
/// Register a new media type
pub fn register(&mut self, descriptor: MediaTypeDescriptor) -> Result<()> {
// Check if ID is already registered
if self.types.contains_key(&descriptor.id) {
return Err(anyhow!("Media type already registered: {}", descriptor.id));
}
/// Register a new media type
pub fn register(&mut self, descriptor: MediaTypeDescriptor) -> Result<()> {
// Check if ID is already registered
if self.types.contains_key(&descriptor.id) {
return Err(anyhow!("Media type already registered: {}", descriptor.id));
}
// Register extensions
for ext in &descriptor.extensions {
let ext_lower = ext.to_lowercase();
if self.extension_map.contains_key(&ext_lower) {
// Extension already registered - this is OK, we'll use the first one
// In a more sophisticated system, we might track multiple types per extension
continue;
}
self.extension_map.insert(ext_lower, descriptor.id.clone());
}
// Register the type
self.types.insert(descriptor.id.clone(), descriptor);
Ok(())
// Register extensions
for ext in &descriptor.extensions {
let ext_lower = ext.to_lowercase();
if self.extension_map.contains_key(&ext_lower) {
// Extension already registered - this is OK, we'll use the first one
// In a more sophisticated system, we might track multiple types per
// extension
continue;
}
self.extension_map.insert(ext_lower, descriptor.id.clone());
}
/// Unregister a media type
pub fn unregister(&mut self, id: &str) -> Result<()> {
let descriptor = self
.types
.remove(id)
.ok_or_else(|| anyhow!("Media type not found: {}", id))?;
// Register the type
self.types.insert(descriptor.id.clone(), descriptor);
// Remove extensions
for ext in &descriptor.extensions {
let ext_lower = ext.to_lowercase();
if self.extension_map.get(&ext_lower) == Some(&descriptor.id) {
self.extension_map.remove(&ext_lower);
}
}
Ok(())
}
Ok(())
/// Unregister a media type
pub fn unregister(&mut self, id: &str) -> Result<()> {
let descriptor = self
.types
.remove(id)
.ok_or_else(|| anyhow!("Media type not found: {}", id))?;
// Remove extensions
for ext in &descriptor.extensions {
let ext_lower = ext.to_lowercase();
if self.extension_map.get(&ext_lower) == Some(&descriptor.id) {
self.extension_map.remove(&ext_lower);
}
}
/// Get a media type descriptor by ID
pub fn get(&self, id: &str) -> Option<&MediaTypeDescriptor> {
self.types.get(id)
Ok(())
}
/// Get a media type descriptor by ID
pub fn get(&self, id: &str) -> Option<&MediaTypeDescriptor> {
self.types.get(id)
}
/// Get a media type by file extension
pub fn get_by_extension(&self, ext: &str) -> Option<&MediaTypeDescriptor> {
let ext_lower = ext.to_lowercase();
self
.extension_map
.get(&ext_lower)
.and_then(|id| self.types.get(id))
}
/// List all registered media types
pub fn list_all(&self) -> Vec<&MediaTypeDescriptor> {
self.types.values().collect()
}
/// List media types from a specific plugin
pub fn list_by_plugin(&self, plugin_id: &str) -> Vec<&MediaTypeDescriptor> {
self
.types
.values()
.filter(|d| d.plugin_id.as_deref() == Some(plugin_id))
.collect()
}
/// List built-in media types (plugin_id is None)
pub fn list_builtin(&self) -> Vec<&MediaTypeDescriptor> {
self
.types
.values()
.filter(|d| d.plugin_id.is_none())
.collect()
}
/// Get count of registered types
pub fn count(&self) -> usize {
self.types.len()
}
/// Check if a media type is registered
pub fn contains(&self, id: &str) -> bool {
self.types.contains_key(id)
}
/// Unregister all types from a specific plugin
pub fn unregister_plugin(&mut self, plugin_id: &str) -> Result<usize> {
let type_ids: Vec<String> = self
.types
.values()
.filter(|d| d.plugin_id.as_deref() == Some(plugin_id))
.map(|d| d.id.clone())
.collect();
let count = type_ids.len();
for id in type_ids {
self.unregister(&id)?;
}
/// Get a media type by file extension
pub fn get_by_extension(&self, ext: &str) -> Option<&MediaTypeDescriptor> {
let ext_lower = ext.to_lowercase();
self.extension_map
.get(&ext_lower)
.and_then(|id| self.types.get(id))
}
/// List all registered media types
pub fn list_all(&self) -> Vec<&MediaTypeDescriptor> {
self.types.values().collect()
}
/// List media types from a specific plugin
pub fn list_by_plugin(&self, plugin_id: &str) -> Vec<&MediaTypeDescriptor> {
self.types
.values()
.filter(|d| d.plugin_id.as_deref() == Some(plugin_id))
.collect()
}
/// List built-in media types (plugin_id is None)
pub fn list_builtin(&self) -> Vec<&MediaTypeDescriptor> {
self.types
.values()
.filter(|d| d.plugin_id.is_none())
.collect()
}
/// Get count of registered types
pub fn count(&self) -> usize {
self.types.len()
}
/// Check if a media type is registered
pub fn contains(&self, id: &str) -> bool {
self.types.contains_key(id)
}
/// Unregister all types from a specific plugin
pub fn unregister_plugin(&mut self, plugin_id: &str) -> Result<usize> {
let type_ids: Vec<String> = self
.types
.values()
.filter(|d| d.plugin_id.as_deref() == Some(plugin_id))
.map(|d| d.id.clone())
.collect();
let count = type_ids.len();
for id in type_ids {
self.unregister(&id)?;
}
Ok(count)
}
Ok(count)
}
}
impl Default for MediaTypeRegistry {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
fn create_test_descriptor(id: &str, ext: &str) -> MediaTypeDescriptor {
MediaTypeDescriptor {
id: id.to_string(),
name: format!("{} Type", id),
category: Some(MediaCategory::Document),
extensions: vec![ext.to_string()],
mime_types: vec![format!("application/{}", id)],
plugin_id: Some("test-plugin".to_string()),
}
fn create_test_descriptor(id: &str, ext: &str) -> MediaTypeDescriptor {
MediaTypeDescriptor {
id: id.to_string(),
name: format!("{} Type", id),
category: Some(MediaCategory::Document),
extensions: vec![ext.to_string()],
mime_types: vec![format!("application/{}", id)],
plugin_id: Some("test-plugin".to_string()),
}
}
#[test]
fn test_register_and_get() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor.clone()).unwrap();
let retrieved = registry.get("test").unwrap();
assert_eq!(retrieved.id, "test");
assert_eq!(retrieved.name, "test Type");
}
#[test]
fn test_register_duplicate() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor.clone()).unwrap();
let result = registry.register(descriptor);
assert!(result.is_err());
}
#[test]
fn test_get_by_extension() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor).unwrap();
let retrieved = registry.get_by_extension("tst").unwrap();
assert_eq!(retrieved.id, "test");
// Test case insensitivity
let retrieved = registry.get_by_extension("TST").unwrap();
assert_eq!(retrieved.id, "test");
}
#[test]
fn test_unregister() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor).unwrap();
assert!(registry.contains("test"));
registry.unregister("test").unwrap();
assert!(!registry.contains("test"));
// Extension should also be removed
assert!(registry.get_by_extension("tst").is_none());
}
#[test]
fn test_list_by_plugin() {
let mut registry = MediaTypeRegistry::new();
let desc1 = MediaTypeDescriptor {
id: "type1".to_string(),
name: "Type 1".to_string(),
category: Some(MediaCategory::Document),
extensions: vec!["t1".to_string()],
mime_types: vec!["application/type1".to_string()],
plugin_id: Some("plugin1".to_string()),
};
let desc2 = MediaTypeDescriptor {
id: "type2".to_string(),
name: "Type 2".to_string(),
category: Some(MediaCategory::Document),
extensions: vec!["t2".to_string()],
mime_types: vec!["application/type2".to_string()],
plugin_id: Some("plugin2".to_string()),
};
registry.register(desc1).unwrap();
registry.register(desc2).unwrap();
let plugin1_types = registry.list_by_plugin("plugin1");
assert_eq!(plugin1_types.len(), 1);
assert_eq!(plugin1_types[0].id, "type1");
let plugin2_types = registry.list_by_plugin("plugin2");
assert_eq!(plugin2_types.len(), 1);
assert_eq!(plugin2_types[0].id, "type2");
}
#[test]
fn test_unregister_plugin() {
let mut registry = MediaTypeRegistry::new();
for i in 1..=3 {
let desc = MediaTypeDescriptor {
id: format!("type{}", i),
name: format!("Type {}", i),
category: Some(MediaCategory::Document),
extensions: vec![format!("t{}", i)],
mime_types: vec![format!("application/type{}", i)],
plugin_id: Some("test-plugin".to_string()),
};
registry.register(desc).unwrap();
}
#[test]
fn test_register_and_get() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
assert_eq!(registry.count(), 3);
registry.register(descriptor.clone()).unwrap();
let retrieved = registry.get("test").unwrap();
assert_eq!(retrieved.id, "test");
assert_eq!(retrieved.name, "test Type");
}
#[test]
fn test_register_duplicate() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor.clone()).unwrap();
let result = registry.register(descriptor);
assert!(result.is_err());
}
#[test]
fn test_get_by_extension() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor).unwrap();
let retrieved = registry.get_by_extension("tst").unwrap();
assert_eq!(retrieved.id, "test");
// Test case insensitivity
let retrieved = registry.get_by_extension("TST").unwrap();
assert_eq!(retrieved.id, "test");
}
#[test]
fn test_unregister() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor).unwrap();
assert!(registry.contains("test"));
registry.unregister("test").unwrap();
assert!(!registry.contains("test"));
// Extension should also be removed
assert!(registry.get_by_extension("tst").is_none());
}
#[test]
fn test_list_by_plugin() {
let mut registry = MediaTypeRegistry::new();
let desc1 = MediaTypeDescriptor {
id: "type1".to_string(),
name: "Type 1".to_string(),
category: Some(MediaCategory::Document),
extensions: vec!["t1".to_string()],
mime_types: vec!["application/type1".to_string()],
plugin_id: Some("plugin1".to_string()),
};
let desc2 = MediaTypeDescriptor {
id: "type2".to_string(),
name: "Type 2".to_string(),
category: Some(MediaCategory::Document),
extensions: vec!["t2".to_string()],
mime_types: vec!["application/type2".to_string()],
plugin_id: Some("plugin2".to_string()),
};
registry.register(desc1).unwrap();
registry.register(desc2).unwrap();
let plugin1_types = registry.list_by_plugin("plugin1");
assert_eq!(plugin1_types.len(), 1);
assert_eq!(plugin1_types[0].id, "type1");
let plugin2_types = registry.list_by_plugin("plugin2");
assert_eq!(plugin2_types.len(), 1);
assert_eq!(plugin2_types[0].id, "type2");
}
#[test]
fn test_unregister_plugin() {
let mut registry = MediaTypeRegistry::new();
for i in 1..=3 {
let desc = MediaTypeDescriptor {
id: format!("type{}", i),
name: format!("Type {}", i),
category: Some(MediaCategory::Document),
extensions: vec![format!("t{}", i)],
mime_types: vec![format!("application/type{}", i)],
plugin_id: Some("test-plugin".to_string()),
};
registry.register(desc).unwrap();
}
assert_eq!(registry.count(), 3);
let removed = registry.unregister_plugin("test-plugin").unwrap();
assert_eq!(removed, 3);
assert_eq!(registry.count(), 0);
}
let removed = registry.unregister_plugin("test-plugin").unwrap();
assert_eq!(removed, 3);
assert_eq!(registry.count(), 0);
}
}

View file

@ -1,81 +1,91 @@
use std::path::Path;
use lofty::file::{AudioFile, TaggedFileExt};
use lofty::tag::Accessor;
use crate::error::{PinakesError, Result};
use crate::media_type::{BuiltinMediaType, MediaType};
use lofty::{
file::{AudioFile, TaggedFileExt},
tag::Accessor,
};
use super::{ExtractedMetadata, MetadataExtractor};
use crate::{
error::{PinakesError, Result},
media_type::{BuiltinMediaType, MediaType},
};
pub struct AudioExtractor;
impl MetadataExtractor for AudioExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let tagged_file = lofty::read_from_path(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("audio metadata: {e}")))?;
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let tagged_file = lofty::read_from_path(path).map_err(|e| {
PinakesError::MetadataExtraction(format!("audio metadata: {e}"))
})?;
let mut meta = ExtractedMetadata::default();
let mut meta = ExtractedMetadata::default();
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag.title().map(|s| s.to_string());
meta.artist = tag.artist().map(|s| s.to_string());
meta.album = tag.album().map(|s| s.to_string());
meta.genre = tag.genre().map(|s| s.to_string());
meta.year = tag.date().map(|ts| ts.year as i32);
}
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
if let Some(track) = tag.track() {
meta.extra
.insert("track_number".to_string(), track.to_string());
}
if let Some(disc) = tag.disk() {
meta.extra
.insert("disc_number".to_string(), disc.to_string());
}
if let Some(comment) = tag.comment() {
meta.extra
.insert("comment".to_string(), comment.to_string());
}
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta.extra
.insert("bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag.title().map(|s| s.to_string());
meta.artist = tag.artist().map(|s| s.to_string());
meta.album = tag.album().map(|s| s.to_string());
meta.genre = tag.genre().map(|s| s.to_string());
meta.year = tag.date().map(|ts| ts.year as i32);
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Mp3),
MediaType::Builtin(BuiltinMediaType::Flac),
MediaType::Builtin(BuiltinMediaType::Ogg),
MediaType::Builtin(BuiltinMediaType::Wav),
MediaType::Builtin(BuiltinMediaType::Aac),
MediaType::Builtin(BuiltinMediaType::Opus),
]
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
if let Some(track) = tag.track() {
meta
.extra
.insert("track_number".to_string(), track.to_string());
}
if let Some(disc) = tag.disk() {
meta
.extra
.insert("disc_number".to_string(), disc.to_string());
}
if let Some(comment) = tag.comment() {
meta
.extra
.insert("comment".to_string(), comment.to_string());
}
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta
.extra
.insert("bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta
.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta
.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Mp3),
MediaType::Builtin(BuiltinMediaType::Flac),
MediaType::Builtin(BuiltinMediaType::Ogg),
MediaType::Builtin(BuiltinMediaType::Wav),
MediaType::Builtin(BuiltinMediaType::Aac),
MediaType::Builtin(BuiltinMediaType::Opus),
]
}
}

View file

@ -1,358 +1,367 @@
use std::path::Path;
use crate::error::{PinakesError, Result};
use crate::media_type::{BuiltinMediaType, MediaType};
use super::{ExtractedMetadata, MetadataExtractor};
use crate::{
error::{PinakesError, Result},
media_type::{BuiltinMediaType, MediaType},
};
pub struct DocumentExtractor;
impl MetadataExtractor for DocumentExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Builtin(BuiltinMediaType::Pdf)) => extract_pdf(path),
Some(MediaType::Builtin(BuiltinMediaType::Epub)) => extract_epub(path),
Some(MediaType::Builtin(BuiltinMediaType::Djvu)) => extract_djvu(path),
_ => Ok(ExtractedMetadata::default()),
}
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Builtin(BuiltinMediaType::Pdf)) => extract_pdf(path),
Some(MediaType::Builtin(BuiltinMediaType::Epub)) => extract_epub(path),
Some(MediaType::Builtin(BuiltinMediaType::Djvu)) => extract_djvu(path),
_ => Ok(ExtractedMetadata::default()),
}
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Pdf),
MediaType::Builtin(BuiltinMediaType::Epub),
MediaType::Builtin(BuiltinMediaType::Djvu),
]
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Pdf),
MediaType::Builtin(BuiltinMediaType::Epub),
MediaType::Builtin(BuiltinMediaType::Djvu),
]
}
}
fn extract_pdf(path: &Path) -> Result<ExtractedMetadata> {
let doc = lopdf::Document::load(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("PDF load: {e}")))?;
let doc = lopdf::Document::load(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("PDF load: {e}")))?;
let mut meta = ExtractedMetadata::default();
let mut book_meta = crate::model::ExtractedBookMetadata::default();
let mut meta = ExtractedMetadata::default();
let mut book_meta = crate::model::ExtractedBookMetadata::default();
// Find the Info dictionary via the trailer
if let Ok(info_ref) = doc.trailer.get(b"Info") {
let info_obj = if let Ok(reference) = info_ref.as_reference() {
doc.get_object(reference).ok()
} else {
Some(info_ref)
};
// Find the Info dictionary via the trailer
if let Ok(info_ref) = doc.trailer.get(b"Info") {
let info_obj = if let Ok(reference) = info_ref.as_reference() {
doc.get_object(reference).ok()
} else {
Some(info_ref)
};
if let Some(obj) = info_obj
&& let Ok(dict) = obj.as_dict()
{
if let Ok(title) = dict.get(b"Title") {
meta.title = pdf_object_to_string(title);
}
if let Ok(author) = dict.get(b"Author") {
let author_str = pdf_object_to_string(author);
meta.artist = author_str.clone();
// Parse multiple authors if separated by semicolon, comma, or "and"
if let Some(authors_str) = author_str {
let author_names: Vec<String> = authors_str
.split(&[';', ','][..])
.flat_map(|part| part.split(" and "))
.map(|name| name.trim().to_string())
.filter(|name| !name.is_empty())
.collect();
book_meta.authors = author_names
.into_iter()
.enumerate()
.map(|(pos, name)| {
let mut author = crate::model::AuthorInfo::new(name);
author.position = pos as i32;
author
})
.collect();
}
}
if let Ok(subject) = dict.get(b"Subject") {
meta.description = pdf_object_to_string(subject);
}
if let Ok(creator) = dict.get(b"Creator") {
meta.extra.insert(
"creator".to_string(),
pdf_object_to_string(creator).unwrap_or_default(),
);
}
if let Ok(producer) = dict.get(b"Producer") {
meta.extra.insert(
"producer".to_string(),
pdf_object_to_string(producer).unwrap_or_default(),
);
}
}
}
// Page count
let pages = doc.get_pages();
let page_count = pages.len();
if page_count > 0 {
book_meta.page_count = Some(page_count as i32);
}
// Try to extract ISBN from first few pages
// Extract text from up to the first 5 pages and search for ISBN patterns
let mut extracted_text = String::new();
let max_pages = page_count.min(5);
for (_page_num, page_id) in pages.iter().take(max_pages) {
if let Ok(content) = doc.get_page_content(*page_id) {
// PDF content streams contain raw operators, but may have text strings
if let Ok(text) = std::str::from_utf8(&content) {
extracted_text.push_str(text);
extracted_text.push(' ');
}
}
}
// Extract ISBN from the text
if let Some(isbn) = crate::books::extract_isbn_from_text(&extracted_text)
&& let Ok(normalized) = crate::books::normalize_isbn(&isbn)
if let Some(obj) = info_obj
&& let Ok(dict) = obj.as_dict()
{
book_meta.isbn13 = Some(normalized);
book_meta.isbn = Some(isbn);
if let Ok(title) = dict.get(b"Title") {
meta.title = pdf_object_to_string(title);
}
if let Ok(author) = dict.get(b"Author") {
let author_str = pdf_object_to_string(author);
meta.artist = author_str.clone();
// Parse multiple authors if separated by semicolon, comma, or "and"
if let Some(authors_str) = author_str {
let author_names: Vec<String> = authors_str
.split(&[';', ','][..])
.flat_map(|part| part.split(" and "))
.map(|name| name.trim().to_string())
.filter(|name| !name.is_empty())
.collect();
book_meta.authors = author_names
.into_iter()
.enumerate()
.map(|(pos, name)| {
let mut author = crate::model::AuthorInfo::new(name);
author.position = pos as i32;
author
})
.collect();
}
}
if let Ok(subject) = dict.get(b"Subject") {
meta.description = pdf_object_to_string(subject);
}
if let Ok(creator) = dict.get(b"Creator") {
meta.extra.insert(
"creator".to_string(),
pdf_object_to_string(creator).unwrap_or_default(),
);
}
if let Ok(producer) = dict.get(b"Producer") {
meta.extra.insert(
"producer".to_string(),
pdf_object_to_string(producer).unwrap_or_default(),
);
}
}
}
// Set format
book_meta.format = Some("pdf".to_string());
// Page count
let pages = doc.get_pages();
let page_count = pages.len();
if page_count > 0 {
book_meta.page_count = Some(page_count as i32);
}
meta.book_metadata = Some(book_meta);
Ok(meta)
// Try to extract ISBN from first few pages
// Extract text from up to the first 5 pages and search for ISBN patterns
let mut extracted_text = String::new();
let max_pages = page_count.min(5);
for (_page_num, page_id) in pages.iter().take(max_pages) {
if let Ok(content) = doc.get_page_content(*page_id) {
// PDF content streams contain raw operators, but may have text strings
if let Ok(text) = std::str::from_utf8(&content) {
extracted_text.push_str(text);
extracted_text.push(' ');
}
}
}
// Extract ISBN from the text
if let Some(isbn) = crate::books::extract_isbn_from_text(&extracted_text)
&& let Ok(normalized) = crate::books::normalize_isbn(&isbn)
{
book_meta.isbn13 = Some(normalized);
book_meta.isbn = Some(isbn);
}
// Set format
book_meta.format = Some("pdf".to_string());
meta.book_metadata = Some(book_meta);
Ok(meta)
}
fn pdf_object_to_string(obj: &lopdf::Object) -> Option<String> {
match obj {
lopdf::Object::String(bytes, _) => Some(String::from_utf8_lossy(bytes).into_owned()),
lopdf::Object::Name(name) => Some(String::from_utf8_lossy(name).into_owned()),
_ => None,
}
match obj {
lopdf::Object::String(bytes, _) => {
Some(String::from_utf8_lossy(bytes).into_owned())
},
lopdf::Object::Name(name) => {
Some(String::from_utf8_lossy(name).into_owned())
},
_ => None,
}
}
fn extract_epub(path: &Path) -> Result<ExtractedMetadata> {
let mut doc = epub::doc::EpubDoc::new(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("EPUB parse: {e}")))?;
let mut doc = epub::doc::EpubDoc::new(path).map_err(|e| {
PinakesError::MetadataExtraction(format!("EPUB parse: {e}"))
})?;
let mut meta = ExtractedMetadata {
title: doc.mdata("title").map(|item| item.value.clone()),
artist: doc.mdata("creator").map(|item| item.value.clone()),
description: doc.mdata("description").map(|item| item.value.clone()),
..Default::default()
};
let mut meta = ExtractedMetadata {
title: doc.mdata("title").map(|item| item.value.clone()),
artist: doc.mdata("creator").map(|item| item.value.clone()),
description: doc.mdata("description").map(|item| item.value.clone()),
..Default::default()
};
let mut book_meta = crate::model::ExtractedBookMetadata::default();
let mut book_meta = crate::model::ExtractedBookMetadata::default();
// Extract basic metadata
if let Some(lang) = doc.mdata("language") {
book_meta.language = Some(lang.value.clone());
// Extract basic metadata
if let Some(lang) = doc.mdata("language") {
book_meta.language = Some(lang.value.clone());
}
if let Some(publisher) = doc.mdata("publisher") {
book_meta.publisher = Some(publisher.value.clone());
}
if let Some(date) = doc.mdata("date") {
// Try to parse as YYYY-MM-DD or just YYYY
if let Ok(parsed_date) =
chrono::NaiveDate::parse_from_str(&date.value, "%Y-%m-%d")
{
book_meta.publication_date = Some(parsed_date);
} else if let Ok(year) = date.value.parse::<i32>() {
book_meta.publication_date = chrono::NaiveDate::from_ymd_opt(year, 1, 1);
}
if let Some(publisher) = doc.mdata("publisher") {
book_meta.publisher = Some(publisher.value.clone());
}
// Extract authors - iterate through all metadata items
let mut authors = Vec::new();
let mut position = 0;
for item in &doc.metadata {
if item.property == "creator" || item.property == "dc:creator" {
let mut author = crate::model::AuthorInfo::new(item.value.clone());
author.position = position;
position += 1;
// Check for file-as in refinements
if let Some(file_as_ref) = item.refinement("file-as") {
author.file_as = Some(file_as_ref.value.clone());
}
// Check for role in refinements
if let Some(role_ref) = item.refinement("role") {
author.role = role_ref.value.clone();
}
authors.push(author);
}
if let Some(date) = doc.mdata("date") {
// Try to parse as YYYY-MM-DD or just YYYY
if let Ok(parsed_date) = chrono::NaiveDate::parse_from_str(&date.value, "%Y-%m-%d") {
book_meta.publication_date = Some(parsed_date);
} else if let Ok(year) = date.value.parse::<i32>() {
book_meta.publication_date = chrono::NaiveDate::from_ymd_opt(year, 1, 1);
}
}
book_meta.authors = authors;
// Extract ISBNs from identifiers
let mut identifiers = std::collections::HashMap::new();
for item in &doc.metadata {
if item.property == "identifier" || item.property == "dc:identifier" {
// Try to get scheme from refinements
let scheme = item
.refinement("identifier-type")
.map(|r| r.value.to_lowercase());
let id_type = match scheme.as_deref() {
Some("isbn") => "isbn",
Some("isbn-10") | Some("isbn10") => "isbn",
Some("isbn-13") | Some("isbn13") => "isbn13",
Some("asin") => "asin",
Some("doi") => "doi",
_ => {
// Fallback: detect from value pattern
if item.value.len() == 10
|| item.value.len() == 13
|| item.value.contains('-') && item.value.len() < 20
{
"isbn"
} else {
"other"
}
},
};
// Try to normalize ISBN
if (id_type == "isbn" || id_type == "isbn13")
&& let Ok(normalized) = crate::books::normalize_isbn(&item.value)
{
book_meta.isbn13 = Some(normalized.clone());
book_meta.isbn = Some(item.value.clone());
}
identifiers
.entry(id_type.to_string())
.or_insert_with(Vec::new)
.push(item.value.clone());
}
}
book_meta.identifiers = identifiers;
// Extract Calibre series metadata by parsing the content.opf file
// Try common OPF locations
let opf_paths = vec!["OEBPS/content.opf", "content.opf", "OPS/content.opf"];
let mut opf_data = None;
for path in opf_paths {
if let Some(data) = doc.get_resource_str_by_path(path) {
opf_data = Some(data);
break;
}
}
if let Some(opf_content) = opf_data {
// Look for <meta name="calibre:series" content="Series Name"/>
if let Some(series_start) = opf_content.find("name=\"calibre:series\"")
&& let Some(content_start) =
opf_content[series_start..].find("content=\"")
{
let after_content = &opf_content[series_start + content_start + 9..];
if let Some(quote_end) = after_content.find('"') {
book_meta.series_name = Some(after_content[..quote_end].to_string());
}
}
// Extract authors - iterate through all metadata items
let mut authors = Vec::new();
let mut position = 0;
for item in &doc.metadata {
if item.property == "creator" || item.property == "dc:creator" {
let mut author = crate::model::AuthorInfo::new(item.value.clone());
author.position = position;
position += 1;
// Check for file-as in refinements
if let Some(file_as_ref) = item.refinement("file-as") {
author.file_as = Some(file_as_ref.value.clone());
}
// Check for role in refinements
if let Some(role_ref) = item.refinement("role") {
author.role = role_ref.value.clone();
}
authors.push(author);
}
// Look for <meta name="calibre:series_index" content="1.0"/>
if let Some(index_start) = opf_content.find("name=\"calibre:series_index\"")
&& let Some(content_start) = opf_content[index_start..].find("content=\"")
{
let after_content = &opf_content[index_start + content_start + 9..];
if let Some(quote_end) = after_content.find('"')
&& let Ok(index) = after_content[..quote_end].parse::<f64>()
{
book_meta.series_index = Some(index);
}
}
book_meta.authors = authors;
}
// Extract ISBNs from identifiers
let mut identifiers = std::collections::HashMap::new();
for item in &doc.metadata {
if item.property == "identifier" || item.property == "dc:identifier" {
// Try to get scheme from refinements
let scheme = item
.refinement("identifier-type")
.map(|r| r.value.to_lowercase());
// Set format
book_meta.format = Some("epub".to_string());
let id_type = match scheme.as_deref() {
Some("isbn") => "isbn",
Some("isbn-10") | Some("isbn10") => "isbn",
Some("isbn-13") | Some("isbn13") => "isbn13",
Some("asin") => "asin",
Some("doi") => "doi",
_ => {
// Fallback: detect from value pattern
if item.value.len() == 10
|| item.value.len() == 13
|| item.value.contains('-') && item.value.len() < 20
{
"isbn"
} else {
"other"
}
}
};
// Try to normalize ISBN
if (id_type == "isbn" || id_type == "isbn13")
&& let Ok(normalized) = crate::books::normalize_isbn(&item.value)
{
book_meta.isbn13 = Some(normalized.clone());
book_meta.isbn = Some(item.value.clone());
}
identifiers
.entry(id_type.to_string())
.or_insert_with(Vec::new)
.push(item.value.clone());
}
}
book_meta.identifiers = identifiers;
// Extract Calibre series metadata by parsing the content.opf file
// Try common OPF locations
let opf_paths = vec!["OEBPS/content.opf", "content.opf", "OPS/content.opf"];
let mut opf_data = None;
for path in opf_paths {
if let Some(data) = doc.get_resource_str_by_path(path) {
opf_data = Some(data);
break;
}
}
if let Some(opf_content) = opf_data {
// Look for <meta name="calibre:series" content="Series Name"/>
if let Some(series_start) = opf_content.find("name=\"calibre:series\"")
&& let Some(content_start) = opf_content[series_start..].find("content=\"")
{
let after_content = &opf_content[series_start + content_start + 9..];
if let Some(quote_end) = after_content.find('"') {
book_meta.series_name = Some(after_content[..quote_end].to_string());
}
}
// Look for <meta name="calibre:series_index" content="1.0"/>
if let Some(index_start) = opf_content.find("name=\"calibre:series_index\"")
&& let Some(content_start) = opf_content[index_start..].find("content=\"")
{
let after_content = &opf_content[index_start + content_start + 9..];
if let Some(quote_end) = after_content.find('"')
&& let Ok(index) = after_content[..quote_end].parse::<f64>()
{
book_meta.series_index = Some(index);
}
}
}
// Set format
book_meta.format = Some("epub".to_string());
meta.book_metadata = Some(book_meta);
Ok(meta)
meta.book_metadata = Some(book_meta);
Ok(meta)
}
fn extract_djvu(path: &Path) -> Result<ExtractedMetadata> {
// DjVu files contain metadata in SEXPR (S-expression) format within
// ANTa/ANTz chunks, or in the DIRM chunk. We parse the raw bytes to
// extract any metadata fields we can find.
let data = std::fs::read(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("DjVu read: {e}")))?;
// DjVu files contain metadata in SEXPR (S-expression) format within
// ANTa/ANTz chunks, or in the DIRM chunk. We parse the raw bytes to
// extract any metadata fields we can find.
let data = std::fs::read(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("DjVu read: {e}")))?;
let mut meta = ExtractedMetadata::default();
let mut meta = ExtractedMetadata::default();
// DjVu files start with "AT&T" magic followed by FORM:DJVU or FORM:DJVM
if data.len() < 16 {
return Ok(meta);
// DjVu files start with "AT&T" magic followed by FORM:DJVU or FORM:DJVM
if data.len() < 16 {
return Ok(meta);
}
// Search for metadata annotations in the file. DjVu metadata is stored
// as S-expressions like (metadata (key "value") ...) within ANTa chunks.
let content = String::from_utf8_lossy(&data);
// Look for (metadata ...) blocks
if let Some(meta_start) = content.find("(metadata") {
let remainder = &content[meta_start..];
// Extract key-value pairs like (title "Some Title")
extract_djvu_field(remainder, "title", &mut meta.title);
extract_djvu_field(remainder, "author", &mut meta.artist);
let mut desc = None;
extract_djvu_field(remainder, "subject", &mut desc);
if desc.is_none() {
extract_djvu_field(remainder, "description", &mut desc);
}
meta.description = desc;
let mut year_str = None;
extract_djvu_field(remainder, "year", &mut year_str);
if let Some(ref y) = year_str {
meta.year = y.parse().ok();
}
// Search for metadata annotations in the file. DjVu metadata is stored
// as S-expressions like (metadata (key "value") ...) within ANTa chunks.
let content = String::from_utf8_lossy(&data);
// Look for (metadata ...) blocks
if let Some(meta_start) = content.find("(metadata") {
let remainder = &content[meta_start..];
// Extract key-value pairs like (title "Some Title")
extract_djvu_field(remainder, "title", &mut meta.title);
extract_djvu_field(remainder, "author", &mut meta.artist);
let mut desc = None;
extract_djvu_field(remainder, "subject", &mut desc);
if desc.is_none() {
extract_djvu_field(remainder, "description", &mut desc);
}
meta.description = desc;
let mut year_str = None;
extract_djvu_field(remainder, "year", &mut year_str);
if let Some(ref y) = year_str {
meta.year = y.parse().ok();
}
let mut creator = None;
extract_djvu_field(remainder, "creator", &mut creator);
if let Some(c) = creator {
meta.extra.insert("creator".to_string(), c);
}
let mut creator = None;
extract_djvu_field(remainder, "creator", &mut creator);
if let Some(c) = creator {
meta.extra.insert("creator".to_string(), c);
}
}
// Also check for booklet-style metadata that some DjVu encoders write
// outside the metadata SEXPR
if meta.title.is_none()
&& let Some(title_start) = content.find("(bookmarks")
{
let remainder = &content[title_start..];
// First bookmark title is often the document title
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
meta.title = Some(val.to_string());
}
}
// Also check for booklet-style metadata that some DjVu encoders write
// outside the metadata SEXPR
if meta.title.is_none()
&& let Some(title_start) = content.find("(bookmarks")
{
let remainder = &content[title_start..];
// First bookmark title is often the document title
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
meta.title = Some(val.to_string());
}
}
}
}
Ok(meta)
Ok(meta)
}
fn extract_djvu_field(sexpr: &str, key: &str, out: &mut Option<String>) {
// Look for patterns like (key "value") in the S-expression
let pattern = format!("({key}");
if let Some(start) = sexpr.find(&pattern) {
let remainder = &sexpr[start + pattern.len()..];
// Find the quoted value
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
*out = Some(val.to_string());
}
}
// Look for patterns like (key "value") in the S-expression
let pattern = format!("({key}");
if let Some(start) = sexpr.find(&pattern) {
let remainder = &sexpr[start + pattern.len()..];
// Find the quoted value
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
*out = Some(val.to_string());
}
}
}
}
}

View file

@ -1,263 +1,297 @@
use std::path::Path;
use crate::error::Result;
use crate::media_type::{BuiltinMediaType, MediaType};
use super::{ExtractedMetadata, MetadataExtractor};
use crate::{
error::Result,
media_type::{BuiltinMediaType, MediaType},
};
pub struct ImageExtractor;
impl MetadataExtractor for ImageExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let mut meta = ExtractedMetadata::default();
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let mut meta = ExtractedMetadata::default();
let file = std::fs::File::open(path)?;
let mut buf_reader = std::io::BufReader::new(&file);
let file = std::fs::File::open(path)?;
let mut buf_reader = std::io::BufReader::new(&file);
let exif_data = match exif::Reader::new().read_from_container(&mut buf_reader) {
Ok(exif) => exif,
Err(_) => return Ok(meta),
};
let exif_data =
match exif::Reader::new().read_from_container(&mut buf_reader) {
Ok(exif) => exif,
Err(_) => return Ok(meta),
};
// Image dimensions
if let Some(width) = exif_data
.get_field(exif::Tag::PixelXDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageWidth, exif::In::PRIMARY))
&& let Some(w) = field_to_u32(width)
{
meta.extra.insert("width".to_string(), w.to_string());
}
if let Some(height) = exif_data
.get_field(exif::Tag::PixelYDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageLength, exif::In::PRIMARY))
&& let Some(h) = field_to_u32(height)
{
meta.extra.insert("height".to_string(), h.to_string());
}
// Camera make and model - set both in top-level fields and extra
if let Some(make) = exif_data.get_field(exif::Tag::Make, exif::In::PRIMARY) {
let val = make.display_value().to_string().trim().to_string();
if !val.is_empty() {
meta.camera_make = Some(val.clone());
meta.extra.insert("camera_make".to_string(), val);
}
}
if let Some(model) = exif_data.get_field(exif::Tag::Model, exif::In::PRIMARY) {
let val = model.display_value().to_string().trim().to_string();
if !val.is_empty() {
meta.camera_model = Some(val.clone());
meta.extra.insert("camera_model".to_string(), val);
}
}
// Date taken - parse EXIF date format (YYYY:MM:DD HH:MM:SS)
if let Some(date) = exif_data
.get_field(exif::Tag::DateTimeOriginal, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::DateTime, exif::In::PRIMARY))
{
let val = date.display_value().to_string();
if !val.is_empty() {
// Try parsing EXIF format: "YYYY:MM:DD HH:MM:SS"
if let Some(dt) = parse_exif_datetime(&val) {
meta.date_taken = Some(dt);
}
meta.extra.insert("date_taken".to_string(), val);
}
}
// GPS coordinates - set both in top-level fields and extra
if let (Some(lat), Some(lat_ref), Some(lon), Some(lon_ref)) = (
exif_data.get_field(exif::Tag::GPSLatitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLatitudeRef, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitudeRef, exif::In::PRIMARY),
) && let (Some(lat_val), Some(lon_val)) =
(dms_to_decimal(lat, lat_ref), dms_to_decimal(lon, lon_ref))
{
meta.latitude = Some(lat_val);
meta.longitude = Some(lon_val);
meta.extra
.insert("gps_latitude".to_string(), format!("{lat_val:.6}"));
meta.extra
.insert("gps_longitude".to_string(), format!("{lon_val:.6}"));
}
// Exposure info
if let Some(iso) =
exif_data.get_field(exif::Tag::PhotographicSensitivity, exif::In::PRIMARY)
{
let val = iso.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("iso".to_string(), val);
}
}
if let Some(exposure) = exif_data.get_field(exif::Tag::ExposureTime, exif::In::PRIMARY) {
let val = exposure.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("exposure_time".to_string(), val);
}
}
if let Some(aperture) = exif_data.get_field(exif::Tag::FNumber, exif::In::PRIMARY) {
let val = aperture.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("f_number".to_string(), val);
}
}
if let Some(focal) = exif_data.get_field(exif::Tag::FocalLength, exif::In::PRIMARY) {
let val = focal.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("focal_length".to_string(), val);
}
}
// Lens model
if let Some(lens) = exif_data.get_field(exif::Tag::LensModel, exif::In::PRIMARY) {
let val = lens.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.extra
.insert("lens_model".to_string(), val.trim_matches('"').to_string());
}
}
// Flash
if let Some(flash) = exif_data.get_field(exif::Tag::Flash, exif::In::PRIMARY) {
let val = flash.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("flash".to_string(), val);
}
}
// Orientation
if let Some(orientation) = exif_data.get_field(exif::Tag::Orientation, exif::In::PRIMARY) {
let val = orientation.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("orientation".to_string(), val);
}
}
// Software
if let Some(software) = exif_data.get_field(exif::Tag::Software, exif::In::PRIMARY) {
let val = software.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("software".to_string(), val);
}
}
// Image description as title
if let Some(desc) = exif_data.get_field(exif::Tag::ImageDescription, exif::In::PRIMARY) {
let val = desc.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.title = Some(val.trim_matches('"').to_string());
}
}
// Artist
if let Some(artist) = exif_data.get_field(exif::Tag::Artist, exif::In::PRIMARY) {
let val = artist.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.artist = Some(val.trim_matches('"').to_string());
}
}
// Copyright as description
if let Some(copyright) = exif_data.get_field(exif::Tag::Copyright, exif::In::PRIMARY) {
let val = copyright.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.description = Some(val.trim_matches('"').to_string());
}
}
Ok(meta)
// Image dimensions
if let Some(width) = exif_data
.get_field(exif::Tag::PixelXDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageWidth, exif::In::PRIMARY))
&& let Some(w) = field_to_u32(width)
{
meta.extra.insert("width".to_string(), w.to_string());
}
if let Some(height) = exif_data
.get_field(exif::Tag::PixelYDimension, exif::In::PRIMARY)
.or_else(|| {
exif_data.get_field(exif::Tag::ImageLength, exif::In::PRIMARY)
})
&& let Some(h) = field_to_u32(height)
{
meta.extra.insert("height".to_string(), h.to_string());
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Jpeg),
MediaType::Builtin(BuiltinMediaType::Png),
MediaType::Builtin(BuiltinMediaType::Gif),
MediaType::Builtin(BuiltinMediaType::Webp),
MediaType::Builtin(BuiltinMediaType::Avif),
MediaType::Builtin(BuiltinMediaType::Tiff),
MediaType::Builtin(BuiltinMediaType::Bmp),
// RAW formats (TIFF-based, kamadak-exif handles these)
MediaType::Builtin(BuiltinMediaType::Cr2),
MediaType::Builtin(BuiltinMediaType::Nef),
MediaType::Builtin(BuiltinMediaType::Arw),
MediaType::Builtin(BuiltinMediaType::Dng),
MediaType::Builtin(BuiltinMediaType::Orf),
MediaType::Builtin(BuiltinMediaType::Rw2),
// HEIC
MediaType::Builtin(BuiltinMediaType::Heic),
]
// Camera make and model - set both in top-level fields and extra
if let Some(make) = exif_data.get_field(exif::Tag::Make, exif::In::PRIMARY)
{
let val = make.display_value().to_string().trim().to_string();
if !val.is_empty() {
meta.camera_make = Some(val.clone());
meta.extra.insert("camera_make".to_string(), val);
}
}
if let Some(model) =
exif_data.get_field(exif::Tag::Model, exif::In::PRIMARY)
{
let val = model.display_value().to_string().trim().to_string();
if !val.is_empty() {
meta.camera_model = Some(val.clone());
meta.extra.insert("camera_model".to_string(), val);
}
}
// Date taken - parse EXIF date format (YYYY:MM:DD HH:MM:SS)
if let Some(date) = exif_data
.get_field(exif::Tag::DateTimeOriginal, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::DateTime, exif::In::PRIMARY))
{
let val = date.display_value().to_string();
if !val.is_empty() {
// Try parsing EXIF format: "YYYY:MM:DD HH:MM:SS"
if let Some(dt) = parse_exif_datetime(&val) {
meta.date_taken = Some(dt);
}
meta.extra.insert("date_taken".to_string(), val);
}
}
// GPS coordinates - set both in top-level fields and extra
if let (Some(lat), Some(lat_ref), Some(lon), Some(lon_ref)) = (
exif_data.get_field(exif::Tag::GPSLatitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLatitudeRef, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitudeRef, exif::In::PRIMARY),
) && let (Some(lat_val), Some(lon_val)) =
(dms_to_decimal(lat, lat_ref), dms_to_decimal(lon, lon_ref))
{
meta.latitude = Some(lat_val);
meta.longitude = Some(lon_val);
meta
.extra
.insert("gps_latitude".to_string(), format!("{lat_val:.6}"));
meta
.extra
.insert("gps_longitude".to_string(), format!("{lon_val:.6}"));
}
// Exposure info
if let Some(iso) =
exif_data.get_field(exif::Tag::PhotographicSensitivity, exif::In::PRIMARY)
{
let val = iso.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("iso".to_string(), val);
}
}
if let Some(exposure) =
exif_data.get_field(exif::Tag::ExposureTime, exif::In::PRIMARY)
{
let val = exposure.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("exposure_time".to_string(), val);
}
}
if let Some(aperture) =
exif_data.get_field(exif::Tag::FNumber, exif::In::PRIMARY)
{
let val = aperture.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("f_number".to_string(), val);
}
}
if let Some(focal) =
exif_data.get_field(exif::Tag::FocalLength, exif::In::PRIMARY)
{
let val = focal.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("focal_length".to_string(), val);
}
}
// Lens model
if let Some(lens) =
exif_data.get_field(exif::Tag::LensModel, exif::In::PRIMARY)
{
let val = lens.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta
.extra
.insert("lens_model".to_string(), val.trim_matches('"').to_string());
}
}
// Flash
if let Some(flash) =
exif_data.get_field(exif::Tag::Flash, exif::In::PRIMARY)
{
let val = flash.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("flash".to_string(), val);
}
}
// Orientation
if let Some(orientation) =
exif_data.get_field(exif::Tag::Orientation, exif::In::PRIMARY)
{
let val = orientation.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("orientation".to_string(), val);
}
}
// Software
if let Some(software) =
exif_data.get_field(exif::Tag::Software, exif::In::PRIMARY)
{
let val = software.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("software".to_string(), val);
}
}
// Image description as title
if let Some(desc) =
exif_data.get_field(exif::Tag::ImageDescription, exif::In::PRIMARY)
{
let val = desc.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.title = Some(val.trim_matches('"').to_string());
}
}
// Artist
if let Some(artist) =
exif_data.get_field(exif::Tag::Artist, exif::In::PRIMARY)
{
let val = artist.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.artist = Some(val.trim_matches('"').to_string());
}
}
// Copyright as description
if let Some(copyright) =
exif_data.get_field(exif::Tag::Copyright, exif::In::PRIMARY)
{
let val = copyright.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.description = Some(val.trim_matches('"').to_string());
}
}
Ok(meta)
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Jpeg),
MediaType::Builtin(BuiltinMediaType::Png),
MediaType::Builtin(BuiltinMediaType::Gif),
MediaType::Builtin(BuiltinMediaType::Webp),
MediaType::Builtin(BuiltinMediaType::Avif),
MediaType::Builtin(BuiltinMediaType::Tiff),
MediaType::Builtin(BuiltinMediaType::Bmp),
// RAW formats (TIFF-based, kamadak-exif handles these)
MediaType::Builtin(BuiltinMediaType::Cr2),
MediaType::Builtin(BuiltinMediaType::Nef),
MediaType::Builtin(BuiltinMediaType::Arw),
MediaType::Builtin(BuiltinMediaType::Dng),
MediaType::Builtin(BuiltinMediaType::Orf),
MediaType::Builtin(BuiltinMediaType::Rw2),
// HEIC
MediaType::Builtin(BuiltinMediaType::Heic),
]
}
}
fn field_to_u32(field: &exif::Field) -> Option<u32> {
match &field.value {
exif::Value::Long(v) => v.first().copied(),
exif::Value::Short(v) => v.first().map(|&x| x as u32),
_ => None,
}
match &field.value {
exif::Value::Long(v) => v.first().copied(),
exif::Value::Short(v) => v.first().map(|&x| x as u32),
_ => None,
}
}
fn dms_to_decimal(dms_field: &exif::Field, ref_field: &exif::Field) -> Option<f64> {
if let exif::Value::Rational(ref rationals) = dms_field.value
&& rationals.len() >= 3
{
let degrees = rationals[0].to_f64();
let minutes = rationals[1].to_f64();
let seconds = rationals[2].to_f64();
let mut decimal = degrees + minutes / 60.0 + seconds / 3600.0;
fn dms_to_decimal(
dms_field: &exif::Field,
ref_field: &exif::Field,
) -> Option<f64> {
if let exif::Value::Rational(ref rationals) = dms_field.value
&& rationals.len() >= 3
{
let degrees = rationals[0].to_f64();
let minutes = rationals[1].to_f64();
let seconds = rationals[2].to_f64();
let mut decimal = degrees + minutes / 60.0 + seconds / 3600.0;
let ref_str = ref_field.display_value().to_string();
if ref_str.contains('S') || ref_str.contains('W') {
decimal = -decimal;
}
return Some(decimal);
let ref_str = ref_field.display_value().to_string();
if ref_str.contains('S') || ref_str.contains('W') {
decimal = -decimal;
}
None
return Some(decimal);
}
None
}
/// Parse EXIF datetime format: "YYYY:MM:DD HH:MM:SS"
fn parse_exif_datetime(s: &str) -> Option<chrono::DateTime<chrono::Utc>> {
use chrono::NaiveDateTime;
use chrono::NaiveDateTime;
// EXIF format is "YYYY:MM:DD HH:MM:SS"
let s = s.trim().trim_matches('"');
// EXIF format is "YYYY:MM:DD HH:MM:SS"
let s = s.trim().trim_matches('"');
// Try standard EXIF format
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S") {
return Some(dt.and_utc());
}
// Try standard EXIF format
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S") {
return Some(dt.and_utc());
}
// Try ISO format as fallback
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
return Some(dt.and_utc());
}
// Try ISO format as fallback
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
return Some(dt.and_utc());
}
None
None
}
/// Generate a perceptual hash for an image file.
/// Uses DCT (Discrete Cosine Transform) hash algorithm for robust similarity detection.
/// Returns a hex-encoded hash string, or None if the image cannot be processed.
/// Uses DCT (Discrete Cosine Transform) hash algorithm for robust similarity
/// detection. Returns a hex-encoded hash string, or None if the image cannot be
/// processed.
pub fn generate_perceptual_hash(path: &Path) -> Option<String> {
use image_hasher::{HashAlg, HasherConfig};
use image_hasher::{HashAlg, HasherConfig};
// Open and decode the image
let img = image::open(path).ok()?;
// Open and decode the image
let img = image::open(path).ok()?;
// Create hasher with DCT algorithm (good for finding similar images)
let hasher = HasherConfig::new()
// Create hasher with DCT algorithm (good for finding similar images)
let hasher = HasherConfig::new()
.hash_alg(HashAlg::DoubleGradient)
.hash_size(8, 8) // 64-bit hash
.to_hasher();
// Generate hash
let hash = hasher.hash_image(&img);
// Generate hash
let hash = hasher.hash_image(&img);
// Convert to hex string for storage
Some(hash.to_base64())
// Convert to hex string for storage
Some(hash.to_base64())
}

View file

@ -1,43 +1,45 @@
use std::path::Path;
use crate::error::Result;
use crate::media_type::{BuiltinMediaType, MediaType};
use super::{ExtractedMetadata, MetadataExtractor};
use crate::{
error::Result,
media_type::{BuiltinMediaType, MediaType},
};
pub struct MarkdownExtractor;
impl MetadataExtractor for MarkdownExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let content = std::fs::read_to_string(path)?;
let parsed = gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(&content);
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let content = std::fs::read_to_string(path)?;
let parsed =
gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(&content);
let mut meta = ExtractedMetadata::default();
let mut meta = ExtractedMetadata::default();
if let Some(data) = parsed.ok().and_then(|p| p.data)
&& let gray_matter::Pod::Hash(map) = data
{
if let Some(gray_matter::Pod::String(title)) = map.get("title") {
meta.title = Some(title.clone());
}
if let Some(gray_matter::Pod::String(author)) = map.get("author") {
meta.artist = Some(author.clone());
}
if let Some(gray_matter::Pod::String(desc)) = map.get("description") {
meta.description = Some(desc.clone());
}
if let Some(gray_matter::Pod::String(date)) = map.get("date") {
meta.extra.insert("date".to_string(), date.clone());
}
}
Ok(meta)
if let Some(data) = parsed.ok().and_then(|p| p.data)
&& let gray_matter::Pod::Hash(map) = data
{
if let Some(gray_matter::Pod::String(title)) = map.get("title") {
meta.title = Some(title.clone());
}
if let Some(gray_matter::Pod::String(author)) = map.get("author") {
meta.artist = Some(author.clone());
}
if let Some(gray_matter::Pod::String(desc)) = map.get("description") {
meta.description = Some(desc.clone());
}
if let Some(gray_matter::Pod::String(date)) = map.get("date") {
meta.extra.insert("date".to_string(), date.clone());
}
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Markdown),
MediaType::Builtin(BuiltinMediaType::PlainText),
]
}
Ok(meta)
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Markdown),
MediaType::Builtin(BuiltinMediaType::PlainText),
]
}
}

View file

@ -4,53 +4,57 @@ pub mod image;
pub mod markdown;
pub mod video;
use std::collections::HashMap;
use std::path::Path;
use std::{collections::HashMap, path::Path};
use crate::error::Result;
use crate::media_type::MediaType;
use crate::model::ExtractedBookMetadata;
use crate::{
error::Result,
media_type::MediaType,
model::ExtractedBookMetadata,
};
#[derive(Debug, Clone, Default)]
pub struct ExtractedMetadata {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub extra: HashMap<String, String>,
pub book_metadata: Option<ExtractedBookMetadata>,
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub extra: HashMap<String, String>,
pub book_metadata: Option<ExtractedBookMetadata>,
// Photo-specific metadata
pub date_taken: Option<chrono::DateTime<chrono::Utc>>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub rating: Option<i32>,
// Photo-specific metadata
pub date_taken: Option<chrono::DateTime<chrono::Utc>>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub rating: Option<i32>,
}
pub trait MetadataExtractor: Send + Sync {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata>;
fn supported_types(&self) -> Vec<MediaType>;
fn extract(&self, path: &Path) -> Result<ExtractedMetadata>;
fn supported_types(&self) -> Vec<MediaType>;
}
pub fn extract_metadata(path: &Path, media_type: MediaType) -> Result<ExtractedMetadata> {
let extractors: Vec<Box<dyn MetadataExtractor>> = vec![
Box::new(audio::AudioExtractor),
Box::new(document::DocumentExtractor),
Box::new(video::VideoExtractor),
Box::new(markdown::MarkdownExtractor),
Box::new(image::ImageExtractor),
];
pub fn extract_metadata(
path: &Path,
media_type: MediaType,
) -> Result<ExtractedMetadata> {
let extractors: Vec<Box<dyn MetadataExtractor>> = vec![
Box::new(audio::AudioExtractor),
Box::new(document::DocumentExtractor),
Box::new(video::VideoExtractor),
Box::new(markdown::MarkdownExtractor),
Box::new(image::ImageExtractor),
];
for extractor in &extractors {
if extractor.supported_types().contains(&media_type) {
return extractor.extract(path);
}
for extractor in &extractors {
if extractor.supported_types().contains(&media_type) {
return extractor.extract(path);
}
}
Ok(ExtractedMetadata::default())
Ok(ExtractedMetadata::default())
}

View file

@ -1,118 +1,128 @@
use std::path::Path;
use crate::error::{PinakesError, Result};
use crate::media_type::{BuiltinMediaType, MediaType};
use super::{ExtractedMetadata, MetadataExtractor};
use crate::{
error::{PinakesError, Result},
media_type::{BuiltinMediaType, MediaType},
};
pub struct VideoExtractor;
impl MetadataExtractor for VideoExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Builtin(BuiltinMediaType::Mkv)) => extract_mkv(path),
Some(MediaType::Builtin(BuiltinMediaType::Mp4)) => extract_mp4(path),
_ => Ok(ExtractedMetadata::default()),
}
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Builtin(BuiltinMediaType::Mkv)) => extract_mkv(path),
Some(MediaType::Builtin(BuiltinMediaType::Mp4)) => extract_mp4(path),
_ => Ok(ExtractedMetadata::default()),
}
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Mp4),
MediaType::Builtin(BuiltinMediaType::Mkv),
]
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Mp4),
MediaType::Builtin(BuiltinMediaType::Mkv),
]
}
}
fn extract_mkv(path: &Path) -> Result<ExtractedMetadata> {
let file = std::fs::File::open(path)?;
let mkv = matroska::Matroska::open(file)
.map_err(|e| PinakesError::MetadataExtraction(format!("MKV parse: {e}")))?;
let file = std::fs::File::open(path)?;
let mkv = matroska::Matroska::open(file)
.map_err(|e| PinakesError::MetadataExtraction(format!("MKV parse: {e}")))?;
let mut meta = ExtractedMetadata {
title: mkv.info.title.clone(),
duration_secs: mkv.info.duration.map(|dur| dur.as_secs_f64()),
..Default::default()
};
let mut meta = ExtractedMetadata {
title: mkv.info.title.clone(),
duration_secs: mkv.info.duration.map(|dur| dur.as_secs_f64()),
..Default::default()
};
// Extract resolution and codec info from tracks
for track in &mkv.tracks {
match &track.settings {
matroska::Settings::Video(v) => {
meta.extra.insert(
"resolution".to_string(),
format!("{}x{}", v.pixel_width, v.pixel_height),
);
if !track.codec_id.is_empty() {
meta.extra
.insert("video_codec".to_string(), track.codec_id.clone());
}
}
matroska::Settings::Audio(a) => {
meta.extra.insert(
"sample_rate".to_string(),
format!("{} Hz", a.sample_rate as u32),
);
meta.extra
.insert("channels".to_string(), a.channels.to_string());
if !track.codec_id.is_empty() {
meta.extra
.insert("audio_codec".to_string(), track.codec_id.clone());
}
}
_ => {}
// Extract resolution and codec info from tracks
for track in &mkv.tracks {
match &track.settings {
matroska::Settings::Video(v) => {
meta.extra.insert(
"resolution".to_string(),
format!("{}x{}", v.pixel_width, v.pixel_height),
);
if !track.codec_id.is_empty() {
meta
.extra
.insert("video_codec".to_string(), track.codec_id.clone());
}
},
matroska::Settings::Audio(a) => {
meta.extra.insert(
"sample_rate".to_string(),
format!("{} Hz", a.sample_rate as u32),
);
meta
.extra
.insert("channels".to_string(), a.channels.to_string());
if !track.codec_id.is_empty() {
meta
.extra
.insert("audio_codec".to_string(), track.codec_id.clone());
}
},
_ => {},
}
}
Ok(meta)
Ok(meta)
}
fn extract_mp4(path: &Path) -> Result<ExtractedMetadata> {
use lofty::file::{AudioFile, TaggedFileExt};
use lofty::tag::Accessor;
use lofty::{
file::{AudioFile, TaggedFileExt},
tag::Accessor,
};
let tagged_file = lofty::read_from_path(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("MP4 metadata: {e}")))?;
let tagged_file = lofty::read_from_path(path).map_err(|e| {
PinakesError::MetadataExtraction(format!("MP4 metadata: {e}"))
})?;
let mut meta = ExtractedMetadata::default();
let mut meta = ExtractedMetadata::default();
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag
.title()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.artist = tag
.artist()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.album = tag
.album()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.genre = tag
.genre()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.year = tag.date().map(|ts| ts.year as i32);
}
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag
.title()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.artist = tag
.artist()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.album = tag
.album()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.genre = tag
.genre()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.year = tag.date().map(|ts| ts.year as i32);
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta.extra
.insert("audio_bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta.extra
.insert("channels".to_string(), channels.to_string());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta
.extra
.insert("audio_bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta
.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta
.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
Ok(meta)
}

View file

@ -1,6 +1,4 @@
use std::collections::HashMap;
use std::fmt;
use std::path::PathBuf;
use std::{collections::HashMap, fmt, path::PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
@ -12,482 +10,487 @@ use crate::media_type::MediaType;
pub struct MediaId(pub Uuid);
impl MediaId {
pub fn new() -> Self {
Self(Uuid::now_v7())
}
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl fmt::Display for MediaId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Default for MediaId {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ContentHash(pub String);
impl ContentHash {
pub fn new(hex: String) -> Self {
Self(hex)
}
pub fn new(hex: String) -> Self {
Self(hex)
}
}
impl fmt::Display for ContentHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
// ===== Managed Storage Types =====
/// Storage mode for media items
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize,
)]
#[serde(rename_all = "lowercase")]
pub enum StorageMode {
/// File exists on disk, referenced by path
#[default]
External,
/// File is stored in managed content-addressable storage
Managed,
/// File exists on disk, referenced by path
#[default]
External,
/// File is stored in managed content-addressable storage
Managed,
}
impl fmt::Display for StorageMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::External => write!(f, "external"),
Self::Managed => write!(f, "managed"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::External => write!(f, "external"),
Self::Managed => write!(f, "managed"),
}
}
}
impl std::str::FromStr for StorageMode {
type Err = String;
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"external" => Ok(Self::External),
"managed" => Ok(Self::Managed),
_ => Err(format!("unknown storage mode: {}", s)),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"external" => Ok(Self::External),
"managed" => Ok(Self::Managed),
_ => Err(format!("unknown storage mode: {}", s)),
}
}
}
/// A blob stored in managed storage (content-addressable)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManagedBlob {
pub content_hash: ContentHash,
pub file_size: u64,
pub mime_type: String,
pub reference_count: u32,
pub stored_at: DateTime<Utc>,
pub last_verified: Option<DateTime<Utc>>,
pub content_hash: ContentHash,
pub file_size: u64,
pub mime_type: String,
pub reference_count: u32,
pub stored_at: DateTime<Utc>,
pub last_verified: Option<DateTime<Utc>>,
}
/// Result of uploading a file to managed storage
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UploadResult {
pub media_id: MediaId,
pub content_hash: ContentHash,
pub was_duplicate: bool,
pub file_size: u64,
pub media_id: MediaId,
pub content_hash: ContentHash,
pub was_duplicate: bool,
pub file_size: u64,
}
/// Statistics about managed storage
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ManagedStorageStats {
pub total_blobs: u64,
pub total_size_bytes: u64,
pub unique_size_bytes: u64,
pub deduplication_ratio: f64,
pub managed_media_count: u64,
pub orphaned_blobs: u64,
pub total_blobs: u64,
pub total_size_bytes: u64,
pub unique_size_bytes: u64,
pub deduplication_ratio: f64,
pub managed_media_count: u64,
pub orphaned_blobs: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MediaItem {
pub id: MediaId,
pub path: PathBuf,
pub file_name: String,
pub media_type: MediaType,
pub content_hash: ContentHash,
pub file_size: u64,
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub thumbnail_path: Option<PathBuf>,
pub custom_fields: HashMap<String, CustomField>,
/// File modification time (Unix timestamp in seconds), used for incremental scanning
pub file_mtime: Option<i64>,
pub id: MediaId,
pub path: PathBuf,
pub file_name: String,
pub media_type: MediaType,
pub content_hash: ContentHash,
pub file_size: u64,
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub thumbnail_path: Option<PathBuf>,
pub custom_fields: HashMap<String, CustomField>,
/// File modification time (Unix timestamp in seconds), used for incremental
/// scanning
pub file_mtime: Option<i64>,
// Photo-specific metadata
pub date_taken: Option<DateTime<Utc>>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub rating: Option<i32>,
pub perceptual_hash: Option<String>,
// Photo-specific metadata
pub date_taken: Option<DateTime<Utc>>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub rating: Option<i32>,
pub perceptual_hash: Option<String>,
// Managed storage fields
/// How the file is stored (external on disk or managed in content-addressable storage)
#[serde(default)]
pub storage_mode: StorageMode,
/// Original filename for uploaded files (preserved separately from file_name)
pub original_filename: Option<String>,
/// When the file was uploaded to managed storage
pub uploaded_at: Option<DateTime<Utc>>,
/// Storage key for looking up the blob (usually same as content_hash)
pub storage_key: Option<String>,
// Managed storage fields
/// How the file is stored (external on disk or managed in
/// content-addressable storage)
#[serde(default)]
pub storage_mode: StorageMode,
/// Original filename for uploaded files (preserved separately from
/// file_name)
pub original_filename: Option<String>,
/// When the file was uploaded to managed storage
pub uploaded_at: Option<DateTime<Utc>>,
/// Storage key for looking up the blob (usually same as content_hash)
pub storage_key: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
/// Soft delete timestamp. If set, the item is in the trash.
pub deleted_at: Option<DateTime<Utc>>,
/// Soft delete timestamp. If set, the item is in the trash.
pub deleted_at: Option<DateTime<Utc>>,
/// When markdown links were last extracted from this file.
pub links_extracted_at: Option<DateTime<Utc>>,
/// When markdown links were last extracted from this file.
pub links_extracted_at: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CustomField {
pub field_type: CustomFieldType,
pub value: String,
pub field_type: CustomFieldType,
pub value: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CustomFieldType {
Text,
Number,
Date,
Boolean,
Text,
Number,
Date,
Boolean,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tag {
pub id: Uuid,
pub name: String,
pub parent_id: Option<Uuid>,
pub created_at: DateTime<Utc>,
pub id: Uuid,
pub name: String,
pub parent_id: Option<Uuid>,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Collection {
pub id: Uuid,
pub name: String,
pub description: Option<String>,
pub kind: CollectionKind,
pub filter_query: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub id: Uuid,
pub name: String,
pub description: Option<String>,
pub kind: CollectionKind,
pub filter_query: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CollectionKind {
Manual,
Virtual,
Manual,
Virtual,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionMember {
pub collection_id: Uuid,
pub media_id: MediaId,
pub position: i32,
pub added_at: DateTime<Utc>,
pub collection_id: Uuid,
pub media_id: MediaId,
pub position: i32,
pub added_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuditEntry {
pub id: Uuid,
pub media_id: Option<MediaId>,
pub action: AuditAction,
pub details: Option<String>,
pub timestamp: DateTime<Utc>,
pub id: Uuid,
pub media_id: Option<MediaId>,
pub action: AuditAction,
pub details: Option<String>,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditAction {
// Media actions
Imported,
Updated,
Deleted,
Tagged,
Untagged,
AddedToCollection,
RemovedFromCollection,
Opened,
Scanned,
// Media actions
Imported,
Updated,
Deleted,
Tagged,
Untagged,
AddedToCollection,
RemovedFromCollection,
Opened,
Scanned,
// Authentication actions
LoginSuccess,
LoginFailed,
Logout,
SessionExpired,
// Authentication actions
LoginSuccess,
LoginFailed,
Logout,
SessionExpired,
// Authorization actions
PermissionDenied,
RoleChanged,
LibraryAccessGranted,
LibraryAccessRevoked,
// Authorization actions
PermissionDenied,
RoleChanged,
LibraryAccessGranted,
LibraryAccessRevoked,
// User management
UserCreated,
UserUpdated,
UserDeleted,
// User management
UserCreated,
UserUpdated,
UserDeleted,
// Plugin actions
PluginInstalled,
PluginUninstalled,
PluginEnabled,
PluginDisabled,
// Plugin actions
PluginInstalled,
PluginUninstalled,
PluginEnabled,
PluginDisabled,
// Configuration actions
ConfigChanged,
RootDirectoryAdded,
RootDirectoryRemoved,
// Configuration actions
ConfigChanged,
RootDirectoryAdded,
RootDirectoryRemoved,
// Social/Sharing actions
ShareLinkCreated,
ShareLinkAccessed,
// Social/Sharing actions
ShareLinkCreated,
ShareLinkAccessed,
// System actions
DatabaseVacuumed,
DatabaseCleared,
ExportCompleted,
IntegrityCheckCompleted,
// System actions
DatabaseVacuumed,
DatabaseCleared,
ExportCompleted,
IntegrityCheckCompleted,
}
impl fmt::Display for AuditAction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
// Media actions
Self::Imported => "imported",
Self::Updated => "updated",
Self::Deleted => "deleted",
Self::Tagged => "tagged",
Self::Untagged => "untagged",
Self::AddedToCollection => "added_to_collection",
Self::RemovedFromCollection => "removed_from_collection",
Self::Opened => "opened",
Self::Scanned => "scanned",
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
// Media actions
Self::Imported => "imported",
Self::Updated => "updated",
Self::Deleted => "deleted",
Self::Tagged => "tagged",
Self::Untagged => "untagged",
Self::AddedToCollection => "added_to_collection",
Self::RemovedFromCollection => "removed_from_collection",
Self::Opened => "opened",
Self::Scanned => "scanned",
// Authentication actions
Self::LoginSuccess => "login_success",
Self::LoginFailed => "login_failed",
Self::Logout => "logout",
Self::SessionExpired => "session_expired",
// Authentication actions
Self::LoginSuccess => "login_success",
Self::LoginFailed => "login_failed",
Self::Logout => "logout",
Self::SessionExpired => "session_expired",
// Authorization actions
Self::PermissionDenied => "permission_denied",
Self::RoleChanged => "role_changed",
Self::LibraryAccessGranted => "library_access_granted",
Self::LibraryAccessRevoked => "library_access_revoked",
// Authorization actions
Self::PermissionDenied => "permission_denied",
Self::RoleChanged => "role_changed",
Self::LibraryAccessGranted => "library_access_granted",
Self::LibraryAccessRevoked => "library_access_revoked",
// User management
Self::UserCreated => "user_created",
Self::UserUpdated => "user_updated",
Self::UserDeleted => "user_deleted",
// User management
Self::UserCreated => "user_created",
Self::UserUpdated => "user_updated",
Self::UserDeleted => "user_deleted",
// Plugin actions
Self::PluginInstalled => "plugin_installed",
Self::PluginUninstalled => "plugin_uninstalled",
Self::PluginEnabled => "plugin_enabled",
Self::PluginDisabled => "plugin_disabled",
// Plugin actions
Self::PluginInstalled => "plugin_installed",
Self::PluginUninstalled => "plugin_uninstalled",
Self::PluginEnabled => "plugin_enabled",
Self::PluginDisabled => "plugin_disabled",
// Configuration actions
Self::ConfigChanged => "config_changed",
Self::RootDirectoryAdded => "root_directory_added",
Self::RootDirectoryRemoved => "root_directory_removed",
// Configuration actions
Self::ConfigChanged => "config_changed",
Self::RootDirectoryAdded => "root_directory_added",
Self::RootDirectoryRemoved => "root_directory_removed",
// Social/Sharing actions
Self::ShareLinkCreated => "share_link_created",
Self::ShareLinkAccessed => "share_link_accessed",
// Social/Sharing actions
Self::ShareLinkCreated => "share_link_created",
Self::ShareLinkAccessed => "share_link_accessed",
// System actions
Self::DatabaseVacuumed => "database_vacuumed",
Self::DatabaseCleared => "database_cleared",
Self::ExportCompleted => "export_completed",
Self::IntegrityCheckCompleted => "integrity_check_completed",
};
write!(f, "{s}")
}
// System actions
Self::DatabaseVacuumed => "database_vacuumed",
Self::DatabaseCleared => "database_cleared",
Self::ExportCompleted => "export_completed",
Self::IntegrityCheckCompleted => "integrity_check_completed",
};
write!(f, "{s}")
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Pagination {
pub offset: u64,
pub limit: u64,
pub sort: Option<String>,
pub offset: u64,
pub limit: u64,
pub sort: Option<String>,
}
impl Pagination {
pub fn new(offset: u64, limit: u64, sort: Option<String>) -> Self {
Self {
offset,
limit,
sort,
}
pub fn new(offset: u64, limit: u64, sort: Option<String>) -> Self {
Self {
offset,
limit,
sort,
}
}
}
impl Default for Pagination {
fn default() -> Self {
Self {
offset: 0,
limit: 50,
sort: None,
}
fn default() -> Self {
Self {
offset: 0,
limit: 50,
sort: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SavedSearch {
pub id: Uuid,
pub name: String,
pub query: String,
pub sort_order: Option<String>,
pub created_at: DateTime<Utc>,
pub id: Uuid,
pub name: String,
pub query: String,
pub sort_order: Option<String>,
pub created_at: DateTime<Utc>,
}
// Book Management Types
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BookMetadata {
pub media_id: MediaId,
pub isbn: Option<String>,
pub isbn13: Option<String>,
pub publisher: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub publication_date: Option<chrono::NaiveDate>,
pub series_name: Option<String>,
pub series_index: Option<f64>,
pub format: Option<String>,
pub authors: Vec<AuthorInfo>,
pub identifiers: HashMap<String, Vec<String>>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub media_id: MediaId,
pub isbn: Option<String>,
pub isbn13: Option<String>,
pub publisher: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub publication_date: Option<chrono::NaiveDate>,
pub series_name: Option<String>,
pub series_index: Option<f64>,
pub format: Option<String>,
pub authors: Vec<AuthorInfo>,
pub identifiers: HashMap<String, Vec<String>>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct AuthorInfo {
pub name: String,
pub role: String,
pub file_as: Option<String>,
pub position: i32,
pub name: String,
pub role: String,
pub file_as: Option<String>,
pub position: i32,
}
impl AuthorInfo {
pub fn new(name: String) -> Self {
Self {
name,
role: "author".to_string(),
file_as: None,
position: 0,
}
pub fn new(name: String) -> Self {
Self {
name,
role: "author".to_string(),
file_as: None,
position: 0,
}
}
pub fn with_role(mut self, role: String) -> Self {
self.role = role;
self
}
pub fn with_role(mut self, role: String) -> Self {
self.role = role;
self
}
pub fn with_file_as(mut self, file_as: String) -> Self {
self.file_as = Some(file_as);
self
}
pub fn with_file_as(mut self, file_as: String) -> Self {
self.file_as = Some(file_as);
self
}
pub fn with_position(mut self, position: i32) -> Self {
self.position = position;
self
}
pub fn with_position(mut self, position: i32) -> Self {
self.position = position;
self
}
}
/// Book metadata extracted from files (without database-specific fields)
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ExtractedBookMetadata {
pub isbn: Option<String>,
pub isbn13: Option<String>,
pub publisher: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub publication_date: Option<chrono::NaiveDate>,
pub series_name: Option<String>,
pub series_index: Option<f64>,
pub format: Option<String>,
pub authors: Vec<AuthorInfo>,
pub identifiers: HashMap<String, Vec<String>>,
pub isbn: Option<String>,
pub isbn13: Option<String>,
pub publisher: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub publication_date: Option<chrono::NaiveDate>,
pub series_name: Option<String>,
pub series_index: Option<f64>,
pub format: Option<String>,
pub authors: Vec<AuthorInfo>,
pub identifiers: HashMap<String, Vec<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReadingProgress {
pub media_id: MediaId,
pub user_id: Uuid,
pub current_page: i32,
pub total_pages: Option<i32>,
pub progress_percent: f64,
pub last_read_at: DateTime<Utc>,
pub media_id: MediaId,
pub user_id: Uuid,
pub current_page: i32,
pub total_pages: Option<i32>,
pub progress_percent: f64,
pub last_read_at: DateTime<Utc>,
}
impl ReadingProgress {
pub fn new(
media_id: MediaId,
user_id: Uuid,
current_page: i32,
total_pages: Option<i32>,
) -> Self {
let progress_percent = if let Some(total) = total_pages {
if total > 0 {
(current_page as f64 / total as f64 * 100.0).min(100.0)
} else {
0.0
}
} else {
0.0
};
pub fn new(
media_id: MediaId,
user_id: Uuid,
current_page: i32,
total_pages: Option<i32>,
) -> Self {
let progress_percent = if let Some(total) = total_pages {
if total > 0 {
(current_page as f64 / total as f64 * 100.0).min(100.0)
} else {
0.0
}
} else {
0.0
};
Self {
media_id,
user_id,
current_page,
total_pages,
progress_percent,
last_read_at: Utc::now(),
}
Self {
media_id,
user_id,
current_page,
total_pages,
progress_percent,
last_read_at: Utc::now(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ReadingStatus {
ToRead,
Reading,
Completed,
Abandoned,
ToRead,
Reading,
Completed,
Abandoned,
}
impl fmt::Display for ReadingStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ToRead => write!(f, "to_read"),
Self::Reading => write!(f, "reading"),
Self::Completed => write!(f, "completed"),
Self::Abandoned => write!(f, "abandoned"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ToRead => write!(f, "to_read"),
Self::Reading => write!(f, "reading"),
Self::Completed => write!(f, "completed"),
Self::Abandoned => write!(f, "abandoned"),
}
}
}
// ===== Markdown Links (Obsidian-style) =====
@ -496,93 +499,93 @@ impl fmt::Display for ReadingStatus {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LinkType {
/// Wikilink: [[target]] or [[target|display]]
Wikilink,
/// Markdown link: [text](path)
MarkdownLink,
/// Embed: ![[target]]
Embed,
/// Wikilink: [[target]] or [[target|display]]
Wikilink,
/// Markdown link: [text](path)
MarkdownLink,
/// Embed: ![[target]]
Embed,
}
impl fmt::Display for LinkType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Wikilink => write!(f, "wikilink"),
Self::MarkdownLink => write!(f, "markdown_link"),
Self::Embed => write!(f, "embed"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Wikilink => write!(f, "wikilink"),
Self::MarkdownLink => write!(f, "markdown_link"),
Self::Embed => write!(f, "embed"),
}
}
}
impl std::str::FromStr for LinkType {
type Err = String;
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"wikilink" => Ok(Self::Wikilink),
"markdown_link" => Ok(Self::MarkdownLink),
"embed" => Ok(Self::Embed),
_ => Err(format!("unknown link type: {}", s)),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"wikilink" => Ok(Self::Wikilink),
"markdown_link" => Ok(Self::MarkdownLink),
"embed" => Ok(Self::Embed),
_ => Err(format!("unknown link type: {}", s)),
}
}
}
/// A markdown link extracted from a file
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MarkdownLink {
pub id: Uuid,
pub source_media_id: MediaId,
/// Raw link target as written in the source (wikilink name or path)
pub target_path: String,
/// Resolved target media_id (None if unresolved)
pub target_media_id: Option<MediaId>,
pub link_type: LinkType,
/// Display text for the link
pub link_text: Option<String>,
/// Line number in source file (1-indexed)
pub line_number: Option<i32>,
/// Surrounding text for backlink preview
pub context: Option<String>,
pub created_at: DateTime<Utc>,
pub id: Uuid,
pub source_media_id: MediaId,
/// Raw link target as written in the source (wikilink name or path)
pub target_path: String,
/// Resolved target media_id (None if unresolved)
pub target_media_id: Option<MediaId>,
pub link_type: LinkType,
/// Display text for the link
pub link_text: Option<String>,
/// Line number in source file (1-indexed)
pub line_number: Option<i32>,
/// Surrounding text for backlink preview
pub context: Option<String>,
pub created_at: DateTime<Utc>,
}
/// Information about a backlink (incoming link)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BacklinkInfo {
pub link_id: Uuid,
pub source_id: MediaId,
pub source_title: Option<String>,
pub source_path: String,
pub link_text: Option<String>,
pub line_number: Option<i32>,
pub context: Option<String>,
pub link_type: LinkType,
pub link_id: Uuid,
pub source_id: MediaId,
pub source_title: Option<String>,
pub source_path: String,
pub link_text: Option<String>,
pub line_number: Option<i32>,
pub context: Option<String>,
pub link_type: LinkType,
}
/// Graph data for visualization
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphData {
pub nodes: Vec<GraphNode>,
pub edges: Vec<GraphEdge>,
pub nodes: Vec<GraphNode>,
pub edges: Vec<GraphEdge>,
}
/// A node in the graph visualization
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphNode {
pub id: String,
pub label: String,
pub title: Option<String>,
pub media_type: String,
/// Number of outgoing links from this node
pub link_count: u32,
/// Number of incoming links to this node
pub backlink_count: u32,
pub id: String,
pub label: String,
pub title: Option<String>,
pub media_type: String,
/// Number of outgoing links from this node
pub link_count: u32,
/// Number of incoming links to this node
pub backlink_count: u32,
}
/// An edge (link) in the graph visualization
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphEdge {
pub source: String,
pub target: String,
pub link_type: LinkType,
pub source: String,
pub target: String,
pub link_type: LinkType,
}

View file

@ -1,79 +1,78 @@
use std::path::Path;
use std::process::Command;
use std::{path::Path, process::Command};
use crate::error::{PinakesError, Result};
pub trait Opener: Send + Sync {
fn open(&self, path: &Path) -> Result<()>;
fn open(&self, path: &Path) -> Result<()>;
}
/// Linux opener using xdg-open
pub struct XdgOpener;
impl Opener for XdgOpener {
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("xdg-open")
.arg(path)
.status()
.map_err(|e| PinakesError::InvalidOperation(format!("failed to run xdg-open: {e}")))?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"xdg-open exited with status {status}"
)))
}
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("xdg-open").arg(path).status().map_err(|e| {
PinakesError::InvalidOperation(format!("failed to run xdg-open: {e}"))
})?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"xdg-open exited with status {status}"
)))
}
}
}
/// macOS opener using the `open` command
pub struct MacOpener;
impl Opener for MacOpener {
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("open")
.arg(path)
.status()
.map_err(|e| PinakesError::InvalidOperation(format!("failed to run open: {e}")))?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"open exited with status {status}"
)))
}
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("open").arg(path).status().map_err(|e| {
PinakesError::InvalidOperation(format!("failed to run open: {e}"))
})?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"open exited with status {status}"
)))
}
}
}
/// Windows opener using `cmd /c start`
pub struct WindowsOpener;
impl Opener for WindowsOpener {
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("cmd")
.args(["/C", "start", ""])
.arg(path)
.status()
.map_err(|e| {
PinakesError::InvalidOperation(format!("failed to run cmd /c start: {e}"))
})?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"cmd /c start exited with status {status}"
)))
}
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("cmd")
.args(["/C", "start", ""])
.arg(path)
.status()
.map_err(|e| {
PinakesError::InvalidOperation(format!(
"failed to run cmd /c start: {e}"
))
})?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"cmd /c start exited with status {status}"
)))
}
}
}
/// Returns the platform-appropriate opener.
pub fn default_opener() -> Box<dyn Opener> {
if cfg!(target_os = "macos") {
Box::new(MacOpener)
} else if cfg!(target_os = "windows") {
Box::new(WindowsOpener)
} else {
Box::new(XdgOpener)
}
if cfg!(target_os = "macos") {
Box::new(MacOpener)
} else if cfg!(target_os = "windows") {
Box::new(WindowsOpener)
} else {
Box::new(XdgOpener)
}
}

View file

@ -37,72 +37,81 @@ use crate::error::{PinakesError, Result};
///
/// ```no_run
/// use std::path::PathBuf;
///
/// use pinakes_core::path_validation::validate_path;
///
/// let allowed_roots = vec![PathBuf::from("/media"), PathBuf::from("/home/user/documents")];
/// let allowed_roots = vec![
/// PathBuf::from("/media"),
/// PathBuf::from("/home/user/documents"),
/// ];
/// let path = PathBuf::from("/media/music/song.mp3");
///
/// let validated = validate_path(&path, &allowed_roots).unwrap();
/// ```
pub fn validate_path(path: &Path, allowed_roots: &[PathBuf]) -> Result<PathBuf> {
// Handle the case where no roots are configured
if allowed_roots.is_empty() {
return Err(PinakesError::PathNotAllowed(
"no allowed roots configured".to_string(),
));
}
pub fn validate_path(
path: &Path,
allowed_roots: &[PathBuf],
) -> Result<PathBuf> {
// Handle the case where no roots are configured
if allowed_roots.is_empty() {
return Err(PinakesError::PathNotAllowed(
"no allowed roots configured".to_string(),
));
}
// First check if the path exists
if !path.exists() {
return Err(PinakesError::PathNotAllowed(format!(
"path does not exist: {}",
path.display()
)));
}
// First check if the path exists
if !path.exists() {
return Err(PinakesError::PathNotAllowed(format!(
"path does not exist: {}",
path.display()
)));
}
// Canonicalize to resolve symlinks and relative components
let canonical = path.canonicalize().map_err(|e| {
PinakesError::PathNotAllowed(format!(
"failed to canonicalize path {}: {}",
path.display(),
e
))
})?;
// Canonicalize to resolve symlinks and relative components
let canonical = path.canonicalize().map_err(|e| {
PinakesError::PathNotAllowed(format!(
"failed to canonicalize path {}: {}",
path.display(),
e
))
})?;
// Check if the canonical path is within any allowed root
let canonical_roots: Vec<PathBuf> = allowed_roots
.iter()
.filter_map(|root| root.canonicalize().ok())
.collect();
// Check if the canonical path is within any allowed root
let canonical_roots: Vec<PathBuf> = allowed_roots
.iter()
.filter_map(|root| root.canonicalize().ok())
.collect();
if canonical_roots.is_empty() {
return Err(PinakesError::PathNotAllowed(
"no accessible allowed roots".to_string(),
));
}
if canonical_roots.is_empty() {
return Err(PinakesError::PathNotAllowed(
"no accessible allowed roots".to_string(),
));
}
let is_allowed = canonical_roots
.iter()
.any(|root| canonical.starts_with(root));
let is_allowed = canonical_roots
.iter()
.any(|root| canonical.starts_with(root));
if is_allowed {
Ok(canonical)
} else {
Err(PinakesError::PathNotAllowed(format!(
"path {} is outside allowed roots",
path.display()
)))
}
if is_allowed {
Ok(canonical)
} else {
Err(PinakesError::PathNotAllowed(format!(
"path {} is outside allowed roots",
path.display()
)))
}
}
/// Validates a path relative to a single root directory.
///
/// This is a convenience wrapper for `validate_path` when you only have one root.
/// This is a convenience wrapper for `validate_path` when you only have one
/// root.
pub fn validate_path_single_root(path: &Path, root: &Path) -> Result<PathBuf> {
validate_path(path, &[root.to_path_buf()])
validate_path(path, &[root.to_path_buf()])
}
/// Checks if a path appears to contain traversal sequences without canonicalizing.
/// Checks if a path appears to contain traversal sequences without
/// canonicalizing.
///
/// This is a quick pre-check that can reject obviously malicious paths without
/// hitting the filesystem. It should be used in addition to `validate_path`,
@ -117,11 +126,11 @@ pub fn validate_path_single_root(path: &Path, root: &Path) -> Result<PathBuf> {
/// `true` if the path appears safe (no obvious traversal sequences),
/// `false` if it contains suspicious patterns.
pub fn path_looks_safe(path: &str) -> bool {
// Reject paths with obvious traversal patterns
!path.contains("..")
&& !path.contains("//")
&& !path.starts_with('/')
&& path.chars().filter(|c| *c == '/').count() < 50 // Reasonable depth limit
// Reject paths with obvious traversal patterns
!path.contains("..")
&& !path.contains("//")
&& !path.starts_with('/')
&& path.chars().filter(|c| *c == '/').count() < 50 // Reasonable depth limit
}
/// Sanitizes a filename by removing or replacing dangerous characters.
@ -140,26 +149,27 @@ pub fn path_looks_safe(path: &str) -> bool {
///
/// A sanitized filename safe for use on most filesystems.
pub fn sanitize_filename(filename: &str) -> String {
let sanitized: String = filename
.chars()
.filter(|c| {
// Allow alphanumeric, common punctuation, and unicode letters
c.is_alphanumeric() || matches!(*c, '-' | '_' | '.' | ' ' | '(' | ')' | '[' | ']')
})
.collect();
let sanitized: String = filename
.chars()
.filter(|c| {
// Allow alphanumeric, common punctuation, and unicode letters
c.is_alphanumeric()
|| matches!(*c, '-' | '_' | '.' | ' ' | '(' | ')' | '[' | ']')
})
.collect();
// Remove leading dots to prevent hidden files
let sanitized = sanitized.trim_start_matches('.');
// Remove leading dots to prevent hidden files
let sanitized = sanitized.trim_start_matches('.');
// Remove leading/trailing whitespace
let sanitized = sanitized.trim();
// Remove leading/trailing whitespace
let sanitized = sanitized.trim();
// Ensure the filename isn't empty after sanitization
if sanitized.is_empty() {
"unnamed".to_string()
} else {
sanitized.to_string()
}
// Ensure the filename isn't empty after sanitization
if sanitized.is_empty() {
"unnamed".to_string()
} else {
sanitized.to_string()
}
}
/// Joins a base path with a relative path safely.
@ -174,137 +184,140 @@ pub fn sanitize_filename(filename: &str) -> String {
///
/// # Returns
///
/// The joined path if safe, or an error if the relative path would escape the base.
/// The joined path if safe, or an error if the relative path would escape the
/// base.
pub fn safe_join(base: &Path, relative: &str) -> Result<PathBuf> {
// Reject absolute paths in the relative component
if relative.starts_with('/') || relative.starts_with('\\') {
// Reject absolute paths in the relative component
if relative.starts_with('/') || relative.starts_with('\\') {
return Err(PinakesError::PathNotAllowed(
"relative path cannot be absolute".to_string(),
));
}
// Reject paths with .. traversal
if relative.contains("..") {
return Err(PinakesError::PathNotAllowed(
"relative path cannot contain '..'".to_string(),
));
}
// Build the path and validate it stays within base
let joined = base.join(relative);
// Canonicalize base for comparison
let canonical_base = base.canonicalize().map_err(|e| {
PinakesError::PathNotAllowed(format!(
"failed to canonicalize base {}: {}",
base.display(),
e
))
})?;
// The joined path might not exist yet, so we can't canonicalize it directly.
// Instead, we check each component
let mut current = canonical_base.clone();
for component in Path::new(relative).components() {
use std::path::Component;
match component {
Component::Normal(name) => {
current = current.join(name);
},
Component::ParentDir => {
return Err(PinakesError::PathNotAllowed(
"relative path cannot be absolute".to_string(),
"path traversal detected".to_string(),
));
}
// Reject paths with .. traversal
if relative.contains("..") {
},
Component::CurDir => continue,
_ => {
return Err(PinakesError::PathNotAllowed(
"relative path cannot contain '..'".to_string(),
"invalid path component".to_string(),
));
},
}
}
// Build the path and validate it stays within base
let joined = base.join(relative);
// Canonicalize base for comparison
let canonical_base = base.canonicalize().map_err(|e| {
PinakesError::PathNotAllowed(format!(
"failed to canonicalize base {}: {}",
base.display(),
e
))
})?;
// The joined path might not exist yet, so we can't canonicalize it directly.
// Instead, we check each component
let mut current = canonical_base.clone();
for component in Path::new(relative).components() {
use std::path::Component;
match component {
Component::Normal(name) => {
current = current.join(name);
}
Component::ParentDir => {
return Err(PinakesError::PathNotAllowed(
"path traversal detected".to_string(),
));
}
Component::CurDir => continue,
_ => {
return Err(PinakesError::PathNotAllowed(
"invalid path component".to_string(),
));
}
}
}
Ok(joined)
Ok(joined)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
use std::fs;
fn setup_test_dirs() -> TempDir {
let temp = TempDir::new().unwrap();
fs::create_dir_all(temp.path().join("allowed")).unwrap();
fs::create_dir_all(temp.path().join("forbidden")).unwrap();
fs::write(temp.path().join("allowed/file.txt"), "test").unwrap();
fs::write(temp.path().join("forbidden/secret.txt"), "secret").unwrap();
temp
}
use tempfile::TempDir;
#[test]
fn test_validate_path_allowed() {
let temp = setup_test_dirs();
let allowed_roots = vec![temp.path().join("allowed")];
let path = temp.path().join("allowed/file.txt");
use super::*;
let result = validate_path(&path, &allowed_roots);
assert!(result.is_ok());
}
fn setup_test_dirs() -> TempDir {
let temp = TempDir::new().unwrap();
fs::create_dir_all(temp.path().join("allowed")).unwrap();
fs::create_dir_all(temp.path().join("forbidden")).unwrap();
fs::write(temp.path().join("allowed/file.txt"), "test").unwrap();
fs::write(temp.path().join("forbidden/secret.txt"), "secret").unwrap();
temp
}
#[test]
fn test_validate_path_forbidden() {
let temp = setup_test_dirs();
let allowed_roots = vec![temp.path().join("allowed")];
let path = temp.path().join("forbidden/secret.txt");
#[test]
fn test_validate_path_allowed() {
let temp = setup_test_dirs();
let allowed_roots = vec![temp.path().join("allowed")];
let path = temp.path().join("allowed/file.txt");
let result = validate_path(&path, &allowed_roots);
assert!(result.is_err());
}
let result = validate_path(&path, &allowed_roots);
assert!(result.is_ok());
}
#[test]
fn test_validate_path_traversal() {
let temp = setup_test_dirs();
let allowed_roots = vec![temp.path().join("allowed")];
let path = temp.path().join("allowed/../forbidden/secret.txt");
#[test]
fn test_validate_path_forbidden() {
let temp = setup_test_dirs();
let allowed_roots = vec![temp.path().join("allowed")];
let path = temp.path().join("forbidden/secret.txt");
let result = validate_path(&path, &allowed_roots);
assert!(result.is_err());
}
let result = validate_path(&path, &allowed_roots);
assert!(result.is_err());
}
#[test]
fn test_sanitize_filename() {
assert_eq!(sanitize_filename("normal.txt"), "normal.txt");
assert_eq!(sanitize_filename("../../../etc/passwd"), "etcpasswd");
assert_eq!(sanitize_filename(".hidden"), "hidden");
assert_eq!(sanitize_filename("file<with>bad:chars"), "filewithbadchars");
assert_eq!(sanitize_filename(""), "unnamed");
assert_eq!(sanitize_filename("..."), "unnamed");
}
#[test]
fn test_validate_path_traversal() {
let temp = setup_test_dirs();
let allowed_roots = vec![temp.path().join("allowed")];
let path = temp.path().join("allowed/../forbidden/secret.txt");
#[test]
fn test_path_looks_safe() {
assert!(path_looks_safe("normal/path/file.txt"));
assert!(!path_looks_safe("../../../etc/passwd"));
assert!(!path_looks_safe("path//double/slash"));
}
let result = validate_path(&path, &allowed_roots);
assert!(result.is_err());
}
#[test]
fn test_safe_join() {
let temp = TempDir::new().unwrap();
let base = temp.path();
#[test]
fn test_sanitize_filename() {
assert_eq!(sanitize_filename("normal.txt"), "normal.txt");
assert_eq!(sanitize_filename("../../../etc/passwd"), "etcpasswd");
assert_eq!(sanitize_filename(".hidden"), "hidden");
assert_eq!(sanitize_filename("file<with>bad:chars"), "filewithbadchars");
assert_eq!(sanitize_filename(""), "unnamed");
assert_eq!(sanitize_filename("..."), "unnamed");
}
// Valid join
let result = safe_join(base, "subdir/file.txt");
assert!(result.is_ok());
#[test]
fn test_path_looks_safe() {
assert!(path_looks_safe("normal/path/file.txt"));
assert!(!path_looks_safe("../../../etc/passwd"));
assert!(!path_looks_safe("path//double/slash"));
}
// Traversal attempt
let result = safe_join(base, "../etc/passwd");
assert!(result.is_err());
#[test]
fn test_safe_join() {
let temp = TempDir::new().unwrap();
let base = temp.path();
// Absolute path attempt
let result = safe_join(base, "/etc/passwd");
assert!(result.is_err());
}
// Valid join
let result = safe_join(base, "subdir/file.txt");
assert!(result.is_ok());
// Traversal attempt
let result = safe_join(base, "../etc/passwd");
assert!(result.is_err());
// Absolute path attempt
let result = safe_join(base, "/etc/passwd");
assert!(result.is_err());
}
}

View file

@ -4,28 +4,27 @@ use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::model::MediaId;
use crate::users::UserId;
use crate::{model::MediaId, users::UserId};
/// A user-owned playlist of media items.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Playlist {
pub id: Uuid,
pub owner_id: UserId,
pub name: String,
pub description: Option<String>,
pub is_public: bool,
pub is_smart: bool,
pub filter_query: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub id: Uuid,
pub owner_id: UserId,
pub name: String,
pub description: Option<String>,
pub is_public: bool,
pub is_smart: bool,
pub filter_query: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
/// An item within a playlist at a specific position.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PlaylistItem {
pub playlist_id: Uuid,
pub media_id: MediaId,
pub position: i32,
pub added_at: DateTime<Utc>,
pub playlist_id: Uuid,
pub media_id: MediaId,
pub position: i32,
pub added_at: DateTime<Utc>,
}

View file

@ -1,334 +1,345 @@
//! Plugin loader for discovering and loading plugins from the filesystem
use std::path::{Path, PathBuf};
use anyhow::{Result, anyhow};
use pinakes_plugin_api::PluginManifest;
use std::path::{Path, PathBuf};
use tracing::{debug, info, warn};
use walkdir::WalkDir;
/// Plugin loader handles discovery and loading of plugins from directories
pub struct PluginLoader {
/// Directories to search for plugins
plugin_dirs: Vec<PathBuf>,
/// Directories to search for plugins
plugin_dirs: Vec<PathBuf>,
}
impl PluginLoader {
/// Create a new plugin loader
pub fn new(plugin_dirs: Vec<PathBuf>) -> Self {
Self { plugin_dirs }
/// Create a new plugin loader
pub fn new(plugin_dirs: Vec<PathBuf>) -> Self {
Self { plugin_dirs }
}
/// Discover all plugins in configured directories
pub async fn discover_plugins(&self) -> Result<Vec<PluginManifest>> {
let mut manifests = Vec::new();
for dir in &self.plugin_dirs {
if !dir.exists() {
warn!("Plugin directory does not exist: {:?}", dir);
continue;
}
info!("Discovering plugins in: {:?}", dir);
match self.discover_in_directory(dir).await {
Ok(found) => {
info!("Found {} plugins in {:?}", found.len(), dir);
manifests.extend(found);
},
Err(e) => {
warn!("Error discovering plugins in {:?}: {}", dir, e);
},
}
}
/// Discover all plugins in configured directories
pub async fn discover_plugins(&self) -> Result<Vec<PluginManifest>> {
let mut manifests = Vec::new();
Ok(manifests)
}
for dir in &self.plugin_dirs {
if !dir.exists() {
warn!("Plugin directory does not exist: {:?}", dir);
continue;
}
/// Discover plugins in a specific directory
async fn discover_in_directory(
&self,
dir: &Path,
) -> Result<Vec<PluginManifest>> {
let mut manifests = Vec::new();
info!("Discovering plugins in: {:?}", dir);
match self.discover_in_directory(dir).await {
Ok(found) => {
info!("Found {} plugins in {:?}", found.len(), dir);
manifests.extend(found);
}
Err(e) => {
warn!("Error discovering plugins in {:?}: {}", dir, e);
}
}
}
Ok(manifests)
}
/// Discover plugins in a specific directory
async fn discover_in_directory(&self, dir: &Path) -> Result<Vec<PluginManifest>> {
let mut manifests = Vec::new();
// Walk the directory looking for plugin.toml files
for entry in WalkDir::new(dir)
// Walk the directory looking for plugin.toml files
for entry in WalkDir::new(dir)
.max_depth(3) // Don't go too deep
.follow_links(false)
{
let entry = match entry {
Ok(e) => e,
Err(e) => {
warn!("Error reading directory entry: {}", e);
continue;
}
};
{
let entry = match entry {
Ok(e) => e,
Err(e) => {
warn!("Error reading directory entry: {}", e);
continue;
},
};
let path = entry.path();
let path = entry.path();
// Look for plugin.toml files
if path.file_name() == Some(std::ffi::OsStr::new("plugin.toml")) {
debug!("Found plugin manifest: {:?}", path);
// Look for plugin.toml files
if path.file_name() == Some(std::ffi::OsStr::new("plugin.toml")) {
debug!("Found plugin manifest: {:?}", path);
match PluginManifest::from_file(path) {
Ok(manifest) => {
info!("Loaded manifest for plugin: {}", manifest.plugin.name);
manifests.push(manifest);
}
Err(e) => {
warn!("Failed to load manifest from {:?}: {}", path, e);
}
}
}
match PluginManifest::from_file(path) {
Ok(manifest) => {
info!("Loaded manifest for plugin: {}", manifest.plugin.name);
manifests.push(manifest);
},
Err(e) => {
warn!("Failed to load manifest from {:?}: {}", path, e);
},
}
Ok(manifests)
}
}
/// Resolve the WASM binary path from a manifest
pub fn resolve_wasm_path(&self, manifest: &PluginManifest) -> Result<PathBuf> {
// The WASM path in the manifest is relative to the manifest file
// We need to search for it in the plugin directories
Ok(manifests)
}
for dir in &self.plugin_dirs {
// Look for a directory matching the plugin name
let plugin_dir = dir.join(&manifest.plugin.name);
if !plugin_dir.exists() {
continue;
}
/// Resolve the WASM binary path from a manifest
pub fn resolve_wasm_path(
&self,
manifest: &PluginManifest,
) -> Result<PathBuf> {
// The WASM path in the manifest is relative to the manifest file
// We need to search for it in the plugin directories
// Check for plugin.toml in this directory
let manifest_path = plugin_dir.join("plugin.toml");
if !manifest_path.exists() {
continue;
}
for dir in &self.plugin_dirs {
// Look for a directory matching the plugin name
let plugin_dir = dir.join(&manifest.plugin.name);
if !plugin_dir.exists() {
continue;
}
// Resolve WASM path relative to this directory
let wasm_path = plugin_dir.join(&manifest.plugin.binary.wasm);
if wasm_path.exists() {
// Verify the resolved path is within the plugin directory (prevent path traversal)
let canonical_wasm = wasm_path
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize WASM path: {}", e))?;
let canonical_plugin_dir = plugin_dir
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize plugin dir: {}", e))?;
if !canonical_wasm.starts_with(&canonical_plugin_dir) {
return Err(anyhow!(
"WASM binary path escapes plugin directory: {:?}",
wasm_path
));
}
return Ok(canonical_wasm);
}
// Check for plugin.toml in this directory
let manifest_path = plugin_dir.join("plugin.toml");
if !manifest_path.exists() {
continue;
}
// Resolve WASM path relative to this directory
let wasm_path = plugin_dir.join(&manifest.plugin.binary.wasm);
if wasm_path.exists() {
// Verify the resolved path is within the plugin directory (prevent path
// traversal)
let canonical_wasm = wasm_path
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize WASM path: {}", e))?;
let canonical_plugin_dir = plugin_dir
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize plugin dir: {}", e))?;
if !canonical_wasm.starts_with(&canonical_plugin_dir) {
return Err(anyhow!(
"WASM binary path escapes plugin directory: {:?}",
wasm_path
));
}
Err(anyhow!(
"WASM binary not found for plugin: {}",
manifest.plugin.name
))
return Ok(canonical_wasm);
}
}
/// Download a plugin from a URL
pub async fn download_plugin(&self, url: &str) -> Result<PathBuf> {
// Only allow HTTPS downloads
if !url.starts_with("https://") {
return Err(anyhow!(
"Only HTTPS URLs are allowed for plugin downloads: {}",
url
));
}
Err(anyhow!(
"WASM binary not found for plugin: {}",
manifest.plugin.name
))
}
let dest_dir = self
.plugin_dirs
.first()
.ok_or_else(|| anyhow!("No plugin directories configured"))?;
std::fs::create_dir_all(dest_dir)?;
// Download the archive with timeout and size limits
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(300))
.build()
.map_err(|e| anyhow!("Failed to build HTTP client: {}", e))?;
let response = client
.get(url)
.send()
.await
.map_err(|e| anyhow!("Failed to download plugin: {}", e))?;
if !response.status().is_success() {
return Err(anyhow!(
"Plugin download failed with status: {}",
response.status()
));
}
// Check content-length header before downloading
const MAX_PLUGIN_SIZE: u64 = 100 * 1024 * 1024; // 100 MB
if let Some(content_length) = response.content_length()
&& content_length > MAX_PLUGIN_SIZE
{
return Err(anyhow!(
"Plugin archive too large: {} bytes (max {} bytes)",
content_length,
MAX_PLUGIN_SIZE
));
}
let bytes = response
.bytes()
.await
.map_err(|e| anyhow!("Failed to read plugin response: {}", e))?;
// Check actual size after download
if bytes.len() as u64 > MAX_PLUGIN_SIZE {
return Err(anyhow!(
"Plugin archive too large: {} bytes (max {} bytes)",
bytes.len(),
MAX_PLUGIN_SIZE
));
}
// Write archive to a unique temp file
let temp_archive = dest_dir.join(format!(".download-{}.tar.gz", uuid::Uuid::now_v7()));
std::fs::write(&temp_archive, &bytes)?;
// Extract using tar with -C to target directory
let canonical_dest = dest_dir
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize dest dir: {}", e))?;
let output = std::process::Command::new("tar")
.args([
"xzf",
&temp_archive.to_string_lossy(),
"-C",
&canonical_dest.to_string_lossy(),
])
.output()
.map_err(|e| anyhow!("Failed to extract plugin archive: {}", e))?;
// Clean up the archive
let _ = std::fs::remove_file(&temp_archive);
if !output.status.success() {
return Err(anyhow!(
"Failed to extract plugin archive: {}",
String::from_utf8_lossy(&output.stderr)
));
}
// Validate that all extracted files are within dest_dir
for entry in WalkDir::new(&canonical_dest).follow_links(false) {
let entry = entry?;
let entry_canonical = entry.path().canonicalize()?;
if !entry_canonical.starts_with(&canonical_dest) {
return Err(anyhow!(
"Extracted file escapes destination directory: {:?}",
entry.path()
));
}
}
// Find the extracted plugin directory by looking for plugin.toml
for entry in WalkDir::new(dest_dir).max_depth(2).follow_links(false) {
let entry = entry?;
if entry.file_name() == "plugin.toml" {
let plugin_dir = entry
.path()
.parent()
.ok_or_else(|| anyhow!("Invalid plugin.toml location"))?;
// Validate the manifest
let manifest = PluginManifest::from_file(entry.path())?;
info!("Downloaded and extracted plugin: {}", manifest.plugin.name);
return Ok(plugin_dir.to_path_buf());
}
}
Err(anyhow!(
"No plugin.toml found after extracting archive from: {}",
url
))
/// Download a plugin from a URL
pub async fn download_plugin(&self, url: &str) -> Result<PathBuf> {
// Only allow HTTPS downloads
if !url.starts_with("https://") {
return Err(anyhow!(
"Only HTTPS URLs are allowed for plugin downloads: {}",
url
));
}
/// Validate a plugin package
pub fn validate_plugin_package(&self, path: &Path) -> Result<()> {
// Check that the path exists
if !path.exists() {
return Err(anyhow!("Plugin path does not exist: {:?}", path));
}
let dest_dir = self
.plugin_dirs
.first()
.ok_or_else(|| anyhow!("No plugin directories configured"))?;
// Check for plugin.toml
let manifest_path = path.join("plugin.toml");
if !manifest_path.exists() {
return Err(anyhow!("Missing plugin.toml in {:?}", path));
}
std::fs::create_dir_all(dest_dir)?;
// Parse and validate manifest
let manifest = PluginManifest::from_file(&manifest_path)?;
// Download the archive with timeout and size limits
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(300))
.build()
.map_err(|e| anyhow!("Failed to build HTTP client: {}", e))?;
// Check that WASM binary exists
let wasm_path = path.join(&manifest.plugin.binary.wasm);
if !wasm_path.exists() {
return Err(anyhow!(
"WASM binary not found: {}",
manifest.plugin.binary.wasm
));
}
let response = client
.get(url)
.send()
.await
.map_err(|e| anyhow!("Failed to download plugin: {}", e))?;
// Verify the WASM path is within the plugin directory (prevent path traversal)
let canonical_wasm = wasm_path.canonicalize()?;
let canonical_path = path.canonicalize()?;
if !canonical_wasm.starts_with(&canonical_path) {
return Err(anyhow!(
"WASM binary path escapes plugin directory: {:?}",
wasm_path
));
}
// Validate WASM file
let wasm_bytes = std::fs::read(&wasm_path)?;
if wasm_bytes.len() < 4 || &wasm_bytes[0..4] != b"\0asm" {
return Err(anyhow!("Invalid WASM file: {:?}", wasm_path));
}
Ok(())
if !response.status().is_success() {
return Err(anyhow!(
"Plugin download failed with status: {}",
response.status()
));
}
/// Get plugin directory path for a given plugin name
pub fn get_plugin_dir(&self, plugin_name: &str) -> Option<PathBuf> {
for dir in &self.plugin_dirs {
let plugin_dir = dir.join(plugin_name);
if plugin_dir.exists() {
return Some(plugin_dir);
}
}
None
// Check content-length header before downloading
const MAX_PLUGIN_SIZE: u64 = 100 * 1024 * 1024; // 100 MB
if let Some(content_length) = response.content_length()
&& content_length > MAX_PLUGIN_SIZE
{
return Err(anyhow!(
"Plugin archive too large: {} bytes (max {} bytes)",
content_length,
MAX_PLUGIN_SIZE
));
}
let bytes = response
.bytes()
.await
.map_err(|e| anyhow!("Failed to read plugin response: {}", e))?;
// Check actual size after download
if bytes.len() as u64 > MAX_PLUGIN_SIZE {
return Err(anyhow!(
"Plugin archive too large: {} bytes (max {} bytes)",
bytes.len(),
MAX_PLUGIN_SIZE
));
}
// Write archive to a unique temp file
let temp_archive =
dest_dir.join(format!(".download-{}.tar.gz", uuid::Uuid::now_v7()));
std::fs::write(&temp_archive, &bytes)?;
// Extract using tar with -C to target directory
let canonical_dest = dest_dir
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize dest dir: {}", e))?;
let output = std::process::Command::new("tar")
.args([
"xzf",
&temp_archive.to_string_lossy(),
"-C",
&canonical_dest.to_string_lossy(),
])
.output()
.map_err(|e| anyhow!("Failed to extract plugin archive: {}", e))?;
// Clean up the archive
let _ = std::fs::remove_file(&temp_archive);
if !output.status.success() {
return Err(anyhow!(
"Failed to extract plugin archive: {}",
String::from_utf8_lossy(&output.stderr)
));
}
// Validate that all extracted files are within dest_dir
for entry in WalkDir::new(&canonical_dest).follow_links(false) {
let entry = entry?;
let entry_canonical = entry.path().canonicalize()?;
if !entry_canonical.starts_with(&canonical_dest) {
return Err(anyhow!(
"Extracted file escapes destination directory: {:?}",
entry.path()
));
}
}
// Find the extracted plugin directory by looking for plugin.toml
for entry in WalkDir::new(dest_dir).max_depth(2).follow_links(false) {
let entry = entry?;
if entry.file_name() == "plugin.toml" {
let plugin_dir = entry
.path()
.parent()
.ok_or_else(|| anyhow!("Invalid plugin.toml location"))?;
// Validate the manifest
let manifest = PluginManifest::from_file(entry.path())?;
info!("Downloaded and extracted plugin: {}", manifest.plugin.name);
return Ok(plugin_dir.to_path_buf());
}
}
Err(anyhow!(
"No plugin.toml found after extracting archive from: {}",
url
))
}
/// Validate a plugin package
pub fn validate_plugin_package(&self, path: &Path) -> Result<()> {
// Check that the path exists
if !path.exists() {
return Err(anyhow!("Plugin path does not exist: {:?}", path));
}
// Check for plugin.toml
let manifest_path = path.join("plugin.toml");
if !manifest_path.exists() {
return Err(anyhow!("Missing plugin.toml in {:?}", path));
}
// Parse and validate manifest
let manifest = PluginManifest::from_file(&manifest_path)?;
// Check that WASM binary exists
let wasm_path = path.join(&manifest.plugin.binary.wasm);
if !wasm_path.exists() {
return Err(anyhow!(
"WASM binary not found: {}",
manifest.plugin.binary.wasm
));
}
// Verify the WASM path is within the plugin directory (prevent path
// traversal)
let canonical_wasm = wasm_path.canonicalize()?;
let canonical_path = path.canonicalize()?;
if !canonical_wasm.starts_with(&canonical_path) {
return Err(anyhow!(
"WASM binary path escapes plugin directory: {:?}",
wasm_path
));
}
// Validate WASM file
let wasm_bytes = std::fs::read(&wasm_path)?;
if wasm_bytes.len() < 4 || &wasm_bytes[0..4] != b"\0asm" {
return Err(anyhow!("Invalid WASM file: {:?}", wasm_path));
}
Ok(())
}
/// Get plugin directory path for a given plugin name
pub fn get_plugin_dir(&self, plugin_name: &str) -> Option<PathBuf> {
for dir in &self.plugin_dirs {
let plugin_dir = dir.join(plugin_name);
if plugin_dir.exists() {
return Some(plugin_dir);
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
use tempfile::TempDir;
#[tokio::test]
async fn test_discover_plugins_empty() {
let temp_dir = TempDir::new().unwrap();
let loader = PluginLoader::new(vec![temp_dir.path().to_path_buf()]);
use super::*;
let manifests = loader.discover_plugins().await.unwrap();
assert_eq!(manifests.len(), 0);
}
#[tokio::test]
async fn test_discover_plugins_empty() {
let temp_dir = TempDir::new().unwrap();
let loader = PluginLoader::new(vec![temp_dir.path().to_path_buf()]);
#[tokio::test]
async fn test_discover_plugins_with_manifest() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
let manifests = loader.discover_plugins().await.unwrap();
assert_eq!(manifests.len(), 0);
}
// Create a valid manifest
let manifest_content = r#"
#[tokio::test]
async fn test_discover_plugins_with_manifest() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
// Create a valid manifest
let manifest_content = r#"
[plugin]
name = "test-plugin"
version = "1.0.0"
@ -338,26 +349,27 @@ kind = ["media_type"]
[plugin.binary]
wasm = "plugin.wasm"
"#;
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
// Create dummy WASM file
std::fs::write(plugin_dir.join("plugin.wasm"), b"\0asm\x01\x00\x00\x00").unwrap();
// Create dummy WASM file
std::fs::write(plugin_dir.join("plugin.wasm"), b"\0asm\x01\x00\x00\x00")
.unwrap();
let loader = PluginLoader::new(vec![temp_dir.path().to_path_buf()]);
let manifests = loader.discover_plugins().await.unwrap();
let loader = PluginLoader::new(vec![temp_dir.path().to_path_buf()]);
let manifests = loader.discover_plugins().await.unwrap();
assert_eq!(manifests.len(), 1);
assert_eq!(manifests[0].plugin.name, "test-plugin");
}
assert_eq!(manifests.len(), 1);
assert_eq!(manifests[0].plugin.name, "test-plugin");
}
#[test]
fn test_validate_plugin_package() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
#[test]
fn test_validate_plugin_package() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
// Create a valid manifest
let manifest_content = r#"
// Create a valid manifest
let manifest_content = r#"
[plugin]
name = "test-plugin"
version = "1.0.0"
@ -367,27 +379,28 @@ kind = ["media_type"]
[plugin.binary]
wasm = "plugin.wasm"
"#;
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
let loader = PluginLoader::new(vec![]);
let loader = PluginLoader::new(vec![]);
// Should fail without WASM file
assert!(loader.validate_plugin_package(&plugin_dir).is_err());
// Should fail without WASM file
assert!(loader.validate_plugin_package(&plugin_dir).is_err());
// Create valid WASM file (magic number only)
std::fs::write(plugin_dir.join("plugin.wasm"), b"\0asm\x01\x00\x00\x00").unwrap();
// Create valid WASM file (magic number only)
std::fs::write(plugin_dir.join("plugin.wasm"), b"\0asm\x01\x00\x00\x00")
.unwrap();
// Should succeed now
assert!(loader.validate_plugin_package(&plugin_dir).is_ok());
}
// Should succeed now
assert!(loader.validate_plugin_package(&plugin_dir).is_ok());
}
#[test]
fn test_validate_invalid_wasm() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
#[test]
fn test_validate_invalid_wasm() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
let manifest_content = r#"
let manifest_content = r#"
[plugin]
name = "test-plugin"
version = "1.0.0"
@ -397,12 +410,12 @@ kind = ["media_type"]
[plugin.binary]
wasm = "plugin.wasm"
"#;
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
// Create invalid WASM file
std::fs::write(plugin_dir.join("plugin.wasm"), b"not wasm").unwrap();
// Create invalid WASM file
std::fs::write(plugin_dir.join("plugin.wasm"), b"not wasm").unwrap();
let loader = PluginLoader::new(vec![]);
assert!(loader.validate_plugin_package(&plugin_dir).is_err());
}
let loader = PluginLoader::new(vec![]);
assert!(loader.validate_plugin_package(&plugin_dir).is_err());
}
}

View file

@ -1,7 +1,8 @@
//! Plugin system for Pinakes
//!
//! This module provides a comprehensive plugin architecture that allows extending
//! Pinakes with custom media types, metadata extractors, search backends, and more.
//! This module provides a comprehensive plugin architecture that allows
//! extending Pinakes with custom media types, metadata extractors, search
//! backends, and more.
//!
//! # Architecture
//!
@ -10,10 +11,10 @@
//! - Hot-reload support for development
//! - Automatic plugin discovery from configured directories
use std::{path::PathBuf, sync::Arc};
use anyhow::Result;
use pinakes_plugin_api::{PluginContext, PluginMetadata};
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{debug, error, info, warn};
@ -29,391 +30,419 @@ pub use security::CapabilityEnforcer;
/// Plugin manager coordinates plugin lifecycle and operations
pub struct PluginManager {
/// Plugin registry
registry: Arc<RwLock<PluginRegistry>>,
/// Plugin registry
registry: Arc<RwLock<PluginRegistry>>,
/// WASM runtime for executing plugins
runtime: Arc<WasmRuntime>,
/// WASM runtime for executing plugins
runtime: Arc<WasmRuntime>,
/// Plugin loader for discovery and loading
loader: PluginLoader,
/// Plugin loader for discovery and loading
loader: PluginLoader,
/// Capability enforcer for security
enforcer: CapabilityEnforcer,
/// Capability enforcer for security
enforcer: CapabilityEnforcer,
/// Plugin data directory
data_dir: PathBuf,
/// Plugin data directory
data_dir: PathBuf,
/// Plugin cache directory
cache_dir: PathBuf,
/// Plugin cache directory
cache_dir: PathBuf,
/// Configuration
config: PluginManagerConfig,
/// Configuration
config: PluginManagerConfig,
}
/// Configuration for the plugin manager
#[derive(Debug, Clone)]
pub struct PluginManagerConfig {
/// Directories to search for plugins
pub plugin_dirs: Vec<PathBuf>,
/// Directories to search for plugins
pub plugin_dirs: Vec<PathBuf>,
/// Whether to enable hot-reload (for development)
pub enable_hot_reload: bool,
/// Whether to enable hot-reload (for development)
pub enable_hot_reload: bool,
/// Whether to allow unsigned plugins
pub allow_unsigned: bool,
/// Whether to allow unsigned plugins
pub allow_unsigned: bool,
/// Maximum number of concurrent plugin operations
pub max_concurrent_ops: usize,
/// Maximum number of concurrent plugin operations
pub max_concurrent_ops: usize,
/// Plugin timeout in seconds
pub plugin_timeout_secs: u64,
/// Plugin timeout in seconds
pub plugin_timeout_secs: u64,
}
impl Default for PluginManagerConfig {
fn default() -> Self {
Self {
plugin_dirs: vec![],
enable_hot_reload: false,
allow_unsigned: false,
max_concurrent_ops: 4,
plugin_timeout_secs: 30,
}
fn default() -> Self {
Self {
plugin_dirs: vec![],
enable_hot_reload: false,
allow_unsigned: false,
max_concurrent_ops: 4,
plugin_timeout_secs: 30,
}
}
}
impl From<crate::config::PluginsConfig> for PluginManagerConfig {
fn from(cfg: crate::config::PluginsConfig) -> Self {
Self {
plugin_dirs: cfg.plugin_dirs,
enable_hot_reload: cfg.enable_hot_reload,
allow_unsigned: cfg.allow_unsigned,
max_concurrent_ops: cfg.max_concurrent_ops,
plugin_timeout_secs: cfg.plugin_timeout_secs,
}
fn from(cfg: crate::config::PluginsConfig) -> Self {
Self {
plugin_dirs: cfg.plugin_dirs,
enable_hot_reload: cfg.enable_hot_reload,
allow_unsigned: cfg.allow_unsigned,
max_concurrent_ops: cfg.max_concurrent_ops,
plugin_timeout_secs: cfg.plugin_timeout_secs,
}
}
}
impl PluginManager {
/// Create a new plugin manager
pub fn new(data_dir: PathBuf, cache_dir: PathBuf, config: PluginManagerConfig) -> Result<Self> {
// Ensure directories exist
std::fs::create_dir_all(&data_dir)?;
std::fs::create_dir_all(&cache_dir)?;
/// Create a new plugin manager
pub fn new(
data_dir: PathBuf,
cache_dir: PathBuf,
config: PluginManagerConfig,
) -> Result<Self> {
// Ensure directories exist
std::fs::create_dir_all(&data_dir)?;
std::fs::create_dir_all(&cache_dir)?;
let runtime = Arc::new(WasmRuntime::new()?);
let registry = Arc::new(RwLock::new(PluginRegistry::new()));
let loader = PluginLoader::new(config.plugin_dirs.clone());
let enforcer = CapabilityEnforcer::new();
let runtime = Arc::new(WasmRuntime::new()?);
let registry = Arc::new(RwLock::new(PluginRegistry::new()));
let loader = PluginLoader::new(config.plugin_dirs.clone());
let enforcer = CapabilityEnforcer::new();
Ok(Self {
registry,
runtime,
loader,
enforcer,
data_dir,
cache_dir,
config,
Ok(Self {
registry,
runtime,
loader,
enforcer,
data_dir,
cache_dir,
config,
})
}
/// Discover and load all plugins from configured directories
pub async fn discover_and_load_all(&self) -> Result<Vec<String>> {
info!("Discovering plugins from {:?}", self.config.plugin_dirs);
let manifests = self.loader.discover_plugins().await?;
let mut loaded_plugins = Vec::new();
for manifest in manifests {
match self.load_plugin_from_manifest(&manifest).await {
Ok(plugin_id) => {
info!("Loaded plugin: {}", plugin_id);
loaded_plugins.push(plugin_id);
},
Err(e) => {
warn!("Failed to load plugin {}: {}", manifest.plugin.name, e);
},
}
}
Ok(loaded_plugins)
}
/// Load a plugin from a manifest file
async fn load_plugin_from_manifest(
&self,
manifest: &pinakes_plugin_api::PluginManifest,
) -> Result<String> {
let plugin_id = manifest.plugin_id();
// Validate plugin_id to prevent path traversal
if plugin_id.contains('/')
|| plugin_id.contains('\\')
|| plugin_id.contains("..")
{
return Err(anyhow::anyhow!("Invalid plugin ID: {}", plugin_id));
}
// Check if already loaded
{
let registry = self.registry.read().await;
if registry.is_loaded(&plugin_id) {
return Ok(plugin_id);
}
}
// Validate capabilities
let capabilities = manifest.to_capabilities();
self.enforcer.validate_capabilities(&capabilities)?;
// Create plugin context
let plugin_data_dir = self.data_dir.join(&plugin_id);
let plugin_cache_dir = self.cache_dir.join(&plugin_id);
tokio::fs::create_dir_all(&plugin_data_dir).await?;
tokio::fs::create_dir_all(&plugin_cache_dir).await?;
let context = PluginContext {
data_dir: plugin_data_dir,
cache_dir: plugin_cache_dir,
config: manifest
.config
.iter()
.map(|(k, v)| {
(
k.clone(),
serde_json::to_value(v).unwrap_or_else(|e| {
tracing::warn!(
"failed to serialize config value for key {}: {}",
k,
e
);
serde_json::Value::Null
}),
)
})
.collect(),
capabilities: capabilities.clone(),
};
// Load WASM binary
let wasm_path = self.loader.resolve_wasm_path(manifest)?;
let wasm_plugin = self.runtime.load_plugin(&wasm_path, context).await?;
// Initialize plugin
let init_succeeded = match wasm_plugin
.call_function("initialize", &[])
.await
{
Ok(_) => true,
Err(e) => {
tracing::warn!(plugin_id = %plugin_id, "plugin initialization failed: {}", e);
false
},
};
// Register plugin
let metadata = PluginMetadata {
id: plugin_id.clone(),
name: manifest.plugin.name.clone(),
version: manifest.plugin.version.clone(),
author: manifest.plugin.author.clone().unwrap_or_default(),
description: manifest
.plugin
.description
.clone()
.unwrap_or_default(),
api_version: manifest.plugin.api_version.clone(),
capabilities_required: capabilities,
};
// Derive manifest_path from the loader's plugin directories
let manifest_path = self
.loader
.get_plugin_dir(&manifest.plugin.name)
.map(|dir| dir.join("plugin.toml"));
let registered = RegisteredPlugin {
id: plugin_id.clone(),
metadata,
wasm_plugin,
manifest: manifest.clone(),
manifest_path,
enabled: init_succeeded,
};
let mut registry = self.registry.write().await;
registry.register(registered)?;
Ok(plugin_id)
}
/// Install a plugin from a file or URL
pub async fn install_plugin(&self, source: &str) -> Result<String> {
info!("Installing plugin from: {}", source);
// Download/copy plugin to plugins directory
let plugin_path =
if source.starts_with("http://") || source.starts_with("https://") {
// Download from URL
self.loader.download_plugin(source).await?
} else {
// Copy from local file
PathBuf::from(source)
};
// Load the manifest
let manifest_path = plugin_path.join("plugin.toml");
let manifest =
pinakes_plugin_api::PluginManifest::from_file(&manifest_path)?;
// Load the plugin
self.load_plugin_from_manifest(&manifest).await
}
/// Uninstall a plugin
pub async fn uninstall_plugin(&self, plugin_id: &str) -> Result<()> {
// Validate plugin_id to prevent path traversal
if plugin_id.contains('/')
|| plugin_id.contains('\\')
|| plugin_id.contains("..")
{
return Err(anyhow::anyhow!("Invalid plugin ID: {}", plugin_id));
}
/// Discover and load all plugins from configured directories
pub async fn discover_and_load_all(&self) -> Result<Vec<String>> {
info!("Discovering plugins from {:?}", self.config.plugin_dirs);
info!("Uninstalling plugin: {}", plugin_id);
let manifests = self.loader.discover_plugins().await?;
let mut loaded_plugins = Vec::new();
// Shutdown plugin first
self.shutdown_plugin(plugin_id).await?;
for manifest in manifests {
match self.load_plugin_from_manifest(&manifest).await {
Ok(plugin_id) => {
info!("Loaded plugin: {}", plugin_id);
loaded_plugins.push(plugin_id);
}
Err(e) => {
warn!("Failed to load plugin {}: {}", manifest.plugin.name, e);
}
}
}
// Remove from registry
let mut registry = self.registry.write().await;
registry.unregister(plugin_id)?;
Ok(loaded_plugins)
// Remove plugin data and cache
let plugin_data_dir = self.data_dir.join(plugin_id);
let plugin_cache_dir = self.cache_dir.join(plugin_id);
if plugin_data_dir.exists() {
std::fs::remove_dir_all(&plugin_data_dir)?;
}
if plugin_cache_dir.exists() {
std::fs::remove_dir_all(&plugin_cache_dir)?;
}
/// Load a plugin from a manifest file
async fn load_plugin_from_manifest(
&self,
manifest: &pinakes_plugin_api::PluginManifest,
) -> Result<String> {
let plugin_id = manifest.plugin_id();
Ok(())
}
// Validate plugin_id to prevent path traversal
if plugin_id.contains('/') || plugin_id.contains('\\') || plugin_id.contains("..") {
return Err(anyhow::anyhow!("Invalid plugin ID: {}", plugin_id));
}
/// Enable a plugin
pub async fn enable_plugin(&self, plugin_id: &str) -> Result<()> {
let mut registry = self.registry.write().await;
registry.enable(plugin_id)
}
// Check if already loaded
{
let registry = self.registry.read().await;
if registry.is_loaded(&plugin_id) {
return Ok(plugin_id);
}
}
/// Disable a plugin
pub async fn disable_plugin(&self, plugin_id: &str) -> Result<()> {
let mut registry = self.registry.write().await;
registry.disable(plugin_id)
}
// Validate capabilities
let capabilities = manifest.to_capabilities();
self.enforcer.validate_capabilities(&capabilities)?;
/// Shutdown a specific plugin
pub async fn shutdown_plugin(&self, plugin_id: &str) -> Result<()> {
debug!("Shutting down plugin: {}", plugin_id);
// Create plugin context
let plugin_data_dir = self.data_dir.join(&plugin_id);
let plugin_cache_dir = self.cache_dir.join(&plugin_id);
tokio::fs::create_dir_all(&plugin_data_dir).await?;
tokio::fs::create_dir_all(&plugin_cache_dir).await?;
let registry = self.registry.read().await;
if let Some(plugin) = registry.get(plugin_id) {
plugin.wasm_plugin.call_function("shutdown", &[]).await.ok();
Ok(())
} else {
Err(anyhow::anyhow!("Plugin not found: {}", plugin_id))
}
}
let context = PluginContext {
data_dir: plugin_data_dir,
cache_dir: plugin_cache_dir,
config: manifest
.config
.iter()
.map(|(k, v)| {
(
k.clone(),
serde_json::to_value(v).unwrap_or_else(|e| {
tracing::warn!("failed to serialize config value for key {}: {}", k, e);
serde_json::Value::Null
}),
)
})
.collect(),
capabilities: capabilities.clone(),
};
/// Shutdown all plugins
pub async fn shutdown_all(&self) -> Result<()> {
info!("Shutting down all plugins");
// Load WASM binary
let wasm_path = self.loader.resolve_wasm_path(manifest)?;
let wasm_plugin = self.runtime.load_plugin(&wasm_path, context).await?;
let registry = self.registry.read().await;
let plugin_ids: Vec<String> =
registry.list_all().iter().map(|p| p.id.clone()).collect();
// Initialize plugin
let init_succeeded = match wasm_plugin.call_function("initialize", &[]).await {
Ok(_) => true,
Err(e) => {
tracing::warn!(plugin_id = %plugin_id, "plugin initialization failed: {}", e);
false
}
};
// Register plugin
let metadata = PluginMetadata {
id: plugin_id.clone(),
name: manifest.plugin.name.clone(),
version: manifest.plugin.version.clone(),
author: manifest.plugin.author.clone().unwrap_or_default(),
description: manifest.plugin.description.clone().unwrap_or_default(),
api_version: manifest.plugin.api_version.clone(),
capabilities_required: capabilities,
};
// Derive manifest_path from the loader's plugin directories
let manifest_path = self
.loader
.get_plugin_dir(&manifest.plugin.name)
.map(|dir| dir.join("plugin.toml"));
let registered = RegisteredPlugin {
id: plugin_id.clone(),
metadata,
wasm_plugin,
manifest: manifest.clone(),
manifest_path,
enabled: init_succeeded,
};
let mut registry = self.registry.write().await;
registry.register(registered)?;
Ok(plugin_id)
for plugin_id in plugin_ids {
if let Err(e) = self.shutdown_plugin(&plugin_id).await {
error!("Failed to shutdown plugin {}: {}", plugin_id, e);
}
}
/// Install a plugin from a file or URL
pub async fn install_plugin(&self, source: &str) -> Result<String> {
info!("Installing plugin from: {}", source);
Ok(())
}
// Download/copy plugin to plugins directory
let plugin_path = if source.starts_with("http://") || source.starts_with("https://") {
// Download from URL
self.loader.download_plugin(source).await?
} else {
// Copy from local file
PathBuf::from(source)
};
/// Get list of all registered plugins
pub async fn list_plugins(&self) -> Vec<PluginMetadata> {
let registry = self.registry.read().await;
registry
.list_all()
.iter()
.map(|p| p.metadata.clone())
.collect()
}
// Load the manifest
let manifest_path = plugin_path.join("plugin.toml");
let manifest = pinakes_plugin_api::PluginManifest::from_file(&manifest_path)?;
/// Get plugin metadata by ID
pub async fn get_plugin(&self, plugin_id: &str) -> Option<PluginMetadata> {
let registry = self.registry.read().await;
registry.get(plugin_id).map(|p| p.metadata.clone())
}
// Load the plugin
self.load_plugin_from_manifest(&manifest).await
/// Check if a plugin is loaded and enabled
pub async fn is_plugin_enabled(&self, plugin_id: &str) -> bool {
let registry = self.registry.read().await;
registry.is_enabled(plugin_id).unwrap_or(false)
}
/// Reload a plugin (for hot-reload during development)
pub async fn reload_plugin(&self, plugin_id: &str) -> Result<()> {
if !self.config.enable_hot_reload {
return Err(anyhow::anyhow!("Hot-reload is disabled"));
}
/// Uninstall a plugin
pub async fn uninstall_plugin(&self, plugin_id: &str) -> Result<()> {
// Validate plugin_id to prevent path traversal
if plugin_id.contains('/') || plugin_id.contains('\\') || plugin_id.contains("..") {
return Err(anyhow::anyhow!("Invalid plugin ID: {}", plugin_id));
}
info!("Reloading plugin: {}", plugin_id);
info!("Uninstalling plugin: {}", plugin_id);
// Re-read the manifest from disk if possible, falling back to cached
// version
let manifest = {
let registry = self.registry.read().await;
let plugin = registry
.get(plugin_id)
.ok_or_else(|| anyhow::anyhow!("Plugin not found"))?;
if let Some(ref manifest_path) = plugin.manifest_path {
pinakes_plugin_api::PluginManifest::from_file(manifest_path)
.unwrap_or_else(|e| {
warn!("Failed to re-read manifest from disk, using cached: {}", e);
plugin.manifest.clone()
})
} else {
plugin.manifest.clone()
}
};
// Shutdown plugin first
self.shutdown_plugin(plugin_id).await?;
// Remove from registry
let mut registry = self.registry.write().await;
registry.unregister(plugin_id)?;
// Remove plugin data and cache
let plugin_data_dir = self.data_dir.join(plugin_id);
let plugin_cache_dir = self.cache_dir.join(plugin_id);
if plugin_data_dir.exists() {
std::fs::remove_dir_all(&plugin_data_dir)?;
}
if plugin_cache_dir.exists() {
std::fs::remove_dir_all(&plugin_cache_dir)?;
}
Ok(())
// Shutdown and unload current version
self.shutdown_plugin(plugin_id).await?;
{
let mut registry = self.registry.write().await;
registry.unregister(plugin_id)?;
}
/// Enable a plugin
pub async fn enable_plugin(&self, plugin_id: &str) -> Result<()> {
let mut registry = self.registry.write().await;
registry.enable(plugin_id)
}
// Reload from manifest
self.load_plugin_from_manifest(&manifest).await?;
/// Disable a plugin
pub async fn disable_plugin(&self, plugin_id: &str) -> Result<()> {
let mut registry = self.registry.write().await;
registry.disable(plugin_id)
}
/// Shutdown a specific plugin
pub async fn shutdown_plugin(&self, plugin_id: &str) -> Result<()> {
debug!("Shutting down plugin: {}", plugin_id);
let registry = self.registry.read().await;
if let Some(plugin) = registry.get(plugin_id) {
plugin.wasm_plugin.call_function("shutdown", &[]).await.ok();
Ok(())
} else {
Err(anyhow::anyhow!("Plugin not found: {}", plugin_id))
}
}
/// Shutdown all plugins
pub async fn shutdown_all(&self) -> Result<()> {
info!("Shutting down all plugins");
let registry = self.registry.read().await;
let plugin_ids: Vec<String> = registry.list_all().iter().map(|p| p.id.clone()).collect();
for plugin_id in plugin_ids {
if let Err(e) = self.shutdown_plugin(&plugin_id).await {
error!("Failed to shutdown plugin {}: {}", plugin_id, e);
}
}
Ok(())
}
/// Get list of all registered plugins
pub async fn list_plugins(&self) -> Vec<PluginMetadata> {
let registry = self.registry.read().await;
registry
.list_all()
.iter()
.map(|p| p.metadata.clone())
.collect()
}
/// Get plugin metadata by ID
pub async fn get_plugin(&self, plugin_id: &str) -> Option<PluginMetadata> {
let registry = self.registry.read().await;
registry.get(plugin_id).map(|p| p.metadata.clone())
}
/// Check if a plugin is loaded and enabled
pub async fn is_plugin_enabled(&self, plugin_id: &str) -> bool {
let registry = self.registry.read().await;
registry.is_enabled(plugin_id).unwrap_or(false)
}
/// Reload a plugin (for hot-reload during development)
pub async fn reload_plugin(&self, plugin_id: &str) -> Result<()> {
if !self.config.enable_hot_reload {
return Err(anyhow::anyhow!("Hot-reload is disabled"));
}
info!("Reloading plugin: {}", plugin_id);
// Re-read the manifest from disk if possible, falling back to cached version
let manifest = {
let registry = self.registry.read().await;
let plugin = registry
.get(plugin_id)
.ok_or_else(|| anyhow::anyhow!("Plugin not found"))?;
if let Some(ref manifest_path) = plugin.manifest_path {
pinakes_plugin_api::PluginManifest::from_file(manifest_path).unwrap_or_else(|e| {
warn!("Failed to re-read manifest from disk, using cached: {}", e);
plugin.manifest.clone()
})
} else {
plugin.manifest.clone()
}
};
// Shutdown and unload current version
self.shutdown_plugin(plugin_id).await?;
{
let mut registry = self.registry.write().await;
registry.unregister(plugin_id)?;
}
// Reload from manifest
self.load_plugin_from_manifest(&manifest).await?;
Ok(())
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
use tempfile::TempDir;
#[tokio::test]
async fn test_plugin_manager_creation() {
let temp_dir = TempDir::new().unwrap();
let data_dir = temp_dir.path().join("data");
let cache_dir = temp_dir.path().join("cache");
use super::*;
let config = PluginManagerConfig::default();
let manager = PluginManager::new(data_dir.clone(), cache_dir.clone(), config);
#[tokio::test]
async fn test_plugin_manager_creation() {
let temp_dir = TempDir::new().unwrap();
let data_dir = temp_dir.path().join("data");
let cache_dir = temp_dir.path().join("cache");
assert!(manager.is_ok());
assert!(data_dir.exists());
assert!(cache_dir.exists());
}
let config = PluginManagerConfig::default();
let manager =
PluginManager::new(data_dir.clone(), cache_dir.clone(), config);
#[tokio::test]
async fn test_list_plugins_empty() {
let temp_dir = TempDir::new().unwrap();
let data_dir = temp_dir.path().join("data");
let cache_dir = temp_dir.path().join("cache");
assert!(manager.is_ok());
assert!(data_dir.exists());
assert!(cache_dir.exists());
}
let config = PluginManagerConfig::default();
let manager = PluginManager::new(data_dir, cache_dir, config).unwrap();
#[tokio::test]
async fn test_list_plugins_empty() {
let temp_dir = TempDir::new().unwrap();
let data_dir = temp_dir.path().join("data");
let cache_dir = temp_dir.path().join("cache");
let plugins = manager.list_plugins().await;
assert_eq!(plugins.len(), 0);
}
let config = PluginManagerConfig::default();
let manager = PluginManager::new(data_dir, cache_dir, config).unwrap();
let plugins = manager.list_plugins().await;
assert_eq!(plugins.len(), 0);
}
}

View file

@ -1,280 +1,282 @@
//! Plugin registry for managing loaded plugins
use std::path::PathBuf;
use std::{collections::HashMap, path::PathBuf};
use anyhow::{Result, anyhow};
use pinakes_plugin_api::{PluginManifest, PluginMetadata};
use std::collections::HashMap;
use super::runtime::WasmPlugin;
/// A registered plugin with its metadata and runtime state
#[derive(Clone)]
pub struct RegisteredPlugin {
pub id: String,
pub metadata: PluginMetadata,
pub wasm_plugin: WasmPlugin,
pub manifest: PluginManifest,
pub manifest_path: Option<PathBuf>,
pub enabled: bool,
pub id: String,
pub metadata: PluginMetadata,
pub wasm_plugin: WasmPlugin,
pub manifest: PluginManifest,
pub manifest_path: Option<PathBuf>,
pub enabled: bool,
}
/// Plugin registry maintains the state of all loaded plugins
pub struct PluginRegistry {
/// Map of plugin ID to registered plugin
plugins: HashMap<String, RegisteredPlugin>,
/// Map of plugin ID to registered plugin
plugins: HashMap<String, RegisteredPlugin>,
}
impl PluginRegistry {
/// Create a new empty registry
pub fn new() -> Self {
Self {
plugins: HashMap::new(),
}
/// Create a new empty registry
pub fn new() -> Self {
Self {
plugins: HashMap::new(),
}
}
/// Register a new plugin
pub fn register(&mut self, plugin: RegisteredPlugin) -> Result<()> {
if self.plugins.contains_key(&plugin.id) {
return Err(anyhow!("Plugin already registered: {}", plugin.id));
}
/// Register a new plugin
pub fn register(&mut self, plugin: RegisteredPlugin) -> Result<()> {
if self.plugins.contains_key(&plugin.id) {
return Err(anyhow!("Plugin already registered: {}", plugin.id));
}
self.plugins.insert(plugin.id.clone(), plugin);
Ok(())
}
self.plugins.insert(plugin.id.clone(), plugin);
Ok(())
}
/// Unregister a plugin by ID
pub fn unregister(&mut self, plugin_id: &str) -> Result<()> {
self
.plugins
.remove(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {}", plugin_id))?;
Ok(())
}
/// Unregister a plugin by ID
pub fn unregister(&mut self, plugin_id: &str) -> Result<()> {
self.plugins
.remove(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {}", plugin_id))?;
Ok(())
}
/// Get a plugin by ID
pub fn get(&self, plugin_id: &str) -> Option<&RegisteredPlugin> {
self.plugins.get(plugin_id)
}
/// Get a plugin by ID
pub fn get(&self, plugin_id: &str) -> Option<&RegisteredPlugin> {
self.plugins.get(plugin_id)
}
/// Get a mutable reference to a plugin by ID
pub fn get_mut(&mut self, plugin_id: &str) -> Option<&mut RegisteredPlugin> {
self.plugins.get_mut(plugin_id)
}
/// Get a mutable reference to a plugin by ID
pub fn get_mut(&mut self, plugin_id: &str) -> Option<&mut RegisteredPlugin> {
self.plugins.get_mut(plugin_id)
}
/// Check if a plugin is loaded
pub fn is_loaded(&self, plugin_id: &str) -> bool {
self.plugins.contains_key(plugin_id)
}
/// Check if a plugin is loaded
pub fn is_loaded(&self, plugin_id: &str) -> bool {
self.plugins.contains_key(plugin_id)
}
/// Check if a plugin is enabled. Returns `None` if the plugin is not found.
pub fn is_enabled(&self, plugin_id: &str) -> Option<bool> {
self.plugins.get(plugin_id).map(|p| p.enabled)
}
/// Check if a plugin is enabled. Returns `None` if the plugin is not found.
pub fn is_enabled(&self, plugin_id: &str) -> Option<bool> {
self.plugins.get(plugin_id).map(|p| p.enabled)
}
/// Enable a plugin
pub fn enable(&mut self, plugin_id: &str) -> Result<()> {
let plugin = self
.plugins
.get_mut(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {}", plugin_id))?;
/// Enable a plugin
pub fn enable(&mut self, plugin_id: &str) -> Result<()> {
let plugin = self
.plugins
.get_mut(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {}", plugin_id))?;
plugin.enabled = true;
Ok(())
}
plugin.enabled = true;
Ok(())
}
/// Disable a plugin
pub fn disable(&mut self, plugin_id: &str) -> Result<()> {
let plugin = self
.plugins
.get_mut(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {}", plugin_id))?;
/// Disable a plugin
pub fn disable(&mut self, plugin_id: &str) -> Result<()> {
let plugin = self
.plugins
.get_mut(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {}", plugin_id))?;
plugin.enabled = false;
Ok(())
}
plugin.enabled = false;
Ok(())
}
/// List all registered plugins
pub fn list_all(&self) -> Vec<&RegisteredPlugin> {
self.plugins.values().collect()
}
/// List all registered plugins
pub fn list_all(&self) -> Vec<&RegisteredPlugin> {
self.plugins.values().collect()
}
/// List all enabled plugins
pub fn list_enabled(&self) -> Vec<&RegisteredPlugin> {
self.plugins.values().filter(|p| p.enabled).collect()
}
/// List all enabled plugins
pub fn list_enabled(&self) -> Vec<&RegisteredPlugin> {
self.plugins.values().filter(|p| p.enabled).collect()
}
/// Get plugins by kind (e.g., "media_type", "metadata_extractor")
pub fn get_by_kind(&self, kind: &str) -> Vec<&RegisteredPlugin> {
self
.plugins
.values()
.filter(|p| p.manifest.plugin.kind.contains(&kind.to_string()))
.collect()
}
/// Get plugins by kind (e.g., "media_type", "metadata_extractor")
pub fn get_by_kind(&self, kind: &str) -> Vec<&RegisteredPlugin> {
self.plugins
.values()
.filter(|p| p.manifest.plugin.kind.contains(&kind.to_string()))
.collect()
}
/// Get count of registered plugins
pub fn count(&self) -> usize {
self.plugins.len()
}
/// Get count of registered plugins
pub fn count(&self) -> usize {
self.plugins.len()
}
/// Get count of enabled plugins
pub fn count_enabled(&self) -> usize {
self.plugins.values().filter(|p| p.enabled).count()
}
/// Get count of enabled plugins
pub fn count_enabled(&self) -> usize {
self.plugins.values().filter(|p| p.enabled).count()
}
}
impl Default for PluginRegistry {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use pinakes_plugin_api::Capabilities;
use std::collections::HashMap;
use std::collections::HashMap;
fn create_test_plugin(id: &str, kind: Vec<String>) -> RegisteredPlugin {
let manifest = PluginManifest {
plugin: pinakes_plugin_api::manifest::PluginInfo {
name: id.to_string(),
version: "1.0.0".to_string(),
api_version: "1.0".to_string(),
author: Some("Test".to_string()),
description: Some("Test plugin".to_string()),
homepage: None,
license: None,
kind,
binary: pinakes_plugin_api::manifest::PluginBinary {
wasm: "test.wasm".to_string(),
entrypoint: None,
},
dependencies: vec![],
},
capabilities: Default::default(),
config: HashMap::new(),
};
use pinakes_plugin_api::Capabilities;
RegisteredPlugin {
id: id.to_string(),
metadata: PluginMetadata {
id: id.to_string(),
name: id.to_string(),
version: "1.0.0".to_string(),
author: "Test".to_string(),
description: "Test plugin".to_string(),
api_version: "1.0".to_string(),
capabilities_required: Capabilities::default(),
},
wasm_plugin: WasmPlugin::default(),
manifest,
manifest_path: None,
enabled: true,
}
use super::*;
fn create_test_plugin(id: &str, kind: Vec<String>) -> RegisteredPlugin {
let manifest = PluginManifest {
plugin: pinakes_plugin_api::manifest::PluginInfo {
name: id.to_string(),
version: "1.0.0".to_string(),
api_version: "1.0".to_string(),
author: Some("Test".to_string()),
description: Some("Test plugin".to_string()),
homepage: None,
license: None,
kind,
binary: pinakes_plugin_api::manifest::PluginBinary {
wasm: "test.wasm".to_string(),
entrypoint: None,
},
dependencies: vec![],
},
capabilities: Default::default(),
config: HashMap::new(),
};
RegisteredPlugin {
id: id.to_string(),
metadata: PluginMetadata {
id: id.to_string(),
name: id.to_string(),
version: "1.0.0".to_string(),
author: "Test".to_string(),
description: "Test plugin".to_string(),
api_version: "1.0".to_string(),
capabilities_required: Capabilities::default(),
},
wasm_plugin: WasmPlugin::default(),
manifest,
manifest_path: None,
enabled: true,
}
}
#[test]
fn test_registry_register_and_get() {
let mut registry = PluginRegistry::new();
let plugin = create_test_plugin("test-plugin", vec!["media_type".to_string()]);
#[test]
fn test_registry_register_and_get() {
let mut registry = PluginRegistry::new();
let plugin =
create_test_plugin("test-plugin", vec!["media_type".to_string()]);
registry.register(plugin.clone()).unwrap();
registry.register(plugin.clone()).unwrap();
assert!(registry.is_loaded("test-plugin"));
assert!(registry.get("test-plugin").is_some());
}
assert!(registry.is_loaded("test-plugin"));
assert!(registry.get("test-plugin").is_some());
}
#[test]
fn test_registry_duplicate_register() {
let mut registry = PluginRegistry::new();
let plugin = create_test_plugin("test-plugin", vec!["media_type".to_string()]);
#[test]
fn test_registry_duplicate_register() {
let mut registry = PluginRegistry::new();
let plugin =
create_test_plugin("test-plugin", vec!["media_type".to_string()]);
registry.register(plugin.clone()).unwrap();
let result = registry.register(plugin);
registry.register(plugin.clone()).unwrap();
let result = registry.register(plugin);
assert!(result.is_err());
}
assert!(result.is_err());
}
#[test]
fn test_registry_unregister() {
let mut registry = PluginRegistry::new();
let plugin = create_test_plugin("test-plugin", vec!["media_type".to_string()]);
#[test]
fn test_registry_unregister() {
let mut registry = PluginRegistry::new();
let plugin =
create_test_plugin("test-plugin", vec!["media_type".to_string()]);
registry.register(plugin).unwrap();
registry.unregister("test-plugin").unwrap();
registry.register(plugin).unwrap();
registry.unregister("test-plugin").unwrap();
assert!(!registry.is_loaded("test-plugin"));
}
assert!(!registry.is_loaded("test-plugin"));
}
#[test]
fn test_registry_enable_disable() {
let mut registry = PluginRegistry::new();
let plugin = create_test_plugin("test-plugin", vec!["media_type".to_string()]);
#[test]
fn test_registry_enable_disable() {
let mut registry = PluginRegistry::new();
let plugin =
create_test_plugin("test-plugin", vec!["media_type".to_string()]);
registry.register(plugin).unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(true));
registry.register(plugin).unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(true));
registry.disable("test-plugin").unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(false));
registry.disable("test-plugin").unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(false));
registry.enable("test-plugin").unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(true));
registry.enable("test-plugin").unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(true));
assert_eq!(registry.is_enabled("nonexistent"), None);
}
assert_eq!(registry.is_enabled("nonexistent"), None);
}
#[test]
fn test_registry_get_by_kind() {
let mut registry = PluginRegistry::new();
#[test]
fn test_registry_get_by_kind() {
let mut registry = PluginRegistry::new();
registry
.register(create_test_plugin(
"plugin1",
vec!["media_type".to_string()],
))
.unwrap();
registry
.register(create_test_plugin(
"plugin2",
vec!["metadata_extractor".to_string()],
))
.unwrap();
registry
.register(create_test_plugin(
"plugin3",
vec!["media_type".to_string()],
))
.unwrap();
registry
.register(create_test_plugin("plugin1", vec![
"media_type".to_string(),
]))
.unwrap();
registry
.register(create_test_plugin("plugin2", vec![
"metadata_extractor".to_string(),
]))
.unwrap();
registry
.register(create_test_plugin("plugin3", vec![
"media_type".to_string(),
]))
.unwrap();
let media_type_plugins = registry.get_by_kind("media_type");
assert_eq!(media_type_plugins.len(), 2);
let media_type_plugins = registry.get_by_kind("media_type");
assert_eq!(media_type_plugins.len(), 2);
let extractor_plugins = registry.get_by_kind("metadata_extractor");
assert_eq!(extractor_plugins.len(), 1);
}
let extractor_plugins = registry.get_by_kind("metadata_extractor");
assert_eq!(extractor_plugins.len(), 1);
}
#[test]
fn test_registry_counts() {
let mut registry = PluginRegistry::new();
#[test]
fn test_registry_counts() {
let mut registry = PluginRegistry::new();
registry
.register(create_test_plugin(
"plugin1",
vec!["media_type".to_string()],
))
.unwrap();
registry
.register(create_test_plugin(
"plugin2",
vec!["media_type".to_string()],
))
.unwrap();
registry
.register(create_test_plugin("plugin1", vec![
"media_type".to_string(),
]))
.unwrap();
registry
.register(create_test_plugin("plugin2", vec![
"media_type".to_string(),
]))
.unwrap();
assert_eq!(registry.count(), 2);
assert_eq!(registry.count_enabled(), 2);
assert_eq!(registry.count(), 2);
assert_eq!(registry.count_enabled(), 2);
registry.disable("plugin1").unwrap();
assert_eq!(registry.count(), 2);
assert_eq!(registry.count_enabled(), 1);
}
registry.disable("plugin1").unwrap();
assert_eq!(registry.count(), 2);
assert_eq!(registry.count_enabled(), 1);
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,341 +1,357 @@
//! Capability-based security for plugins
use std::path::{Path, PathBuf};
use anyhow::{Result, anyhow};
use pinakes_plugin_api::Capabilities;
use std::path::{Path, PathBuf};
/// Capability enforcer validates and enforces plugin capabilities
pub struct CapabilityEnforcer {
/// Maximum allowed memory per plugin (bytes)
max_memory_limit: usize,
/// Maximum allowed memory per plugin (bytes)
max_memory_limit: usize,
/// Maximum allowed CPU time per plugin (milliseconds)
max_cpu_time_limit: u64,
/// Maximum allowed CPU time per plugin (milliseconds)
max_cpu_time_limit: u64,
/// Allowed filesystem read paths (system-wide)
allowed_read_paths: Vec<PathBuf>,
/// Allowed filesystem read paths (system-wide)
allowed_read_paths: Vec<PathBuf>,
/// Allowed filesystem write paths (system-wide)
allowed_write_paths: Vec<PathBuf>,
/// Allowed filesystem write paths (system-wide)
allowed_write_paths: Vec<PathBuf>,
/// Whether to allow network access by default
allow_network_default: bool,
/// Whether to allow network access by default
allow_network_default: bool,
}
impl CapabilityEnforcer {
/// Create a new capability enforcer with default limits
pub fn new() -> Self {
Self {
max_memory_limit: 512 * 1024 * 1024, // 512 MB
max_cpu_time_limit: 60 * 1000, // 60 seconds
allowed_read_paths: vec![],
allowed_write_paths: vec![],
allow_network_default: false,
}
/// Create a new capability enforcer with default limits
pub fn new() -> Self {
Self {
max_memory_limit: 512 * 1024 * 1024, // 512 MB
max_cpu_time_limit: 60 * 1000, // 60 seconds
allowed_read_paths: vec![],
allowed_write_paths: vec![],
allow_network_default: false,
}
}
/// Set maximum memory limit
pub fn with_max_memory(mut self, bytes: usize) -> Self {
self.max_memory_limit = bytes;
self
}
/// Set maximum CPU time limit
pub fn with_max_cpu_time(mut self, milliseconds: u64) -> Self {
self.max_cpu_time_limit = milliseconds;
self
}
/// Add allowed read path
pub fn allow_read_path(mut self, path: PathBuf) -> Self {
self.allowed_read_paths.push(path);
self
}
/// Add allowed write path
pub fn allow_write_path(mut self, path: PathBuf) -> Self {
self.allowed_write_paths.push(path);
self
}
/// Set default network access policy
pub fn with_network_default(mut self, allow: bool) -> Self {
self.allow_network_default = allow;
self
}
/// Validate capabilities requested by a plugin
pub fn validate_capabilities(
&self,
capabilities: &Capabilities,
) -> Result<()> {
// Validate memory limit
if let Some(memory) = capabilities.max_memory_bytes
&& memory > self.max_memory_limit
{
return Err(anyhow!(
"Requested memory ({} bytes) exceeds limit ({} bytes)",
memory,
self.max_memory_limit
));
}
/// Set maximum memory limit
pub fn with_max_memory(mut self, bytes: usize) -> Self {
self.max_memory_limit = bytes;
self
// Validate CPU time limit
if let Some(cpu_time) = capabilities.max_cpu_time_ms
&& cpu_time > self.max_cpu_time_limit
{
return Err(anyhow!(
"Requested CPU time ({} ms) exceeds limit ({} ms)",
cpu_time,
self.max_cpu_time_limit
));
}
/// Set maximum CPU time limit
pub fn with_max_cpu_time(mut self, milliseconds: u64) -> Self {
self.max_cpu_time_limit = milliseconds;
self
// Validate filesystem access
self.validate_filesystem_access(capabilities)?;
// Validate network access
if capabilities.network.enabled && !self.allow_network_default {
return Err(anyhow!(
"Plugin requests network access, but network access is disabled by \
policy"
));
}
/// Add allowed read path
pub fn allow_read_path(mut self, path: PathBuf) -> Self {
self.allowed_read_paths.push(path);
self
Ok(())
}
/// Validate filesystem access capabilities
fn validate_filesystem_access(
&self,
capabilities: &Capabilities,
) -> Result<()> {
// Check read paths
for path in &capabilities.filesystem.read {
if !self.is_read_allowed(path) {
return Err(anyhow!(
"Plugin requests read access to {:?} which is not in allowed paths",
path
));
}
}
/// Add allowed write path
pub fn allow_write_path(mut self, path: PathBuf) -> Self {
self.allowed_write_paths.push(path);
self
// Check write paths
for path in &capabilities.filesystem.write {
if !self.is_write_allowed(path) {
return Err(anyhow!(
"Plugin requests write access to {:?} which is not in allowed paths",
path
));
}
}
/// Set default network access policy
pub fn with_network_default(mut self, allow: bool) -> Self {
self.allow_network_default = allow;
self
Ok(())
}
/// Check if a path is allowed for reading
pub fn is_read_allowed(&self, path: &Path) -> bool {
if self.allowed_read_paths.is_empty() {
return false; // deny-all when unconfigured
}
let Ok(canonical) = path.canonicalize() else {
return false;
};
self.allowed_read_paths.iter().any(|allowed| {
allowed
.canonicalize()
.is_ok_and(|a| canonical.starts_with(a))
})
}
/// Check if a path is allowed for writing
pub fn is_write_allowed(&self, path: &Path) -> bool {
if self.allowed_write_paths.is_empty() {
return false; // deny-all when unconfigured
}
let canonical = if path.exists() {
path.canonicalize().ok()
} else {
path
.parent()
.and_then(|p| p.canonicalize().ok())
.map(|p| p.join(path.file_name().unwrap_or_default()))
};
let Some(canonical) = canonical else {
return false;
};
self.allowed_write_paths.iter().any(|allowed| {
allowed
.canonicalize()
.is_ok_and(|a| canonical.starts_with(a))
})
}
/// Check if network access is allowed for a plugin
pub fn is_network_allowed(&self, capabilities: &Capabilities) -> bool {
capabilities.network.enabled && self.allow_network_default
}
/// Check if a specific domain is allowed
pub fn is_domain_allowed(
&self,
capabilities: &Capabilities,
domain: &str,
) -> bool {
if !capabilities.network.enabled {
return false;
}
/// Validate capabilities requested by a plugin
pub fn validate_capabilities(&self, capabilities: &Capabilities) -> Result<()> {
// Validate memory limit
if let Some(memory) = capabilities.max_memory_bytes
&& memory > self.max_memory_limit
{
return Err(anyhow!(
"Requested memory ({} bytes) exceeds limit ({} bytes)",
memory,
self.max_memory_limit
));
}
// Validate CPU time limit
if let Some(cpu_time) = capabilities.max_cpu_time_ms
&& cpu_time > self.max_cpu_time_limit
{
return Err(anyhow!(
"Requested CPU time ({} ms) exceeds limit ({} ms)",
cpu_time,
self.max_cpu_time_limit
));
}
// Validate filesystem access
self.validate_filesystem_access(capabilities)?;
// Validate network access
if capabilities.network.enabled && !self.allow_network_default {
return Err(anyhow!(
"Plugin requests network access, but network access is disabled by policy"
));
}
Ok(())
// If no domain restrictions, allow all domains
if capabilities.network.allowed_domains.is_none() {
return self.allow_network_default;
}
/// Validate filesystem access capabilities
fn validate_filesystem_access(&self, capabilities: &Capabilities) -> Result<()> {
// Check read paths
for path in &capabilities.filesystem.read {
if !self.is_read_allowed(path) {
return Err(anyhow!(
"Plugin requests read access to {:?} which is not in allowed paths",
path
));
}
}
// Check against allowed domains list
capabilities
.network
.allowed_domains
.as_ref()
.map(|domains| domains.iter().any(|d| d.eq_ignore_ascii_case(domain)))
.unwrap_or(false)
}
// Check write paths
for path in &capabilities.filesystem.write {
if !self.is_write_allowed(path) {
return Err(anyhow!(
"Plugin requests write access to {:?} which is not in allowed paths",
path
));
}
}
/// Get effective memory limit for a plugin
pub fn get_memory_limit(&self, capabilities: &Capabilities) -> usize {
capabilities
.max_memory_bytes
.unwrap_or(self.max_memory_limit)
.min(self.max_memory_limit)
}
Ok(())
}
/// Check if a path is allowed for reading
pub fn is_read_allowed(&self, path: &Path) -> bool {
if self.allowed_read_paths.is_empty() {
return false; // deny-all when unconfigured
}
let Ok(canonical) = path.canonicalize() else {
return false;
};
self.allowed_read_paths.iter().any(|allowed| {
allowed
.canonicalize()
.is_ok_and(|a| canonical.starts_with(a))
})
}
/// Check if a path is allowed for writing
pub fn is_write_allowed(&self, path: &Path) -> bool {
if self.allowed_write_paths.is_empty() {
return false; // deny-all when unconfigured
}
let canonical = if path.exists() {
path.canonicalize().ok()
} else {
path.parent()
.and_then(|p| p.canonicalize().ok())
.map(|p| p.join(path.file_name().unwrap_or_default()))
};
let Some(canonical) = canonical else {
return false;
};
self.allowed_write_paths.iter().any(|allowed| {
allowed
.canonicalize()
.is_ok_and(|a| canonical.starts_with(a))
})
}
/// Check if network access is allowed for a plugin
pub fn is_network_allowed(&self, capabilities: &Capabilities) -> bool {
capabilities.network.enabled && self.allow_network_default
}
/// Check if a specific domain is allowed
pub fn is_domain_allowed(&self, capabilities: &Capabilities, domain: &str) -> bool {
if !capabilities.network.enabled {
return false;
}
// If no domain restrictions, allow all domains
if capabilities.network.allowed_domains.is_none() {
return self.allow_network_default;
}
// Check against allowed domains list
capabilities
.network
.allowed_domains
.as_ref()
.map(|domains| domains.iter().any(|d| d.eq_ignore_ascii_case(domain)))
.unwrap_or(false)
}
/// Get effective memory limit for a plugin
pub fn get_memory_limit(&self, capabilities: &Capabilities) -> usize {
capabilities
.max_memory_bytes
.unwrap_or(self.max_memory_limit)
.min(self.max_memory_limit)
}
/// Get effective CPU time limit for a plugin
pub fn get_cpu_time_limit(&self, capabilities: &Capabilities) -> u64 {
capabilities
.max_cpu_time_ms
.unwrap_or(self.max_cpu_time_limit)
.min(self.max_cpu_time_limit)
}
/// Get effective CPU time limit for a plugin
pub fn get_cpu_time_limit(&self, capabilities: &Capabilities) -> u64 {
capabilities
.max_cpu_time_ms
.unwrap_or(self.max_cpu_time_limit)
.min(self.max_cpu_time_limit)
}
}
impl Default for CapabilityEnforcer {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[allow(unused_imports)]
use pinakes_plugin_api::{FilesystemCapability, NetworkCapability};
#[allow(unused_imports)]
use pinakes_plugin_api::{FilesystemCapability, NetworkCapability};
#[test]
fn test_validate_memory_limit() {
let enforcer = CapabilityEnforcer::new().with_max_memory(100 * 1024 * 1024); // 100 MB
use super::*;
let mut caps = Capabilities::default();
caps.max_memory_bytes = Some(50 * 1024 * 1024); // 50 MB - OK
assert!(enforcer.validate_capabilities(&caps).is_ok());
#[test]
fn test_validate_memory_limit() {
let enforcer = CapabilityEnforcer::new().with_max_memory(100 * 1024 * 1024); // 100 MB
caps.max_memory_bytes = Some(200 * 1024 * 1024); // 200 MB - exceeds limit
assert!(enforcer.validate_capabilities(&caps).is_err());
}
let mut caps = Capabilities::default();
caps.max_memory_bytes = Some(50 * 1024 * 1024); // 50 MB - OK
assert!(enforcer.validate_capabilities(&caps).is_ok());
#[test]
fn test_validate_cpu_time_limit() {
let enforcer = CapabilityEnforcer::new().with_max_cpu_time(30_000); // 30 seconds
caps.max_memory_bytes = Some(200 * 1024 * 1024); // 200 MB - exceeds limit
assert!(enforcer.validate_capabilities(&caps).is_err());
}
let mut caps = Capabilities::default();
caps.max_cpu_time_ms = Some(10_000); // 10 seconds - OK
assert!(enforcer.validate_capabilities(&caps).is_ok());
#[test]
fn test_validate_cpu_time_limit() {
let enforcer = CapabilityEnforcer::new().with_max_cpu_time(30_000); // 30 seconds
caps.max_cpu_time_ms = Some(60_000); // 60 seconds - exceeds limit
assert!(enforcer.validate_capabilities(&caps).is_err());
}
let mut caps = Capabilities::default();
caps.max_cpu_time_ms = Some(10_000); // 10 seconds - OK
assert!(enforcer.validate_capabilities(&caps).is_ok());
#[test]
fn test_filesystem_read_allowed() {
// Use real temp directories so canonicalize works
let tmp = tempfile::tempdir().unwrap();
let allowed_dir = tmp.path().join("allowed");
std::fs::create_dir_all(&allowed_dir).unwrap();
let test_file = allowed_dir.join("test.txt");
std::fs::write(&test_file, "test").unwrap();
caps.max_cpu_time_ms = Some(60_000); // 60 seconds - exceeds limit
assert!(enforcer.validate_capabilities(&caps).is_err());
}
let enforcer = CapabilityEnforcer::new().allow_read_path(allowed_dir.clone());
#[test]
fn test_filesystem_read_allowed() {
// Use real temp directories so canonicalize works
let tmp = tempfile::tempdir().unwrap();
let allowed_dir = tmp.path().join("allowed");
std::fs::create_dir_all(&allowed_dir).unwrap();
let test_file = allowed_dir.join("test.txt");
std::fs::write(&test_file, "test").unwrap();
assert!(enforcer.is_read_allowed(&test_file));
assert!(!enforcer.is_read_allowed(Path::new("/etc/passwd")));
}
let enforcer =
CapabilityEnforcer::new().allow_read_path(allowed_dir.clone());
#[test]
fn test_filesystem_read_denied_when_empty() {
let enforcer = CapabilityEnforcer::new();
assert!(!enforcer.is_read_allowed(Path::new("/tmp/test.txt")));
}
assert!(enforcer.is_read_allowed(&test_file));
assert!(!enforcer.is_read_allowed(Path::new("/etc/passwd")));
}
#[test]
fn test_filesystem_write_allowed() {
let tmp = tempfile::tempdir().unwrap();
let output_dir = tmp.path().join("output");
std::fs::create_dir_all(&output_dir).unwrap();
// Existing file in allowed dir
let existing = output_dir.join("file.txt");
std::fs::write(&existing, "test").unwrap();
#[test]
fn test_filesystem_read_denied_when_empty() {
let enforcer = CapabilityEnforcer::new();
assert!(!enforcer.is_read_allowed(Path::new("/tmp/test.txt")));
}
let enforcer = CapabilityEnforcer::new().allow_write_path(output_dir.clone());
#[test]
fn test_filesystem_write_allowed() {
let tmp = tempfile::tempdir().unwrap();
let output_dir = tmp.path().join("output");
std::fs::create_dir_all(&output_dir).unwrap();
// Existing file in allowed dir
let existing = output_dir.join("file.txt");
std::fs::write(&existing, "test").unwrap();
assert!(enforcer.is_write_allowed(&existing));
// New file in allowed dir (parent exists)
assert!(enforcer.is_write_allowed(&output_dir.join("new_file.txt")));
assert!(!enforcer.is_write_allowed(Path::new("/etc/config")));
}
let enforcer =
CapabilityEnforcer::new().allow_write_path(output_dir.clone());
#[test]
fn test_filesystem_write_denied_when_empty() {
let enforcer = CapabilityEnforcer::new();
assert!(!enforcer.is_write_allowed(Path::new("/tmp/file.txt")));
}
assert!(enforcer.is_write_allowed(&existing));
// New file in allowed dir (parent exists)
assert!(enforcer.is_write_allowed(&output_dir.join("new_file.txt")));
assert!(!enforcer.is_write_allowed(Path::new("/etc/config")));
}
#[test]
fn test_network_allowed() {
let enforcer = CapabilityEnforcer::new().with_network_default(true);
#[test]
fn test_filesystem_write_denied_when_empty() {
let enforcer = CapabilityEnforcer::new();
assert!(!enforcer.is_write_allowed(Path::new("/tmp/file.txt")));
}
let mut caps = Capabilities::default();
caps.network.enabled = true;
#[test]
fn test_network_allowed() {
let enforcer = CapabilityEnforcer::new().with_network_default(true);
assert!(enforcer.is_network_allowed(&caps));
let mut caps = Capabilities::default();
caps.network.enabled = true;
caps.network.enabled = false;
assert!(!enforcer.is_network_allowed(&caps));
}
assert!(enforcer.is_network_allowed(&caps));
#[test]
fn test_domain_restrictions() {
let enforcer = CapabilityEnforcer::new().with_network_default(true);
caps.network.enabled = false;
assert!(!enforcer.is_network_allowed(&caps));
}
let mut caps = Capabilities::default();
caps.network.enabled = true;
caps.network.allowed_domains = Some(vec![
"api.example.com".to_string(),
"cdn.example.com".to_string(),
]);
#[test]
fn test_domain_restrictions() {
let enforcer = CapabilityEnforcer::new().with_network_default(true);
assert!(enforcer.is_domain_allowed(&caps, "api.example.com"));
assert!(enforcer.is_domain_allowed(&caps, "cdn.example.com"));
assert!(!enforcer.is_domain_allowed(&caps, "evil.com"));
}
let mut caps = Capabilities::default();
caps.network.enabled = true;
caps.network.allowed_domains = Some(vec![
"api.example.com".to_string(),
"cdn.example.com".to_string(),
]);
#[test]
fn test_get_effective_limits() {
let enforcer = CapabilityEnforcer::new()
.with_max_memory(100 * 1024 * 1024)
.with_max_cpu_time(30_000);
assert!(enforcer.is_domain_allowed(&caps, "api.example.com"));
assert!(enforcer.is_domain_allowed(&caps, "cdn.example.com"));
assert!(!enforcer.is_domain_allowed(&caps, "evil.com"));
}
let mut caps = Capabilities::default();
#[test]
fn test_get_effective_limits() {
let enforcer = CapabilityEnforcer::new()
.with_max_memory(100 * 1024 * 1024)
.with_max_cpu_time(30_000);
// No limits specified - use defaults
assert_eq!(enforcer.get_memory_limit(&caps), 100 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 30_000);
let mut caps = Capabilities::default();
// Plugin requests lower limits - use plugin's
caps.max_memory_bytes = Some(50 * 1024 * 1024);
caps.max_cpu_time_ms = Some(10_000);
assert_eq!(enforcer.get_memory_limit(&caps), 50 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 10_000);
// No limits specified - use defaults
assert_eq!(enforcer.get_memory_limit(&caps), 100 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 30_000);
// Plugin requests higher limits - cap at system max
caps.max_memory_bytes = Some(200 * 1024 * 1024);
caps.max_cpu_time_ms = Some(60_000);
assert_eq!(enforcer.get_memory_limit(&caps), 100 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 30_000);
}
// Plugin requests lower limits - use plugin's
caps.max_memory_bytes = Some(50 * 1024 * 1024);
caps.max_cpu_time_ms = Some(10_000);
assert_eq!(enforcer.get_memory_limit(&caps), 50 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 10_000);
// Plugin requests higher limits - cap at system max
caps.max_memory_bytes = Some(200 * 1024 * 1024);
caps.max_cpu_time_ms = Some(60_000);
assert_eq!(enforcer.get_memory_limit(&caps), 100 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 30_000);
}
}

View file

@ -1,387 +1,422 @@
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::{
path::{Path, PathBuf},
sync::{
Arc,
Mutex,
atomic::{AtomicBool, AtomicUsize, Ordering},
},
};
use notify::{PollWatcher, RecursiveMode, Watcher};
use tokio::sync::mpsc;
use tracing::{info, warn};
use crate::error::Result;
use crate::import;
use crate::storage::DynStorageBackend;
use crate::{error::Result, import, storage::DynStorageBackend};
pub struct ScanStatus {
pub scanning: bool,
pub files_found: usize,
pub files_processed: usize,
/// Number of files skipped because they haven't changed (incremental scan)
pub files_skipped: usize,
pub errors: Vec<String>,
pub scanning: bool,
pub files_found: usize,
pub files_processed: usize,
/// Number of files skipped because they haven't changed (incremental scan)
pub files_skipped: usize,
pub errors: Vec<String>,
}
/// Options for scanning operations
#[derive(Debug, Clone, Default)]
pub struct ScanOptions {
/// Use incremental scanning (skip unchanged files based on mtime)
pub incremental: bool,
/// Force full rescan even for incremental mode
pub force_full: bool,
/// Use incremental scanning (skip unchanged files based on mtime)
pub incremental: bool,
/// Force full rescan even for incremental mode
pub force_full: bool,
}
/// Shared scan progress that can be read by the status endpoint while a scan runs.
/// Shared scan progress that can be read by the status endpoint while a scan
/// runs.
#[derive(Clone)]
pub struct ScanProgress {
pub is_scanning: Arc<AtomicBool>,
pub files_found: Arc<AtomicUsize>,
pub files_processed: Arc<AtomicUsize>,
pub error_count: Arc<AtomicUsize>,
pub error_messages: Arc<Mutex<Vec<String>>>,
pub is_scanning: Arc<AtomicBool>,
pub files_found: Arc<AtomicUsize>,
pub files_processed: Arc<AtomicUsize>,
pub error_count: Arc<AtomicUsize>,
pub error_messages: Arc<Mutex<Vec<String>>>,
}
const MAX_STORED_ERRORS: usize = 100;
impl ScanProgress {
pub fn new() -> Self {
Self {
is_scanning: Arc::new(AtomicBool::new(false)),
files_found: Arc::new(AtomicUsize::new(0)),
files_processed: Arc::new(AtomicUsize::new(0)),
error_count: Arc::new(AtomicUsize::new(0)),
error_messages: Arc::new(Mutex::new(Vec::new())),
}
pub fn new() -> Self {
Self {
is_scanning: Arc::new(AtomicBool::new(false)),
files_found: Arc::new(AtomicUsize::new(0)),
files_processed: Arc::new(AtomicUsize::new(0)),
error_count: Arc::new(AtomicUsize::new(0)),
error_messages: Arc::new(Mutex::new(Vec::new())),
}
}
pub fn snapshot(&self) -> ScanStatus {
let errors = self
.error_messages
.lock()
.map(|v| v.clone())
.unwrap_or_default();
ScanStatus {
scanning: self.is_scanning.load(Ordering::Acquire),
files_found: self.files_found.load(Ordering::Acquire),
files_processed: self.files_processed.load(Ordering::Acquire),
files_skipped: 0, // Not tracked in real-time progress
errors,
}
pub fn snapshot(&self) -> ScanStatus {
let errors = self
.error_messages
.lock()
.map(|v| v.clone())
.unwrap_or_default();
ScanStatus {
scanning: self.is_scanning.load(Ordering::Acquire),
files_found: self.files_found.load(Ordering::Acquire),
files_processed: self.files_processed.load(Ordering::Acquire),
files_skipped: 0, // Not tracked in real-time progress
errors,
}
}
fn begin(&self) {
self.is_scanning.store(true, Ordering::Release);
self.files_found.store(0, Ordering::Release);
self.files_processed.store(0, Ordering::Release);
self.error_count.store(0, Ordering::Release);
if let Ok(mut msgs) = self.error_messages.lock() {
msgs.clear();
}
fn begin(&self) {
self.is_scanning.store(true, Ordering::Release);
self.files_found.store(0, Ordering::Release);
self.files_processed.store(0, Ordering::Release);
self.error_count.store(0, Ordering::Release);
if let Ok(mut msgs) = self.error_messages.lock() {
msgs.clear();
}
}
fn record_error(&self, message: String) {
self.error_count.fetch_add(1, Ordering::Release);
if let Ok(mut msgs) = self.error_messages.lock()
&& msgs.len() < MAX_STORED_ERRORS
{
msgs.push(message);
}
fn record_error(&self, message: String) {
self.error_count.fetch_add(1, Ordering::Release);
if let Ok(mut msgs) = self.error_messages.lock()
&& msgs.len() < MAX_STORED_ERRORS
{
msgs.push(message);
}
}
fn finish(&self) {
self.is_scanning.store(false, Ordering::Release);
}
fn finish(&self) {
self.is_scanning.store(false, Ordering::Release);
}
}
impl Default for ScanProgress {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
pub async fn scan_directory(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
) -> Result<ScanStatus> {
scan_directory_with_options(storage, dir, ignore_patterns, None, &ScanOptions::default()).await
scan_directory_with_options(
storage,
dir,
ignore_patterns,
None,
&ScanOptions::default(),
)
.await
}
/// Scan a directory with incremental scanning support
pub async fn scan_directory_incremental(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
) -> Result<ScanStatus> {
let options = ScanOptions {
incremental: true,
force_full: false,
};
scan_directory_with_options(storage, dir, ignore_patterns, None, &options).await
}
pub async fn scan_directory_with_progress(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
) -> Result<ScanStatus> {
scan_directory_with_options(
storage,
dir,
ignore_patterns,
progress,
&ScanOptions::default(),
)
let options = ScanOptions {
incremental: true,
force_full: false,
};
scan_directory_with_options(storage, dir, ignore_patterns, None, &options)
.await
}
/// Scan a directory with full options including progress tracking and incremental mode
pub async fn scan_directory_with_options(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
scan_options: &ScanOptions,
pub async fn scan_directory_with_progress(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
) -> Result<ScanStatus> {
scan_directory_with_options(
storage,
dir,
ignore_patterns,
progress,
&ScanOptions::default(),
)
.await
}
/// Scan a directory with full options including progress tracking and
/// incremental mode
pub async fn scan_directory_with_options(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
scan_options: &ScanOptions,
) -> Result<ScanStatus> {
info!(
dir = %dir.display(),
incremental = scan_options.incremental,
force = scan_options.force_full,
"starting directory scan"
);
if let Some(p) = progress {
p.begin();
}
// Convert scan options to import options
let import_options = import::ImportOptions {
incremental: scan_options.incremental && !scan_options.force_full,
force: scan_options.force_full,
photo_config: crate::config::PhotoConfig::default(),
};
let results = import::import_directory_with_options(
storage,
dir,
ignore_patterns,
8, // Default concurrency
&import_options,
)
.await?;
let mut errors = Vec::new();
let mut processed = 0;
let mut skipped = 0;
for result in &results {
match result {
Ok(r) => {
if r.was_skipped {
skipped += 1;
} else {
processed += 1;
}
},
Err(e) => {
let msg = e.to_string();
if let Some(p) = progress {
p.record_error(msg.clone());
}
errors.push(msg);
},
}
}
if let Some(p) = progress {
p.files_found.store(results.len(), Ordering::Release);
p.files_processed.store(processed, Ordering::Release);
p.finish();
}
let status = ScanStatus {
scanning: false,
files_found: results.len(),
files_processed: processed,
files_skipped: skipped,
errors,
};
if scan_options.incremental {
info!(
dir = %dir.display(),
incremental = scan_options.incremental,
force = scan_options.force_full,
"starting directory scan"
found = status.files_found,
processed = status.files_processed,
skipped = status.files_skipped,
"incremental scan complete"
);
}
if let Some(p) = progress {
p.begin();
}
// Convert scan options to import options
let import_options = import::ImportOptions {
incremental: scan_options.incremental && !scan_options.force_full,
force: scan_options.force_full,
photo_config: crate::config::PhotoConfig::default(),
};
let results = import::import_directory_with_options(
storage,
dir,
ignore_patterns,
8, // Default concurrency
&import_options,
)
.await?;
let mut errors = Vec::new();
let mut processed = 0;
let mut skipped = 0;
for result in &results {
match result {
Ok(r) => {
if r.was_skipped {
skipped += 1;
} else {
processed += 1;
}
}
Err(e) => {
let msg = e.to_string();
if let Some(p) = progress {
p.record_error(msg.clone());
}
errors.push(msg);
}
}
}
if let Some(p) = progress {
p.files_found.store(results.len(), Ordering::Release);
p.files_processed.store(processed, Ordering::Release);
p.finish();
}
let status = ScanStatus {
scanning: false,
files_found: results.len(),
files_processed: processed,
files_skipped: skipped,
errors,
};
if scan_options.incremental {
info!(
dir = %dir.display(),
found = status.files_found,
processed = status.files_processed,
skipped = status.files_skipped,
"incremental scan complete"
);
}
Ok(status)
Ok(status)
}
pub async fn scan_all_roots(
storage: &DynStorageBackend,
ignore_patterns: &[String],
storage: &DynStorageBackend,
ignore_patterns: &[String],
) -> Result<Vec<ScanStatus>> {
scan_all_roots_with_options(storage, ignore_patterns, None, &ScanOptions::default()).await
scan_all_roots_with_options(
storage,
ignore_patterns,
None,
&ScanOptions::default(),
)
.await
}
/// Scan all roots incrementally (skip unchanged files)
pub async fn scan_all_roots_incremental(
storage: &DynStorageBackend,
ignore_patterns: &[String],
storage: &DynStorageBackend,
ignore_patterns: &[String],
) -> Result<Vec<ScanStatus>> {
let options = ScanOptions {
incremental: true,
force_full: false,
};
scan_all_roots_with_options(storage, ignore_patterns, None, &options).await
let options = ScanOptions {
incremental: true,
force_full: false,
};
scan_all_roots_with_options(storage, ignore_patterns, None, &options).await
}
pub async fn scan_all_roots_with_progress(
storage: &DynStorageBackend,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
storage: &DynStorageBackend,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
) -> Result<Vec<ScanStatus>> {
scan_all_roots_with_options(storage, ignore_patterns, progress, &ScanOptions::default()).await
scan_all_roots_with_options(
storage,
ignore_patterns,
progress,
&ScanOptions::default(),
)
.await
}
/// Scan all roots with full options including progress and incremental mode
pub async fn scan_all_roots_with_options(
storage: &DynStorageBackend,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
scan_options: &ScanOptions,
storage: &DynStorageBackend,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
scan_options: &ScanOptions,
) -> Result<Vec<ScanStatus>> {
let roots = storage.list_root_dirs().await?;
let mut statuses = Vec::new();
let roots = storage.list_root_dirs().await?;
let mut statuses = Vec::new();
for root in roots {
match scan_directory_with_options(storage, &root, ignore_patterns, progress, scan_options)
.await
{
Ok(status) => statuses.push(status),
Err(e) => {
warn!(root = %root.display(), error = %e, "failed to scan root directory");
statuses.push(ScanStatus {
scanning: false,
files_found: 0,
files_processed: 0,
files_skipped: 0,
errors: vec![e.to_string()],
});
}
}
for root in roots {
match scan_directory_with_options(
storage,
&root,
ignore_patterns,
progress,
scan_options,
)
.await
{
Ok(status) => statuses.push(status),
Err(e) => {
warn!(root = %root.display(), error = %e, "failed to scan root directory");
statuses.push(ScanStatus {
scanning: false,
files_found: 0,
files_processed: 0,
files_skipped: 0,
errors: vec![e.to_string()],
});
},
}
}
Ok(statuses)
Ok(statuses)
}
pub struct FileWatcher {
_watcher: Box<dyn Watcher + Send>,
rx: mpsc::Receiver<PathBuf>,
_watcher: Box<dyn Watcher + Send>,
rx: mpsc::Receiver<PathBuf>,
}
impl FileWatcher {
pub fn new(dirs: &[PathBuf]) -> Result<Self> {
let (tx, rx) = mpsc::channel(1024);
pub fn new(dirs: &[PathBuf]) -> Result<Self> {
let (tx, rx) = mpsc::channel(1024);
// Try the recommended (native) watcher first, fall back to polling
let watcher: Box<dyn Watcher + Send> = match Self::try_native_watcher(dirs, tx.clone()) {
Ok(w) => {
info!("using native filesystem watcher");
w
// Try the recommended (native) watcher first, fall back to polling
let watcher: Box<dyn Watcher + Send> = match Self::try_native_watcher(
dirs,
tx.clone(),
) {
Ok(w) => {
info!("using native filesystem watcher");
w
},
Err(native_err) => {
warn!(error = %native_err, "native watcher failed, falling back to polling");
Self::polling_watcher(dirs, tx)?
},
};
Ok(Self {
_watcher: watcher,
rx,
})
}
fn try_native_watcher(
dirs: &[PathBuf],
tx: mpsc::Sender<PathBuf>,
) -> std::result::Result<Box<dyn Watcher + Send>, notify::Error> {
let tx_clone = tx.clone();
let mut watcher = notify::recommended_watcher(
move |res: notify::Result<notify::Event>| {
if let Ok(event) = res {
for path in event.paths {
if tx_clone.blocking_send(path).is_err() {
tracing::warn!("filesystem watcher channel closed, stopping");
break;
}
Err(native_err) => {
warn!(error = %native_err, "native watcher failed, falling back to polling");
Self::polling_watcher(dirs, tx)?
}
}
},
)?;
for dir in dirs {
watcher.watch(dir, RecursiveMode::Recursive)?;
}
Ok(Box::new(watcher))
}
fn polling_watcher(
dirs: &[PathBuf],
tx: mpsc::Sender<PathBuf>,
) -> Result<Box<dyn Watcher + Send>> {
let tx_clone = tx.clone();
let poll_interval = std::time::Duration::from_secs(5);
let config = notify::Config::default().with_poll_interval(poll_interval);
let mut watcher = PollWatcher::new(
move |res: notify::Result<notify::Event>| {
if let Ok(event) = res {
for path in event.paths {
if tx_clone.blocking_send(path).is_err() {
tracing::warn!("filesystem watcher channel closed, stopping");
break;
}
};
Ok(Self {
_watcher: watcher,
rx,
})
}
fn try_native_watcher(
dirs: &[PathBuf],
tx: mpsc::Sender<PathBuf>,
) -> std::result::Result<Box<dyn Watcher + Send>, notify::Error> {
let tx_clone = tx.clone();
let mut watcher =
notify::recommended_watcher(move |res: notify::Result<notify::Event>| {
if let Ok(event) = res {
for path in event.paths {
if tx_clone.blocking_send(path).is_err() {
tracing::warn!("filesystem watcher channel closed, stopping");
break;
}
}
}
})?;
for dir in dirs {
watcher.watch(dir, RecursiveMode::Recursive)?;
}
}
},
config,
)
.map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))?;
Ok(Box::new(watcher))
for dir in dirs {
watcher.watch(dir, RecursiveMode::Recursive).map_err(|e| {
crate::error::PinakesError::Io(std::io::Error::other(e))
})?;
}
fn polling_watcher(
dirs: &[PathBuf],
tx: mpsc::Sender<PathBuf>,
) -> Result<Box<dyn Watcher + Send>> {
let tx_clone = tx.clone();
let poll_interval = std::time::Duration::from_secs(5);
let config = notify::Config::default().with_poll_interval(poll_interval);
Ok(Box::new(watcher))
}
let mut watcher = PollWatcher::new(
move |res: notify::Result<notify::Event>| {
if let Ok(event) = res {
for path in event.paths {
if tx_clone.blocking_send(path).is_err() {
tracing::warn!("filesystem watcher channel closed, stopping");
break;
}
}
}
},
config,
)
.map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))?;
for dir in dirs {
watcher
.watch(dir, RecursiveMode::Recursive)
.map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))?;
}
Ok(Box::new(watcher))
}
pub async fn next_change(&mut self) -> Option<PathBuf> {
self.rx.recv().await
}
pub async fn next_change(&mut self) -> Option<PathBuf> {
self.rx.recv().await
}
}
pub async fn watch_and_import(
storage: DynStorageBackend,
dirs: Vec<PathBuf>,
ignore_patterns: Vec<String>,
storage: DynStorageBackend,
dirs: Vec<PathBuf>,
ignore_patterns: Vec<String>,
) -> Result<()> {
let mut watcher = FileWatcher::new(&dirs)?;
info!("filesystem watcher started");
let mut watcher = FileWatcher::new(&dirs)?;
info!("filesystem watcher started");
while let Some(path) = watcher.next_change().await {
if path.is_file()
&& crate::media_type::MediaType::from_path(&path).is_some()
&& !crate::import::should_ignore(&path, &ignore_patterns)
{
info!(path = %path.display(), "detected file change, importing");
if let Err(e) = import::import_file(&storage, &path).await {
warn!(path = %path.display(), error = %e, "failed to import changed file");
}
}
while let Some(path) = watcher.next_change().await {
if path.is_file()
&& crate::media_type::MediaType::from_path(&path).is_some()
&& !crate::import::should_ignore(&path, &ignore_patterns)
{
info!(path = %path.display(), "detected file change, importing");
if let Err(e) = import::import_file(&storage, &path).await {
warn!(path = %path.display(), error = %e, "failed to import changed file");
}
}
}
Ok(())
Ok(())
}

View file

@ -1,5 +1,4 @@
use std::path::PathBuf;
use std::sync::Arc;
use std::{path::PathBuf, sync::Arc};
use chrono::{DateTime, Datelike, Utc};
use serde::{Deserialize, Serialize};
@ -7,511 +6,543 @@ use tokio::sync::RwLock;
use tokio_util::sync::CancellationToken;
use uuid::Uuid;
use crate::config::Config;
use crate::jobs::{JobKind, JobQueue};
use crate::{
config::Config,
jobs::{JobKind, JobQueue},
};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum Schedule {
Interval { secs: u64 },
Daily { hour: u32, minute: u32 },
Weekly { day: u32, hour: u32, minute: u32 },
Interval {
secs: u64,
},
Daily {
hour: u32,
minute: u32,
},
Weekly {
day: u32,
hour: u32,
minute: u32,
},
}
impl Schedule {
pub fn next_run(&self, from: DateTime<Utc>) -> DateTime<Utc> {
match self {
Schedule::Interval { secs } => from + chrono::Duration::seconds(*secs as i64),
Schedule::Daily { hour, minute } => {
let today = from
.date_naive()
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default();
let today_utc = today.and_utc();
if today_utc > from {
today_utc
} else {
today_utc + chrono::Duration::days(1)
}
}
Schedule::Weekly { day, hour, minute } => {
let current_day = from.weekday().num_days_from_monday();
let target_day = *day;
let days_ahead = if target_day > current_day {
target_day - current_day
} else if target_day < current_day {
7 - (current_day - target_day)
} else {
let today = from
.date_naive()
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default()
.and_utc();
if today > from {
return today;
}
7
};
let target_date = from.date_naive() + chrono::Duration::days(days_ahead as i64);
target_date
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default()
.and_utc()
}
pub fn next_run(&self, from: DateTime<Utc>) -> DateTime<Utc> {
match self {
Schedule::Interval { secs } => {
from + chrono::Duration::seconds(*secs as i64)
},
Schedule::Daily { hour, minute } => {
let today = from
.date_naive()
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default();
let today_utc = today.and_utc();
if today_utc > from {
today_utc
} else {
today_utc + chrono::Duration::days(1)
}
},
Schedule::Weekly { day, hour, minute } => {
let current_day = from.weekday().num_days_from_monday();
let target_day = *day;
let days_ahead = if target_day > current_day {
target_day - current_day
} else if target_day < current_day {
7 - (current_day - target_day)
} else {
let today = from
.date_naive()
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default()
.and_utc();
if today > from {
return today;
}
7
};
let target_date =
from.date_naive() + chrono::Duration::days(days_ahead as i64);
target_date
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default()
.and_utc()
},
}
}
pub fn display_string(&self) -> String {
match self {
Schedule::Interval { secs } => {
if *secs >= 3600 {
format!("Every {}h", secs / 3600)
} else if *secs >= 60 {
format!("Every {}m", secs / 60)
} else {
format!("Every {}s", secs)
}
}
Schedule::Daily { hour, minute } => format!("Daily {hour:02}:{minute:02}"),
Schedule::Weekly { day, hour, minute } => {
let day_name = match day {
0 => "Mon",
1 => "Tue",
2 => "Wed",
3 => "Thu",
4 => "Fri",
5 => "Sat",
_ => "Sun",
};
format!("{day_name} {hour:02}:{minute:02}")
}
pub fn display_string(&self) -> String {
match self {
Schedule::Interval { secs } => {
if *secs >= 3600 {
format!("Every {}h", secs / 3600)
} else if *secs >= 60 {
format!("Every {}m", secs / 60)
} else {
format!("Every {}s", secs)
}
},
Schedule::Daily { hour, minute } => {
format!("Daily {hour:02}:{minute:02}")
},
Schedule::Weekly { day, hour, minute } => {
let day_name = match day {
0 => "Mon",
1 => "Tue",
2 => "Wed",
3 => "Thu",
4 => "Fri",
5 => "Sat",
_ => "Sun",
};
format!("{day_name} {hour:02}:{minute:02}")
},
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScheduledTask {
pub id: String,
pub name: String,
pub kind: JobKind,
pub schedule: Schedule,
pub enabled: bool,
pub last_run: Option<DateTime<Utc>>,
pub next_run: Option<DateTime<Utc>>,
pub last_status: Option<String>,
/// Whether a job for this task is currently running. Skipped during serialization.
#[serde(default, skip_serializing)]
pub running: bool,
/// The job ID of the last submitted job. Skipped during serialization/deserialization.
#[serde(skip)]
pub last_job_id: Option<Uuid>,
pub id: String,
pub name: String,
pub kind: JobKind,
pub schedule: Schedule,
pub enabled: bool,
pub last_run: Option<DateTime<Utc>>,
pub next_run: Option<DateTime<Utc>>,
pub last_status: Option<String>,
/// Whether a job for this task is currently running. Skipped during
/// serialization.
#[serde(default, skip_serializing)]
pub running: bool,
/// The job ID of the last submitted job. Skipped during
/// serialization/deserialization.
#[serde(skip)]
pub last_job_id: Option<Uuid>,
}
pub struct TaskScheduler {
tasks: Arc<RwLock<Vec<ScheduledTask>>>,
tasks: Arc<RwLock<Vec<ScheduledTask>>>,
job_queue: Arc<JobQueue>,
cancel: CancellationToken,
config: Arc<RwLock<Config>>,
config_path: Option<PathBuf>,
}
impl TaskScheduler {
pub fn new(
job_queue: Arc<JobQueue>,
cancel: CancellationToken,
config: Arc<RwLock<Config>>,
config_path: Option<PathBuf>,
}
) -> Self {
let now = Utc::now();
let default_tasks = vec![
ScheduledTask {
id: "periodic_scan".to_string(),
name: "Periodic Scan".to_string(),
kind: JobKind::Scan { path: None },
schedule: Schedule::Interval { secs: 3600 },
enabled: true,
last_run: None,
next_run: Some(now + chrono::Duration::seconds(3600)),
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "integrity_check".to_string(),
name: "Integrity Check".to_string(),
kind: JobKind::VerifyIntegrity { media_ids: vec![] },
schedule: Schedule::Weekly {
day: 0,
hour: 3,
minute: 0,
},
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "orphan_detection".to_string(),
name: "Orphan Detection".to_string(),
kind: JobKind::OrphanDetection,
schedule: Schedule::Daily {
hour: 2,
minute: 0,
},
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "thumbnail_cleanup".to_string(),
name: "Thumbnail Cleanup".to_string(),
kind: JobKind::CleanupThumbnails,
schedule: Schedule::Weekly {
day: 6,
hour: 4,
minute: 0,
},
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
];
impl TaskScheduler {
pub fn new(
job_queue: Arc<JobQueue>,
cancel: CancellationToken,
config: Arc<RwLock<Config>>,
config_path: Option<PathBuf>,
) -> Self {
let now = Utc::now();
let default_tasks = vec![
ScheduledTask {
id: "periodic_scan".to_string(),
name: "Periodic Scan".to_string(),
kind: JobKind::Scan { path: None },
schedule: Schedule::Interval { secs: 3600 },
enabled: true,
last_run: None,
next_run: Some(now + chrono::Duration::seconds(3600)),
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "integrity_check".to_string(),
name: "Integrity Check".to_string(),
kind: JobKind::VerifyIntegrity { media_ids: vec![] },
schedule: Schedule::Weekly {
day: 0,
hour: 3,
minute: 0,
},
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "orphan_detection".to_string(),
name: "Orphan Detection".to_string(),
kind: JobKind::OrphanDetection,
schedule: Schedule::Daily { hour: 2, minute: 0 },
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "thumbnail_cleanup".to_string(),
name: "Thumbnail Cleanup".to_string(),
kind: JobKind::CleanupThumbnails,
schedule: Schedule::Weekly {
day: 6,
hour: 4,
minute: 0,
},
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
];
Self {
tasks: Arc::new(RwLock::new(default_tasks)),
job_queue,
cancel,
config,
config_path,
}
Self {
tasks: Arc::new(RwLock::new(default_tasks)),
job_queue,
cancel,
config,
config_path,
}
}
/// Restore saved task state from config. Should be called once after construction.
pub async fn restore_state(&self) {
let saved = self.config.read().await.scheduled_tasks.clone();
if saved.is_empty() {
return;
}
let mut tasks = self.tasks.write().await;
for saved_task in &saved {
if let Some(task) = tasks.iter_mut().find(|t| t.id == saved_task.id) {
task.enabled = saved_task.enabled;
task.schedule = saved_task.schedule.clone();
if let Some(Ok(dt)) = saved_task
.last_run
.as_ref()
.map(|s| DateTime::parse_from_rfc3339(s))
{
task.last_run = Some(dt.with_timezone(&Utc));
}
if task.enabled {
let from = task.last_run.unwrap_or_else(Utc::now);
task.next_run = Some(task.schedule.next_run(from));
} else {
task.next_run = None;
}
}
}
/// Restore saved task state from config. Should be called once after
/// construction.
pub async fn restore_state(&self) {
let saved = self.config.read().await.scheduled_tasks.clone();
if saved.is_empty() {
return;
}
/// Persist current task state to config file.
async fn persist_task_state(&self) {
let tasks = self.tasks.read().await;
let task_configs: Vec<crate::config::ScheduledTaskConfig> = tasks
.iter()
.map(|t| crate::config::ScheduledTaskConfig {
id: t.id.clone(),
enabled: t.enabled,
schedule: t.schedule.clone(),
last_run: t.last_run.map(|dt| dt.to_rfc3339()),
})
.collect();
drop(tasks);
let mut tasks = self.tasks.write().await;
for saved_task in &saved {
if let Some(task) = tasks.iter_mut().find(|t| t.id == saved_task.id) {
task.enabled = saved_task.enabled;
task.schedule = saved_task.schedule.clone();
if let Some(Ok(dt)) = saved_task
.last_run
.as_ref()
.map(|s| DateTime::parse_from_rfc3339(s))
{
let mut config = self.config.write().await;
config.scheduled_tasks = task_configs;
task.last_run = Some(dt.with_timezone(&Utc));
}
if task.enabled {
let from = task.last_run.unwrap_or_else(Utc::now);
task.next_run = Some(task.schedule.next_run(from));
} else {
task.next_run = None;
}
}
}
}
if let Some(ref path) = self.config_path {
let config = self.config.read().await;
if let Err(e) = config.save_to_file(path) {
tracing::warn!(error = %e, "failed to persist scheduler state to config file");
}
/// Persist current task state to config file.
async fn persist_task_state(&self) {
let tasks = self.tasks.read().await;
let task_configs: Vec<crate::config::ScheduledTaskConfig> = tasks
.iter()
.map(|t| {
crate::config::ScheduledTaskConfig {
id: t.id.clone(),
enabled: t.enabled,
schedule: t.schedule.clone(),
last_run: t.last_run.map(|dt| dt.to_rfc3339()),
}
})
.collect();
drop(tasks);
{
let mut config = self.config.write().await;
config.scheduled_tasks = task_configs;
}
pub async fn list_tasks(&self) -> Vec<ScheduledTask> {
self.tasks.read().await.clone()
if let Some(ref path) = self.config_path {
let config = self.config.read().await;
if let Err(e) = config.save_to_file(path) {
tracing::warn!(error = %e, "failed to persist scheduler state to config file");
}
}
}
pub async fn toggle_task(&self, id: &str) -> Option<bool> {
let result = {
let mut tasks = self.tasks.write().await;
if let Some(task) = tasks.iter_mut().find(|t| t.id == id) {
task.enabled = !task.enabled;
if task.enabled {
task.next_run = Some(task.schedule.next_run(Utc::now()));
} else {
task.next_run = None;
}
Some(task.enabled)
} else {
None
}
};
if result.is_some() {
self.persist_task_state().await;
pub async fn list_tasks(&self) -> Vec<ScheduledTask> {
self.tasks.read().await.clone()
}
pub async fn toggle_task(&self, id: &str) -> Option<bool> {
let result = {
let mut tasks = self.tasks.write().await;
if let Some(task) = tasks.iter_mut().find(|t| t.id == id) {
task.enabled = !task.enabled;
if task.enabled {
task.next_run = Some(task.schedule.next_run(Utc::now()));
} else {
task.next_run = None;
}
result
Some(task.enabled)
} else {
None
}
};
if result.is_some() {
self.persist_task_state().await;
}
result
}
/// Run a task immediately. Uses a single write lock to avoid TOCTOU races.
pub async fn run_now(&self, id: &str) -> Option<String> {
let result = {
let mut tasks = self.tasks.write().await;
let task = tasks.iter_mut().find(|t| t.id == id)?;
/// Run a task immediately. Uses a single write lock to avoid TOCTOU races.
pub async fn run_now(&self, id: &str) -> Option<String> {
let result = {
let mut tasks = self.tasks.write().await;
let task = tasks.iter_mut().find(|t| t.id == id)?;
// Submit the job (cheap: sends to mpsc channel)
let job_id = self.job_queue.submit(task.kind.clone()).await;
// Submit the job (cheap: sends to mpsc channel)
let job_id = self.job_queue.submit(task.kind.clone()).await;
task.last_run = Some(Utc::now());
task.last_run = Some(Utc::now());
task.last_status = Some("running".to_string());
task.running = true;
task.last_job_id = Some(job_id);
if task.enabled {
task.next_run = Some(task.schedule.next_run(Utc::now()));
}
Some(job_id.to_string())
};
if result.is_some() {
self.persist_task_state().await;
}
result
}
/// Main scheduler loop. Uses a two-phase approach per tick to avoid
/// holding the write lock across await points. Returns when the
/// cancellation token is triggered.
pub async fn run(&self) {
let mut interval =
tokio::time::interval(std::time::Duration::from_secs(30));
loop {
tokio::select! {
_ = interval.tick() => {}
_ = self.cancel.cancelled() => {
tracing::info!("scheduler shutting down");
return;
}
}
// Phase 1: Check completed jobs and update running status
{
use crate::jobs::JobStatus;
let mut tasks = self.tasks.write().await;
for task in tasks.iter_mut() {
if !task.running {
continue;
}
let Some(job_id) = task.last_job_id else {
continue;
};
let Some(job) = self.job_queue.status(job_id).await else {
continue;
};
match &job.status {
JobStatus::Completed { .. } => {
task.running = false;
task.last_status = Some("completed".to_string());
},
JobStatus::Failed { error } => {
task.running = false;
task.last_status = Some(format!("failed: {error}"));
},
JobStatus::Cancelled => {
task.running = false;
task.last_status = Some("cancelled".to_string());
},
_ => {}, // still pending or running
}
}
}
// Phase 2: Collect due tasks and submit jobs
let now = Utc::now();
let mut to_submit: Vec<(usize, JobKind)> = Vec::new();
{
let mut tasks = self.tasks.write().await;
for (i, task) in tasks.iter_mut().enumerate() {
if !task.enabled || task.running {
continue;
}
let due = task.next_run.is_some_and(|next| now >= next);
if due {
to_submit.push((i, task.kind.clone()));
task.last_run = Some(now);
task.last_status = Some("running".to_string());
task.running = true;
task.last_job_id = Some(job_id);
if task.enabled {
task.next_run = Some(task.schedule.next_run(Utc::now()));
}
Some(job_id.to_string())
};
if result.is_some() {
self.persist_task_state().await;
task.next_run = Some(task.schedule.next_run(now));
}
}
result
}
}
/// Main scheduler loop. Uses a two-phase approach per tick to avoid
/// holding the write lock across await points. Returns when the
/// cancellation token is triggered.
pub async fn run(&self) {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
loop {
tokio::select! {
_ = interval.tick() => {}
_ = self.cancel.cancelled() => {
tracing::info!("scheduler shutting down");
return;
}
}
// Phase 1: Check completed jobs and update running status
{
use crate::jobs::JobStatus;
let mut tasks = self.tasks.write().await;
for task in tasks.iter_mut() {
if !task.running {
continue;
}
let Some(job_id) = task.last_job_id else {
continue;
};
let Some(job) = self.job_queue.status(job_id).await else {
continue;
};
match &job.status {
JobStatus::Completed { .. } => {
task.running = false;
task.last_status = Some("completed".to_string());
}
JobStatus::Failed { error } => {
task.running = false;
task.last_status = Some(format!("failed: {error}"));
}
JobStatus::Cancelled => {
task.running = false;
task.last_status = Some("cancelled".to_string());
}
_ => {} // still pending or running
}
}
}
// Phase 2: Collect due tasks and submit jobs
let now = Utc::now();
let mut to_submit: Vec<(usize, JobKind)> = Vec::new();
{
let mut tasks = self.tasks.write().await;
for (i, task) in tasks.iter_mut().enumerate() {
if !task.enabled || task.running {
continue;
}
let due = task.next_run.is_some_and(|next| now >= next);
if due {
to_submit.push((i, task.kind.clone()));
task.last_run = Some(now);
task.last_status = Some("running".to_string());
task.running = true;
task.next_run = Some(task.schedule.next_run(now));
}
}
}
// Submit jobs without holding the lock
for (idx, kind) in to_submit {
let job_id = self.job_queue.submit(kind).await;
let mut tasks = self.tasks.write().await;
if let Some(task) = tasks.get_mut(idx) {
task.last_job_id = Some(job_id);
}
}
// Submit jobs without holding the lock
for (idx, kind) in to_submit {
let job_id = self.job_queue.submit(kind).await;
let mut tasks = self.tasks.write().await;
if let Some(task) = tasks.get_mut(idx) {
task.last_job_id = Some(job_id);
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
use chrono::TimeZone;
#[test]
fn test_interval_next_run() {
let from = Utc.with_ymd_and_hms(2025, 6, 15, 12, 0, 0).unwrap();
let schedule = Schedule::Interval { secs: 3600 };
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 13, 0, 0).unwrap());
}
use super::*;
#[test]
fn test_daily_next_run_future_today() {
// 10:00 UTC, schedule is 14:00 => same day
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Daily {
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 14, 0, 0).unwrap());
}
#[test]
fn test_interval_next_run() {
let from = Utc.with_ymd_and_hms(2025, 6, 15, 12, 0, 0).unwrap();
let schedule = Schedule::Interval { secs: 3600 };
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 13, 0, 0).unwrap());
}
#[test]
fn test_daily_next_run_past_today() {
// 16:00 UTC, schedule is 14:00 => next day
let from = Utc.with_ymd_and_hms(2025, 6, 15, 16, 0, 0).unwrap();
let schedule = Schedule::Daily {
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 16, 14, 0, 0).unwrap());
}
#[test]
fn test_daily_next_run_future_today() {
// 10:00 UTC, schedule is 14:00 => same day
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Daily {
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 14, 0, 0).unwrap());
}
#[test]
fn test_weekly_next_run() {
// 2025-06-15 is a Sunday (day 6). Target is Monday (day 0) at 03:00.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 12, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 0,
hour: 3,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 16, 3, 0, 0).unwrap());
}
#[test]
fn test_daily_next_run_past_today() {
// 16:00 UTC, schedule is 14:00 => next day
let from = Utc.with_ymd_and_hms(2025, 6, 15, 16, 0, 0).unwrap();
let schedule = Schedule::Daily {
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 16, 14, 0, 0).unwrap());
}
#[test]
fn test_weekly_same_day_future() {
// 2025-06-15 is Sunday (day 6). Schedule is Sunday 14:00, current is 10:00 => today.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 6,
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 14, 0, 0).unwrap());
}
#[test]
fn test_weekly_next_run() {
// 2025-06-15 is a Sunday (day 6). Target is Monday (day 0) at 03:00.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 12, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 0,
hour: 3,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 16, 3, 0, 0).unwrap());
}
#[test]
fn test_weekly_same_day_past() {
// 2025-06-15 is Sunday (day 6). Schedule is Sunday 08:00, current is 10:00 => next week.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 6,
hour: 8,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 22, 8, 0, 0).unwrap());
}
#[test]
fn test_weekly_same_day_future() {
// 2025-06-15 is Sunday (day 6). Schedule is Sunday 14:00, current is 10:00
// => today.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 6,
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 14, 0, 0).unwrap());
}
#[test]
fn test_serde_roundtrip() {
let task = ScheduledTask {
id: "test".to_string(),
name: "Test Task".to_string(),
kind: JobKind::Scan { path: None },
schedule: Schedule::Interval { secs: 3600 },
enabled: true,
last_run: Some(Utc::now()),
next_run: Some(Utc::now()),
last_status: Some("completed".to_string()),
running: true,
last_job_id: Some(Uuid::now_v7()),
};
#[test]
fn test_weekly_same_day_past() {
// 2025-06-15 is Sunday (day 6). Schedule is Sunday 08:00, current is 10:00
// => next week.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 6,
hour: 8,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 22, 8, 0, 0).unwrap());
}
let json = serde_json::to_string(&task).unwrap();
let deserialized: ScheduledTask = serde_json::from_str(&json).unwrap();
#[test]
fn test_serde_roundtrip() {
let task = ScheduledTask {
id: "test".to_string(),
name: "Test Task".to_string(),
kind: JobKind::Scan { path: None },
schedule: Schedule::Interval { secs: 3600 },
enabled: true,
last_run: Some(Utc::now()),
next_run: Some(Utc::now()),
last_status: Some("completed".to_string()),
running: true,
last_job_id: Some(Uuid::now_v7()),
};
assert_eq!(deserialized.id, "test");
assert_eq!(deserialized.enabled, true);
// running defaults to false on deserialization (skip_serializing)
assert!(!deserialized.running);
// last_job_id is skipped entirely
assert!(deserialized.last_job_id.is_none());
}
let json = serde_json::to_string(&task).unwrap();
let deserialized: ScheduledTask = serde_json::from_str(&json).unwrap();
#[test]
fn test_display_string() {
assert_eq!(
Schedule::Interval { secs: 3600 }.display_string(),
"Every 1h"
);
assert_eq!(
Schedule::Interval { secs: 300 }.display_string(),
"Every 5m"
);
assert_eq!(
Schedule::Interval { secs: 30 }.display_string(),
"Every 30s"
);
assert_eq!(
Schedule::Daily { hour: 3, minute: 0 }.display_string(),
"Daily 03:00"
);
assert_eq!(
Schedule::Weekly {
day: 0,
hour: 3,
minute: 0
}
.display_string(),
"Mon 03:00"
);
assert_eq!(
Schedule::Weekly {
day: 6,
hour: 14,
minute: 30
}
.display_string(),
"Sun 14:30"
);
}
assert_eq!(deserialized.id, "test");
assert_eq!(deserialized.enabled, true);
// running defaults to false on deserialization (skip_serializing)
assert!(!deserialized.running);
// last_job_id is skipped entirely
assert!(deserialized.last_job_id.is_none());
}
#[test]
fn test_display_string() {
assert_eq!(
Schedule::Interval { secs: 3600 }.display_string(),
"Every 1h"
);
assert_eq!(
Schedule::Interval { secs: 300 }.display_string(),
"Every 5m"
);
assert_eq!(
Schedule::Interval { secs: 30 }.display_string(),
"Every 30s"
);
assert_eq!(
Schedule::Daily {
hour: 3,
minute: 0,
}
.display_string(),
"Daily 03:00"
);
assert_eq!(
Schedule::Weekly {
day: 0,
hour: 3,
minute: 0,
}
.display_string(),
"Mon 03:00"
);
assert_eq!(
Schedule::Weekly {
day: 6,
hour: 14,
minute: 30,
}
.display_string(),
"Sun 14:30"
);
}
}

View file

@ -1,553 +1,524 @@
use serde::{Deserialize, Serialize};
use winnow::combinator::{alt, delimited, preceded, repeat};
use winnow::token::{take_till, take_while};
use winnow::{ModalResult, Parser};
use winnow::{
ModalResult,
Parser,
combinator::{alt, delimited, preceded, repeat},
token::{take_till, take_while},
};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SearchQuery {
FullText(String),
FieldMatch {
field: String,
value: String,
},
And(Vec<SearchQuery>),
Or(Vec<SearchQuery>),
Not(Box<SearchQuery>),
Prefix(String),
Fuzzy(String),
TypeFilter(String),
TagFilter(String),
/// Range query: field:start..end (inclusive)
RangeQuery {
field: String,
start: Option<i64>,
end: Option<i64>,
},
/// Comparison query: field:>value, field:<value, field:>=value, field:<=value
CompareQuery {
field: String,
op: CompareOp,
value: i64,
},
/// Date query: created:today, modified:last-week, etc.
DateQuery {
field: String,
value: DateValue,
},
FullText(String),
FieldMatch {
field: String,
value: String,
},
And(Vec<SearchQuery>),
Or(Vec<SearchQuery>),
Not(Box<SearchQuery>),
Prefix(String),
Fuzzy(String),
TypeFilter(String),
TagFilter(String),
/// Range query: field:start..end (inclusive)
RangeQuery {
field: String,
start: Option<i64>,
end: Option<i64>,
},
/// Comparison query: field:>value, field:<value, field:>=value, field:<=value
CompareQuery {
field: String,
op: CompareOp,
value: i64,
},
/// Date query: created:today, modified:last-week, etc.
DateQuery {
field: String,
value: DateValue,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CompareOp {
GreaterThan,
GreaterOrEqual,
LessThan,
LessOrEqual,
GreaterThan,
GreaterOrEqual,
LessThan,
LessOrEqual,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum DateValue {
Today,
Yesterday,
ThisWeek,
LastWeek,
ThisMonth,
LastMonth,
ThisYear,
LastYear,
/// Days ago: last-7d, last-30d
DaysAgo(u32),
Today,
Yesterday,
ThisWeek,
LastWeek,
ThisMonth,
LastMonth,
ThisYear,
LastYear,
/// Days ago: last-7d, last-30d
DaysAgo(u32),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchRequest {
pub query: SearchQuery,
pub sort: SortOrder,
pub pagination: crate::model::Pagination,
pub query: SearchQuery,
pub sort: SortOrder,
pub pagination: crate::model::Pagination,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResults {
pub items: Vec<crate::model::MediaItem>,
pub total_count: u64,
pub items: Vec<crate::model::MediaItem>,
pub total_count: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[derive(Default)]
pub enum SortOrder {
#[default]
Relevance,
DateAsc,
DateDesc,
NameAsc,
NameDesc,
SizeAsc,
SizeDesc,
#[default]
Relevance,
DateAsc,
DateDesc,
NameAsc,
NameDesc,
SizeAsc,
SizeDesc,
}
fn ws<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
take_while(0.., ' ').parse_next(input)
take_while(0.., ' ').parse_next(input)
}
fn quoted_string(input: &mut &str) -> ModalResult<String> {
delimited('"', take_till(0.., '"'), '"')
.map(|s: &str| s.to_string())
.parse_next(input)
delimited('"', take_till(0.., '"'), '"')
.map(|s: &str| s.to_string())
.parse_next(input)
}
fn bare_word(input: &mut &str) -> ModalResult<String> {
take_while(1.., |c: char| !c.is_whitespace() && c != ')' && c != '(')
.map(|s: &str| s.to_string())
.parse_next(input)
take_while(1.., |c: char| !c.is_whitespace() && c != ')' && c != '(')
.map(|s: &str| s.to_string())
.parse_next(input)
}
fn word_or_quoted(input: &mut &str) -> ModalResult<String> {
alt((quoted_string, bare_word)).parse_next(input)
alt((quoted_string, bare_word)).parse_next(input)
}
fn not_expr(input: &mut &str) -> ModalResult<SearchQuery> {
preceded(('-', ws), atom)
.map(|q| SearchQuery::Not(Box::new(q)))
.parse_next(input)
preceded(('-', ws), atom)
.map(|q| SearchQuery::Not(Box::new(q)))
.parse_next(input)
}
/// Parse a date value like "today", "yesterday", "last-week", "last-30d"
fn parse_date_value(s: &str) -> Option<DateValue> {
match s.to_lowercase().as_str() {
"today" => Some(DateValue::Today),
"yesterday" => Some(DateValue::Yesterday),
"this-week" | "thisweek" => Some(DateValue::ThisWeek),
"last-week" | "lastweek" => Some(DateValue::LastWeek),
"this-month" | "thismonth" => Some(DateValue::ThisMonth),
"last-month" | "lastmonth" => Some(DateValue::LastMonth),
"this-year" | "thisyear" => Some(DateValue::ThisYear),
"last-year" | "lastyear" => Some(DateValue::LastYear),
other => {
// Try to parse "last-Nd" format (e.g., "last-7d", "last-30d")
if let Some(rest) = other.strip_prefix("last-")
&& let Some(days_str) = rest.strip_suffix('d')
&& let Ok(days) = days_str.parse::<u32>()
{
return Some(DateValue::DaysAgo(days));
}
None
}
}
match s.to_lowercase().as_str() {
"today" => Some(DateValue::Today),
"yesterday" => Some(DateValue::Yesterday),
"this-week" | "thisweek" => Some(DateValue::ThisWeek),
"last-week" | "lastweek" => Some(DateValue::LastWeek),
"this-month" | "thismonth" => Some(DateValue::ThisMonth),
"last-month" | "lastmonth" => Some(DateValue::LastMonth),
"this-year" | "thisyear" => Some(DateValue::ThisYear),
"last-year" | "lastyear" => Some(DateValue::LastYear),
other => {
// Try to parse "last-Nd" format (e.g., "last-7d", "last-30d")
if let Some(rest) = other.strip_prefix("last-")
&& let Some(days_str) = rest.strip_suffix('d')
&& let Ok(days) = days_str.parse::<u32>()
{
return Some(DateValue::DaysAgo(days));
}
None
},
}
}
/// Parse size strings like "10MB", "1GB", "500KB" to bytes
fn parse_size_value(s: &str) -> Option<i64> {
let s = s.to_uppercase();
if let Some(num) = s.strip_suffix("GB") {
num.parse::<i64>().ok().map(|n| n * 1024 * 1024 * 1024)
} else if let Some(num) = s.strip_suffix("MB") {
num.parse::<i64>().ok().map(|n| n * 1024 * 1024)
} else if let Some(num) = s.strip_suffix("KB") {
num.parse::<i64>().ok().map(|n| n * 1024)
} else if let Some(num) = s.strip_suffix('B') {
num.parse::<i64>().ok()
} else {
s.parse::<i64>().ok()
}
let s = s.to_uppercase();
if let Some(num) = s.strip_suffix("GB") {
num.parse::<i64>().ok().map(|n| n * 1024 * 1024 * 1024)
} else if let Some(num) = s.strip_suffix("MB") {
num.parse::<i64>().ok().map(|n| n * 1024 * 1024)
} else if let Some(num) = s.strip_suffix("KB") {
num.parse::<i64>().ok().map(|n| n * 1024)
} else if let Some(num) = s.strip_suffix('B') {
num.parse::<i64>().ok()
} else {
s.parse::<i64>().ok()
}
}
fn field_match(input: &mut &str) -> ModalResult<SearchQuery> {
let field_name =
take_while(1.., |c: char| c.is_alphanumeric() || c == '_').map(|s: &str| s.to_string());
(field_name, ':', word_or_quoted)
.map(|(field, _, value)| {
// Handle special field types
match field.as_str() {
"type" => return SearchQuery::TypeFilter(value),
"tag" => return SearchQuery::TagFilter(value),
_ => {}
}
// Check for range queries: field:start..end
if value.contains("..") {
let parts: Vec<&str> = value.split("..").collect();
if parts.len() == 2 {
let start = if parts[0].is_empty() {
None
} else if field == "size" {
parse_size_value(parts[0])
} else {
parts[0].parse().ok()
};
let end = if parts[1].is_empty() {
None
} else if field == "size" {
parse_size_value(parts[1])
} else {
parts[1].parse().ok()
};
return SearchQuery::RangeQuery { field, start, end };
}
}
// Check for comparison queries: >=, <=, >, <
if let Some(rest) = value.strip_prefix(">=") {
let val = if field == "size" {
parse_size_value(rest).unwrap_or(0)
} else {
rest.parse().unwrap_or(0)
};
return SearchQuery::CompareQuery {
field,
op: CompareOp::GreaterOrEqual,
value: val,
};
}
if let Some(rest) = value.strip_prefix("<=") {
let val = if field == "size" {
parse_size_value(rest).unwrap_or(0)
} else {
rest.parse().unwrap_or(0)
};
return SearchQuery::CompareQuery {
field,
op: CompareOp::LessOrEqual,
value: val,
};
}
if let Some(rest) = value.strip_prefix('>') {
let val = if field == "size" {
parse_size_value(rest).unwrap_or(0)
} else {
rest.parse().unwrap_or(0)
};
return SearchQuery::CompareQuery {
field,
op: CompareOp::GreaterThan,
value: val,
};
}
if let Some(rest) = value.strip_prefix('<') {
let val = if field == "size" {
parse_size_value(rest).unwrap_or(0)
} else {
rest.parse().unwrap_or(0)
};
return SearchQuery::CompareQuery {
field,
op: CompareOp::LessThan,
value: val,
};
}
// Check for date queries on created/modified fields
if (field == "created" || field == "modified")
&& let Some(date_val) = parse_date_value(&value)
{
return SearchQuery::DateQuery {
field,
value: date_val,
};
}
// Default: simple field match
SearchQuery::FieldMatch { field, value }
})
.parse_next(input)
}
fn prefix_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let word = take_while(1.., |c: char| {
!c.is_whitespace() && c != ')' && c != '(' && c != '*'
})
let field_name = take_while(1.., |c: char| c.is_alphanumeric() || c == '_')
.map(|s: &str| s.to_string());
(word, '*')
.map(|(w, _)| SearchQuery::Prefix(w))
.parse_next(input)
}
(field_name, ':', word_or_quoted)
.map(|(field, _, value)| {
// Handle special field types
match field.as_str() {
"type" => return SearchQuery::TypeFilter(value),
"tag" => return SearchQuery::TagFilter(value),
_ => {},
}
fn fuzzy_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let word = take_while(1.., |c: char| {
!c.is_whitespace() && c != ')' && c != '(' && c != '~'
// Check for range queries: field:start..end
if value.contains("..") {
let parts: Vec<&str> = value.split("..").collect();
if parts.len() == 2 {
let start = if parts[0].is_empty() {
None
} else if field == "size" {
parse_size_value(parts[0])
} else {
parts[0].parse().ok()
};
let end = if parts[1].is_empty() {
None
} else if field == "size" {
parse_size_value(parts[1])
} else {
parts[1].parse().ok()
};
return SearchQuery::RangeQuery { field, start, end };
}
}
// Check for comparison queries: >=, <=, >, <
if let Some(rest) = value.strip_prefix(">=") {
let val = if field == "size" {
parse_size_value(rest).unwrap_or(0)
} else {
rest.parse().unwrap_or(0)
};
return SearchQuery::CompareQuery {
field,
op: CompareOp::GreaterOrEqual,
value: val,
};
}
if let Some(rest) = value.strip_prefix("<=") {
let val = if field == "size" {
parse_size_value(rest).unwrap_or(0)
} else {
rest.parse().unwrap_or(0)
};
return SearchQuery::CompareQuery {
field,
op: CompareOp::LessOrEqual,
value: val,
};
}
if let Some(rest) = value.strip_prefix('>') {
let val = if field == "size" {
parse_size_value(rest).unwrap_or(0)
} else {
rest.parse().unwrap_or(0)
};
return SearchQuery::CompareQuery {
field,
op: CompareOp::GreaterThan,
value: val,
};
}
if let Some(rest) = value.strip_prefix('<') {
let val = if field == "size" {
parse_size_value(rest).unwrap_or(0)
} else {
rest.parse().unwrap_or(0)
};
return SearchQuery::CompareQuery {
field,
op: CompareOp::LessThan,
value: val,
};
}
// Check for date queries on created/modified fields
if (field == "created" || field == "modified")
&& let Some(date_val) = parse_date_value(&value)
{
return SearchQuery::DateQuery {
field,
value: date_val,
};
}
// Default: simple field match
SearchQuery::FieldMatch { field, value }
})
.map(|s: &str| s.to_string());
(word, '~')
.map(|(w, _)| SearchQuery::Fuzzy(w))
.parse_next(input)
}
fn paren_expr(input: &mut &str) -> ModalResult<SearchQuery> {
delimited(('(', ws), or_expr, (ws, ')')).parse_next(input)
}
fn not_or_keyword(input: &mut &str) -> ModalResult<()> {
if let Some(rest) = input.strip_prefix("OR")
&& (rest.is_empty() || rest.starts_with(' ') || rest.starts_with(')'))
{
return Err(winnow::error::ErrMode::Backtrack(
winnow::error::ContextError::new(),
));
}
Ok(())
}
fn full_text(input: &mut &str) -> ModalResult<SearchQuery> {
not_or_keyword.parse_next(input)?;
word_or_quoted.map(SearchQuery::FullText).parse_next(input)
}
fn atom(input: &mut &str) -> ModalResult<SearchQuery> {
alt((
paren_expr,
not_expr,
field_match,
prefix_expr,
fuzzy_expr,
full_text,
))
.parse_next(input)
}
fn prefix_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let word = take_while(1.., |c: char| {
!c.is_whitespace() && c != ')' && c != '(' && c != '*'
})
.map(|s: &str| s.to_string());
(word, '*')
.map(|(w, _)| SearchQuery::Prefix(w))
.parse_next(input)
}
fn fuzzy_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let word = take_while(1.., |c: char| {
!c.is_whitespace() && c != ')' && c != '(' && c != '~'
})
.map(|s: &str| s.to_string());
(word, '~')
.map(|(w, _)| SearchQuery::Fuzzy(w))
.parse_next(input)
}
fn paren_expr(input: &mut &str) -> ModalResult<SearchQuery> {
delimited(('(', ws), or_expr, (ws, ')')).parse_next(input)
}
fn not_or_keyword(input: &mut &str) -> ModalResult<()> {
if let Some(rest) = input.strip_prefix("OR")
&& (rest.is_empty() || rest.starts_with(' ') || rest.starts_with(')'))
{
return Err(winnow::error::ErrMode::Backtrack(
winnow::error::ContextError::new(),
));
}
Ok(())
}
fn full_text(input: &mut &str) -> ModalResult<SearchQuery> {
not_or_keyword.parse_next(input)?;
word_or_quoted.map(SearchQuery::FullText).parse_next(input)
}
fn atom(input: &mut &str) -> ModalResult<SearchQuery> {
alt((
paren_expr,
not_expr,
field_match,
prefix_expr,
fuzzy_expr,
full_text,
))
.parse_next(input)
}
fn and_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let first = atom.parse_next(input)?;
let rest: Vec<SearchQuery> = repeat(0.., preceded(ws, atom)).parse_next(input)?;
if rest.is_empty() {
Ok(first)
} else {
let mut terms = vec![first];
terms.extend(rest);
Ok(SearchQuery::And(terms))
}
let first = atom.parse_next(input)?;
let rest: Vec<SearchQuery> =
repeat(0.., preceded(ws, atom)).parse_next(input)?;
if rest.is_empty() {
Ok(first)
} else {
let mut terms = vec![first];
terms.extend(rest);
Ok(SearchQuery::And(terms))
}
}
fn or_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let first = and_expr.parse_next(input)?;
let rest: Vec<SearchQuery> =
repeat(0.., preceded((ws, "OR", ws), and_expr)).parse_next(input)?;
if rest.is_empty() {
Ok(first)
} else {
let mut terms = vec![first];
terms.extend(rest);
Ok(SearchQuery::Or(terms))
}
let first = and_expr.parse_next(input)?;
let rest: Vec<SearchQuery> =
repeat(0.., preceded((ws, "OR", ws), and_expr)).parse_next(input)?;
if rest.is_empty() {
Ok(first)
} else {
let mut terms = vec![first];
terms.extend(rest);
Ok(SearchQuery::Or(terms))
}
}
pub fn parse_search_query(input: &str) -> crate::error::Result<SearchQuery> {
let trimmed = input.trim();
if trimmed.is_empty() {
return Ok(SearchQuery::FullText(String::new()));
}
let mut input = trimmed;
or_expr
.parse_next(&mut input)
.map_err(|e| crate::error::PinakesError::SearchParse(format!("{e}")))
let trimmed = input.trim();
if trimmed.is_empty() {
return Ok(SearchQuery::FullText(String::new()));
}
let mut input = trimmed;
or_expr
.parse_next(&mut input)
.map_err(|e| crate::error::PinakesError::SearchParse(format!("{e}")))
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[test]
fn test_simple_text() {
let q = parse_search_query("hello").unwrap();
assert_eq!(q, SearchQuery::FullText("hello".into()));
}
#[test]
fn test_simple_text() {
let q = parse_search_query("hello").unwrap();
assert_eq!(q, SearchQuery::FullText("hello".into()));
}
#[test]
fn test_field_match() {
let q = parse_search_query("artist:Beatles").unwrap();
assert_eq!(
q,
SearchQuery::FieldMatch {
field: "artist".into(),
value: "Beatles".into()
}
);
}
#[test]
fn test_field_match() {
let q = parse_search_query("artist:Beatles").unwrap();
assert_eq!(q, SearchQuery::FieldMatch {
field: "artist".into(),
value: "Beatles".into(),
});
}
#[test]
fn test_type_filter() {
let q = parse_search_query("type:pdf").unwrap();
assert_eq!(q, SearchQuery::TypeFilter("pdf".into()));
}
#[test]
fn test_type_filter() {
let q = parse_search_query("type:pdf").unwrap();
assert_eq!(q, SearchQuery::TypeFilter("pdf".into()));
}
#[test]
fn test_tag_filter() {
let q = parse_search_query("tag:music").unwrap();
assert_eq!(q, SearchQuery::TagFilter("music".into()));
}
#[test]
fn test_tag_filter() {
let q = parse_search_query("tag:music").unwrap();
assert_eq!(q, SearchQuery::TagFilter("music".into()));
}
#[test]
fn test_and_implicit() {
let q = parse_search_query("hello world").unwrap();
assert_eq!(
q,
SearchQuery::And(vec![
SearchQuery::FullText("hello".into()),
SearchQuery::FullText("world".into()),
])
);
}
#[test]
fn test_and_implicit() {
let q = parse_search_query("hello world").unwrap();
assert_eq!(
q,
SearchQuery::And(vec![
SearchQuery::FullText("hello".into()),
SearchQuery::FullText("world".into()),
])
);
}
#[test]
fn test_or() {
let q = parse_search_query("hello OR world").unwrap();
assert_eq!(
q,
SearchQuery::Or(vec![
SearchQuery::FullText("hello".into()),
SearchQuery::FullText("world".into()),
])
);
}
#[test]
fn test_or() {
let q = parse_search_query("hello OR world").unwrap();
assert_eq!(
q,
SearchQuery::Or(vec![
SearchQuery::FullText("hello".into()),
SearchQuery::FullText("world".into()),
])
);
}
#[test]
fn test_not() {
let q = parse_search_query("-excluded").unwrap();
assert_eq!(
q,
SearchQuery::Not(Box::new(SearchQuery::FullText("excluded".into())))
);
}
#[test]
fn test_not() {
let q = parse_search_query("-excluded").unwrap();
assert_eq!(
q,
SearchQuery::Not(Box::new(SearchQuery::FullText("excluded".into())))
);
}
#[test]
fn test_prefix() {
let q = parse_search_query("hel*").unwrap();
assert_eq!(q, SearchQuery::Prefix("hel".into()));
}
#[test]
fn test_prefix() {
let q = parse_search_query("hel*").unwrap();
assert_eq!(q, SearchQuery::Prefix("hel".into()));
}
#[test]
fn test_fuzzy() {
let q = parse_search_query("hello~").unwrap();
assert_eq!(q, SearchQuery::Fuzzy("hello".into()));
}
#[test]
fn test_fuzzy() {
let q = parse_search_query("hello~").unwrap();
assert_eq!(q, SearchQuery::Fuzzy("hello".into()));
}
#[test]
fn test_quoted() {
let q = parse_search_query("\"hello world\"").unwrap();
assert_eq!(q, SearchQuery::FullText("hello world".into()));
}
#[test]
fn test_quoted() {
let q = parse_search_query("\"hello world\"").unwrap();
assert_eq!(q, SearchQuery::FullText("hello world".into()));
}
#[test]
fn test_range_query_year() {
let q = parse_search_query("year:2020..2023").unwrap();
assert_eq!(
q,
SearchQuery::RangeQuery {
field: "year".into(),
start: Some(2020),
end: Some(2023)
}
);
}
#[test]
fn test_range_query_year() {
let q = parse_search_query("year:2020..2023").unwrap();
assert_eq!(q, SearchQuery::RangeQuery {
field: "year".into(),
start: Some(2020),
end: Some(2023),
});
}
#[test]
fn test_range_query_open_start() {
let q = parse_search_query("year:..2023").unwrap();
assert_eq!(
q,
SearchQuery::RangeQuery {
field: "year".into(),
start: None,
end: Some(2023)
}
);
}
#[test]
fn test_range_query_open_start() {
let q = parse_search_query("year:..2023").unwrap();
assert_eq!(q, SearchQuery::RangeQuery {
field: "year".into(),
start: None,
end: Some(2023),
});
}
#[test]
fn test_range_query_open_end() {
let q = parse_search_query("year:2020..").unwrap();
assert_eq!(
q,
SearchQuery::RangeQuery {
field: "year".into(),
start: Some(2020),
end: None
}
);
}
#[test]
fn test_range_query_open_end() {
let q = parse_search_query("year:2020..").unwrap();
assert_eq!(q, SearchQuery::RangeQuery {
field: "year".into(),
start: Some(2020),
end: None,
});
}
#[test]
fn test_compare_greater_than() {
let q = parse_search_query("year:>2020").unwrap();
assert_eq!(
q,
SearchQuery::CompareQuery {
field: "year".into(),
op: CompareOp::GreaterThan,
value: 2020
}
);
}
#[test]
fn test_compare_greater_than() {
let q = parse_search_query("year:>2020").unwrap();
assert_eq!(q, SearchQuery::CompareQuery {
field: "year".into(),
op: CompareOp::GreaterThan,
value: 2020,
});
}
#[test]
fn test_compare_less_or_equal() {
let q = parse_search_query("year:<=2023").unwrap();
assert_eq!(
q,
SearchQuery::CompareQuery {
field: "year".into(),
op: CompareOp::LessOrEqual,
value: 2023
}
);
}
#[test]
fn test_compare_less_or_equal() {
let q = parse_search_query("year:<=2023").unwrap();
assert_eq!(q, SearchQuery::CompareQuery {
field: "year".into(),
op: CompareOp::LessOrEqual,
value: 2023,
});
}
#[test]
fn test_size_compare_mb() {
let q = parse_search_query("size:>10MB").unwrap();
assert_eq!(
q,
SearchQuery::CompareQuery {
field: "size".into(),
op: CompareOp::GreaterThan,
value: 10 * 1024 * 1024
}
);
}
#[test]
fn test_size_compare_mb() {
let q = parse_search_query("size:>10MB").unwrap();
assert_eq!(q, SearchQuery::CompareQuery {
field: "size".into(),
op: CompareOp::GreaterThan,
value: 10 * 1024 * 1024,
});
}
#[test]
fn test_size_range_gb() {
let q = parse_search_query("size:1GB..2GB").unwrap();
assert_eq!(
q,
SearchQuery::RangeQuery {
field: "size".into(),
start: Some(1024 * 1024 * 1024),
end: Some(2 * 1024 * 1024 * 1024)
}
);
}
#[test]
fn test_size_range_gb() {
let q = parse_search_query("size:1GB..2GB").unwrap();
assert_eq!(q, SearchQuery::RangeQuery {
field: "size".into(),
start: Some(1024 * 1024 * 1024),
end: Some(2 * 1024 * 1024 * 1024),
});
}
#[test]
fn test_date_query_today() {
let q = parse_search_query("created:today").unwrap();
assert_eq!(
q,
SearchQuery::DateQuery {
field: "created".into(),
value: DateValue::Today
}
);
}
#[test]
fn test_date_query_today() {
let q = parse_search_query("created:today").unwrap();
assert_eq!(q, SearchQuery::DateQuery {
field: "created".into(),
value: DateValue::Today,
});
}
#[test]
fn test_date_query_last_week() {
let q = parse_search_query("modified:last-week").unwrap();
assert_eq!(
q,
SearchQuery::DateQuery {
field: "modified".into(),
value: DateValue::LastWeek
}
);
}
#[test]
fn test_date_query_last_week() {
let q = parse_search_query("modified:last-week").unwrap();
assert_eq!(q, SearchQuery::DateQuery {
field: "modified".into(),
value: DateValue::LastWeek,
});
}
#[test]
fn test_date_query_days_ago() {
let q = parse_search_query("created:last-30d").unwrap();
assert_eq!(
q,
SearchQuery::DateQuery {
field: "created".into(),
value: DateValue::DaysAgo(30)
}
);
}
#[test]
fn test_date_query_days_ago() {
let q = parse_search_query("created:last-30d").unwrap();
assert_eq!(q, SearchQuery::DateQuery {
field: "created".into(),
value: DateValue::DaysAgo(30),
});
}
}

View file

@ -12,423 +12,424 @@ use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::model::MediaId;
use crate::users::UserId;
use crate::{model::MediaId, users::UserId};
/// Unique identifier for a share.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ShareId(pub Uuid);
impl ShareId {
pub fn new() -> Self {
Self(Uuid::now_v7())
}
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl Default for ShareId {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
impl fmt::Display for ShareId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
/// What is being shared.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ShareTarget {
Media { media_id: MediaId },
Collection { collection_id: Uuid },
Tag { tag_id: Uuid },
SavedSearch { search_id: Uuid },
Media { media_id: MediaId },
Collection { collection_id: Uuid },
Tag { tag_id: Uuid },
SavedSearch { search_id: Uuid },
}
impl ShareTarget {
pub fn target_type(&self) -> &'static str {
match self {
Self::Media { .. } => "media",
Self::Collection { .. } => "collection",
Self::Tag { .. } => "tag",
Self::SavedSearch { .. } => "saved_search",
}
pub fn target_type(&self) -> &'static str {
match self {
Self::Media { .. } => "media",
Self::Collection { .. } => "collection",
Self::Tag { .. } => "tag",
Self::SavedSearch { .. } => "saved_search",
}
}
pub fn target_id(&self) -> Uuid {
match self {
Self::Media { media_id } => media_id.0,
Self::Collection { collection_id } => *collection_id,
Self::Tag { tag_id } => *tag_id,
Self::SavedSearch { search_id } => *search_id,
}
pub fn target_id(&self) -> Uuid {
match self {
Self::Media { media_id } => media_id.0,
Self::Collection { collection_id } => *collection_id,
Self::Tag { tag_id } => *tag_id,
Self::SavedSearch { search_id } => *search_id,
}
}
}
/// Who the share is with.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ShareRecipient {
/// Public link accessible to anyone with the token
PublicLink {
token: String,
password_hash: Option<String>,
},
/// Shared with a specific user
User { user_id: UserId },
/// Shared with a group
Group { group_id: Uuid },
/// Shared with a federated user on another server
Federated {
user_handle: String,
server_url: String,
},
/// Public link accessible to anyone with the token
PublicLink {
token: String,
password_hash: Option<String>,
},
/// Shared with a specific user
User { user_id: UserId },
/// Shared with a group
Group { group_id: Uuid },
/// Shared with a federated user on another server
Federated {
user_handle: String,
server_url: String,
},
}
impl ShareRecipient {
pub fn recipient_type(&self) -> &'static str {
match self {
Self::PublicLink { .. } => "public_link",
Self::User { .. } => "user",
Self::Group { .. } => "group",
Self::Federated { .. } => "federated",
}
pub fn recipient_type(&self) -> &'static str {
match self {
Self::PublicLink { .. } => "public_link",
Self::User { .. } => "user",
Self::Group { .. } => "group",
Self::Federated { .. } => "federated",
}
}
}
/// Permissions granted by a share.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[derive(
Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize,
)]
pub struct SharePermissions {
/// Can view the content
pub can_view: bool,
/// Can download the content
pub can_download: bool,
/// Can edit the content/metadata
pub can_edit: bool,
/// Can delete the content
pub can_delete: bool,
/// Can reshare with others
pub can_reshare: bool,
/// Can add new items (for collections)
pub can_add: bool,
/// Can view the content
pub can_view: bool,
/// Can download the content
pub can_download: bool,
/// Can edit the content/metadata
pub can_edit: bool,
/// Can delete the content
pub can_delete: bool,
/// Can reshare with others
pub can_reshare: bool,
/// Can add new items (for collections)
pub can_add: bool,
}
impl SharePermissions {
/// View-only permissions
pub fn view_only() -> Self {
Self {
can_view: true,
..Default::default()
}
/// View-only permissions
pub fn view_only() -> Self {
Self {
can_view: true,
..Default::default()
}
}
/// Download permissions (includes view)
pub fn download() -> Self {
Self {
can_view: true,
can_download: true,
..Default::default()
}
/// Download permissions (includes view)
pub fn download() -> Self {
Self {
can_view: true,
can_download: true,
..Default::default()
}
}
/// Edit permissions (includes view and download)
pub fn edit() -> Self {
Self {
can_view: true,
can_download: true,
can_edit: true,
can_add: true,
..Default::default()
}
/// Edit permissions (includes view and download)
pub fn edit() -> Self {
Self {
can_view: true,
can_download: true,
can_edit: true,
can_add: true,
..Default::default()
}
}
/// Full permissions
pub fn full() -> Self {
Self {
can_view: true,
can_download: true,
can_edit: true,
can_delete: true,
can_reshare: true,
can_add: true,
}
/// Full permissions
pub fn full() -> Self {
Self {
can_view: true,
can_download: true,
can_edit: true,
can_delete: true,
can_reshare: true,
can_add: true,
}
}
/// Merge permissions (takes the most permissive of each)
pub fn merge(&self, other: &Self) -> Self {
Self {
can_view: self.can_view || other.can_view,
can_download: self.can_download || other.can_download,
can_edit: self.can_edit || other.can_edit,
can_delete: self.can_delete || other.can_delete,
can_reshare: self.can_reshare || other.can_reshare,
can_add: self.can_add || other.can_add,
}
/// Merge permissions (takes the most permissive of each)
pub fn merge(&self, other: &Self) -> Self {
Self {
can_view: self.can_view || other.can_view,
can_download: self.can_download || other.can_download,
can_edit: self.can_edit || other.can_edit,
can_delete: self.can_delete || other.can_delete,
can_reshare: self.can_reshare || other.can_reshare,
can_add: self.can_add || other.can_add,
}
}
}
/// A share record.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Share {
pub id: ShareId,
pub target: ShareTarget,
pub owner_id: UserId,
pub recipient: ShareRecipient,
pub permissions: SharePermissions,
pub note: Option<String>,
pub expires_at: Option<DateTime<Utc>>,
pub access_count: u64,
pub last_accessed: Option<DateTime<Utc>>,
/// Whether children (media in collection, etc.) inherit this share
pub inherit_to_children: bool,
/// Parent share if this was created via reshare
pub parent_share_id: Option<ShareId>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub id: ShareId,
pub target: ShareTarget,
pub owner_id: UserId,
pub recipient: ShareRecipient,
pub permissions: SharePermissions,
pub note: Option<String>,
pub expires_at: Option<DateTime<Utc>>,
pub access_count: u64,
pub last_accessed: Option<DateTime<Utc>>,
/// Whether children (media in collection, etc.) inherit this share
pub inherit_to_children: bool,
/// Parent share if this was created via reshare
pub parent_share_id: Option<ShareId>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl Share {
/// Create a new public link share.
pub fn new_public_link(
owner_id: UserId,
target: ShareTarget,
token: String,
permissions: SharePermissions,
) -> Self {
let now = Utc::now();
Self {
id: ShareId::new(),
target,
owner_id,
recipient: ShareRecipient::PublicLink {
token,
password_hash: None,
},
permissions,
note: None,
expires_at: None,
access_count: 0,
last_accessed: None,
inherit_to_children: true,
parent_share_id: None,
created_at: now,
updated_at: now,
}
/// Create a new public link share.
pub fn new_public_link(
owner_id: UserId,
target: ShareTarget,
token: String,
permissions: SharePermissions,
) -> Self {
let now = Utc::now();
Self {
id: ShareId::new(),
target,
owner_id,
recipient: ShareRecipient::PublicLink {
token,
password_hash: None,
},
permissions,
note: None,
expires_at: None,
access_count: 0,
last_accessed: None,
inherit_to_children: true,
parent_share_id: None,
created_at: now,
updated_at: now,
}
}
/// Create a new user share.
pub fn new_user_share(
owner_id: UserId,
target: ShareTarget,
recipient_user_id: UserId,
permissions: SharePermissions,
) -> Self {
let now = Utc::now();
Self {
id: ShareId::new(),
target,
owner_id,
recipient: ShareRecipient::User {
user_id: recipient_user_id,
},
permissions,
note: None,
expires_at: None,
access_count: 0,
last_accessed: None,
inherit_to_children: true,
parent_share_id: None,
created_at: now,
updated_at: now,
}
/// Create a new user share.
pub fn new_user_share(
owner_id: UserId,
target: ShareTarget,
recipient_user_id: UserId,
permissions: SharePermissions,
) -> Self {
let now = Utc::now();
Self {
id: ShareId::new(),
target,
owner_id,
recipient: ShareRecipient::User {
user_id: recipient_user_id,
},
permissions,
note: None,
expires_at: None,
access_count: 0,
last_accessed: None,
inherit_to_children: true,
parent_share_id: None,
created_at: now,
updated_at: now,
}
}
/// Check if the share has expired.
pub fn is_expired(&self) -> bool {
self.expires_at.map(|exp| exp < Utc::now()).unwrap_or(false)
}
/// Check if the share has expired.
pub fn is_expired(&self) -> bool {
self.expires_at.map(|exp| exp < Utc::now()).unwrap_or(false)
}
/// Check if this is a public link share.
pub fn is_public(&self) -> bool {
matches!(self.recipient, ShareRecipient::PublicLink { .. })
}
/// Check if this is a public link share.
pub fn is_public(&self) -> bool {
matches!(self.recipient, ShareRecipient::PublicLink { .. })
}
/// Get the public token if this is a public link share.
pub fn public_token(&self) -> Option<&str> {
match &self.recipient {
ShareRecipient::PublicLink { token, .. } => Some(token),
_ => None,
}
/// Get the public token if this is a public link share.
pub fn public_token(&self) -> Option<&str> {
match &self.recipient {
ShareRecipient::PublicLink { token, .. } => Some(token),
_ => None,
}
}
}
/// Types of share activity actions.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ShareActivityAction {
Created,
Updated,
Accessed,
Downloaded,
Revoked,
Expired,
PasswordFailed,
Created,
Updated,
Accessed,
Downloaded,
Revoked,
Expired,
PasswordFailed,
}
impl fmt::Display for ShareActivityAction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Created => write!(f, "created"),
Self::Updated => write!(f, "updated"),
Self::Accessed => write!(f, "accessed"),
Self::Downloaded => write!(f, "downloaded"),
Self::Revoked => write!(f, "revoked"),
Self::Expired => write!(f, "expired"),
Self::PasswordFailed => write!(f, "password_failed"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Created => write!(f, "created"),
Self::Updated => write!(f, "updated"),
Self::Accessed => write!(f, "accessed"),
Self::Downloaded => write!(f, "downloaded"),
Self::Revoked => write!(f, "revoked"),
Self::Expired => write!(f, "expired"),
Self::PasswordFailed => write!(f, "password_failed"),
}
}
}
impl std::str::FromStr for ShareActivityAction {
type Err = String;
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"created" => Ok(Self::Created),
"updated" => Ok(Self::Updated),
"accessed" => Ok(Self::Accessed),
"downloaded" => Ok(Self::Downloaded),
"revoked" => Ok(Self::Revoked),
"expired" => Ok(Self::Expired),
"password_failed" => Ok(Self::PasswordFailed),
_ => Err(format!("unknown share activity action: {}", s)),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"created" => Ok(Self::Created),
"updated" => Ok(Self::Updated),
"accessed" => Ok(Self::Accessed),
"downloaded" => Ok(Self::Downloaded),
"revoked" => Ok(Self::Revoked),
"expired" => Ok(Self::Expired),
"password_failed" => Ok(Self::PasswordFailed),
_ => Err(format!("unknown share activity action: {}", s)),
}
}
}
/// Activity log entry for a share.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShareActivity {
pub id: Uuid,
pub share_id: ShareId,
pub actor_id: Option<UserId>,
pub actor_ip: Option<String>,
pub action: ShareActivityAction,
pub details: Option<String>,
pub timestamp: DateTime<Utc>,
pub id: Uuid,
pub share_id: ShareId,
pub actor_id: Option<UserId>,
pub actor_ip: Option<String>,
pub action: ShareActivityAction,
pub details: Option<String>,
pub timestamp: DateTime<Utc>,
}
impl ShareActivity {
pub fn new(share_id: ShareId, action: ShareActivityAction) -> Self {
Self {
id: Uuid::now_v7(),
share_id,
actor_id: None,
actor_ip: None,
action,
details: None,
timestamp: Utc::now(),
}
pub fn new(share_id: ShareId, action: ShareActivityAction) -> Self {
Self {
id: Uuid::now_v7(),
share_id,
actor_id: None,
actor_ip: None,
action,
details: None,
timestamp: Utc::now(),
}
}
pub fn with_actor(mut self, actor_id: UserId) -> Self {
self.actor_id = Some(actor_id);
self
}
pub fn with_actor(mut self, actor_id: UserId) -> Self {
self.actor_id = Some(actor_id);
self
}
pub fn with_ip(mut self, ip: &str) -> Self {
self.actor_ip = Some(ip.to_string());
self
}
pub fn with_ip(mut self, ip: &str) -> Self {
self.actor_ip = Some(ip.to_string());
self
}
pub fn with_details(mut self, details: &str) -> Self {
self.details = Some(details.to_string());
self
}
pub fn with_details(mut self, details: &str) -> Self {
self.details = Some(details.to_string());
self
}
}
/// Types of share notifications.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ShareNotificationType {
NewShare,
ShareUpdated,
ShareRevoked,
ShareExpiring,
ShareAccessed,
NewShare,
ShareUpdated,
ShareRevoked,
ShareExpiring,
ShareAccessed,
}
impl fmt::Display for ShareNotificationType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NewShare => write!(f, "new_share"),
Self::ShareUpdated => write!(f, "share_updated"),
Self::ShareRevoked => write!(f, "share_revoked"),
Self::ShareExpiring => write!(f, "share_expiring"),
Self::ShareAccessed => write!(f, "share_accessed"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NewShare => write!(f, "new_share"),
Self::ShareUpdated => write!(f, "share_updated"),
Self::ShareRevoked => write!(f, "share_revoked"),
Self::ShareExpiring => write!(f, "share_expiring"),
Self::ShareAccessed => write!(f, "share_accessed"),
}
}
}
impl std::str::FromStr for ShareNotificationType {
type Err = String;
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"new_share" => Ok(Self::NewShare),
"share_updated" => Ok(Self::ShareUpdated),
"share_revoked" => Ok(Self::ShareRevoked),
"share_expiring" => Ok(Self::ShareExpiring),
"share_accessed" => Ok(Self::ShareAccessed),
_ => Err(format!("unknown share notification type: {}", s)),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"new_share" => Ok(Self::NewShare),
"share_updated" => Ok(Self::ShareUpdated),
"share_revoked" => Ok(Self::ShareRevoked),
"share_expiring" => Ok(Self::ShareExpiring),
"share_accessed" => Ok(Self::ShareAccessed),
_ => Err(format!("unknown share notification type: {}", s)),
}
}
}
/// A notification about a share.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShareNotification {
pub id: Uuid,
pub user_id: UserId,
pub share_id: ShareId,
pub notification_type: ShareNotificationType,
pub is_read: bool,
pub created_at: DateTime<Utc>,
pub id: Uuid,
pub user_id: UserId,
pub share_id: ShareId,
pub notification_type: ShareNotificationType,
pub is_read: bool,
pub created_at: DateTime<Utc>,
}
impl ShareNotification {
pub fn new(
user_id: UserId,
share_id: ShareId,
notification_type: ShareNotificationType,
) -> Self {
Self {
id: Uuid::now_v7(),
user_id,
share_id,
notification_type,
is_read: false,
created_at: Utc::now(),
}
pub fn new(
user_id: UserId,
share_id: ShareId,
notification_type: ShareNotificationType,
) -> Self {
Self {
id: Uuid::now_v7(),
user_id,
share_id,
notification_type,
is_read: false,
created_at: Utc::now(),
}
}
}
/// Generate a random share token using UUID.
pub fn generate_share_token() -> String {
// Use UUIDv4 for random tokens - simple string representation
Uuid::new_v4().simple().to_string()
// Use UUIDv4 for random tokens - simple string representation
Uuid::new_v4().simple().to_string()
}
/// Hash a share password.
pub fn hash_share_password(password: &str) -> String {
// Use BLAKE3 for password hashing (in production, use Argon2)
blake3::hash(password.as_bytes()).to_hex().to_string()
// Use BLAKE3 for password hashing (in production, use Argon2)
blake3::hash(password.as_bytes()).to_hex().to_string()
}
/// Verify a share password.
pub fn verify_share_password(password: &str, hash: &str) -> bool {
let computed = hash_share_password(password);
computed == hash
let computed = hash_share_password(password);
computed == hash
}

View file

@ -4,49 +4,48 @@ use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::model::MediaId;
use crate::users::UserId;
use crate::{model::MediaId, users::UserId};
/// A user's rating for a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Rating {
pub id: Uuid,
pub user_id: UserId,
pub media_id: MediaId,
pub stars: u8,
pub review_text: Option<String>,
pub created_at: DateTime<Utc>,
pub id: Uuid,
pub user_id: UserId,
pub media_id: MediaId,
pub stars: u8,
pub review_text: Option<String>,
pub created_at: DateTime<Utc>,
}
/// A comment on a media item, supporting threaded replies.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Comment {
pub id: Uuid,
pub user_id: UserId,
pub media_id: MediaId,
pub parent_comment_id: Option<Uuid>,
pub text: String,
pub created_at: DateTime<Utc>,
pub id: Uuid,
pub user_id: UserId,
pub media_id: MediaId,
pub parent_comment_id: Option<Uuid>,
pub text: String,
pub created_at: DateTime<Utc>,
}
/// A user's favorite bookmark for a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Favorite {
pub user_id: UserId,
pub media_id: MediaId,
pub created_at: DateTime<Utc>,
pub user_id: UserId,
pub media_id: MediaId,
pub created_at: DateTime<Utc>,
}
/// A shareable link to a media item with optional password and expiration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShareLink {
pub id: Uuid,
pub media_id: MediaId,
pub created_by: UserId,
pub token: String,
#[serde(skip_serializing)]
pub password_hash: Option<String>,
pub expires_at: Option<DateTime<Utc>>,
pub view_count: u64,
pub created_at: DateTime<Utc>,
pub id: Uuid,
pub media_id: MediaId,
pub created_by: UserId,
pub token: String,
#[serde(skip_serializing)]
pub password_hash: Option<String>,
pub expires_at: Option<DateTime<Utc>>,
pub view_count: u64,
pub created_at: DateTime<Utc>,
}

View file

@ -1,26 +1,28 @@
use crate::error::{PinakesError, Result};
mod sqlite_migrations {
use refinery::embed_migrations;
embed_migrations!("../../migrations/sqlite");
use refinery::embed_migrations;
embed_migrations!("../../migrations/sqlite");
}
mod postgres_migrations {
use refinery::embed_migrations;
embed_migrations!("../../migrations/postgres");
use refinery::embed_migrations;
embed_migrations!("../../migrations/postgres");
}
pub fn run_sqlite_migrations(conn: &mut rusqlite::Connection) -> Result<()> {
sqlite_migrations::migrations::runner()
.run(conn)
.map_err(|e| PinakesError::Migration(e.to_string()))?;
Ok(())
sqlite_migrations::migrations::runner()
.run(conn)
.map_err(|e| PinakesError::Migration(e.to_string()))?;
Ok(())
}
pub async fn run_postgres_migrations(client: &mut tokio_postgres::Client) -> Result<()> {
postgres_migrations::migrations::runner()
.run_async(client)
.await
.map_err(|e| PinakesError::Migration(e.to_string()))?;
Ok(())
pub async fn run_postgres_migrations(
client: &mut tokio_postgres::Client,
) -> Result<()> {
postgres_migrations::migrations::runner()
.run_async(client)
.await
.map_err(|e| PinakesError::Migration(e.to_string()))?;
Ok(())
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -11,52 +11,52 @@ use crate::model::MediaId;
/// A subtitle track associated with a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Subtitle {
pub id: Uuid,
pub media_id: MediaId,
pub language: Option<String>,
pub format: SubtitleFormat,
pub file_path: Option<PathBuf>,
pub is_embedded: bool,
pub track_index: Option<usize>,
pub offset_ms: i64,
pub created_at: DateTime<Utc>,
pub id: Uuid,
pub media_id: MediaId,
pub language: Option<String>,
pub format: SubtitleFormat,
pub file_path: Option<PathBuf>,
pub is_embedded: bool,
pub track_index: Option<usize>,
pub offset_ms: i64,
pub created_at: DateTime<Utc>,
}
/// Supported subtitle formats.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum SubtitleFormat {
Srt,
Vtt,
Ass,
Ssa,
Pgs,
Srt,
Vtt,
Ass,
Ssa,
Pgs,
}
impl std::fmt::Display for SubtitleFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::Srt => "srt",
Self::Vtt => "vtt",
Self::Ass => "ass",
Self::Ssa => "ssa",
Self::Pgs => "pgs",
};
write!(f, "{s}")
}
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::Srt => "srt",
Self::Vtt => "vtt",
Self::Ass => "ass",
Self::Ssa => "ssa",
Self::Pgs => "pgs",
};
write!(f, "{s}")
}
}
impl std::str::FromStr for SubtitleFormat {
type Err = String;
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"srt" => Ok(Self::Srt),
"vtt" => Ok(Self::Vtt),
"ass" => Ok(Self::Ass),
"ssa" => Ok(Self::Ssa),
"pgs" => Ok(Self::Pgs),
_ => Err(format!("unknown subtitle format: {s}")),
}
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"srt" => Ok(Self::Srt),
"vtt" => Ok(Self::Vtt),
"ass" => Ok(Self::Ass),
"ssa" => Ok(Self::Ssa),
"pgs" => Ok(Self::Pgs),
_ => Err(format!("unknown subtitle format: {s}")),
}
}
}

View file

@ -3,295 +3,297 @@
use std::path::{Path, PathBuf};
use chrono::Utc;
use tokio::fs;
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
use tokio::{
fs,
io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
};
use tracing::{debug, info};
use uuid::Uuid;
use crate::error::{PinakesError, Result};
use super::{ChunkInfo, UploadSession};
use crate::error::{PinakesError, Result};
/// Manager for chunked uploads.
#[derive(Debug, Clone)]
pub struct ChunkedUploadManager {
temp_dir: PathBuf,
temp_dir: PathBuf,
}
impl ChunkedUploadManager {
/// Create a new chunked upload manager.
pub fn new(temp_dir: PathBuf) -> Self {
Self { temp_dir }
/// Create a new chunked upload manager.
pub fn new(temp_dir: PathBuf) -> Self {
Self { temp_dir }
}
/// Initialize the temp directory.
pub async fn init(&self) -> Result<()> {
fs::create_dir_all(&self.temp_dir).await?;
Ok(())
}
/// Get the temp file path for an upload session.
pub fn temp_path(&self, session_id: Uuid) -> PathBuf {
self.temp_dir.join(format!("{}.upload", session_id))
}
/// Create the temp file for a new upload session.
pub async fn create_temp_file(&self, session: &UploadSession) -> Result<()> {
let path = self.temp_path(session.id);
// Create a sparse file of the expected size
let file = fs::File::create(&path).await?;
file.set_len(session.expected_size).await?;
debug!(
session_id = %session.id,
size = session.expected_size,
"created temp file for upload"
);
Ok(())
}
/// Write a chunk to the temp file.
pub async fn write_chunk(
&self,
session: &UploadSession,
chunk_index: u64,
data: &[u8],
) -> Result<ChunkInfo> {
let path = self.temp_path(session.id);
if !path.exists() {
return Err(PinakesError::UploadSessionNotFound(session.id.to_string()));
}
/// Initialize the temp directory.
pub async fn init(&self) -> Result<()> {
fs::create_dir_all(&self.temp_dir).await?;
Ok(())
// Calculate offset
let offset = chunk_index * session.chunk_size;
// Validate chunk
if offset >= session.expected_size {
return Err(PinakesError::ChunkOutOfOrder {
expected: session.chunk_count - 1,
actual: chunk_index,
});
}
/// Get the temp file path for an upload session.
pub fn temp_path(&self, session_id: Uuid) -> PathBuf {
self.temp_dir.join(format!("{}.upload", session_id))
// Calculate expected chunk size
let expected_size = if chunk_index == session.chunk_count - 1 {
// Last chunk may be smaller
session.expected_size - offset
} else {
session.chunk_size
};
if data.len() as u64 != expected_size {
return Err(PinakesError::InvalidData(format!(
"chunk {} has wrong size: expected {}, got {}",
chunk_index,
expected_size,
data.len()
)));
}
/// Create the temp file for a new upload session.
pub async fn create_temp_file(&self, session: &UploadSession) -> Result<()> {
let path = self.temp_path(session.id);
// Write chunk to file at offset
let mut file = fs::OpenOptions::new().write(true).open(&path).await?;
// Create a sparse file of the expected size
let file = fs::File::create(&path).await?;
file.set_len(session.expected_size).await?;
file.seek(std::io::SeekFrom::Start(offset)).await?;
file.write_all(data).await?;
file.flush().await?;
debug!(
session_id = %session.id,
size = session.expected_size,
"created temp file for upload"
);
// Compute chunk hash
let hash = blake3::hash(data).to_hex().to_string();
Ok(())
debug!(
session_id = %session.id,
chunk_index,
offset,
size = data.len(),
"wrote chunk"
);
Ok(ChunkInfo {
upload_id: session.id,
chunk_index,
offset,
size: data.len() as u64,
hash,
received_at: Utc::now(),
})
}
/// Verify and finalize the upload.
///
/// Checks that:
/// 1. All chunks are received
/// 2. File size matches expected
/// 3. Content hash matches expected
pub async fn finalize(
&self,
session: &UploadSession,
received_chunks: &[ChunkInfo],
) -> Result<PathBuf> {
let path = self.temp_path(session.id);
// Check all chunks received
if received_chunks.len() as u64 != session.chunk_count {
return Err(PinakesError::InvalidData(format!(
"missing chunks: expected {}, got {}",
session.chunk_count,
received_chunks.len()
)));
}
/// Write a chunk to the temp file.
pub async fn write_chunk(
&self,
session: &UploadSession,
chunk_index: u64,
data: &[u8],
) -> Result<ChunkInfo> {
let path = self.temp_path(session.id);
if !path.exists() {
return Err(PinakesError::UploadSessionNotFound(session.id.to_string()));
}
// Calculate offset
let offset = chunk_index * session.chunk_size;
// Validate chunk
if offset >= session.expected_size {
return Err(PinakesError::ChunkOutOfOrder {
expected: session.chunk_count - 1,
actual: chunk_index,
});
}
// Calculate expected chunk size
let expected_size = if chunk_index == session.chunk_count - 1 {
// Last chunk may be smaller
session.expected_size - offset
} else {
session.chunk_size
};
if data.len() as u64 != expected_size {
return Err(PinakesError::InvalidData(format!(
"chunk {} has wrong size: expected {}, got {}",
chunk_index,
expected_size,
data.len()
)));
}
// Write chunk to file at offset
let mut file = fs::OpenOptions::new().write(true).open(&path).await?;
file.seek(std::io::SeekFrom::Start(offset)).await?;
file.write_all(data).await?;
file.flush().await?;
// Compute chunk hash
let hash = blake3::hash(data).to_hex().to_string();
debug!(
session_id = %session.id,
chunk_index,
offset,
size = data.len(),
"wrote chunk"
);
Ok(ChunkInfo {
upload_id: session.id,
chunk_index,
offset,
size: data.len() as u64,
hash,
received_at: Utc::now(),
})
// Verify chunk indices
let mut indices: Vec<u64> =
received_chunks.iter().map(|c| c.chunk_index).collect();
indices.sort();
for (i, idx) in indices.iter().enumerate() {
if *idx != i as u64 {
return Err(PinakesError::InvalidData(format!(
"chunk {} missing or out of order",
i
)));
}
}
/// Verify and finalize the upload.
///
/// Checks that:
/// 1. All chunks are received
/// 2. File size matches expected
/// 3. Content hash matches expected
pub async fn finalize(
&self,
session: &UploadSession,
received_chunks: &[ChunkInfo],
) -> Result<PathBuf> {
let path = self.temp_path(session.id);
// Verify file size
let metadata = fs::metadata(&path).await?;
if metadata.len() != session.expected_size {
return Err(PinakesError::InvalidData(format!(
"file size mismatch: expected {}, got {}",
session.expected_size,
metadata.len()
)));
}
// Check all chunks received
if received_chunks.len() as u64 != session.chunk_count {
return Err(PinakesError::InvalidData(format!(
"missing chunks: expected {}, got {}",
session.chunk_count,
received_chunks.len()
)));
}
// Verify content hash
let computed_hash = compute_file_hash(&path).await?;
if computed_hash != session.expected_hash.0 {
return Err(PinakesError::StorageIntegrity(format!(
"hash mismatch: expected {}, computed {}",
session.expected_hash, computed_hash
)));
}
// Verify chunk indices
let mut indices: Vec<u64> = received_chunks.iter().map(|c| c.chunk_index).collect();
indices.sort();
for (i, idx) in indices.iter().enumerate() {
if *idx != i as u64 {
return Err(PinakesError::InvalidData(format!(
"chunk {} missing or out of order",
i
)));
info!(
session_id = %session.id,
hash = %session.expected_hash,
size = session.expected_size,
"finalized chunked upload"
);
Ok(path)
}
/// Cancel an upload and clean up temp file.
pub async fn cancel(&self, session_id: Uuid) -> Result<()> {
let path = self.temp_path(session_id);
if path.exists() {
fs::remove_file(&path).await?;
debug!(session_id = %session_id, "cancelled upload, removed temp file");
}
Ok(())
}
/// Clean up expired temp files.
pub async fn cleanup_expired(&self, max_age_hours: u64) -> Result<u64> {
let mut count = 0u64;
let max_age = std::time::Duration::from_secs(max_age_hours * 3600);
let mut entries = fs::read_dir(&self.temp_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.extension().map(|e| e == "upload").unwrap_or(false) {
if let Ok(metadata) = fs::metadata(&path).await {
if let Ok(modified) = metadata.modified() {
let age = std::time::SystemTime::now()
.duration_since(modified)
.unwrap_or_default();
if age > max_age {
let _ = fs::remove_file(&path).await;
count += 1;
}
}
}
// Verify file size
let metadata = fs::metadata(&path).await?;
if metadata.len() != session.expected_size {
return Err(PinakesError::InvalidData(format!(
"file size mismatch: expected {}, got {}",
session.expected_size,
metadata.len()
)));
}
// Verify content hash
let computed_hash = compute_file_hash(&path).await?;
if computed_hash != session.expected_hash.0 {
return Err(PinakesError::StorageIntegrity(format!(
"hash mismatch: expected {}, computed {}",
session.expected_hash, computed_hash
)));
}
info!(
session_id = %session.id,
hash = %session.expected_hash,
size = session.expected_size,
"finalized chunked upload"
);
Ok(path)
}
}
/// Cancel an upload and clean up temp file.
pub async fn cancel(&self, session_id: Uuid) -> Result<()> {
let path = self.temp_path(session_id);
if path.exists() {
fs::remove_file(&path).await?;
debug!(session_id = %session_id, "cancelled upload, removed temp file");
}
Ok(())
}
/// Clean up expired temp files.
pub async fn cleanup_expired(&self, max_age_hours: u64) -> Result<u64> {
let mut count = 0u64;
let max_age = std::time::Duration::from_secs(max_age_hours * 3600);
let mut entries = fs::read_dir(&self.temp_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.extension().map(|e| e == "upload").unwrap_or(false) {
if let Ok(metadata) = fs::metadata(&path).await {
if let Ok(modified) = metadata.modified() {
let age = std::time::SystemTime::now()
.duration_since(modified)
.unwrap_or_default();
if age > max_age {
let _ = fs::remove_file(&path).await;
count += 1;
}
}
}
}
}
if count > 0 {
info!(count, "cleaned up expired upload temp files");
}
Ok(count)
if count > 0 {
info!(count, "cleaned up expired upload temp files");
}
Ok(count)
}
}
/// Compute the BLAKE3 hash of a file.
async fn compute_file_hash(path: &Path) -> Result<String> {
let mut file = fs::File::open(path).await?;
let mut hasher = blake3::Hasher::new();
let mut buf = vec![0u8; 64 * 1024];
let mut file = fs::File::open(path).await?;
let mut hasher = blake3::Hasher::new();
let mut buf = vec![0u8; 64 * 1024];
loop {
let n = file.read(&mut buf).await?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
loop {
let n = file.read(&mut buf).await?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(hasher.finalize().to_hex().to_string())
Ok(hasher.finalize().to_hex().to_string())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::ContentHash;
use crate::sync::UploadStatus;
use tempfile::tempdir;
use tempfile::tempdir;
#[tokio::test]
async fn test_chunked_upload() {
let dir = tempdir().unwrap();
let manager = ChunkedUploadManager::new(dir.path().to_path_buf());
manager.init().await.unwrap();
use super::*;
use crate::{model::ContentHash, sync::UploadStatus};
// Create test data
let data = b"Hello, World! This is test data for chunked upload.";
let hash = blake3::hash(data).to_hex().to_string();
let chunk_size = 20u64;
#[tokio::test]
async fn test_chunked_upload() {
let dir = tempdir().unwrap();
let manager = ChunkedUploadManager::new(dir.path().to_path_buf());
manager.init().await.unwrap();
let session = UploadSession {
id: Uuid::now_v7(),
device_id: super::super::DeviceId::new(),
target_path: "/test/file.txt".to_string(),
expected_hash: ContentHash::new(hash.clone()),
expected_size: data.len() as u64,
chunk_size,
chunk_count: (data.len() as u64 + chunk_size - 1) / chunk_size,
status: UploadStatus::InProgress,
created_at: Utc::now(),
expires_at: Utc::now() + chrono::Duration::hours(24),
last_activity: Utc::now(),
};
// Create test data
let data = b"Hello, World! This is test data for chunked upload.";
let hash = blake3::hash(data).to_hex().to_string();
let chunk_size = 20u64;
manager.create_temp_file(&session).await.unwrap();
let session = UploadSession {
id: Uuid::now_v7(),
device_id: super::super::DeviceId::new(),
target_path: "/test/file.txt".to_string(),
expected_hash: ContentHash::new(hash.clone()),
expected_size: data.len() as u64,
chunk_size,
chunk_count: (data.len() as u64 + chunk_size - 1) / chunk_size,
status: UploadStatus::InProgress,
created_at: Utc::now(),
expires_at: Utc::now() + chrono::Duration::hours(24),
last_activity: Utc::now(),
};
// Write chunks
let mut chunks = Vec::new();
for i in 0..session.chunk_count {
let start = (i * chunk_size) as usize;
let end = ((i + 1) * chunk_size).min(data.len() as u64) as usize;
let chunk_data = &data[start..end];
manager.create_temp_file(&session).await.unwrap();
let chunk = manager.write_chunk(&session, i, chunk_data).await.unwrap();
chunks.push(chunk);
}
// Write chunks
let mut chunks = Vec::new();
for i in 0..session.chunk_count {
let start = (i * chunk_size) as usize;
let end = ((i + 1) * chunk_size).min(data.len() as u64) as usize;
let chunk_data = &data[start..end];
// Finalize
let final_path = manager.finalize(&session, &chunks).await.unwrap();
assert!(final_path.exists());
// Verify content
let content = fs::read(&final_path).await.unwrap();
assert_eq!(&content[..], data);
let chunk = manager.write_chunk(&session, i, chunk_data).await.unwrap();
chunks.push(chunk);
}
// Finalize
let final_path = manager.finalize(&session, &chunks).await.unwrap();
assert!(final_path.exists());
// Verify content
let content = fs::read(&final_path).await.unwrap();
assert_eq!(&content[..], data);
}
}

View file

@ -1,144 +1,144 @@
//! Conflict detection and resolution for sync.
use crate::config::ConflictResolution;
use super::DeviceSyncState;
use crate::config::ConflictResolution;
/// Detect if there's a conflict between local and server state.
pub fn detect_conflict(state: &DeviceSyncState) -> Option<ConflictInfo> {
// If either side has no hash, no conflict possible
let local_hash = state.local_hash.as_ref()?;
let server_hash = state.server_hash.as_ref()?;
// If either side has no hash, no conflict possible
let local_hash = state.local_hash.as_ref()?;
let server_hash = state.server_hash.as_ref()?;
// Same hash = no conflict
if local_hash == server_hash {
return None;
}
// Same hash = no conflict
if local_hash == server_hash {
return None;
}
// Both have different hashes = conflict
Some(ConflictInfo {
path: state.path.clone(),
local_hash: local_hash.clone(),
server_hash: server_hash.clone(),
local_mtime: state.local_mtime,
server_mtime: state.server_mtime,
})
// Both have different hashes = conflict
Some(ConflictInfo {
path: state.path.clone(),
local_hash: local_hash.clone(),
server_hash: server_hash.clone(),
local_mtime: state.local_mtime,
server_mtime: state.server_mtime,
})
}
/// Information about a detected conflict.
#[derive(Debug, Clone)]
pub struct ConflictInfo {
pub path: String,
pub local_hash: String,
pub server_hash: String,
pub local_mtime: Option<i64>,
pub server_mtime: Option<i64>,
pub path: String,
pub local_hash: String,
pub server_hash: String,
pub local_mtime: Option<i64>,
pub server_mtime: Option<i64>,
}
/// Result of resolving a conflict.
#[derive(Debug, Clone)]
pub enum ConflictOutcome {
/// Use the server version
UseServer,
/// Use the local version (upload it)
UseLocal,
/// Keep both versions (rename one)
KeepBoth { new_local_path: String },
/// Requires manual intervention
Manual,
/// Use the server version
UseServer,
/// Use the local version (upload it)
UseLocal,
/// Keep both versions (rename one)
KeepBoth { new_local_path: String },
/// Requires manual intervention
Manual,
}
/// Resolve a conflict based on the configured strategy.
pub fn resolve_conflict(
conflict: &ConflictInfo,
resolution: ConflictResolution,
conflict: &ConflictInfo,
resolution: ConflictResolution,
) -> ConflictOutcome {
match resolution {
ConflictResolution::ServerWins => ConflictOutcome::UseServer,
ConflictResolution::ClientWins => ConflictOutcome::UseLocal,
ConflictResolution::KeepBoth => {
let new_path = generate_conflict_path(&conflict.path, &conflict.local_hash);
ConflictOutcome::KeepBoth {
new_local_path: new_path,
}
}
ConflictResolution::Manual => ConflictOutcome::Manual,
}
match resolution {
ConflictResolution::ServerWins => ConflictOutcome::UseServer,
ConflictResolution::ClientWins => ConflictOutcome::UseLocal,
ConflictResolution::KeepBoth => {
let new_path =
generate_conflict_path(&conflict.path, &conflict.local_hash);
ConflictOutcome::KeepBoth {
new_local_path: new_path,
}
},
ConflictResolution::Manual => ConflictOutcome::Manual,
}
}
/// Generate a new path for the conflicting local file.
/// Format: filename.conflict-<short_hash>.ext
fn generate_conflict_path(original_path: &str, local_hash: &str) -> String {
let short_hash = &local_hash[..8.min(local_hash.len())];
let short_hash = &local_hash[..8.min(local_hash.len())];
if let Some((base, ext)) = original_path.rsplit_once('.') {
format!("{}.conflict-{}.{}", base, short_hash, ext)
} else {
format!("{}.conflict-{}", original_path, short_hash)
}
if let Some((base, ext)) = original_path.rsplit_once('.') {
format!("{}.conflict-{}.{}", base, short_hash, ext)
} else {
format!("{}.conflict-{}", original_path, short_hash)
}
}
/// Automatic conflict resolution based on modification times.
/// Useful when ConflictResolution is set to a time-based strategy.
pub fn resolve_by_mtime(conflict: &ConflictInfo) -> ConflictOutcome {
match (conflict.local_mtime, conflict.server_mtime) {
(Some(local), Some(server)) => {
if local > server {
ConflictOutcome::UseLocal
} else {
ConflictOutcome::UseServer
}
}
(Some(_), None) => ConflictOutcome::UseLocal,
(None, Some(_)) => ConflictOutcome::UseServer,
(None, None) => ConflictOutcome::UseServer, // Default to server
}
match (conflict.local_mtime, conflict.server_mtime) {
(Some(local), Some(server)) => {
if local > server {
ConflictOutcome::UseLocal
} else {
ConflictOutcome::UseServer
}
},
(Some(_), None) => ConflictOutcome::UseLocal,
(None, Some(_)) => ConflictOutcome::UseServer,
(None, None) => ConflictOutcome::UseServer, // Default to server
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::sync::FileSyncStatus;
use super::*;
use crate::sync::FileSyncStatus;
#[test]
fn test_generate_conflict_path() {
assert_eq!(
generate_conflict_path("/path/to/file.txt", "abc12345"),
"/path/to/file.conflict-abc12345.txt"
);
#[test]
fn test_generate_conflict_path() {
assert_eq!(
generate_conflict_path("/path/to/file.txt", "abc12345"),
"/path/to/file.conflict-abc12345.txt"
);
assert_eq!(
generate_conflict_path("/path/to/file", "abc12345"),
"/path/to/file.conflict-abc12345"
);
}
assert_eq!(
generate_conflict_path("/path/to/file", "abc12345"),
"/path/to/file.conflict-abc12345"
);
}
#[test]
fn test_detect_conflict() {
let state_no_conflict = DeviceSyncState {
device_id: super::super::DeviceId::new(),
path: "/test".to_string(),
local_hash: Some("abc".to_string()),
server_hash: Some("abc".to_string()),
local_mtime: None,
server_mtime: None,
sync_status: FileSyncStatus::Synced,
last_synced_at: None,
conflict_info_json: None,
};
assert!(detect_conflict(&state_no_conflict).is_none());
#[test]
fn test_detect_conflict() {
let state_no_conflict = DeviceSyncState {
device_id: super::super::DeviceId::new(),
path: "/test".to_string(),
local_hash: Some("abc".to_string()),
server_hash: Some("abc".to_string()),
local_mtime: None,
server_mtime: None,
sync_status: FileSyncStatus::Synced,
last_synced_at: None,
conflict_info_json: None,
};
assert!(detect_conflict(&state_no_conflict).is_none());
let state_conflict = DeviceSyncState {
device_id: super::super::DeviceId::new(),
path: "/test".to_string(),
local_hash: Some("abc".to_string()),
server_hash: Some("def".to_string()),
local_mtime: None,
server_mtime: None,
sync_status: FileSyncStatus::Conflict,
last_synced_at: None,
conflict_info_json: None,
};
assert!(detect_conflict(&state_conflict).is_some());
}
let state_conflict = DeviceSyncState {
device_id: super::super::DeviceId::new(),
path: "/test".to_string(),
local_hash: Some("abc".to_string()),
server_hash: Some("def".to_string()),
local_mtime: None,
server_mtime: None,
sync_status: FileSyncStatus::Conflict,
last_synced_at: None,
conflict_info_json: None,
};
assert!(detect_conflict(&state_conflict).is_some());
}
}

View file

@ -6,375 +6,377 @@ use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::config::ConflictResolution;
use crate::model::{ContentHash, MediaId};
use crate::users::UserId;
use crate::{
config::ConflictResolution,
model::{ContentHash, MediaId},
users::UserId,
};
/// Unique identifier for a sync device.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct DeviceId(pub Uuid);
impl DeviceId {
pub fn new() -> Self {
Self(Uuid::now_v7())
}
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl Default for DeviceId {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
impl fmt::Display for DeviceId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
/// Type of sync device.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DeviceType {
Desktop,
Mobile,
Tablet,
Server,
Other,
Desktop,
Mobile,
Tablet,
Server,
Other,
}
impl Default for DeviceType {
fn default() -> Self {
Self::Other
}
fn default() -> Self {
Self::Other
}
}
impl fmt::Display for DeviceType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Desktop => write!(f, "desktop"),
Self::Mobile => write!(f, "mobile"),
Self::Tablet => write!(f, "tablet"),
Self::Server => write!(f, "server"),
Self::Other => write!(f, "other"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Desktop => write!(f, "desktop"),
Self::Mobile => write!(f, "mobile"),
Self::Tablet => write!(f, "tablet"),
Self::Server => write!(f, "server"),
Self::Other => write!(f, "other"),
}
}
}
impl std::str::FromStr for DeviceType {
type Err = String;
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"desktop" => Ok(Self::Desktop),
"mobile" => Ok(Self::Mobile),
"tablet" => Ok(Self::Tablet),
"server" => Ok(Self::Server),
"other" => Ok(Self::Other),
_ => Err(format!("unknown device type: {}", s)),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"desktop" => Ok(Self::Desktop),
"mobile" => Ok(Self::Mobile),
"tablet" => Ok(Self::Tablet),
"server" => Ok(Self::Server),
"other" => Ok(Self::Other),
_ => Err(format!("unknown device type: {}", s)),
}
}
}
/// A registered sync device.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyncDevice {
pub id: DeviceId,
pub user_id: UserId,
pub name: String,
pub device_type: DeviceType,
pub client_version: String,
pub os_info: Option<String>,
pub last_sync_at: Option<DateTime<Utc>>,
pub last_seen_at: DateTime<Utc>,
pub sync_cursor: Option<i64>,
pub enabled: bool,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub id: DeviceId,
pub user_id: UserId,
pub name: String,
pub device_type: DeviceType,
pub client_version: String,
pub os_info: Option<String>,
pub last_sync_at: Option<DateTime<Utc>>,
pub last_seen_at: DateTime<Utc>,
pub sync_cursor: Option<i64>,
pub enabled: bool,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl SyncDevice {
pub fn new(
user_id: UserId,
name: String,
device_type: DeviceType,
client_version: String,
) -> Self {
let now = Utc::now();
Self {
id: DeviceId::new(),
user_id,
name,
device_type,
client_version,
os_info: None,
last_sync_at: None,
last_seen_at: now,
sync_cursor: None,
enabled: true,
created_at: now,
updated_at: now,
}
pub fn new(
user_id: UserId,
name: String,
device_type: DeviceType,
client_version: String,
) -> Self {
let now = Utc::now();
Self {
id: DeviceId::new(),
user_id,
name,
device_type,
client_version,
os_info: None,
last_sync_at: None,
last_seen_at: now,
sync_cursor: None,
enabled: true,
created_at: now,
updated_at: now,
}
}
}
/// Type of change recorded in the sync log.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SyncChangeType {
Created,
Modified,
Deleted,
Moved,
MetadataUpdated,
Created,
Modified,
Deleted,
Moved,
MetadataUpdated,
}
impl fmt::Display for SyncChangeType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Created => write!(f, "created"),
Self::Modified => write!(f, "modified"),
Self::Deleted => write!(f, "deleted"),
Self::Moved => write!(f, "moved"),
Self::MetadataUpdated => write!(f, "metadata_updated"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Created => write!(f, "created"),
Self::Modified => write!(f, "modified"),
Self::Deleted => write!(f, "deleted"),
Self::Moved => write!(f, "moved"),
Self::MetadataUpdated => write!(f, "metadata_updated"),
}
}
}
impl std::str::FromStr for SyncChangeType {
type Err = String;
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"created" => Ok(Self::Created),
"modified" => Ok(Self::Modified),
"deleted" => Ok(Self::Deleted),
"moved" => Ok(Self::Moved),
"metadata_updated" => Ok(Self::MetadataUpdated),
_ => Err(format!("unknown sync change type: {}", s)),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"created" => Ok(Self::Created),
"modified" => Ok(Self::Modified),
"deleted" => Ok(Self::Deleted),
"moved" => Ok(Self::Moved),
"metadata_updated" => Ok(Self::MetadataUpdated),
_ => Err(format!("unknown sync change type: {}", s)),
}
}
}
/// An entry in the sync log tracking a change.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyncLogEntry {
pub id: Uuid,
pub sequence: i64,
pub change_type: SyncChangeType,
pub media_id: Option<MediaId>,
pub path: String,
pub content_hash: Option<ContentHash>,
pub file_size: Option<u64>,
pub metadata_json: Option<String>,
pub changed_by_device: Option<DeviceId>,
pub timestamp: DateTime<Utc>,
pub id: Uuid,
pub sequence: i64,
pub change_type: SyncChangeType,
pub media_id: Option<MediaId>,
pub path: String,
pub content_hash: Option<ContentHash>,
pub file_size: Option<u64>,
pub metadata_json: Option<String>,
pub changed_by_device: Option<DeviceId>,
pub timestamp: DateTime<Utc>,
}
impl SyncLogEntry {
pub fn new(
change_type: SyncChangeType,
path: String,
media_id: Option<MediaId>,
content_hash: Option<ContentHash>,
) -> Self {
Self {
id: Uuid::now_v7(),
sequence: 0, // Will be assigned by database
change_type,
media_id,
path,
content_hash,
file_size: None,
metadata_json: None,
changed_by_device: None,
timestamp: Utc::now(),
}
pub fn new(
change_type: SyncChangeType,
path: String,
media_id: Option<MediaId>,
content_hash: Option<ContentHash>,
) -> Self {
Self {
id: Uuid::now_v7(),
sequence: 0, // Will be assigned by database
change_type,
media_id,
path,
content_hash,
file_size: None,
metadata_json: None,
changed_by_device: None,
timestamp: Utc::now(),
}
}
}
/// Sync status for a file on a device.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FileSyncStatus {
Synced,
PendingUpload,
PendingDownload,
Conflict,
Deleted,
Synced,
PendingUpload,
PendingDownload,
Conflict,
Deleted,
}
impl fmt::Display for FileSyncStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Synced => write!(f, "synced"),
Self::PendingUpload => write!(f, "pending_upload"),
Self::PendingDownload => write!(f, "pending_download"),
Self::Conflict => write!(f, "conflict"),
Self::Deleted => write!(f, "deleted"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Synced => write!(f, "synced"),
Self::PendingUpload => write!(f, "pending_upload"),
Self::PendingDownload => write!(f, "pending_download"),
Self::Conflict => write!(f, "conflict"),
Self::Deleted => write!(f, "deleted"),
}
}
}
impl std::str::FromStr for FileSyncStatus {
type Err = String;
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"synced" => Ok(Self::Synced),
"pending_upload" => Ok(Self::PendingUpload),
"pending_download" => Ok(Self::PendingDownload),
"conflict" => Ok(Self::Conflict),
"deleted" => Ok(Self::Deleted),
_ => Err(format!("unknown file sync status: {}", s)),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"synced" => Ok(Self::Synced),
"pending_upload" => Ok(Self::PendingUpload),
"pending_download" => Ok(Self::PendingDownload),
"conflict" => Ok(Self::Conflict),
"deleted" => Ok(Self::Deleted),
_ => Err(format!("unknown file sync status: {}", s)),
}
}
}
/// Sync state for a specific file on a specific device.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeviceSyncState {
pub device_id: DeviceId,
pub path: String,
pub local_hash: Option<String>,
pub server_hash: Option<String>,
pub local_mtime: Option<i64>,
pub server_mtime: Option<i64>,
pub sync_status: FileSyncStatus,
pub last_synced_at: Option<DateTime<Utc>>,
pub conflict_info_json: Option<String>,
pub device_id: DeviceId,
pub path: String,
pub local_hash: Option<String>,
pub server_hash: Option<String>,
pub local_mtime: Option<i64>,
pub server_mtime: Option<i64>,
pub sync_status: FileSyncStatus,
pub last_synced_at: Option<DateTime<Utc>>,
pub conflict_info_json: Option<String>,
}
/// A sync conflict that needs resolution.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyncConflict {
pub id: Uuid,
pub device_id: DeviceId,
pub path: String,
pub local_hash: String,
pub local_mtime: i64,
pub server_hash: String,
pub server_mtime: i64,
pub detected_at: DateTime<Utc>,
pub resolved_at: Option<DateTime<Utc>>,
pub resolution: Option<ConflictResolution>,
pub id: Uuid,
pub device_id: DeviceId,
pub path: String,
pub local_hash: String,
pub local_mtime: i64,
pub server_hash: String,
pub server_mtime: i64,
pub detected_at: DateTime<Utc>,
pub resolved_at: Option<DateTime<Utc>>,
pub resolution: Option<ConflictResolution>,
}
impl SyncConflict {
pub fn new(
device_id: DeviceId,
path: String,
local_hash: String,
local_mtime: i64,
server_hash: String,
server_mtime: i64,
) -> Self {
Self {
id: Uuid::now_v7(),
device_id,
path,
local_hash,
local_mtime,
server_hash,
server_mtime,
detected_at: Utc::now(),
resolved_at: None,
resolution: None,
}
pub fn new(
device_id: DeviceId,
path: String,
local_hash: String,
local_mtime: i64,
server_hash: String,
server_mtime: i64,
) -> Self {
Self {
id: Uuid::now_v7(),
device_id,
path,
local_hash,
local_mtime,
server_hash,
server_mtime,
detected_at: Utc::now(),
resolved_at: None,
resolution: None,
}
}
}
/// Status of an upload session.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum UploadStatus {
Pending,
InProgress,
Completed,
Failed,
Expired,
Cancelled,
Pending,
InProgress,
Completed,
Failed,
Expired,
Cancelled,
}
impl fmt::Display for UploadStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Pending => write!(f, "pending"),
Self::InProgress => write!(f, "in_progress"),
Self::Completed => write!(f, "completed"),
Self::Failed => write!(f, "failed"),
Self::Expired => write!(f, "expired"),
Self::Cancelled => write!(f, "cancelled"),
}
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Pending => write!(f, "pending"),
Self::InProgress => write!(f, "in_progress"),
Self::Completed => write!(f, "completed"),
Self::Failed => write!(f, "failed"),
Self::Expired => write!(f, "expired"),
Self::Cancelled => write!(f, "cancelled"),
}
}
}
impl std::str::FromStr for UploadStatus {
type Err = String;
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"pending" => Ok(Self::Pending),
"in_progress" => Ok(Self::InProgress),
"completed" => Ok(Self::Completed),
"failed" => Ok(Self::Failed),
"expired" => Ok(Self::Expired),
"cancelled" => Ok(Self::Cancelled),
_ => Err(format!("unknown upload status: {}", s)),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"pending" => Ok(Self::Pending),
"in_progress" => Ok(Self::InProgress),
"completed" => Ok(Self::Completed),
"failed" => Ok(Self::Failed),
"expired" => Ok(Self::Expired),
"cancelled" => Ok(Self::Cancelled),
_ => Err(format!("unknown upload status: {}", s)),
}
}
}
/// A chunked upload session.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UploadSession {
pub id: Uuid,
pub device_id: DeviceId,
pub target_path: String,
pub expected_hash: ContentHash,
pub expected_size: u64,
pub chunk_size: u64,
pub chunk_count: u64,
pub status: UploadStatus,
pub created_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
pub last_activity: DateTime<Utc>,
pub id: Uuid,
pub device_id: DeviceId,
pub target_path: String,
pub expected_hash: ContentHash,
pub expected_size: u64,
pub chunk_size: u64,
pub chunk_count: u64,
pub status: UploadStatus,
pub created_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
pub last_activity: DateTime<Utc>,
}
impl UploadSession {
pub fn new(
device_id: DeviceId,
target_path: String,
expected_hash: ContentHash,
expected_size: u64,
chunk_size: u64,
timeout_hours: u64,
) -> Self {
let now = Utc::now();
let chunk_count = (expected_size + chunk_size - 1) / chunk_size;
Self {
id: Uuid::now_v7(),
device_id,
target_path,
expected_hash,
expected_size,
chunk_size,
chunk_count,
status: UploadStatus::Pending,
created_at: now,
expires_at: now + chrono::Duration::hours(timeout_hours as i64),
last_activity: now,
}
pub fn new(
device_id: DeviceId,
target_path: String,
expected_hash: ContentHash,
expected_size: u64,
chunk_size: u64,
timeout_hours: u64,
) -> Self {
let now = Utc::now();
let chunk_count = (expected_size + chunk_size - 1) / chunk_size;
Self {
id: Uuid::now_v7(),
device_id,
target_path,
expected_hash,
expected_size,
chunk_size,
chunk_count,
status: UploadStatus::Pending,
created_at: now,
expires_at: now + chrono::Duration::hours(timeout_hours as i64),
last_activity: now,
}
}
}
/// Information about an uploaded chunk.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkInfo {
pub upload_id: Uuid,
pub chunk_index: u64,
pub offset: u64,
pub size: u64,
pub hash: String,
pub received_at: DateTime<Utc>,
pub upload_id: Uuid,
pub chunk_index: u64,
pub offset: u64,
pub size: u64,
pub hash: String,
pub received_at: DateTime<Utc>,
}

View file

@ -6,210 +6,219 @@ use chrono::Utc;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::error::Result;
use crate::model::{ContentHash, MediaId};
use crate::storage::DynStorageBackend;
use super::{DeviceId, DeviceSyncState, FileSyncStatus, SyncChangeType, SyncLogEntry};
use super::{
DeviceId,
DeviceSyncState,
FileSyncStatus,
SyncChangeType,
SyncLogEntry,
};
use crate::{
error::Result,
model::{ContentHash, MediaId},
storage::DynStorageBackend,
};
/// Request from client to get changes since a cursor.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChangesRequest {
pub cursor: i64,
pub limit: Option<u64>,
pub cursor: i64,
pub limit: Option<u64>,
}
/// Response containing changes since the cursor.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChangesResponse {
pub changes: Vec<SyncLogEntry>,
pub cursor: i64,
pub has_more: bool,
pub changes: Vec<SyncLogEntry>,
pub cursor: i64,
pub has_more: bool,
}
/// A change reported by the client.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClientChange {
pub path: String,
pub change_type: SyncChangeType,
pub content_hash: Option<String>,
pub file_size: Option<u64>,
pub local_mtime: Option<i64>,
pub metadata: Option<serde_json::Value>,
pub path: String,
pub change_type: SyncChangeType,
pub content_hash: Option<String>,
pub file_size: Option<u64>,
pub local_mtime: Option<i64>,
pub metadata: Option<serde_json::Value>,
}
/// Request from client to report local changes.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReportChangesRequest {
pub device_id: String,
pub changes: Vec<ClientChange>,
pub device_id: String,
pub changes: Vec<ClientChange>,
}
/// Result of processing a client change.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "snake_case")]
pub enum ChangeResult {
/// Change accepted, no action needed
Accepted { path: String },
/// Conflict detected, needs resolution
Conflict {
path: String,
server_hash: String,
server_mtime: i64,
},
/// Upload required for new/modified file
UploadRequired {
path: String,
upload_url: String,
session_id: String,
},
/// Error processing change
Error { path: String, message: String },
/// Change accepted, no action needed
Accepted { path: String },
/// Conflict detected, needs resolution
Conflict {
path: String,
server_hash: String,
server_mtime: i64,
},
/// Upload required for new/modified file
UploadRequired {
path: String,
upload_url: String,
session_id: String,
},
/// Error processing change
Error { path: String, message: String },
}
/// Response to a report changes request.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReportChangesResponse {
pub results: Vec<ChangeResult>,
pub server_cursor: i64,
pub results: Vec<ChangeResult>,
pub server_cursor: i64,
}
/// Acknowledgment from client that changes have been processed.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AckRequest {
pub device_id: String,
pub cursor: i64,
pub processed_paths: Vec<String>,
pub device_id: String,
pub cursor: i64,
pub processed_paths: Vec<String>,
}
/// Get changes since a cursor position.
pub async fn get_changes(
storage: &DynStorageBackend,
cursor: i64,
limit: u64,
storage: &DynStorageBackend,
cursor: i64,
limit: u64,
) -> Result<ChangesResponse> {
let limit = limit.min(1000); // Cap at 1000
let changes = storage.get_changes_since(cursor, limit + 1).await?;
let limit = limit.min(1000); // Cap at 1000
let changes = storage.get_changes_since(cursor, limit + 1).await?;
let has_more = changes.len() > limit as usize;
let changes: Vec<_> = changes.into_iter().take(limit as usize).collect();
let has_more = changes.len() > limit as usize;
let changes: Vec<_> = changes.into_iter().take(limit as usize).collect();
let new_cursor = changes.last().map(|c| c.sequence).unwrap_or(cursor);
let new_cursor = changes.last().map(|c| c.sequence).unwrap_or(cursor);
Ok(ChangesResponse {
changes,
cursor: new_cursor,
has_more,
})
Ok(ChangesResponse {
changes,
cursor: new_cursor,
has_more,
})
}
/// Record a change in the sync log.
pub async fn record_change(
storage: &DynStorageBackend,
change_type: SyncChangeType,
path: &str,
media_id: Option<MediaId>,
content_hash: Option<&ContentHash>,
file_size: Option<u64>,
changed_by_device: Option<DeviceId>,
storage: &DynStorageBackend,
change_type: SyncChangeType,
path: &str,
media_id: Option<MediaId>,
content_hash: Option<&ContentHash>,
file_size: Option<u64>,
changed_by_device: Option<DeviceId>,
) -> Result<SyncLogEntry> {
let entry = SyncLogEntry {
id: Uuid::now_v7(),
sequence: 0, // Will be assigned by database
change_type,
media_id,
path: path.to_string(),
content_hash: content_hash.cloned(),
file_size,
metadata_json: None,
changed_by_device,
timestamp: Utc::now(),
};
let entry = SyncLogEntry {
id: Uuid::now_v7(),
sequence: 0, // Will be assigned by database
change_type,
media_id,
path: path.to_string(),
content_hash: content_hash.cloned(),
file_size,
metadata_json: None,
changed_by_device,
timestamp: Utc::now(),
};
storage.record_sync_change(&entry).await?;
Ok(entry)
storage.record_sync_change(&entry).await?;
Ok(entry)
}
/// Update device cursor after processing changes.
pub async fn update_device_cursor(
storage: &DynStorageBackend,
device_id: DeviceId,
cursor: i64,
storage: &DynStorageBackend,
device_id: DeviceId,
cursor: i64,
) -> Result<()> {
let mut device = storage.get_device(device_id).await?;
device.sync_cursor = Some(cursor);
device.last_sync_at = Some(Utc::now());
device.updated_at = Utc::now();
storage.update_device(&device).await?;
Ok(())
let mut device = storage.get_device(device_id).await?;
device.sync_cursor = Some(cursor);
device.last_sync_at = Some(Utc::now());
device.updated_at = Utc::now();
storage.update_device(&device).await?;
Ok(())
}
/// Mark a file as synced for a device.
pub async fn mark_synced(
storage: &DynStorageBackend,
device_id: DeviceId,
path: &str,
hash: &str,
mtime: Option<i64>,
storage: &DynStorageBackend,
device_id: DeviceId,
path: &str,
hash: &str,
mtime: Option<i64>,
) -> Result<()> {
let state = DeviceSyncState {
device_id,
path: path.to_string(),
local_hash: Some(hash.to_string()),
server_hash: Some(hash.to_string()),
local_mtime: mtime,
server_mtime: mtime,
sync_status: FileSyncStatus::Synced,
last_synced_at: Some(Utc::now()),
conflict_info_json: None,
};
let state = DeviceSyncState {
device_id,
path: path.to_string(),
local_hash: Some(hash.to_string()),
server_hash: Some(hash.to_string()),
local_mtime: mtime,
server_mtime: mtime,
sync_status: FileSyncStatus::Synced,
last_synced_at: Some(Utc::now()),
conflict_info_json: None,
};
storage.upsert_device_sync_state(&state).await?;
Ok(())
storage.upsert_device_sync_state(&state).await?;
Ok(())
}
/// Mark a file as pending download for a device.
pub async fn mark_pending_download(
storage: &DynStorageBackend,
device_id: DeviceId,
path: &str,
server_hash: &str,
server_mtime: Option<i64>,
storage: &DynStorageBackend,
device_id: DeviceId,
path: &str,
server_hash: &str,
server_mtime: Option<i64>,
) -> Result<()> {
// Get existing state or create new
let state = match storage.get_device_sync_state(device_id, path).await? {
Some(mut s) => {
s.server_hash = Some(server_hash.to_string());
s.server_mtime = server_mtime;
s.sync_status = FileSyncStatus::PendingDownload;
s
}
None => DeviceSyncState {
device_id,
path: path.to_string(),
local_hash: None,
server_hash: Some(server_hash.to_string()),
local_mtime: None,
server_mtime,
sync_status: FileSyncStatus::PendingDownload,
last_synced_at: None,
conflict_info_json: None,
},
};
// Get existing state or create new
let state = match storage.get_device_sync_state(device_id, path).await? {
Some(mut s) => {
s.server_hash = Some(server_hash.to_string());
s.server_mtime = server_mtime;
s.sync_status = FileSyncStatus::PendingDownload;
s
},
None => {
DeviceSyncState {
device_id,
path: path.to_string(),
local_hash: None,
server_hash: Some(server_hash.to_string()),
local_mtime: None,
server_mtime,
sync_status: FileSyncStatus::PendingDownload,
last_synced_at: None,
conflict_info_json: None,
}
},
};
storage.upsert_device_sync_state(&state).await?;
Ok(())
storage.upsert_device_sync_state(&state).await?;
Ok(())
}
/// Generate a device token using UUIDs for randomness.
pub fn generate_device_token() -> String {
// Concatenate two UUIDs for 256 bits of randomness
let uuid1 = uuid::Uuid::new_v4();
let uuid2 = uuid::Uuid::new_v4();
format!("{}{}", uuid1.simple(), uuid2.simple())
// Concatenate two UUIDs for 256 bits of randomness
let uuid1 = uuid::Uuid::new_v4();
let uuid2 = uuid::Uuid::new_v4();
format!("{}{}", uuid1.simple(), uuid2.simple())
}
/// Hash a device token for storage.
pub fn hash_device_token(token: &str) -> String {
blake3::hash(token.as_bytes()).to_hex().to_string()
blake3::hash(token.as_bytes()).to_hex().to_string()
}

View file

@ -1,43 +1,52 @@
use uuid::Uuid;
use crate::error::Result;
use crate::model::{AuditAction, MediaId, Tag};
use crate::storage::DynStorageBackend;
use crate::{
error::Result,
model::{AuditAction, MediaId, Tag},
storage::DynStorageBackend,
};
pub async fn create_tag(
storage: &DynStorageBackend,
name: &str,
parent_id: Option<Uuid>,
storage: &DynStorageBackend,
name: &str,
parent_id: Option<Uuid>,
) -> Result<Tag> {
storage.create_tag(name, parent_id).await
storage.create_tag(name, parent_id).await
}
pub async fn tag_media(storage: &DynStorageBackend, media_id: MediaId, tag_id: Uuid) -> Result<()> {
storage.tag_media(media_id, tag_id).await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::Tagged,
Some(format!("tag_id={tag_id}")),
)
.await
pub async fn tag_media(
storage: &DynStorageBackend,
media_id: MediaId,
tag_id: Uuid,
) -> Result<()> {
storage.tag_media(media_id, tag_id).await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::Tagged,
Some(format!("tag_id={tag_id}")),
)
.await
}
pub async fn untag_media(
storage: &DynStorageBackend,
media_id: MediaId,
tag_id: Uuid,
storage: &DynStorageBackend,
media_id: MediaId,
tag_id: Uuid,
) -> Result<()> {
storage.untag_media(media_id, tag_id).await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::Untagged,
Some(format!("tag_id={tag_id}")),
)
.await
storage.untag_media(media_id, tag_id).await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::Untagged,
Some(format!("tag_id={tag_id}")),
)
.await
}
pub async fn get_tag_tree(storage: &DynStorageBackend, tag_id: Uuid) -> Result<Vec<Tag>> {
storage.get_tag_descendants(tag_id).await
pub async fn get_tag_tree(
storage: &DynStorageBackend,
tag_id: Uuid,
) -> Result<Vec<Tag>> {
storage.get_tag_descendants(tag_id).await
}

View file

@ -1,501 +1,565 @@
use std::path::{Path, PathBuf};
use std::process::Command;
use std::{
path::{Path, PathBuf},
process::Command,
};
use tracing::{info, warn};
use crate::config::ThumbnailConfig;
use crate::error::{PinakesError, Result};
use crate::media_type::{BuiltinMediaType, MediaCategory, MediaType};
use crate::model::MediaId;
use crate::{
config::ThumbnailConfig,
error::{PinakesError, Result},
media_type::{BuiltinMediaType, MediaCategory, MediaType},
model::MediaId,
};
/// Generate a thumbnail for a media file and return the path to the thumbnail.
///
/// Supports images (via `image` crate), videos (via ffmpeg), PDFs (via pdftoppm),
/// and EPUBs (via cover image extraction).
/// Supports images (via `image` crate), videos (via ffmpeg), PDFs (via
/// pdftoppm), and EPUBs (via cover image extraction).
pub fn generate_thumbnail(
media_id: MediaId,
source_path: &Path,
media_type: MediaType,
thumbnail_dir: &Path,
media_id: MediaId,
source_path: &Path,
media_type: MediaType,
thumbnail_dir: &Path,
) -> Result<Option<PathBuf>> {
generate_thumbnail_with_config(
media_id,
source_path,
media_type,
thumbnail_dir,
&ThumbnailConfig::default(),
)
generate_thumbnail_with_config(
media_id,
source_path,
media_type,
thumbnail_dir,
&ThumbnailConfig::default(),
)
}
pub fn generate_thumbnail_with_config(
media_id: MediaId,
source_path: &Path,
media_type: MediaType,
thumbnail_dir: &Path,
config: &ThumbnailConfig,
media_id: MediaId,
source_path: &Path,
media_type: MediaType,
thumbnail_dir: &Path,
config: &ThumbnailConfig,
) -> Result<Option<PathBuf>> {
std::fs::create_dir_all(thumbnail_dir)?;
let thumb_path = thumbnail_dir.join(format!("{}.jpg", media_id));
std::fs::create_dir_all(thumbnail_dir)?;
let thumb_path = thumbnail_dir.join(format!("{}.jpg", media_id));
let result = match media_type.category() {
MediaCategory::Image => {
if media_type.is_raw() {
generate_raw_thumbnail(source_path, &thumb_path, config)
} else if media_type == MediaType::Builtin(BuiltinMediaType::Heic) {
generate_heic_thumbnail(source_path, &thumb_path, config)
} else {
generate_image_thumbnail(source_path, &thumb_path, config)
}
}
MediaCategory::Video => generate_video_thumbnail(source_path, &thumb_path, config),
MediaCategory::Document => match media_type {
MediaType::Builtin(BuiltinMediaType::Pdf) => {
generate_pdf_thumbnail(source_path, &thumb_path, config)
}
MediaType::Builtin(BuiltinMediaType::Epub) => {
generate_epub_thumbnail(source_path, &thumb_path, config)
}
_ => return Ok(None),
let result = match media_type.category() {
MediaCategory::Image => {
if media_type.is_raw() {
generate_raw_thumbnail(source_path, &thumb_path, config)
} else if media_type == MediaType::Builtin(BuiltinMediaType::Heic) {
generate_heic_thumbnail(source_path, &thumb_path, config)
} else {
generate_image_thumbnail(source_path, &thumb_path, config)
}
},
MediaCategory::Video => {
generate_video_thumbnail(source_path, &thumb_path, config)
},
MediaCategory::Document => {
match media_type {
MediaType::Builtin(BuiltinMediaType::Pdf) => {
generate_pdf_thumbnail(source_path, &thumb_path, config)
},
MediaType::Builtin(BuiltinMediaType::Epub) => {
generate_epub_thumbnail(source_path, &thumb_path, config)
},
_ => return Ok(None),
};
}
},
_ => return Ok(None),
};
match result {
Ok(()) => {
info!(media_id = %media_id, category = ?media_type.category(), "generated thumbnail");
Ok(Some(thumb_path))
}
Err(e) => {
warn!(media_id = %media_id, error = %e, "failed to generate thumbnail");
Ok(None)
}
}
match result {
Ok(()) => {
info!(media_id = %media_id, category = ?media_type.category(), "generated thumbnail");
Ok(Some(thumb_path))
},
Err(e) => {
warn!(media_id = %media_id, error = %e, "failed to generate thumbnail");
Ok(None)
},
}
}
fn generate_image_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
let img = image::open(source)
.map_err(|e| PinakesError::MetadataExtraction(format!("image open: {e}")))?;
fn generate_image_thumbnail(
source: &Path,
dest: &Path,
config: &ThumbnailConfig,
) -> Result<()> {
let img = image::open(source).map_err(|e| {
PinakesError::MetadataExtraction(format!("image open: {e}"))
})?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(
&mut output,
config.quality,
);
thumb.write_with_encoder(encoder).map_err(|e| {
PinakesError::MetadataExtraction(format!("thumbnail encode: {e}"))
})?;
Ok(())
}
fn generate_video_thumbnail(
source: &Path,
dest: &Path,
config: &ThumbnailConfig,
) -> Result<()> {
let ffmpeg = config.ffmpeg_path.as_deref().unwrap_or("ffmpeg");
let status = Command::new(ffmpeg)
.args(["-ss", &config.video_seek_secs.to_string(), "-i"])
.arg(source)
.args([
"-vframes",
"1",
"-vf",
&format!("scale={}:{}", config.size, config.size),
"-y",
])
.arg(dest)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"ffmpeg not found or failed to execute: {e}"
))
})?;
if !status.success() {
return Err(PinakesError::MetadataExtraction(format!(
"ffmpeg exited with status {}",
status
)));
}
Ok(())
}
fn generate_pdf_thumbnail(
source: &Path,
dest: &Path,
config: &ThumbnailConfig,
) -> Result<()> {
// Use pdftoppm to render first page, then resize with image crate
let temp_prefix = dest.with_extension("tmp");
let status = Command::new("pdftoppm")
.args(["-jpeg", "-f", "1", "-l", "1", "-singlefile"])
.arg(source)
.arg(&temp_prefix)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"pdftoppm not found or failed to execute: {e}"
))
})?;
if !status.success() {
return Err(PinakesError::MetadataExtraction(format!(
"pdftoppm exited with status {}",
status
)));
}
// pdftoppm outputs <prefix>.jpg
let rendered = temp_prefix.with_extension("jpg");
if rendered.exists() {
// Resize to thumbnail size
let img = image::open(&rendered).map_err(|e| {
PinakesError::MetadataExtraction(format!("pdf thumbnail open: {e}"))
})?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("thumbnail encode: {e}")))?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(
&mut output,
config.quality,
);
thumb.write_with_encoder(encoder).map_err(|e| {
PinakesError::MetadataExtraction(format!("pdf thumbnail encode: {e}"))
})?;
let _ = std::fs::remove_file(&rendered);
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"pdftoppm did not produce output".to_string(),
))
}
}
fn generate_video_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
let ffmpeg = config.ffmpeg_path.as_deref().unwrap_or("ffmpeg");
fn generate_epub_thumbnail(
source: &Path,
dest: &Path,
config: &ThumbnailConfig,
) -> Result<()> {
// Try to extract cover image from EPUB
let mut doc = epub::doc::EpubDoc::new(source)
.map_err(|e| PinakesError::MetadataExtraction(format!("epub open: {e}")))?;
let status = Command::new(ffmpeg)
.args(["-ss", &config.video_seek_secs.to_string(), "-i"])
.arg(source)
.args([
"-vframes",
"1",
"-vf",
&format!("scale={}:{}", config.size, config.size),
"-y",
])
.arg(dest)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!("ffmpeg not found or failed to execute: {e}"))
})?;
if !status.success() {
return Err(PinakesError::MetadataExtraction(format!(
"ffmpeg exited with status {}",
status
)));
}
let cover_data = doc.get_cover().map(|(data, _mime)| data).or_else(|| {
// Fallback: try to find a cover image in the resources
doc
.get_resource("cover-image")
.map(|(data, _)| data)
.or_else(|| doc.get_resource("cover").map(|(data, _)| data))
});
if let Some(data) = cover_data {
let img = image::load_from_memory(&data).map_err(|e| {
PinakesError::MetadataExtraction(format!("epub cover decode: {e}"))
})?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(
&mut output,
config.quality,
);
thumb.write_with_encoder(encoder).map_err(|e| {
PinakesError::MetadataExtraction(format!("epub thumbnail encode: {e}"))
})?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"no cover image found in epub".to_string(),
))
}
}
fn generate_pdf_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
// Use pdftoppm to render first page, then resize with image crate
let temp_prefix = dest.with_extension("tmp");
let status = Command::new("pdftoppm")
.args(["-jpeg", "-f", "1", "-l", "1", "-singlefile"])
.arg(source)
.arg(&temp_prefix)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"pdftoppm not found or failed to execute: {e}"
))
})?;
fn generate_raw_thumbnail(
source: &Path,
dest: &Path,
config: &ThumbnailConfig,
) -> Result<()> {
// Try dcraw to extract embedded JPEG preview, then resize
let temp_ppm = dest.with_extension("ppm");
let status = Command::new("dcraw")
.args(["-e", "-c"])
.arg(source)
.stdout(std::fs::File::create(&temp_ppm).map_err(|e| {
PinakesError::MetadataExtraction(format!(
"failed to create temp file: {e}"
))
})?)
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"dcraw not found or failed: {e}"
))
})?;
if !status.success() {
return Err(PinakesError::MetadataExtraction(format!(
"pdftoppm exited with status {}",
status
)));
}
if !status.success() {
let _ = std::fs::remove_file(&temp_ppm);
return Err(PinakesError::MetadataExtraction(format!(
"dcraw exited with status {}",
status
)));
}
// pdftoppm outputs <prefix>.jpg
let rendered = temp_prefix.with_extension("jpg");
if rendered.exists() {
// Resize to thumbnail size
let img = image::open(&rendered)
.map_err(|e| PinakesError::MetadataExtraction(format!("pdf thumbnail open: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("pdf thumbnail encode: {e}")))?;
let _ = std::fs::remove_file(&rendered);
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"pdftoppm did not produce output".to_string(),
))
}
// The extracted preview is typically a JPEG — try loading it
if temp_ppm.exists() {
let result = image::open(&temp_ppm);
let _ = std::fs::remove_file(&temp_ppm);
let img = result.map_err(|e| {
PinakesError::MetadataExtraction(format!("raw preview decode: {e}"))
})?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(
&mut output,
config.quality,
);
thumb.write_with_encoder(encoder).map_err(|e| {
PinakesError::MetadataExtraction(format!("raw thumbnail encode: {e}"))
})?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"dcraw did not produce output".to_string(),
))
}
}
fn generate_epub_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
// Try to extract cover image from EPUB
let mut doc = epub::doc::EpubDoc::new(source)
.map_err(|e| PinakesError::MetadataExtraction(format!("epub open: {e}")))?;
fn generate_heic_thumbnail(
source: &Path,
dest: &Path,
config: &ThumbnailConfig,
) -> Result<()> {
// Use heif-convert to convert to JPEG, then resize
let temp_jpg = dest.with_extension("tmp.jpg");
let status = Command::new("heif-convert")
.arg(source)
.arg(&temp_jpg)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"heif-convert not found or failed: {e}"
))
})?;
let cover_data = doc.get_cover().map(|(data, _mime)| data).or_else(|| {
// Fallback: try to find a cover image in the resources
doc.get_resource("cover-image")
.map(|(data, _)| data)
.or_else(|| doc.get_resource("cover").map(|(data, _)| data))
});
if !status.success() {
let _ = std::fs::remove_file(&temp_jpg);
return Err(PinakesError::MetadataExtraction(format!(
"heif-convert exited with status {}",
status
)));
}
if let Some(data) = cover_data {
let img = image::load_from_memory(&data)
.map_err(|e| PinakesError::MetadataExtraction(format!("epub cover decode: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("epub thumbnail encode: {e}")))?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"no cover image found in epub".to_string(),
))
}
}
fn generate_raw_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
// Try dcraw to extract embedded JPEG preview, then resize
let temp_ppm = dest.with_extension("ppm");
let status = Command::new("dcraw")
.args(["-e", "-c"])
.arg(source)
.stdout(std::fs::File::create(&temp_ppm).map_err(|e| {
PinakesError::MetadataExtraction(format!("failed to create temp file: {e}"))
})?)
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| PinakesError::MetadataExtraction(format!("dcraw not found or failed: {e}")))?;
if !status.success() {
let _ = std::fs::remove_file(&temp_ppm);
return Err(PinakesError::MetadataExtraction(format!(
"dcraw exited with status {}",
status
)));
}
// The extracted preview is typically a JPEG — try loading it
if temp_ppm.exists() {
let result = image::open(&temp_ppm);
let _ = std::fs::remove_file(&temp_ppm);
let img = result
.map_err(|e| PinakesError::MetadataExtraction(format!("raw preview decode: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("raw thumbnail encode: {e}")))?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"dcraw did not produce output".to_string(),
))
}
}
fn generate_heic_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
// Use heif-convert to convert to JPEG, then resize
let temp_jpg = dest.with_extension("tmp.jpg");
let status = Command::new("heif-convert")
.arg(source)
.arg(&temp_jpg)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!("heif-convert not found or failed: {e}"))
})?;
if !status.success() {
let _ = std::fs::remove_file(&temp_jpg);
return Err(PinakesError::MetadataExtraction(format!(
"heif-convert exited with status {}",
status
)));
}
if temp_jpg.exists() {
let result = image::open(&temp_jpg);
let _ = std::fs::remove_file(&temp_jpg);
let img =
result.map_err(|e| PinakesError::MetadataExtraction(format!("heic decode: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("heic thumbnail encode: {e}")))?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"heif-convert did not produce output".to_string(),
))
}
if temp_jpg.exists() {
let result = image::open(&temp_jpg);
let _ = std::fs::remove_file(&temp_jpg);
let img = result.map_err(|e| {
PinakesError::MetadataExtraction(format!("heic decode: {e}"))
})?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(
&mut output,
config.quality,
);
thumb.write_with_encoder(encoder).map_err(|e| {
PinakesError::MetadataExtraction(format!("heic thumbnail encode: {e}"))
})?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"heif-convert did not produce output".to_string(),
))
}
}
/// Cover size variants for book covers
#[derive(Debug, Clone, Copy)]
pub enum CoverSize {
Tiny, // 64x64 - for map markers, timeline
Grid, // 320x320 - for library grid view
Preview, // 1024x1024 - for quick fullscreen preview
Original, // Full size - original cover
Tiny, // 64x64 - for map markers, timeline
Grid, // 320x320 - for library grid view
Preview, // 1024x1024 - for quick fullscreen preview
Original, // Full size - original cover
}
impl CoverSize {
pub fn dimensions(&self) -> Option<(u32, u32)> {
match self {
CoverSize::Tiny => Some((64, 64)),
CoverSize::Grid => Some((320, 320)),
CoverSize::Preview => Some((1024, 1024)),
CoverSize::Original => None, // No resizing
}
pub fn dimensions(&self) -> Option<(u32, u32)> {
match self {
CoverSize::Tiny => Some((64, 64)),
CoverSize::Grid => Some((320, 320)),
CoverSize::Preview => Some((1024, 1024)),
CoverSize::Original => None, // No resizing
}
}
pub fn filename(&self) -> &'static str {
match self {
CoverSize::Tiny => "tiny.jpg",
CoverSize::Grid => "grid.jpg",
CoverSize::Preview => "preview.jpg",
CoverSize::Original => "original.jpg",
}
pub fn filename(&self) -> &'static str {
match self {
CoverSize::Tiny => "tiny.jpg",
CoverSize::Grid => "grid.jpg",
CoverSize::Preview => "preview.jpg",
CoverSize::Original => "original.jpg",
}
}
}
/// Generate multi-resolution covers for a book
pub fn generate_book_covers(
media_id: MediaId,
source_image: &[u8],
covers_dir: &Path,
media_id: MediaId,
source_image: &[u8],
covers_dir: &Path,
) -> Result<Vec<(CoverSize, PathBuf)>> {
// Create cover directory for this media item
let media_cover_dir = covers_dir.join(media_id.to_string());
std::fs::create_dir_all(&media_cover_dir)?;
// Create cover directory for this media item
let media_cover_dir = covers_dir.join(media_id.to_string());
std::fs::create_dir_all(&media_cover_dir)?;
let img = image::load_from_memory(source_image)
.map_err(|e| PinakesError::MetadataExtraction(format!("cover image load: {e}")))?;
let img = image::load_from_memory(source_image).map_err(|e| {
PinakesError::MetadataExtraction(format!("cover image load: {e}"))
})?;
let mut results = Vec::new();
let mut results = Vec::new();
// Generate each size variant
for size in [
CoverSize::Tiny,
CoverSize::Grid,
CoverSize::Preview,
CoverSize::Original,
] {
let cover_path = media_cover_dir.join(size.filename());
// Generate each size variant
for size in [
CoverSize::Tiny,
CoverSize::Grid,
CoverSize::Preview,
CoverSize::Original,
] {
let cover_path = media_cover_dir.join(size.filename());
match size.dimensions() {
Some((width, height)) => {
// Generate thumbnail
let thumb = img.thumbnail(width, height);
let mut output = std::fs::File::create(&cover_path)?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, 90);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("cover encode: {e}")))?;
}
None => {
// Save original
let mut output = std::fs::File::create(&cover_path)?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, 95);
img.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("cover encode: {e}")))?;
}
}
results.push((size, cover_path));
match size.dimensions() {
Some((width, height)) => {
// Generate thumbnail
let thumb = img.thumbnail(width, height);
let mut output = std::fs::File::create(&cover_path)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, 90);
thumb.write_with_encoder(encoder).map_err(|e| {
PinakesError::MetadataExtraction(format!("cover encode: {e}"))
})?;
},
None => {
// Save original
let mut output = std::fs::File::create(&cover_path)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, 95);
img.write_with_encoder(encoder).map_err(|e| {
PinakesError::MetadataExtraction(format!("cover encode: {e}"))
})?;
},
}
Ok(results)
results.push((size, cover_path));
}
Ok(results)
}
/// Extract full-size cover from an EPUB file
pub fn extract_epub_cover(epub_path: &Path) -> Result<Option<Vec<u8>>> {
let mut doc = epub::doc::EpubDoc::new(epub_path)
.map_err(|e| PinakesError::MetadataExtraction(format!("EPUB open: {e}")))?;
let mut doc = epub::doc::EpubDoc::new(epub_path)
.map_err(|e| PinakesError::MetadataExtraction(format!("EPUB open: {e}")))?;
// Try to get the cover image
if let Some(cover_id) = doc.get_cover_id()
&& let Some((cover_data, _mime)) = doc.get_resource(&cover_id)
{
return Ok(Some(cover_data));
// Try to get the cover image
if let Some(cover_id) = doc.get_cover_id()
&& let Some((cover_data, _mime)) = doc.get_resource(&cover_id)
{
return Ok(Some(cover_data));
}
// Fallback: look for common cover image filenames
let cover_names = [
"cover.jpg",
"cover.jpeg",
"cover.png",
"Cover.jpg",
"Cover.jpeg",
"Cover.png",
];
for name in &cover_names {
if let Some(data) = doc.get_resource_by_path(name) {
return Ok(Some(data));
}
}
// Fallback: look for common cover image filenames
let cover_names = [
"cover.jpg",
"cover.jpeg",
"cover.png",
"Cover.jpg",
"Cover.jpeg",
"Cover.png",
];
for name in &cover_names {
if let Some(data) = doc.get_resource_by_path(name) {
return Ok(Some(data));
}
}
Ok(None)
Ok(None)
}
/// Extract full-size cover from a PDF file (first page)
pub fn extract_pdf_cover(pdf_path: &Path) -> Result<Option<Vec<u8>>> {
// Use pdftoppm to extract the first page at high resolution
let pdftoppm = "pdftoppm";
// Use pdftoppm to extract the first page at high resolution
let pdftoppm = "pdftoppm";
let temp_dir = std::env::temp_dir();
let temp_prefix = temp_dir.join(format!("pdf_cover_{}", uuid::Uuid::new_v4()));
let temp_dir = std::env::temp_dir();
let temp_prefix =
temp_dir.join(format!("pdf_cover_{}", uuid::Uuid::new_v4()));
let status = Command::new(pdftoppm)
.args(["-jpeg", "-f", "1", "-l", "1", "-scale-to", "1200"])
.arg(pdf_path)
.arg(&temp_prefix)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| PinakesError::MetadataExtraction(format!("pdftoppm: {e}")))?;
let status = Command::new(pdftoppm)
.args(["-jpeg", "-f", "1", "-l", "1", "-scale-to", "1200"])
.arg(pdf_path)
.arg(&temp_prefix)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| PinakesError::MetadataExtraction(format!("pdftoppm: {e}")))?;
if !status.success() {
return Err(PinakesError::MetadataExtraction(format!(
"pdftoppm exited with status {}",
status
)));
}
if !status.success() {
return Err(PinakesError::MetadataExtraction(format!(
"pdftoppm exited with status {}",
status
)));
}
// pdftoppm outputs files like prefix-1.jpg
let output_path = format!("{}-1.jpg", temp_prefix.display());
let output_pathbuf = PathBuf::from(&output_path);
// pdftoppm outputs files like prefix-1.jpg
let output_path = format!("{}-1.jpg", temp_prefix.display());
let output_pathbuf = PathBuf::from(&output_path);
if output_pathbuf.exists() {
let data = std::fs::read(&output_pathbuf)?;
let _ = std::fs::remove_file(&output_pathbuf);
Ok(Some(data))
} else {
Ok(None)
}
if output_pathbuf.exists() {
let data = std::fs::read(&output_pathbuf)?;
let _ = std::fs::remove_file(&output_pathbuf);
Ok(Some(data))
} else {
Ok(None)
}
}
/// Returns the default covers directory under the data dir
pub fn default_covers_dir() -> PathBuf {
crate::config::Config::default_data_dir().join("covers")
crate::config::Config::default_data_dir().join("covers")
}
/// Returns the default thumbnail directory under the data dir.
pub fn default_thumbnail_dir() -> PathBuf {
crate::config::Config::default_data_dir().join("thumbnails")
crate::config::Config::default_data_dir().join("thumbnails")
}
/// Thumbnail size variant for multi-resolution support
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ThumbnailSize {
/// Tiny thumbnail for map markers and icons (64x64)
Tiny,
/// Grid thumbnail for library grid view (320x320)
Grid,
/// Preview thumbnail for quick fullscreen preview (1024x1024)
Preview,
/// Tiny thumbnail for map markers and icons (64x64)
Tiny,
/// Grid thumbnail for library grid view (320x320)
Grid,
/// Preview thumbnail for quick fullscreen preview (1024x1024)
Preview,
}
impl ThumbnailSize {
/// Get the pixel size for this thumbnail variant
pub fn pixels(&self) -> u32 {
match self {
ThumbnailSize::Tiny => 64,
ThumbnailSize::Grid => 320,
ThumbnailSize::Preview => 1024,
}
/// Get the pixel size for this thumbnail variant
pub fn pixels(&self) -> u32 {
match self {
ThumbnailSize::Tiny => 64,
ThumbnailSize::Grid => 320,
ThumbnailSize::Preview => 1024,
}
}
/// Get the subdirectory name for this size
pub fn subdir_name(&self) -> &'static str {
match self {
ThumbnailSize::Tiny => "tiny",
ThumbnailSize::Grid => "grid",
ThumbnailSize::Preview => "preview",
}
/// Get the subdirectory name for this size
pub fn subdir_name(&self) -> &'static str {
match self {
ThumbnailSize::Tiny => "tiny",
ThumbnailSize::Grid => "grid",
ThumbnailSize::Preview => "preview",
}
}
}
/// Generate all thumbnail sizes for a media file
/// Returns paths to the generated thumbnails (tiny, grid, preview)
pub fn generate_all_thumbnail_sizes(
media_id: MediaId,
source_path: &Path,
media_type: MediaType,
thumbnail_base_dir: &Path,
media_id: MediaId,
source_path: &Path,
media_type: MediaType,
thumbnail_base_dir: &Path,
) -> Result<(Option<PathBuf>, Option<PathBuf>, Option<PathBuf>)> {
let sizes = [
ThumbnailSize::Tiny,
ThumbnailSize::Grid,
ThumbnailSize::Preview,
];
let mut results = Vec::new();
let sizes = [
ThumbnailSize::Tiny,
ThumbnailSize::Grid,
ThumbnailSize::Preview,
];
let mut results = Vec::new();
for size in &sizes {
let size_dir = thumbnail_base_dir.join(size.subdir_name());
std::fs::create_dir_all(&size_dir)?;
for size in &sizes {
let size_dir = thumbnail_base_dir.join(size.subdir_name());
std::fs::create_dir_all(&size_dir)?;
let config = ThumbnailConfig {
size: size.pixels(),
..ThumbnailConfig::default()
};
let config = ThumbnailConfig {
size: size.pixels(),
..ThumbnailConfig::default()
};
let result = generate_thumbnail_with_config(
media_id,
source_path,
media_type.clone(),
&size_dir,
&config,
)?;
let result = generate_thumbnail_with_config(
media_id,
source_path,
media_type.clone(),
&size_dir,
&config,
)?;
results.push(result);
}
results.push(result);
}
Ok((results[0].clone(), results[1].clone(), results[2].clone()))
Ok((results[0].clone(), results[1].clone(), results[2].clone()))
}

File diff suppressed because it is too large Load diff

View file

@ -3,19 +3,20 @@
//! Handles file uploads, metadata extraction, and MediaItem creation
//! for files stored in managed content-addressable storage.
use std::collections::HashMap;
use std::path::Path;
use std::{collections::HashMap, path::Path};
use chrono::Utc;
use tokio::io::AsyncRead;
use tracing::{debug, info};
use crate::error::{PinakesError, Result};
use crate::managed_storage::ManagedStorageService;
use crate::media_type::MediaType;
use crate::metadata;
use crate::model::{MediaId, MediaItem, StorageMode, UploadResult};
use crate::storage::DynStorageBackend;
use crate::{
error::{PinakesError, Result},
managed_storage::ManagedStorageService,
media_type::MediaType,
metadata,
model::{MediaId, MediaItem, StorageMode, UploadResult},
storage::DynStorageBackend,
};
/// Process an upload from an async reader.
///
@ -25,196 +26,198 @@ use crate::storage::DynStorageBackend;
/// 3. Extracts metadata from the file
/// 4. Creates or updates the MediaItem
pub async fn process_upload<R: AsyncRead + Unpin>(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
reader: R,
original_filename: &str,
mime_type: Option<&str>,
storage: &DynStorageBackend,
managed: &ManagedStorageService,
reader: R,
original_filename: &str,
mime_type: Option<&str>,
) -> Result<UploadResult> {
// Store the file
let (content_hash, file_size) = managed.store_stream(reader).await?;
// Store the file
let (content_hash, file_size) = managed.store_stream(reader).await?;
// Check if we already have a media item with this hash
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
debug!(hash = %content_hash, media_id = %existing.id, "upload matched existing media item");
return Ok(UploadResult {
media_id: existing.id,
content_hash,
was_duplicate: true,
file_size,
});
}
// Check if we already have a media item with this hash
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
debug!(hash = %content_hash, media_id = %existing.id, "upload matched existing media item");
return Ok(UploadResult {
media_id: existing.id,
content_hash,
was_duplicate: true,
file_size,
});
}
// Determine media type from filename
let media_type = MediaType::from_path(Path::new(original_filename))
.unwrap_or_else(|| MediaType::custom("unknown"));
// Determine media type from filename
let media_type = MediaType::from_path(Path::new(original_filename))
.unwrap_or_else(|| MediaType::custom("unknown"));
// Get the actual file path in managed storage for metadata extraction
let blob_path = managed.path(&content_hash);
// Get the actual file path in managed storage for metadata extraction
let blob_path = managed.path(&content_hash);
// Extract metadata
let extracted = metadata::extract_metadata(&blob_path, media_type.clone()).ok();
// Extract metadata
let extracted =
metadata::extract_metadata(&blob_path, media_type.clone()).ok();
// Create or get blob record
let mime = mime_type
.map(String::from)
.unwrap_or_else(|| media_type.mime_type().to_string());
let _blob = storage
.get_or_create_blob(&content_hash, file_size, &mime)
.await?;
// Create or get blob record
let mime = mime_type
.map(String::from)
.unwrap_or_else(|| media_type.mime_type().to_string());
let _blob = storage
.get_or_create_blob(&content_hash, file_size, &mime)
.await?;
// Create the media item
let now = Utc::now();
let media_id = MediaId::new();
// Create the media item
let now = Utc::now();
let media_id = MediaId::new();
let item = MediaItem {
id: media_id,
path: blob_path,
file_name: sanitize_filename(original_filename),
media_type,
content_hash: content_hash.clone(),
file_size,
title: extracted.as_ref().and_then(|m| m.title.clone()),
artist: extracted.as_ref().and_then(|m| m.artist.clone()),
album: extracted.as_ref().and_then(|m| m.album.clone()),
genre: extracted.as_ref().and_then(|m| m.genre.clone()),
year: extracted.as_ref().and_then(|m| m.year),
duration_secs: extracted.as_ref().and_then(|m| m.duration_secs),
description: extracted.as_ref().and_then(|m| m.description.clone()),
thumbnail_path: None,
custom_fields: HashMap::new(),
file_mtime: None,
date_taken: extracted.as_ref().and_then(|m| m.date_taken),
latitude: extracted.as_ref().and_then(|m| m.latitude),
longitude: extracted.as_ref().and_then(|m| m.longitude),
camera_make: extracted.as_ref().and_then(|m| m.camera_make.clone()),
camera_model: extracted.as_ref().and_then(|m| m.camera_model.clone()),
rating: None,
perceptual_hash: None,
storage_mode: StorageMode::Managed,
original_filename: Some(original_filename.to_string()),
uploaded_at: Some(now),
storage_key: Some(content_hash.0.clone()),
created_at: now,
updated_at: now,
deleted_at: None,
links_extracted_at: None,
};
let item = MediaItem {
id: media_id,
path: blob_path,
file_name: sanitize_filename(original_filename),
media_type,
content_hash: content_hash.clone(),
file_size,
title: extracted.as_ref().and_then(|m| m.title.clone()),
artist: extracted.as_ref().and_then(|m| m.artist.clone()),
album: extracted.as_ref().and_then(|m| m.album.clone()),
genre: extracted.as_ref().and_then(|m| m.genre.clone()),
year: extracted.as_ref().and_then(|m| m.year),
duration_secs: extracted.as_ref().and_then(|m| m.duration_secs),
description: extracted.as_ref().and_then(|m| m.description.clone()),
thumbnail_path: None,
custom_fields: HashMap::new(),
file_mtime: None,
date_taken: extracted.as_ref().and_then(|m| m.date_taken),
latitude: extracted.as_ref().and_then(|m| m.latitude),
longitude: extracted.as_ref().and_then(|m| m.longitude),
camera_make: extracted.as_ref().and_then(|m| m.camera_make.clone()),
camera_model: extracted.as_ref().and_then(|m| m.camera_model.clone()),
rating: None,
perceptual_hash: None,
storage_mode: StorageMode::Managed,
original_filename: Some(original_filename.to_string()),
uploaded_at: Some(now),
storage_key: Some(content_hash.0.clone()),
created_at: now,
updated_at: now,
deleted_at: None,
links_extracted_at: None,
};
// Store the media item
storage.insert_managed_media(&item).await?;
// Store the media item
storage.insert_managed_media(&item).await?;
info!(
media_id = %media_id,
hash = %content_hash,
filename = %original_filename,
size = file_size,
"processed upload"
);
info!(
media_id = %media_id,
hash = %content_hash,
filename = %original_filename,
size = file_size,
"processed upload"
);
Ok(UploadResult {
media_id,
content_hash,
was_duplicate: false,
file_size,
})
Ok(UploadResult {
media_id,
content_hash,
was_duplicate: false,
file_size,
})
}
/// Process an upload from bytes.
pub async fn process_upload_bytes(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
data: &[u8],
original_filename: &str,
mime_type: Option<&str>,
storage: &DynStorageBackend,
managed: &ManagedStorageService,
data: &[u8],
original_filename: &str,
mime_type: Option<&str>,
) -> Result<UploadResult> {
use std::io::Cursor;
let cursor = Cursor::new(data);
process_upload(storage, managed, cursor, original_filename, mime_type).await
use std::io::Cursor;
let cursor = Cursor::new(data);
process_upload(storage, managed, cursor, original_filename, mime_type).await
}
/// Process an upload from a local file path.
///
/// This is useful for migrating existing external files to managed storage.
pub async fn process_upload_file(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
path: &Path,
original_filename: Option<&str>,
storage: &DynStorageBackend,
managed: &ManagedStorageService,
path: &Path,
original_filename: Option<&str>,
) -> Result<UploadResult> {
let file = tokio::fs::File::open(path).await?;
let reader = tokio::io::BufReader::new(file);
let file = tokio::fs::File::open(path).await?;
let reader = tokio::io::BufReader::new(file);
let filename = original_filename.unwrap_or_else(|| {
path.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
});
let filename = original_filename.unwrap_or_else(|| {
path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
});
let mime = mime_guess::from_path(path).first().map(|m| m.to_string());
let mime = mime_guess::from_path(path).first().map(|m| m.to_string());
process_upload(storage, managed, reader, filename, mime.as_deref()).await
process_upload(storage, managed, reader, filename, mime.as_deref()).await
}
/// Migrate an existing external media item to managed storage.
pub async fn migrate_to_managed(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
media_id: MediaId,
storage: &DynStorageBackend,
managed: &ManagedStorageService,
media_id: MediaId,
) -> Result<()> {
let item = storage.get_media(media_id).await?;
let item = storage.get_media(media_id).await?;
if item.storage_mode == StorageMode::Managed {
return Err(PinakesError::InvalidOperation(
"media item is already in managed storage".into(),
));
}
if item.storage_mode == StorageMode::Managed {
return Err(PinakesError::InvalidOperation(
"media item is already in managed storage".into(),
));
}
// Check if the external file exists
if !item.path.exists() {
return Err(PinakesError::FileNotFound(item.path.clone()));
}
// Check if the external file exists
if !item.path.exists() {
return Err(PinakesError::FileNotFound(item.path.clone()));
}
// Store the file in managed storage
let (new_hash, new_size) = managed.store_file(&item.path).await?;
// Store the file in managed storage
let (new_hash, new_size) = managed.store_file(&item.path).await?;
// Verify the hash matches (it should, unless the file changed)
if new_hash.0 != item.content_hash.0 {
return Err(PinakesError::StorageIntegrity(format!(
"hash changed during migration: {} -> {}",
item.content_hash, new_hash
)));
}
// Verify the hash matches (it should, unless the file changed)
if new_hash.0 != item.content_hash.0 {
return Err(PinakesError::StorageIntegrity(format!(
"hash changed during migration: {} -> {}",
item.content_hash, new_hash
)));
}
// Get or create blob record
let mime = item.media_type.mime_type().to_string();
let _blob = storage
.get_or_create_blob(&new_hash, new_size, &mime)
.await?;
// Get or create blob record
let mime = item.media_type.mime_type().to_string();
let _blob = storage
.get_or_create_blob(&new_hash, new_size, &mime)
.await?;
// Update the media item
let mut updated = item.clone();
updated.storage_mode = StorageMode::Managed;
updated.storage_key = Some(new_hash.0.clone());
updated.uploaded_at = Some(Utc::now());
updated.path = managed.path(&new_hash);
updated.updated_at = Utc::now();
// Update the media item
let mut updated = item.clone();
updated.storage_mode = StorageMode::Managed;
updated.storage_key = Some(new_hash.0.clone());
updated.uploaded_at = Some(Utc::now());
updated.path = managed.path(&new_hash);
updated.updated_at = Utc::now();
storage.update_media(&updated).await?;
storage.update_media(&updated).await?;
info!(
media_id = %media_id,
hash = %new_hash,
"migrated media item to managed storage"
);
info!(
media_id = %media_id,
hash = %new_hash,
"migrated media item to managed storage"
);
Ok(())
Ok(())
}
/// Sanitize a filename for storage.
fn sanitize_filename(name: &str) -> String {
// Remove path separators and null bytes
name.replace(['/', '\\', '\0'], "_")
// Remove path separators and null bytes
name.replace(['/', '\\', '\0'], "_")
// Trim whitespace
.trim()
// Truncate to reasonable length
@ -225,43 +228,43 @@ fn sanitize_filename(name: &str) -> String {
/// Delete a managed media item and clean up the blob if orphaned.
pub async fn delete_managed_media(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
media_id: MediaId,
storage: &DynStorageBackend,
managed: &ManagedStorageService,
media_id: MediaId,
) -> Result<()> {
let item = storage.get_media(media_id).await?;
let item = storage.get_media(media_id).await?;
if item.storage_mode != StorageMode::Managed {
return Err(PinakesError::InvalidOperation(
"media item is not in managed storage".into(),
));
}
if item.storage_mode != StorageMode::Managed {
return Err(PinakesError::InvalidOperation(
"media item is not in managed storage".into(),
));
}
// Decrement blob reference count
let should_delete = storage.decrement_blob_ref(&item.content_hash).await?;
// Decrement blob reference count
let should_delete = storage.decrement_blob_ref(&item.content_hash).await?;
// Delete the media item
storage.delete_media(media_id).await?;
// Delete the media item
storage.delete_media(media_id).await?;
// If blob is orphaned, delete it from storage
if should_delete {
managed.delete(&item.content_hash).await?;
storage.delete_blob(&item.content_hash).await?;
info!(hash = %item.content_hash, "deleted orphaned blob");
}
// If blob is orphaned, delete it from storage
if should_delete {
managed.delete(&item.content_hash).await?;
storage.delete_blob(&item.content_hash).await?;
info!(hash = %item.content_hash, "deleted orphaned blob");
}
Ok(())
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[test]
fn test_sanitize_filename() {
assert_eq!(sanitize_filename("test.txt"), "test.txt");
assert_eq!(sanitize_filename("path/to/file.txt"), "path_to_file.txt");
assert_eq!(sanitize_filename(" spaces "), "spaces");
assert_eq!(sanitize_filename("a".repeat(300).as_str()), "a".repeat(255));
}
#[test]
fn test_sanitize_filename() {
assert_eq!(sanitize_filename("test.txt"), "test.txt");
assert_eq!(sanitize_filename("path/to/file.txt"), "path_to_file.txt");
assert_eq!(sanitize_filename(" spaces "), "spaces");
assert_eq!(sanitize_filename("a".repeat(300).as_str()), "a".repeat(255));
}
}

View file

@ -1,210 +1,218 @@
//! User management and authentication
use std::collections::HashMap;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;
use crate::config::UserRole;
use crate::error::{PinakesError, Result};
use crate::{
config::UserRole,
error::{PinakesError, Result},
};
/// User ID
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct UserId(pub Uuid);
impl UserId {
pub fn new() -> Self {
Self(Uuid::now_v7())
}
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl Default for UserId {
fn default() -> Self {
Self::new()
}
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for UserId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<Uuid> for UserId {
fn from(id: Uuid) -> Self {
Self(id)
}
fn from(id: Uuid) -> Self {
Self(id)
}
}
/// User account with profile information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct User {
pub id: UserId,
pub username: String,
#[serde(skip_serializing)]
pub password_hash: String,
pub role: UserRole,
pub profile: UserProfile,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub id: UserId,
pub username: String,
#[serde(skip_serializing)]
pub password_hash: String,
pub role: UserRole,
pub profile: UserProfile,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
/// User profile information
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct UserProfile {
pub avatar_path: Option<String>,
pub bio: Option<String>,
pub preferences: UserPreferences,
pub avatar_path: Option<String>,
pub bio: Option<String>,
pub preferences: UserPreferences,
}
/// User-specific preferences
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct UserPreferences {
/// UI theme preference
pub theme: Option<String>,
/// UI theme preference
pub theme: Option<String>,
/// Language preference
pub language: Option<String>,
/// Language preference
pub language: Option<String>,
/// Default video quality preference for transcoding
pub default_video_quality: Option<String>,
/// Default video quality preference for transcoding
pub default_video_quality: Option<String>,
/// Whether to auto-play media
pub auto_play: bool,
/// Whether to auto-play media
pub auto_play: bool,
/// Custom preferences (extensible)
pub custom: HashMap<String, serde_json::Value>,
/// Custom preferences (extensible)
pub custom: HashMap<String, serde_json::Value>,
}
/// Library access permission
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum LibraryPermission {
/// Can only read/view media
Read,
/// Can only read/view media
Read,
/// Can read and modify media metadata
Write,
/// Can read and modify media metadata
Write,
/// Full control including deletion and sharing
Admin,
/// Full control including deletion and sharing
Admin,
}
impl LibraryPermission {
pub fn can_read(&self) -> bool {
true
}
pub fn can_read(&self) -> bool {
true
}
pub fn can_write(&self) -> bool {
matches!(self, Self::Write | Self::Admin)
}
pub fn can_write(&self) -> bool {
matches!(self, Self::Write | Self::Admin)
}
pub fn can_admin(&self) -> bool {
matches!(self, Self::Admin)
}
pub fn can_admin(&self) -> bool {
matches!(self, Self::Admin)
}
}
/// User's access to a specific library root
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UserLibraryAccess {
pub user_id: UserId,
pub root_path: String,
pub permission: LibraryPermission,
pub granted_at: DateTime<Utc>,
pub user_id: UserId,
pub root_path: String,
pub permission: LibraryPermission,
pub granted_at: DateTime<Utc>,
}
/// User creation request
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CreateUserRequest {
pub username: String,
#[serde(skip_serializing)]
pub password: String,
pub role: UserRole,
pub profile: Option<UserProfile>,
pub username: String,
#[serde(skip_serializing)]
pub password: String,
pub role: UserRole,
pub profile: Option<UserProfile>,
}
/// User update request
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UpdateUserRequest {
#[serde(skip_serializing)]
pub password: Option<String>,
pub role: Option<UserRole>,
pub profile: Option<UserProfile>,
#[serde(skip_serializing)]
pub password: Option<String>,
pub role: Option<UserRole>,
pub profile: Option<UserProfile>,
}
/// User authentication
pub mod auth {
use super::*;
use super::*;
/// Hash a password using Argon2
pub fn hash_password(password: &str) -> Result<String> {
use argon2::{
Argon2,
password_hash::{PasswordHasher, SaltString, rand_core::OsRng},
};
/// Hash a password using Argon2
pub fn hash_password(password: &str) -> Result<String> {
use argon2::{
Argon2,
password_hash::{PasswordHasher, SaltString, rand_core::OsRng},
};
let salt = SaltString::generate(&mut OsRng);
let argon2 = Argon2::default();
let salt = SaltString::generate(&mut OsRng);
let argon2 = Argon2::default();
argon2
.hash_password(password.as_bytes(), &salt)
.map(|hash| hash.to_string())
.map_err(|e| PinakesError::Authentication(format!("failed to hash password: {e}")))
}
argon2
.hash_password(password.as_bytes(), &salt)
.map(|hash| hash.to_string())
.map_err(|e| {
PinakesError::Authentication(format!("failed to hash password: {e}"))
})
}
/// Verify a password against a hash
pub fn verify_password(password: &str, hash: &str) -> Result<bool> {
use argon2::{
Argon2,
password_hash::{PasswordHash, PasswordVerifier},
};
/// Verify a password against a hash
pub fn verify_password(password: &str, hash: &str) -> Result<bool> {
use argon2::{
Argon2,
password_hash::{PasswordHash, PasswordVerifier},
};
let parsed_hash = PasswordHash::new(hash)
.map_err(|e| PinakesError::Authentication(format!("invalid password hash: {e}")))?;
let parsed_hash = PasswordHash::new(hash).map_err(|e| {
PinakesError::Authentication(format!("invalid password hash: {e}"))
})?;
Ok(Argon2::default()
.verify_password(password.as_bytes(), &parsed_hash)
.is_ok())
}
Ok(
Argon2::default()
.verify_password(password.as_bytes(), &parsed_hash)
.is_ok(),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::*;
#[test]
fn test_hash_and_verify_password() {
let password = "test_password_123";
let hash = auth::hash_password(password).unwrap();
#[test]
fn test_hash_and_verify_password() {
let password = "test_password_123";
let hash = auth::hash_password(password).unwrap();
assert!(auth::verify_password(password, &hash).unwrap());
assert!(!auth::verify_password("wrong_password", &hash).unwrap());
}
assert!(auth::verify_password(password, &hash).unwrap());
assert!(!auth::verify_password("wrong_password", &hash).unwrap());
}
#[test]
fn test_user_preferences_default() {
let prefs = UserPreferences::default();
assert_eq!(prefs.theme, None);
assert_eq!(prefs.language, None);
assert!(!prefs.auto_play);
assert!(prefs.custom.is_empty());
}
#[test]
fn test_user_preferences_default() {
let prefs = UserPreferences::default();
assert_eq!(prefs.theme, None);
assert_eq!(prefs.language, None);
assert!(!prefs.auto_play);
assert!(prefs.custom.is_empty());
}
#[test]
fn test_library_permission_levels() {
let read = LibraryPermission::Read;
assert!(read.can_read());
assert!(!read.can_write());
assert!(!read.can_admin());
#[test]
fn test_library_permission_levels() {
let read = LibraryPermission::Read;
assert!(read.can_read());
assert!(!read.can_write());
assert!(!read.can_admin());
let write = LibraryPermission::Write;
assert!(write.can_read());
assert!(write.can_write());
assert!(!write.can_admin());
let write = LibraryPermission::Write;
assert!(write.can_read());
assert!(write.can_write());
assert!(!write.can_admin());
let admin = LibraryPermission::Admin;
assert!(admin.can_read());
assert!(admin.can_write());
assert!(admin.can_admin());
}
let admin = LibraryPermission::Admin;
assert!(admin.can_read());
assert!(admin.can_write());
assert!(admin.can_admin());
}
}